From d9235c688f5447981a6f91673e6fa227e9ed111a Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Wed, 31 Aug 2022 08:49:29 -0700 Subject: [PATCH 01/27] Add an overlap checker to ISLE --- cranelift/isle/isle/Cargo.toml | 1 + cranelift/isle/isle/src/compile.rs | 4 + cranelift/isle/isle/src/error.rs | 23 + cranelift/isle/isle/src/error_miette.rs | 2 + cranelift/isle/isle/src/lib.rs | 1 + cranelift/isle/isle/src/overlap.rs | 808 ++++++++++++++++++++++++ 6 files changed, 839 insertions(+) create mode 100644 cranelift/isle/isle/src/overlap.rs diff --git a/cranelift/isle/isle/Cargo.toml b/cranelift/isle/isle/Cargo.toml index 09cff1d75efe..134788c05301 100644 --- a/cranelift/isle/isle/Cargo.toml +++ b/cranelift/isle/isle/Cargo.toml @@ -18,5 +18,6 @@ tempfile = "3" [features] default = [] +check-overlap = [] logging = ["log"] miette-errors = ["miette"] diff --git a/cranelift/isle/isle/src/compile.rs b/cranelift/isle/isle/src/compile.rs index 877b8a5cec80..97c9b3478ca1 100644 --- a/cranelift/isle/isle/src/compile.rs +++ b/cranelift/isle/isle/src/compile.rs @@ -7,6 +7,10 @@ use crate::{ast, codegen, sema, trie}; pub fn compile(defs: &ast::Defs, options: &codegen::CodegenOptions) -> Result { let mut typeenv = sema::TypeEnv::from_ast(defs)?; let termenv = sema::TermEnv::from_ast(&mut typeenv, defs)?; + + #[cfg(feature = "check-overlap")] + crate::overlap::check(&mut typeenv, &termenv)?; + let tries = trie::build_tries(&typeenv, &termenv); Ok(codegen::codegen(&typeenv, &termenv, &tries, options)) } diff --git a/cranelift/isle/isle/src/error.rs b/cranelift/isle/isle/src/error.rs index 39b6adcf8777..8ca4a86a7484 100644 --- a/cranelift/isle/isle/src/error.rs +++ b/cranelift/isle/isle/src/error.rs @@ -42,6 +42,15 @@ pub enum Error { span: Span, }, + /// The rules mentioned by the two spans overlap in the inputs they accept. + OverlapError { + /// The error message. + msg: String, + + /// The locations of all the rules that overlap. + rules: Vec<(Source, Span)>, + }, + /// Multiple errors. Errors(Vec), } @@ -108,6 +117,10 @@ impl std::fmt::Display for Error { #[cfg(feature = "miette-errors")] Error::TypeError { msg, .. } => write!(f, "type error: {}", msg), + Error::OverlapError { msg, rules, .. } => { + writeln!(f, "overlap error: {}\n{}", msg, OverlappingRules(&rules)) + } + Error::Errors(_) => write!( f, "found {} errors:\n\n{}", @@ -128,6 +141,16 @@ impl std::fmt::Display for DisplayErrors<'_> { } } +struct OverlappingRules<'a>(&'a [(Source, Span)]); +impl std::fmt::Display for OverlappingRules<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for (src, span) in self.0 { + writeln!(f, " {}", span.from.pretty_print_with_filename(&*src.name))?; + } + Ok(()) + } +} + /// A source file and its contents. #[derive(Clone)] pub struct Source { diff --git a/cranelift/isle/isle/src/error_miette.rs b/cranelift/isle/isle/src/error_miette.rs index 555b4e4acae4..840d2be7928c 100644 --- a/cranelift/isle/isle/src/error_miette.rs +++ b/cranelift/isle/isle/src/error_miette.rs @@ -45,12 +45,14 @@ impl miette::Diagnostic for Error { .into_iter(), )) } + _ => None, } } fn source_code(&self) -> std::option::Option<&dyn miette::SourceCode> { match self { Self::ParseError { src, .. } | Self::TypeError { src, .. } => Some(src), + _ => None, } } diff --git a/cranelift/isle/isle/src/lib.rs b/cranelift/isle/isle/src/lib.rs index 1164dafc3d89..140ba0aff102 100644 --- a/cranelift/isle/isle/src/lib.rs +++ b/cranelift/isle/isle/src/lib.rs @@ -98,6 +98,7 @@ pub mod error; pub mod ir; pub mod lexer; mod log; +pub mod overlap; pub mod parser; pub mod sema; pub mod trie; diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs new file mode 100644 index 000000000000..5de408ad6ef3 --- /dev/null +++ b/cranelift/isle/isle/src/overlap.rs @@ -0,0 +1,808 @@ +//! Overlap detection for rules in ISLE. + +use std::collections::HashSet; + +use crate::error::{Error, Result, Source, Span}; +use crate::sema::{ + self, ConstructorKind, Rule, RuleId, Sym, Term, TermEnv, TermId, TermKind, Type, TypeEnv, + TypeId, VarId, +}; + +/// Check for overlap. +pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { + + + let env = Env::new(tyenv, termenv); + let mut errors = Errors::new(termenv.rules.len()); + for term in termenv.terms.iter() { + // The only isle declaration that currently produces overlap is constructors whose + // definition is entirely in isle. + if !env.is_internal_constructor(term.id) { + continue; + } + + check_overlap_groups(&mut errors, &env, term); + } + + + if !errors.is_empty() { + let mut errors = errors.report(&env); + return match errors.len() { + 1 => Err(errors.pop().unwrap()), + _ => Err(Error::Errors(std::mem::take(&mut errors))), + }; + } + + Ok(()) +} + +struct Node { + degree: usize, + edges: Vec, +} + +impl Node { + fn new(len: usize) -> Self { + let mut edges = Vec::with_capacity(len); + edges.resize(len, false); + Self { degree: 0, edges } + } + + fn add_edge(&mut self, other: RuleId) { + if self.edges[other.0] { + return; + } + + self.degree += 1; + self.edges[other.0] = true; + } + + fn remove_edge(&mut self, other: RuleId) { + if !self.edges[other.0] { + return; + } + + self.degree -= 1; + self.edges[other.0] = false; + } +} + +struct Errors { + /// Edges between rules indicating overlap. As the edges are not directed, the edges are + /// normalized by ordering the rule ids. + nodes: Vec, +} + +impl Errors { + fn new(len: usize) -> Self { + let mut nodes = Vec::with_capacity(len); + nodes.resize_with(len, || Node::new(len)); + Self { nodes } + } + + fn is_empty(&self) -> bool { + self.nodes.iter().all(|node| node.degree == 0) + } + + fn report(&mut self, env: &Env) -> Vec { + let mut rules: Vec = self + .nodes + .iter() + .enumerate() + .filter_map(|(id, node)| { + if node.degree == 0 { + None + } else { + Some(id) + } + }) + .collect(); + + rules.sort_by_cached_key(|id| self.nodes[*id].degree); + + let mut errors = Vec::new(); + + let get_info = |id| { + let rule = env.get_rule(id); + + let src = env.get_source(rule.pos.file); + let span = Span::new_single(rule.pos); + (src, span) + }; + + // Work backwards through the ids to find the nodes with the largest conflict first. + for id in rules.into_iter().rev() { + let node = self.remove_node(RuleId(id)); + if node.degree == 0 { + continue; + } + + // build the real error + let mut rules = vec![get_info(RuleId(id))]; + + rules.extend(node.edges.into_iter().enumerate().filter_map(|(ix, present)| { + if present { + Some(get_info(RuleId(ix))) + } else { + None + } + })); + + errors.push(Error::OverlapError { + msg: String::from("rules are overlapping"), + rules, + }); + } + + errors + } + + fn remove_node(&mut self, id: RuleId) -> Node { + let mut node = Node::new(self.nodes.len()); + std::mem::swap(&mut self.nodes[id.0], &mut node); + + for (ix, other) in node.edges.iter().copied().enumerate() { + if other { + self.nodes[ix].remove_edge(id); + } + } + + node + } + + fn add_edge(&mut self, a: RuleId, b: RuleId) { + // edges are undirected + self.nodes[a.0].add_edge(b); + self.nodes[b.0].add_edge(a); + } +} + +fn overlap_error(errs: &mut Errors, matrix: Matrix) { + for (ix, rule) in matrix.rows.iter().enumerate() { + for other in &matrix.rows[ix + 1..] { + errs.add_edge(rule.rule, other.rule); + } + } +} + +fn check_overlap_groups(errs: &mut Errors, env: &Env, term: &Term) { + for matrix in Matrix::from_priority_groups(env, term.id) { + check_overlap(errs, env, matrix); + } +} + +fn check_overlap(errs: &mut Errors, env: &Env, mut matrix: Matrix) { + if matrix.is_unique() { + return; + } + + matrix.normalize(); + if matrix.cols_empty() { + overlap_error(errs, matrix); + return; + } + + let mut work = Vec::new(); + work.push(matrix); + + while let Some(mut matrix) = work.pop() { + let pat = matrix.leading_pattern(); + let remainder = matrix.specialize(env, &pat); + + if !remainder.is_empty() && !remainder.is_unique() { + if remainder.cols_empty() { + overlap_error(errs, remainder); + } else if !remainder.is_unique() { + work.push(remainder); + } + } + + if !matrix.is_empty() && !matrix.is_unique() { + if matrix.cols_empty() { + overlap_error(errs, matrix); + } else if !matrix.is_unique() { + work.push(matrix); + } + } + } +} + +struct Env<'a> { + tyenv: &'a TypeEnv, + termenv: &'a TermEnv, +} + +impl<'a> Env<'a> { + /// Construct a new [`Env`]. + fn new(tyenv: &'a TypeEnv, termenv: &'a TermEnv) -> Self { + Self { tyenv, termenv } + } + + /// Fetch the string associated with a symbol. + fn get_sym(&self, id: Sym) -> &str { + &self.tyenv.syms[id.0] + } + + /// Fetch the rule associated with this id. + fn get_rule(&self, id: RuleId) -> &Rule { + &self.termenv.rules[id.0] + } + + /// Fetch the term associated with this id. + fn get_term(&self, id: TermId) -> &Term { + &self.termenv.terms[id.0] + } + + /// Fetch the tyep associated with this id. + fn get_type(&self, id: TypeId) -> &Type { + &self.tyenv.types[id.0] + } + + fn get_source(&self, file: usize) -> Source { + Source::new( + self.tyenv.filenames[file].clone(), + self.tyenv.file_texts[file].clone(), + ) + } + + /// True when this term represents a constructor implemented in isle. + fn is_internal_constructor(&self, id: TermId) -> bool { + match self.get_term(id).kind { + TermKind::Decl { + constructor_kind: Some(ConstructorKind::InternalConstructor), + .. + } => true, + _ => false, + } + } + + /// The ids of all [`Rule`]s defined for this term. + fn rules_for_term(&self, id: TermId) -> Vec { + self.termenv + .rules + .iter() + .filter_map(|rule| { + if let sema::Pattern::Term(_, tid, _) = rule.lhs { + if tid == id { + return Some(rule.id); + } + } + None + }) + .collect() + } + + /// Returns true when this type is an enum with only a single constructor. + fn is_single_constructor_enum(&self, ty: TypeId) -> bool { + match self.get_type(ty) { + Type::Primitive(_, _, _) => false, + Type::Enum { variants, .. } => variants.len() == 1, + } + } +} + +/// A version of [`sema::Pattern`] with some simplifications to make overlap checking easier. +/// TODO: location information for reporting errors. +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +enum Pattern { + Int { + value: i128, + }, + + Const { + name: Sym, + }, + + Variant { + id: TermId, + single_case: bool, + pats: Vec, + }, + + And { + pats: Vec, + }, + + Extractor { + id: TermId, + pats: Vec, + }, + + Wildcard, +} + +impl Pattern { + /// Create a [`Pattern`] from a [`sema::Pattern`]. The major differences between these two + /// representations are as follows: + /// 1. Variable bindings are removed and turned into wildcards + /// 2. Equality constraints are removed and turned into inlined versions of the patterns they + /// would have introduced equalities with + /// 3. [`sema::Pattern::Term`] instances are turned into either [`Pattern::Variant`] or + /// [`Pattern::Extractor`] cases depending on their term kind. + fn from_sema(env: &Env, binds: &mut Vec<(VarId, Pattern)>, pat: &sema::Pattern) -> Self { + match pat { + sema::Pattern::BindPattern(_, id, pat) => { + let pat = Self::from_sema(env, binds, pat); + binds.push((*id, pat.clone())); + pat + } + + sema::Pattern::Var(_, id) => { + for (vid, pat) in binds.iter().rev() { + if vid == id { + // TODO: explain why we inline equality constraint + return pat.clone(); + } + } + + binds.push((*id, Pattern::Wildcard)); + Pattern::Wildcard + } + + sema::Pattern::ConstInt(_, value) => Pattern::Int { value: *value }, + sema::Pattern::ConstPrim(_, name) => Pattern::Const { name: *name }, + + sema::Pattern::Term(ty, id, pats) => { + let pats = pats + .iter() + .map(|pat| Pattern::from_sema(env, binds, pat)) + .collect(); + + match &env.get_term(*id).kind { + TermKind::EnumVariant { .. } => Pattern::Variant { + id: *id, + single_case: env.is_single_constructor_enum(*ty), + pats, + }, + + TermKind::Decl { .. } => Pattern::Extractor { id: *id, pats }, + } + } + + sema::Pattern::Wildcard(_) => Pattern::Wildcard, + + sema::Pattern::And(_, pats) => { + let mut pats: Vec = pats + .iter() + .map(|pat| Pattern::from_sema(env, binds, pat)) + .collect(); + + if pats.len() == 1 { + pats.pop().unwrap() + } else { + pats.sort_unstable(); + Pattern::And { pats } + } + } + } + } + + fn is_wildcard(&self) -> bool { + match self { + Pattern::Wildcard => true, + _ => false, + } + } + + fn is_extractor(&self) -> Option { + match self { + Pattern::Extractor { id, .. } => Some(*id), + _ => None, + } + } + + fn is_and(&self) -> bool { + match self { + Pattern::And { .. } => true, + _ => false, + } + } + + /// For `Variant` and `Extractor`, the number of arguments. + fn arity(&self) -> usize { + match self { + Pattern::Variant { pats, .. } => pats.len(), + Pattern::Extractor { pats, .. } => pats.len(), + _ => 1, + } + } + + /// Returns `true` for `Variant` or `Extractor`. + fn can_expand(&self) -> bool { + match self { + Pattern::Variant { .. } => true, + Pattern::Extractor { .. } => true, + _ => false, + } + } + + /// Returns `true` if this pattern could match one of the concrete patterns specified by + /// `other`. + fn match_concrete(&self, other: &Pattern) -> bool { + match (self, other) { + // these are the cases where we know enough to say definitively yes or no + (Pattern::Int { value: left }, Pattern::Int { value: right }) => left == right, + (Pattern::Const { name: left }, Pattern::Const { name: right }) => left == right, + (Pattern::Variant { id: left, .. }, Pattern::Variant { id: right, .. }) => { + left == right + } + + (Pattern::Extractor { id: left, .. }, Pattern::Extractor { id: right, .. }) => { + left == right + } + + (Pattern::And { pats }, _) => pats.iter().rev().any(|pat| pat.match_concrete(other)), + + _ => false, + } + } + + /// Extracts the that matches the template from this pattern, assuming that it's an `And`. + /// Updates the `And` to no longer include the pattern. + fn extract_matching(&mut self, template: &Pattern) -> Option { + if let Pattern::And { pats } = self { + for i in 0..pats.len() { + if pats[i].match_concrete(template) { + let res = pats.remove(i); + + // if the and has only a single element left, collapse it + if pats.len() == 1 { + *self = pats.remove(0); + } + + return Some(res); + } + } + } + + None + } +} + +/// A single row in the pattern matrix. +#[derive(Debug, Clone)] +struct Row { + pats: Vec, + rule: RuleId, +} + +impl Row { + fn from_rule(env: &Env, rule: RuleId) -> Row { + if let sema::Pattern::Term(_, _, vars) = &env.get_rule(rule).lhs { + let mut binds = Vec::new(); + Self { + // NOTE: the patterns are reversed so that it's easier to manipulate the leading + // column of the row. + pats: vars + .iter() + .rev() + .map(|pat| Pattern::from_sema(env, &mut binds, pat)) + .collect(), + rule, + } + } else { + panic!("Constructing a Row from a malformed rule") + } + } + + /// A row is empty when its pattern vector is empty. + fn is_empty(&self) -> bool { + self.pats.is_empty() + } + + /// The pattern from the first column of this row. + fn front(&self) -> &Pattern { + assert!(!self.pats.is_empty()); + self.pats.last().unwrap() + } + + /// A mutable reference to the pattern from the first column of this row. + fn front_mut(&mut self) -> &mut Pattern { + assert!(!self.pats.is_empty()); + self.pats.last_mut().unwrap() + } + + /// Push a new pattern on the front of this row. + fn push(&mut self, pat: Pattern) { + self.pats.push(pat); + } + + /// Pop the pattern from the front of the row. + fn pop(&mut self) -> Pattern { + assert!(!self.pats.is_empty()); + self.pats.pop().unwrap() + } +} + +#[derive(Debug, Clone)] +struct Matrix { + rows: Vec, + + /// The term that this matrix represents. + term: TermId, + + /// The priority of this group of rules matrix. + prio: i64, +} + +impl Matrix { + fn new(term: TermId, prio: i64, rows: Vec) -> Self { + Self { rows, prio, term } + } + + fn from_priority_groups(env: &Env, term: TermId) -> Vec { + let mut matrices = Vec::new(); + + let mut rules: Vec<(i64, RuleId)> = env + .rules_for_term(term) + .into_iter() + .map(|id| (env.get_rule(id).prio.unwrap_or(0), id)) + .collect(); + + if rules.is_empty() { + return matrices; + } + + rules.sort_by_key(|(prio, _)| *prio); + + let mut current = { + let (prio, _) = rules.first().unwrap(); + matrices.push(Matrix::new(term, *prio, Vec::new())); + matrices.last_mut().unwrap() + }; + + for (p, id) in rules { + if p != current.prio { + matrices.push(Matrix::new(term, p, Vec::new())); + current = matrices.last_mut().unwrap(); + } + current.rows.push(Row::from_rule(env, id)); + } + + matrices + } + + /// Normalizing the matrix by removing leading columns that consist of only wildcards, and then + /// sorting the remaining rows to put those with fallible leading patterns first. + fn normalize(&mut self) { + while !self.cols_empty() && self.rows.iter().all(|row| row.front().is_wildcard()) { + self.drop_leading(); + } + + if !self.cols_empty() { + self.rows.sort_unstable_by(|a, b| a.front().cmp(b.front())); + } + } + + /// Returns true if there are no rows in the matrix. + fn is_empty(&self) -> bool { + self.rows.first().is_none() + } + + /// Returns true if there are no rows, or those that exist have no columns. + fn cols_empty(&self) -> bool { + self.rows.first().map_or(true, |row| row.is_empty()) + } + + /// Returns true if there is exactly one row. + fn is_unique(&self) -> bool { + self.rows.len() == 1 + } + + /// Specialize the matrix according to the pattern in the first column of the first row. This + /// assumes that the matrix has already been normalized. + fn specialize(&mut self, env: &Env, pat: &Pattern) -> Self { + assert!(!self.cols_empty()); + + let spec_extractor = pat.is_extractor(); + + // we start by specializing and patterns, so that we don't have to consider them when + // deciding which rows go to which matrix. + self.specialize_and_patterns(&pat); + + // remove rows from self that we know couldn't possibly match this pattern + let mut other = Matrix::new(self.term, self.prio, Vec::new()); + let mut i = 0; + while i < self.rows.len() { + let row = &mut self.rows[i]; + match row.front() { + Pattern::Wildcard => { + // wildcards always match, and go into both matrices as a result + other.rows.push(row.clone()); + i += 1; + } + + Pattern::Extractor { id, .. } => { + // if we're specializing on this extractor then it shouldn't be duplicated over + // to the other matrix. otherwise, we copy the row over to the other matrix and + // rewrite this one to a wildcard to model the fact that we can't determine if + // the extractor will actually match. + if spec_extractor != Some(*id) { + other.rows.push(row.clone()); + *row.front_mut() = Pattern::Wildcard; + } + + i += 1; + } + + // rows that don't match this pattern get moved to the other matrix + col if !col.match_concrete(&pat) => { + other.rows.push(self.rows.swap_remove(i)); + // note that we don't increment the index here + } + + // and all other rows stay in this matrix + _ => i += 1, + } + } + + if pat.can_expand() { + self.expand_leading(env, &pat); + } else { + self.drop_leading(); + } + + self.normalize(); + other.normalize(); + other + } + + /// Returns a copy of the first pattern for the first row of the matrix, as a candidate for + /// specialization. + fn leading_pattern(&self) -> Pattern { + assert!( + !self.cols_empty(), + "leading_pattern called on a matrix with no patterns" + ); + + match self.rows[0].front() { + Pattern::And { pats } => pats.first().unwrap().clone(), + pat => pat.clone(), + } + } + + /// If there are any `and` patterns in the leading column, extract out the sub-pattern that + /// matches `pat` and leave the rest in a fresh column. Insert wildcards for other columns + /// that have no `and` patterns. + fn specialize_and_patterns(&mut self, template: &Pattern) { + if !self.rows.iter().any(|row| row.front().is_and()) { + return; + } + + for row in self.rows.iter_mut() { + let mut pat = row.pop(); + if let Some(p) = pat.extract_matching(template) { + row.push(pat); + row.push(p); + } else { + row.push(Pattern::Wildcard); + row.push(pat); + } + } + } + + /// Expand leading patterns. + fn expand_leading(&mut self, env: &Env, template: &Pattern) { + let arity = template.arity(); + for row in self.rows.iter_mut() { + let pat = row.pop(); + match pat { + Pattern::Variant { pats, .. } | Pattern::Extractor { pats, .. } + if pat.match_concrete(template) => + { + row.pats.extend(pats.into_iter().rev()) + } + + Pattern::Wildcard => row + .pats + .extend(std::iter::repeat(Pattern::Wildcard).take(arity)), + + _ => panic!( + "incorrect leading expansion:\nfound: {}\n expected: {}", + WithEnv::new(env, &pat), + WithEnv::new(env, template) + ), + } + } + } + + // Drop leading column from the matrix. + fn drop_leading(&mut self) { + for row in self.rows.iter_mut() { + row.pop(); + } + } +} + +struct WithEnv<'env, T> { + env: &'env Env<'env>, + value: T, +} + +impl<'env, T> WithEnv<'env, T> { + fn new(env: &'env Env, value: T) -> Self { + Self { env, value } + } + + fn with_value(&self, value: U) -> WithEnv<'env, U> { + WithEnv { + env: self.env, + value, + } + } +} + +impl std::fmt::Display for WithEnv<'_, &Pattern> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.value { + Pattern::Int { value } => write!(f, "{}", value), + + Pattern::Const { name } => write!(f, "${}", self.env.get_sym(*name)), + + Pattern::Variant { id, pats, .. } => { + write!(f, "({}", self.env.get_sym(self.env.get_term(*id).name))?; + for pat in pats { + write!(f, " {}", self.with_value(pat))?; + } + write!(f, ")") + } + + Pattern::And { pats } => { + write!(f, "(and")?; + for pat in pats { + write!(f, " {}", self.with_value(pat))?; + } + write!(f, ")") + } + + Pattern::Extractor { id, pats } => { + write!(f, "({}", self.env.get_sym(self.env.get_term(*id).name))?; + for pat in pats { + write!(f, " {}", self.with_value(pat))?; + } + write!(f, ")") + } + + Pattern::Wildcard => write!(f, "_"), + } + } +} + +impl std::fmt::Display for WithEnv<'_, &Matrix> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.value.rows.is_empty() { + return writeln!(f, ""); + } + + let mut lens = vec![0; self.value.rows.first().unwrap().pats.len()]; + let mut rows: Vec<(RuleId, Vec)> = Vec::with_capacity(self.value.rows.len()); + + for row in self.value.rows.iter() { + rows.push(( + row.rule, + row.pats + .iter() + .rev() + .enumerate() + .map(|(col, pat)| { + let str = format!("{}", self.with_value(pat)); + lens[col] = lens[col].max(str.len()); + str + }) + .collect(), + )); + } + + for (rule, row) in rows.into_iter() { + write!(f, "[")?; + let mut sep = ""; + for (col, width) in row.into_iter().zip(lens.iter()) { + write!(f, "{} {:width$} ", sep, col)?; + sep = "|"; + } + writeln!(f, "] = {}", rule.0)? + } + + Ok(()) + } +} From 992ce4616a0ec05b8e0e99ce92211b2f29de3c5a Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Tue, 13 Sep 2022 09:21:35 -0700 Subject: [PATCH 02/27] Fix comments --- cranelift/isle/isle/src/compile.rs | 2 ++ cranelift/isle/isle/src/error.rs | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cranelift/isle/isle/src/compile.rs b/cranelift/isle/isle/src/compile.rs index 97c9b3478ca1..090d5e3159c1 100644 --- a/cranelift/isle/isle/src/compile.rs +++ b/cranelift/isle/isle/src/compile.rs @@ -8,6 +8,8 @@ pub fn compile(defs: &ast::Defs, options: &codegen::CodegenOptions) -> Result, }, From 2871a9c0ca39285809b855ec0efd425ecd7eb0f1 Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Tue, 13 Sep 2022 09:22:27 -0700 Subject: [PATCH 03/27] Minimize diff --- cranelift/isle/isle/src/error_miette.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/cranelift/isle/isle/src/error_miette.rs b/cranelift/isle/isle/src/error_miette.rs index 840d2be7928c..555b4e4acae4 100644 --- a/cranelift/isle/isle/src/error_miette.rs +++ b/cranelift/isle/isle/src/error_miette.rs @@ -45,14 +45,12 @@ impl miette::Diagnostic for Error { .into_iter(), )) } - _ => None, } } fn source_code(&self) -> std::option::Option<&dyn miette::SourceCode> { match self { Self::ParseError { src, .. } | Self::TypeError { src, .. } => Some(src), - _ => None, } } From 9906df02cc23828aba30bc4a5329cd39854c90d0 Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Tue, 13 Sep 2022 10:02:41 -0700 Subject: [PATCH 04/27] Add more comments --- cranelift/isle/isle/src/overlap.rs | 115 +++++++++++++++++++---------- 1 file changed, 77 insertions(+), 38 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index 5de408ad6ef3..109f236aa898 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -1,7 +1,5 @@ //! Overlap detection for rules in ISLE. -use std::collections::HashSet; - use crate::error::{Error, Result, Source, Span}; use crate::sema::{ self, ConstructorKind, Rule, RuleId, Sym, Term, TermEnv, TermId, TermKind, Type, TypeEnv, @@ -10,8 +8,6 @@ use crate::sema::{ /// Check for overlap. pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { - - let env = Env::new(tyenv, termenv); let mut errors = Errors::new(termenv.rules.len()); for term in termenv.terms.iter() { @@ -24,7 +20,6 @@ pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { check_overlap_groups(&mut errors, &env, term); } - if !errors.is_empty() { let mut errors = errors.report(&env); return match errors.len() { @@ -36,18 +31,24 @@ pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { Ok(()) } +/// A node in the error graph. struct Node { + /// The number of other rules this node overlaps with. degree: usize, + + /// `true` entries where an edge exists to the node at that index. edges: Vec, } impl Node { + /// Make a new `Node` in the error graph with the given number of total nodes. fn new(len: usize) -> Self { let mut edges = Vec::with_capacity(len); edges.resize(len, false); Self { degree: 0, edges } } + /// Add an edge between this node and the other node. fn add_edge(&mut self, other: RuleId) { if self.edges[other.0] { return; @@ -57,6 +58,7 @@ impl Node { self.edges[other.0] = true; } + /// Remove an edge between this node and another node. fn remove_edge(&mut self, other: RuleId) { if !self.edges[other.0] { return; @@ -67,6 +69,8 @@ impl Node { } } +/// A graph of all the rules in the isle source, with bi-directional edges between rules that are +/// discovered to have overlap problems. struct Errors { /// Edges between rules indicating overlap. As the edges are not directed, the edges are /// normalized by ordering the rule ids. @@ -74,28 +78,25 @@ struct Errors { } impl Errors { + /// Make a new `Errors` graph for collecting overlap information. fn new(len: usize) -> Self { let mut nodes = Vec::with_capacity(len); nodes.resize_with(len, || Node::new(len)); Self { nodes } } + /// True when there are no edges in the graph. fn is_empty(&self) -> bool { self.nodes.iter().all(|node| node.degree == 0) } + /// Condense the overlap information down into individual errors. fn report(&mut self, env: &Env) -> Vec { let mut rules: Vec = self .nodes .iter() .enumerate() - .filter_map(|(id, node)| { - if node.degree == 0 { - None - } else { - Some(id) - } - }) + .filter_map(|(id, node)| if node.degree == 0 { None } else { Some(id) }) .collect(); rules.sort_by_cached_key(|id| self.nodes[*id].degree); @@ -112,7 +113,7 @@ impl Errors { // Work backwards through the ids to find the nodes with the largest conflict first. for id in rules.into_iter().rev() { - let node = self.remove_node(RuleId(id)); + let node = self.remove_edges(RuleId(id)); if node.degree == 0 { continue; } @@ -120,13 +121,18 @@ impl Errors { // build the real error let mut rules = vec![get_info(RuleId(id))]; - rules.extend(node.edges.into_iter().enumerate().filter_map(|(ix, present)| { - if present { - Some(get_info(RuleId(ix))) - } else { - None - } - })); + rules.extend( + node.edges + .into_iter() + .enumerate() + .filter_map(|(ix, present)| { + if present { + Some(get_info(RuleId(ix))) + } else { + None + } + }), + ); errors.push(Error::OverlapError { msg: String::from("rules are overlapping"), @@ -137,7 +143,9 @@ impl Errors { errors } - fn remove_node(&mut self, id: RuleId) -> Node { + /// Remove all the edges for this rule in the graph, returning the original `Node` contents for + /// further processing. + fn remove_edges(&mut self, id: RuleId) -> Node { let mut node = Node::new(self.nodes.len()); std::mem::swap(&mut self.nodes[id.0], &mut node); @@ -150,27 +158,31 @@ impl Errors { node } + /// Add a bidirectional edge between two rules in the graph. fn add_edge(&mut self, a: RuleId, b: RuleId) { // edges are undirected self.nodes[a.0].add_edge(b); self.nodes[b.0].add_edge(a); } -} -fn overlap_error(errs: &mut Errors, matrix: Matrix) { - for (ix, rule) in matrix.rows.iter().enumerate() { - for other in &matrix.rows[ix + 1..] { - errs.add_edge(rule.rule, other.rule); + /// Register all of the rules in the matrix as overlapping. + fn overlap_error(&mut self, matrix: Matrix) { + for (ix, rule) in matrix.rows.iter().enumerate() { + for other in &matrix.rows[ix + 1..] { + self.add_edge(rule.rule, other.rule); + } } } } +/// Check for overlapping rules within individual priority groups. fn check_overlap_groups(errs: &mut Errors, env: &Env, term: &Term) { for matrix in Matrix::from_priority_groups(env, term.id) { check_overlap(errs, env, matrix); } } +/// Check for overlapping rules within a single prioirty group. fn check_overlap(errs: &mut Errors, env: &Env, mut matrix: Matrix) { if matrix.is_unique() { return; @@ -178,7 +190,7 @@ fn check_overlap(errs: &mut Errors, env: &Env, mut matrix: Matrix) { matrix.normalize(); if matrix.cols_empty() { - overlap_error(errs, matrix); + errs.overlap_error(matrix); return; } @@ -191,7 +203,7 @@ fn check_overlap(errs: &mut Errors, env: &Env, mut matrix: Matrix) { if !remainder.is_empty() && !remainder.is_unique() { if remainder.cols_empty() { - overlap_error(errs, remainder); + errs.overlap_error(remainder); } else if !remainder.is_unique() { work.push(remainder); } @@ -199,7 +211,7 @@ fn check_overlap(errs: &mut Errors, env: &Env, mut matrix: Matrix) { if !matrix.is_empty() && !matrix.is_unique() { if matrix.cols_empty() { - overlap_error(errs, matrix); + errs.overlap_error(matrix); } else if !matrix.is_unique() { work.push(matrix); } @@ -207,6 +219,7 @@ fn check_overlap(errs: &mut Errors, env: &Env, mut matrix: Matrix) { } } +/// A convenience wrapper around the `TypeEnv` and `TermEnv` environments. struct Env<'a> { tyenv: &'a TypeEnv, termenv: &'a TermEnv, @@ -238,6 +251,7 @@ impl<'a> Env<'a> { &self.tyenv.types[id.0] } + /// Fetch source information for a file id. fn get_source(&self, file: usize) -> Source { Source::new( self.tyenv.filenames[file].clone(), @@ -282,27 +296,31 @@ impl<'a> Env<'a> { } /// A version of [`sema::Pattern`] with some simplifications to make overlap checking easier. -/// TODO: location information for reporting errors. #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] enum Pattern { + /// Integer literal patterns. Int { value: i128, }, + /// Constant literal patterns, such as `$F32`. Const { name: Sym, }, + /// Enum variant constructors. Variant { id: TermId, single_case: bool, pats: Vec, }, + /// And patterns, with their sub-patterns sorted. And { pats: Vec, }, + /// Extractor uses (both fallible and infallible). Extractor { id: TermId, pats: Vec, @@ -319,6 +337,8 @@ impl Pattern { /// would have introduced equalities with /// 3. [`sema::Pattern::Term`] instances are turned into either [`Pattern::Variant`] or /// [`Pattern::Extractor`] cases depending on their term kind. + /// 4. [`sema::Pattern::And`] instances are sorted to ensure that we can traverse them quickly + /// when specializing the matrix. fn from_sema(env: &Env, binds: &mut Vec<(VarId, Pattern)>, pat: &sema::Pattern) -> Self { match pat { sema::Pattern::BindPattern(_, id, pat) => { @@ -377,6 +397,7 @@ impl Pattern { } } + /// True when this pattern is a wildcard. fn is_wildcard(&self) -> bool { match self { Pattern::Wildcard => true, @@ -384,6 +405,7 @@ impl Pattern { } } + /// True when this pattern is an extractor. fn is_extractor(&self) -> Option { match self { Pattern::Extractor { id, .. } => Some(*id), @@ -391,6 +413,7 @@ impl Pattern { } } + /// True when this pattern is an and-pattern. fn is_and(&self) -> bool { match self { Pattern::And { .. } => true, @@ -398,7 +421,7 @@ impl Pattern { } } - /// For `Variant` and `Extractor`, the number of arguments. + /// For `Variant` and `Extractor` this is the number of arguments, otherwise it is `1`. fn arity(&self) -> usize { match self { Pattern::Variant { pats, .. } => pats.len(), @@ -407,7 +430,8 @@ impl Pattern { } } - /// Returns `true` for `Variant` or `Extractor`. + /// Returns `true` for `Variant` or `Extractor`, as these are the two cases that can be + /// expanded into sub-patterns. fn can_expand(&self) -> bool { match self { Pattern::Variant { .. } => true, @@ -417,7 +441,9 @@ impl Pattern { } /// Returns `true` if this pattern could match one of the concrete patterns specified by - /// `other`. + /// `other`. NOTE: this is intentionally a shallow match, and any sub-patterns of `other` are + /// intentionally ignored. We're only interested in the most top-level overlap between these + /// patterns here. fn match_concrete(&self, other: &Pattern) -> bool { match (self, other) { // these are the cases where we know enough to say definitively yes or no @@ -437,8 +463,10 @@ impl Pattern { } } - /// Extracts the that matches the template from this pattern, assuming that it's an `And`. - /// Updates the `And` to no longer include the pattern. + /// Assuming that this is an and-pattern, extract the sub-pattern that matches the template and + /// remove it from `self`. This operation is used when specializing columns that contain + /// and-patterns to another pattern, leaning on the assumption that the and-pattern matches if + /// any of its sub-patterns also match. fn extract_matching(&mut self, template: &Pattern) -> Option { if let Pattern::And { pats } = self { for i in 0..pats.len() { @@ -467,12 +495,13 @@ struct Row { } impl Row { + /// Construct a rule from this rule id. fn from_rule(env: &Env, rule: RuleId) -> Row { if let sema::Pattern::Term(_, _, vars) = &env.get_rule(rule).lhs { let mut binds = Vec::new(); Self { // NOTE: the patterns are reversed so that it's easier to manipulate the leading - // column of the row. + // column of the row by pushing/popping the pats vector. pats: vars .iter() .rev() @@ -514,8 +543,11 @@ impl Row { } } +/// A matrix whose rows consist rules that rewrite the same terms, and whose columns are the +/// positional arguments to those rules. #[derive(Debug, Clone)] struct Matrix { + /// The rows of the rule matrix. rows: Vec, /// The term that this matrix represents. @@ -526,10 +558,12 @@ struct Matrix { } impl Matrix { + /// Construct a new matrix with the given rows. fn new(term: TermId, prio: i64, rows: Vec) -> Self { Self { rows, prio, term } } + /// Construct one matrix for each priority group defined for a given term. fn from_priority_groups(env: &Env, term: TermId) -> Vec { let mut matrices = Vec::new(); @@ -590,7 +624,7 @@ impl Matrix { } /// Specialize the matrix according to the pattern in the first column of the first row. This - /// assumes that the matrix has already been normalized. + /// assumes that the matrix has already been normalized, and will return normalized results. fn specialize(&mut self, env: &Env, pat: &Pattern) -> Self { assert!(!self.cols_empty()); @@ -681,7 +715,9 @@ impl Matrix { } } - /// Expand leading patterns. + /// Expand the patterns of the leading column according to the given template. This function + /// will only handle cases where the leading column is a variant, extractor, or wildcard, as + /// all other cases could not reasonably introduce sub-patterns. fn expand_leading(&mut self, env: &Env, template: &Pattern) { let arity = template.arity(); for row in self.rows.iter_mut() { @@ -714,16 +750,19 @@ impl Matrix { } } +/// A convenience struct for pretty-printing values that need an environment. struct WithEnv<'env, T> { env: &'env Env<'env>, value: T, } impl<'env, T> WithEnv<'env, T> { + /// Construct a new `WithEnv` for the given environment and value. fn new(env: &'env Env, value: T) -> Self { Self { env, value } } + /// Construct a new `WithEnv` with the same environment, but a different value. fn with_value(&self, value: U) -> WithEnv<'env, U> { WithEnv { env: self.env, From 15283d324840305943c4f8f06cabf5086d0aeb48 Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Tue, 13 Sep 2022 10:38:05 -0700 Subject: [PATCH 05/27] Expand comments about handling non-linear patterns --- cranelift/isle/isle/src/overlap.rs | 31 +++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index 109f236aa898..22f1b3017bb1 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -350,7 +350,36 @@ impl Pattern { sema::Pattern::Var(_, id) => { for (vid, pat) in binds.iter().rev() { if vid == id { - // TODO: explain why we inline equality constraint + // We inline equality constraints for two reasons: we specialize on the + // spine of related patterns only, so more specific information about + // individual values isn't necessarily helpful; we consider overlap + // checking to be an over-approximation of overlapping rules, so handling + // equalies ends up being best-effort. As an approximation, we use whatever + // pattern happened to be at the binding of the variable for all of the + // cases where it's used for equality. For example, in the following rule: + // + // > (rule (example x @ (Enum.Variant y) x) ...) + // + // we will only specialize up to `(Enum.Variant _)`, so any more specific + // runtime values of `y` won't end up helping to identify overlap. As a + // result, we rewrite the patterns in the rule to look more like the + // following, as it greatly simplifies overlap checking. + // + // > (rule (example (Enum.Variant _) (Enum.Variant _)) ...) + // + // Cases that this scheme won't handle look like the following: + // + // > (rule (example2 2 3) ...) + // > (rule (example2 x x) ...) + // + // As in this case we'll not make use of the information that `2` and `3` + // aren't equal to know that the rules don't overlap. One approach that we + // could take here is delaying substitution to the point where a variable + // binding has been specialized, turning the rules into the following once + // specialization had occurred for `2`: + // + // > (rule (example2 2 3) ...) + // > (rule (example2 2 2) ...) return pat.clone(); } } From 84de3dddaaa9cbd0d8c386bd49857da5ca3bdbd6 Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Wed, 14 Sep 2022 10:43:27 -0700 Subject: [PATCH 06/27] Refactor the overlap checker to process pairs of rules --- Cargo.lock | 1 + cranelift/isle/isle/Cargo.toml | 1 + cranelift/isle/isle/src/overlap.rs | 89 ++++++++++++++++-------------- 3 files changed, 51 insertions(+), 40 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7a813d5f88e1..678c3c834224 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -629,6 +629,7 @@ version = "0.89.0" dependencies = [ "log", "miette", + "rayon", "tempfile", ] diff --git a/cranelift/isle/isle/Cargo.toml b/cranelift/isle/isle/Cargo.toml index 134788c05301..72d5866afce4 100644 --- a/cranelift/isle/isle/Cargo.toml +++ b/cranelift/isle/isle/Cargo.toml @@ -11,6 +11,7 @@ version = "0.89.0" [dependencies] log = { version = "0.4", optional = true } miette = { version = "5.1.0", optional = true } +rayon = "^1.5" [dev-dependencies] tempfile = "3" diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index 22f1b3017bb1..e6d2fa271d2c 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -1,5 +1,7 @@ //! Overlap detection for rules in ISLE. +use rayon::prelude::*; + use crate::error::{Error, Result, Source, Span}; use crate::sema::{ self, ConstructorKind, Rule, RuleId, Sym, Term, TermEnv, TermId, TermKind, Type, TypeEnv, @@ -22,6 +24,7 @@ pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { if !errors.is_empty() { let mut errors = errors.report(&env); + return Ok(()); return match errors.len() { 1 => Err(errors.pop().unwrap()), _ => Err(Error::Errors(std::mem::take(&mut errors))), @@ -177,21 +180,40 @@ impl Errors { /// Check for overlapping rules within individual priority groups. fn check_overlap_groups(errs: &mut Errors, env: &Env, term: &Term) { - for matrix in Matrix::from_priority_groups(env, term.id) { - check_overlap(errs, env, matrix); + let pairs = Matrix::rule_pairs(env, term.id); + if pairs.is_empty() { + return; + } + + println!("checking {} ({} pairs)", env.get_sym(term.name), pairs.len()); + let conflicts: Vec<_> = pairs + .into_par_iter() + .filter_map(|(id, left, right)| { + let lid = left.rule; + let rid = right.rule; + if check_overlap(env, id, left, right) { + Some((lid, rid)) + } else { + None + } + }).collect(); + + // Process conflicts sequentially. + if !conflicts.is_empty() { + println!("merging conflicts"); + for (l, r) in conflicts { + errs.add_edge(l, r); + } } } /// Check for overlapping rules within a single prioirty group. -fn check_overlap(errs: &mut Errors, env: &Env, mut matrix: Matrix) { - if matrix.is_unique() { - return; - } +fn check_overlap(env: &Env, id: TermId, left: Row, right: Row) -> bool { + let mut matrix = Matrix::new(id, vec![left, right]); matrix.normalize(); if matrix.cols_empty() { - errs.overlap_error(matrix); - return; + return true; } let mut work = Vec::new(); @@ -203,7 +225,7 @@ fn check_overlap(errs: &mut Errors, env: &Env, mut matrix: Matrix) { if !remainder.is_empty() && !remainder.is_unique() { if remainder.cols_empty() { - errs.overlap_error(remainder); + return true; } else if !remainder.is_unique() { work.push(remainder); } @@ -211,12 +233,14 @@ fn check_overlap(errs: &mut Errors, env: &Env, mut matrix: Matrix) { if !matrix.is_empty() && !matrix.is_unique() { if matrix.cols_empty() { - errs.overlap_error(matrix); + return true; } else if !matrix.is_unique() { work.push(matrix); } } } + + return false; } /// A convenience wrapper around the `TypeEnv` and `TermEnv` environments. @@ -581,48 +605,33 @@ struct Matrix { /// The term that this matrix represents. term: TermId, - - /// The priority of this group of rules matrix. - prio: i64, } impl Matrix { /// Construct a new matrix with the given rows. - fn new(term: TermId, prio: i64, rows: Vec) -> Self { - Self { rows, prio, term } + fn new(term: TermId, rows: Vec) -> Self { + Self { rows, term } } - /// Construct one matrix for each priority group defined for a given term. - fn from_priority_groups(env: &Env, term: TermId) -> Vec { - let mut matrices = Vec::new(); + /// Construct a rule matrix for each pair of rules in a term, that have the same priority. + fn rule_pairs(env: &Env, term: TermId) -> Vec<(TermId, Row, Row)> { - let mut rules: Vec<(i64, RuleId)> = env + let mut rows: Vec = env .rules_for_term(term) .into_iter() - .map(|id| (env.get_rule(id).prio.unwrap_or(0), id)) + .map(|id| Row::from_rule(env, id)) .collect(); - if rules.is_empty() { - return matrices; - } - - rules.sort_by_key(|(prio, _)| *prio); - - let mut current = { - let (prio, _) = rules.first().unwrap(); - matrices.push(Matrix::new(term, *prio, Vec::new())); - matrices.last_mut().unwrap() - }; - - for (p, id) in rules { - if p != current.prio { - matrices.push(Matrix::new(term, p, Vec::new())); - current = matrices.last_mut().unwrap(); - } - current.rows.push(Row::from_rule(env, id)); + let mut pairs = Vec::new(); + let mut cursor = &rows[..]; + while let Some((row, rest)) = cursor.split_first() { + cursor = rest; + pairs.extend(rest + .iter() + .map(|other| (term, row.clone(), other.clone()))); } - matrices + pairs } /// Normalizing the matrix by removing leading columns that consist of only wildcards, and then @@ -664,7 +673,7 @@ impl Matrix { self.specialize_and_patterns(&pat); // remove rows from self that we know couldn't possibly match this pattern - let mut other = Matrix::new(self.term, self.prio, Vec::new()); + let mut other = Matrix::new(self.term, Vec::new()); let mut i = 0; while i < self.rows.len() { let row = &mut self.rows[i]; From 1e7ef9019a0f925ba8cd8eee8718b4f25ff82fc0 Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Wed, 14 Sep 2022 16:56:36 -0700 Subject: [PATCH 07/27] Checkpoint --- cranelift/isle/isle/src/overlap.rs | 500 ++++++++++++----------------- 1 file changed, 200 insertions(+), 300 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index e6d2fa271d2c..4592f523b570 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -1,6 +1,7 @@ //! Overlap detection for rules in ISLE. use rayon::prelude::*; +use std::collections::{HashMap, HashSet}; use crate::error::{Error, Result, Source, Span}; use crate::sema::{ @@ -11,7 +12,7 @@ use crate::sema::{ /// Check for overlap. pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { let env = Env::new(tyenv, termenv); - let mut errors = Errors::new(termenv.rules.len()); + let mut errors = Errors::new(); for term in termenv.terms.iter() { // The only isle declaration that currently produces overlap is constructors whose // definition is entirely in isle. @@ -23,8 +24,9 @@ pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { } if !errors.is_empty() { + errors.graphviz(&env); + panic!(""); let mut errors = errors.report(&env); - return Ok(()); return match errors.len() { 1 => Err(errors.pop().unwrap()), _ => Err(Error::Errors(std::mem::take(&mut errors))), @@ -36,39 +38,29 @@ pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { /// A node in the error graph. struct Node { - /// The number of other rules this node overlaps with. - degree: usize, + component: Option, /// `true` entries where an edge exists to the node at that index. - edges: Vec, + edges: HashSet, } impl Node { /// Make a new `Node` in the error graph with the given number of total nodes. - fn new(len: usize) -> Self { - let mut edges = Vec::with_capacity(len); - edges.resize(len, false); - Self { degree: 0, edges } + fn new() -> Self { + Self { + component: None, + edges: HashSet::new(), + } } /// Add an edge between this node and the other node. fn add_edge(&mut self, other: RuleId) { - if self.edges[other.0] { - return; - } - - self.degree += 1; - self.edges[other.0] = true; + self.edges.insert(other); } /// Remove an edge between this node and another node. fn remove_edge(&mut self, other: RuleId) { - if !self.edges[other.0] { - return; - } - - self.degree -= 1; - self.edges[other.0] = false; + self.edges.remove(&other); } } @@ -77,32 +69,102 @@ impl Node { struct Errors { /// Edges between rules indicating overlap. As the edges are not directed, the edges are /// normalized by ordering the rule ids. - nodes: Vec, + nodes: HashMap, } impl Errors { /// Make a new `Errors` graph for collecting overlap information. - fn new(len: usize) -> Self { - let mut nodes = Vec::with_capacity(len); - nodes.resize_with(len, || Node::new(len)); - Self { nodes } + fn new() -> Self { + Self { + nodes: HashMap::new(), + } + } + + fn graphviz(&mut self, env: &Env) { + let mut comps = self.components(env); + + comps.sort_by_key(|(_, comp)| std::cmp::Reverse(comp.len())); + + for (_, comp) in comps.into_iter() { + println!("graph {{"); + for a in comp { + let node = &self.nodes[&a]; + for b in node.edges.iter() { + if *b > a { + println!( + "{} -- {} // {} -- {}", + env.get_rule(a).pos.line, + env.get_rule(*b).pos.line, + a.0, + b.0 + ); + } + } + } + println!("}}"); + } + } + + /// Label components + fn components(&mut self, env: &Env) -> Vec<(usize, Vec)> { + let mut components = Vec::new(); + let mut work = Vec::new(); + + let keys: Vec<_> = self.nodes.keys().copied().collect(); + + for id in keys { + if self.nodes[&id].component.is_some() { + continue; + } + + if self.nodes[&id].edges.is_empty() { + continue; + } + + let comp = components.len(); + let mut component = Vec::new(); + + work.clear(); + work.push(id); + while let Some(other) = work.pop() { + if let Some(other_comp) = self.nodes[&other].component { + assert!(comp == other_comp); + continue; + } + + self.nodes.get_mut(&other).unwrap().component = Some(comp); + component.push(other); + + work.extend(self.nodes[&other].edges.iter()); + } + + component.sort(); + components.push((comp, component)); + } + + components } /// True when there are no edges in the graph. fn is_empty(&self) -> bool { - self.nodes.iter().all(|node| node.degree == 0) + self.nodes.is_empty() } /// Condense the overlap information down into individual errors. fn report(&mut self, env: &Env) -> Vec { - let mut rules: Vec = self + let mut rules: Vec<_> = self .nodes .iter() - .enumerate() - .filter_map(|(id, node)| if node.degree == 0 { None } else { Some(id) }) + .filter_map(|(id, node)| { + if node.edges.is_empty() { + None + } else { + Some(*id) + } + }) .collect(); - rules.sort_by_cached_key(|id| self.nodes[*id].degree); + rules.sort_by_cached_key(|id| self.nodes[id].edges.len()); let mut errors = Vec::new(); @@ -116,26 +178,15 @@ impl Errors { // Work backwards through the ids to find the nodes with the largest conflict first. for id in rules.into_iter().rev() { - let node = self.remove_edges(RuleId(id)); - if node.degree == 0 { + let node = self.remove_edges(id); + if node.edges.is_empty() { continue; } // build the real error - let mut rules = vec![get_info(RuleId(id))]; - - rules.extend( - node.edges - .into_iter() - .enumerate() - .filter_map(|(ix, present)| { - if present { - Some(get_info(RuleId(ix))) - } else { - None - } - }), - ); + let mut rules = vec![get_info(id)]; + + rules.extend(node.edges.into_iter().map(get_info)); errors.push(Error::OverlapError { msg: String::from("rules are overlapping"), @@ -149,12 +200,11 @@ impl Errors { /// Remove all the edges for this rule in the graph, returning the original `Node` contents for /// further processing. fn remove_edges(&mut self, id: RuleId) -> Node { - let mut node = Node::new(self.nodes.len()); - std::mem::swap(&mut self.nodes[id.0], &mut node); + let node = self.nodes.remove(&id).unwrap(); - for (ix, other) in node.edges.iter().copied().enumerate() { - if other { - self.nodes[ix].remove_edge(id); + for other in node.edges.iter() { + if let Some(other) = self.nodes.get_mut(&other) { + other.remove_edge(id); } } @@ -164,83 +214,82 @@ impl Errors { /// Add a bidirectional edge between two rules in the graph. fn add_edge(&mut self, a: RuleId, b: RuleId) { // edges are undirected - self.nodes[a.0].add_edge(b); - self.nodes[b.0].add_edge(a); - } - - /// Register all of the rules in the matrix as overlapping. - fn overlap_error(&mut self, matrix: Matrix) { - for (ix, rule) in matrix.rows.iter().enumerate() { - for other in &matrix.rows[ix + 1..] { - self.add_edge(rule.rule, other.rule); - } - } + self.nodes.entry(a).or_insert_with(Node::new).add_edge(b); + self.nodes.entry(b).or_insert_with(Node::new).add_edge(a); } } /// Check for overlapping rules within individual priority groups. fn check_overlap_groups(errs: &mut Errors, env: &Env, term: &Term) { - let pairs = Matrix::rule_pairs(env, term.id); + let rows: Vec = env + .rules_for_term(term.id) + .into_iter() + .map(|id| Row::from_rule(env, id)) + .collect(); + + let mut pairs = Vec::new(); + let mut cursor = &rows[..]; + while let Some((row, rest)) = cursor.split_first() { + cursor = rest; + pairs.extend(rest.iter().map(|other| (row, other))); + } + if pairs.is_empty() { return; } - println!("checking {} ({} pairs)", env.get_sym(term.name), pairs.len()); + // Process rule pairs in parallel let conflicts: Vec<_> = pairs .into_par_iter() - .filter_map(|(id, left, right)| { + .filter_map(|(left, right)| { let lid = left.rule; let rid = right.rule; - if check_overlap(env, id, left, right) { + if check_overlap(env, left.clone(), right.clone()) { Some((lid, rid)) } else { None } - }).collect(); + }) + .collect(); // Process conflicts sequentially. - if !conflicts.is_empty() { - println!("merging conflicts"); - for (l, r) in conflicts { - errs.add_edge(l, r); - } + for (l, r) in conflicts { + errs.add_edge(l, r); } } /// Check for overlapping rules within a single prioirty group. -fn check_overlap(env: &Env, id: TermId, left: Row, right: Row) -> bool { - let mut matrix = Matrix::new(id, vec![left, right]); +fn check_overlap(env: &Env, mut left: Row, mut right: Row) -> bool { + while !left.is_empty() { + // drop leading wildcards from both + while !left.is_empty() && left.front().is_wildcard() && right.front().is_wildcard() { + left.pop(); + right.pop(); + } - matrix.normalize(); - if matrix.cols_empty() { - return true; - } + if left.is_empty() { + break; + } - let mut work = Vec::new(); - work.push(matrix); + // pick the best pattern from the leading column of the two rows + let lr = left.leading_pattern(); + let rr = right.leading_pattern(); - while let Some(mut matrix) = work.pop() { - let pat = matrix.leading_pattern(); - let remainder = matrix.specialize(env, &pat); + let pat = if lr < rr { lr.1.clone() } else { rr.1.clone() }; - if !remainder.is_empty() && !remainder.is_unique() { - if remainder.cols_empty() { - return true; - } else if !remainder.is_unique() { - work.push(remainder); - } + if lr.0 || rr.0 { + left.specialize_and_patterns(&pat); + right.specialize_and_patterns(&pat); } - if !matrix.is_empty() && !matrix.is_unique() { - if matrix.cols_empty() { - return true; - } else if !matrix.is_unique() { - work.push(matrix); - } + // specialize both rows on that pattern, and if specialization fails we know the two don't + // overlap. + if !left.specialize(env, &pat) || !right.specialize(env, &pat) { + return false; } } - return false; + return true; } /// A convenience wrapper around the `TypeEnv` and `TermEnv` environments. @@ -594,197 +643,77 @@ impl Row { assert!(!self.pats.is_empty()); self.pats.pop().unwrap() } -} - -/// A matrix whose rows consist rules that rewrite the same terms, and whose columns are the -/// positional arguments to those rules. -#[derive(Debug, Clone)] -struct Matrix { - /// The rows of the rule matrix. - rows: Vec, - /// The term that this matrix represents. - term: TermId, -} - -impl Matrix { - /// Construct a new matrix with the given rows. - fn new(term: TermId, rows: Vec) -> Self { - Self { rows, term } - } - - /// Construct a rule matrix for each pair of rules in a term, that have the same priority. - fn rule_pairs(env: &Env, term: TermId) -> Vec<(TermId, Row, Row)> { - - let mut rows: Vec = env - .rules_for_term(term) - .into_iter() - .map(|id| Row::from_rule(env, id)) - .collect(); + fn leading_pattern(&self) -> (bool, &Pattern) { + assert!(!self.is_empty(), "leading_pattern called on an emtpy row"); - let mut pairs = Vec::new(); - let mut cursor = &rows[..]; - while let Some((row, rest)) = cursor.split_first() { - cursor = rest; - pairs.extend(rest - .iter() - .map(|other| (term, row.clone(), other.clone()))); + match self.front() { + Pattern::And { pats } => (true, pats.first().unwrap()), + pat => (false, pat), } - - pairs } - /// Normalizing the matrix by removing leading columns that consist of only wildcards, and then - /// sorting the remaining rows to put those with fallible leading patterns first. - fn normalize(&mut self) { - while !self.cols_empty() && self.rows.iter().all(|row| row.front().is_wildcard()) { - self.drop_leading(); - } - - if !self.cols_empty() { - self.rows.sort_unstable_by(|a, b| a.front().cmp(b.front())); + /// Specialize any leading and-patterns to this template. + fn specialize_and_patterns(&mut self, template: &Pattern) { + let mut pat = self.pop(); + if let Some(p) = pat.extract_matching(template) { + self.push(pat); + self.push(p); + } else { + self.push(Pattern::Wildcard); + self.push(pat); } } - /// Returns true if there are no rows in the matrix. - fn is_empty(&self) -> bool { - self.rows.first().is_none() - } - - /// Returns true if there are no rows, or those that exist have no columns. - fn cols_empty(&self) -> bool { - self.rows.first().map_or(true, |row| row.is_empty()) - } - - /// Returns true if there is exactly one row. - fn is_unique(&self) -> bool { - self.rows.len() == 1 - } - - /// Specialize the matrix according to the pattern in the first column of the first row. This - /// assumes that the matrix has already been normalized, and will return normalized results. - fn specialize(&mut self, env: &Env, pat: &Pattern) -> Self { - assert!(!self.cols_empty()); - - let spec_extractor = pat.is_extractor(); - - // we start by specializing and patterns, so that we don't have to consider them when - // deciding which rows go to which matrix. - self.specialize_and_patterns(&pat); - - // remove rows from self that we know couldn't possibly match this pattern - let mut other = Matrix::new(self.term, Vec::new()); - let mut i = 0; - while i < self.rows.len() { - let row = &mut self.rows[i]; - match row.front() { - Pattern::Wildcard => { - // wildcards always match, and go into both matrices as a result - other.rows.push(row.clone()); - i += 1; - } - - Pattern::Extractor { id, .. } => { - // if we're specializing on this extractor then it shouldn't be duplicated over - // to the other matrix. otherwise, we copy the row over to the other matrix and - // rewrite this one to a wildcard to model the fact that we can't determine if - // the extractor will actually match. - if spec_extractor != Some(*id) { - other.rows.push(row.clone()); - *row.front_mut() = Pattern::Wildcard; - } - - i += 1; - } - - // rows that don't match this pattern get moved to the other matrix - col if !col.match_concrete(&pat) => { - other.rows.push(self.rows.swap_remove(i)); - // note that we don't increment the index here - } + /// Expand the leading pattern of this row according to the template. + fn expand_leading(&mut self, env: &Env, template: &Pattern) { + assert!(!self.front().is_and()); - // and all other rows stay in this matrix - _ => i += 1, + let arity = template.arity(); + match self.pop() { + Pattern::Variant { pats, .. } | Pattern::Extractor { pats, .. } => { + self.pats.extend(pats.into_iter().rev()); } - } - if pat.can_expand() { - self.expand_leading(env, &pat); - } else { - self.drop_leading(); - } + Pattern::Wildcard => self + .pats + .extend(std::iter::repeat(Pattern::Wildcard).take(arity)), - self.normalize(); - other.normalize(); - other - } - - /// Returns a copy of the first pattern for the first row of the matrix, as a candidate for - /// specialization. - fn leading_pattern(&self) -> Pattern { - assert!( - !self.cols_empty(), - "leading_pattern called on a matrix with no patterns" - ); - - match self.rows[0].front() { - Pattern::And { pats } => pats.first().unwrap().clone(), - pat => pat.clone(), + pat => panic!( + "incorrect leading expansion:\nfound: {}\n expected: {}", + WithEnv::new(env, &pat), + WithEnv::new(env, template) + ), } } - /// If there are any `and` patterns in the leading column, extract out the sub-pattern that - /// matches `pat` and leave the rest in a fresh column. Insert wildcards for other columns - /// that have no `and` patterns. - fn specialize_and_patterns(&mut self, template: &Pattern) { - if !self.rows.iter().any(|row| row.front().is_and()) { - return; - } - - for row in self.rows.iter_mut() { - let mut pat = row.pop(); - if let Some(p) = pat.extract_matching(template) { - row.push(pat); - row.push(p); - } else { - row.push(Pattern::Wildcard); - row.push(pat); - } + /// Expand the leading pattern of this row according to the template. + fn expand(&mut self, env: &Env, template: &Pattern) { + if template.can_expand() { + self.expand_leading(env, template); + } else { + self.pop(); } } - /// Expand the patterns of the leading column according to the given template. This function - /// will only handle cases where the leading column is a variant, extractor, or wildcard, as - /// all other cases could not reasonably introduce sub-patterns. - fn expand_leading(&mut self, env: &Env, template: &Pattern) { - let arity = template.arity(); - for row in self.rows.iter_mut() { - let pat = row.pop(); - match pat { - Pattern::Variant { pats, .. } | Pattern::Extractor { pats, .. } - if pat.match_concrete(template) => - { - row.pats.extend(pats.into_iter().rev()) - } - - Pattern::Wildcard => row - .pats - .extend(std::iter::repeat(Pattern::Wildcard).take(arity)), + /// Returns true if it was possible to specialize this row to the pattern template provided. + fn specialize(&mut self, env: &Env, template: &Pattern) -> bool { + assert!(!self.is_empty()); - _ => panic!( - "incorrect leading expansion:\nfound: {}\n expected: {}", - WithEnv::new(env, &pat), - WithEnv::new(env, template) - ), - } + if self.front().is_wildcard() || self.front().match_concrete(template) { + self.expand(env, template); + return true; } - } - // Drop leading column from the matrix. - fn drop_leading(&mut self) { - for row in self.rows.iter_mut() { - row.pop(); + // If this is an extractor, we already know that it doesn't match the template exactly, so + // we'll treat it like a wildcard. + if self.front().is_extractor().is_some() { + *self.front_mut() = Pattern::Wildcard; + self.expand(env, template); + return true; } + + return false; } } @@ -845,41 +774,12 @@ impl std::fmt::Display for WithEnv<'_, &Pattern> { } } -impl std::fmt::Display for WithEnv<'_, &Matrix> { +impl std::fmt::Display for WithEnv<'_, &Row> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if self.value.rows.is_empty() { - return writeln!(f, ""); + write!(f, "[")?; + for pat in self.value.pats.iter().rev() { + write!(f, " {}", self.with_value(pat))?; } - - let mut lens = vec![0; self.value.rows.first().unwrap().pats.len()]; - let mut rows: Vec<(RuleId, Vec)> = Vec::with_capacity(self.value.rows.len()); - - for row in self.value.rows.iter() { - rows.push(( - row.rule, - row.pats - .iter() - .rev() - .enumerate() - .map(|(col, pat)| { - let str = format!("{}", self.with_value(pat)); - lens[col] = lens[col].max(str.len()); - str - }) - .collect(), - )); - } - - for (rule, row) in rows.into_iter() { - write!(f, "[")?; - let mut sep = ""; - for (col, width) in row.into_iter().zip(lens.iter()) { - write!(f, "{} {:width$} ", sep, col)?; - sep = "|"; - } - writeln!(f, "] = {}", rule.0)? - } - - Ok(()) + write!(f, " ] -> {}", self.value.rule.0) } } From e9f26d229a763a810ce291ef787f91f9b2a0955d Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Wed, 14 Sep 2022 17:01:48 -0700 Subject: [PATCH 08/27] Improve errors consumption --- cranelift/isle/isle/src/overlap.rs | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index 4592f523b570..13c42e081926 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -23,17 +23,13 @@ pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { check_overlap_groups(&mut errors, &env, term); } - if !errors.is_empty() { - errors.graphviz(&env); - panic!(""); - let mut errors = errors.report(&env); - return match errors.len() { - 1 => Err(errors.pop().unwrap()), - _ => Err(Error::Errors(std::mem::take(&mut errors))), - }; + errors.graphviz(&env); + let mut errors = errors.report(&env); + match errors.len() { + 0 => Ok(()), + 1 => Err(errors.pop().unwrap()), + _ => Err(Error::Errors(errors)), } - - Ok(()) } /// A node in the error graph. @@ -145,11 +141,6 @@ impl Errors { components } - /// True when there are no edges in the graph. - fn is_empty(&self) -> bool { - self.nodes.is_empty() - } - /// Condense the overlap information down into individual errors. fn report(&mut self, env: &Env) -> Vec { let mut rules: Vec<_> = self From 443066da034f9b37b970dda058ab1de0bc563ebd Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Wed, 14 Sep 2022 17:41:04 -0700 Subject: [PATCH 09/27] Collect errors from the highest degree node at each iteration --- cranelift/isle/isle/src/overlap.rs | 135 ++++++----------------------- 1 file changed, 27 insertions(+), 108 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index 13c42e081926..22a3e408187e 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -12,18 +12,20 @@ use crate::sema::{ /// Check for overlap. pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { let env = Env::new(tyenv, termenv); - let mut errors = Errors::new(); - for term in termenv.terms.iter() { - // The only isle declaration that currently produces overlap is constructors whose - // definition is entirely in isle. - if !env.is_internal_constructor(term.id) { - continue; - } + let mut errors = termenv + .terms + .par_iter() + .fold(Errors::default, |mut errs, term| { + // The only isle declaration that currently produces overlap is constructors whose + // definition is entirely in isle. + if env.is_internal_constructor(term.id) { + check_overlap_groups(&mut errs, &env, term); + } - check_overlap_groups(&mut errors, &env, term); - } + errs + }) + .reduce(Errors::default, Errors::union); - errors.graphviz(&env); let mut errors = errors.report(&env); match errors.len() { 0 => Ok(()), @@ -33,22 +35,13 @@ pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { } /// A node in the error graph. +#[derive(Default)] struct Node { - component: Option, - /// `true` entries where an edge exists to the node at that index. edges: HashSet, } impl Node { - /// Make a new `Node` in the error graph with the given number of total nodes. - fn new() -> Self { - Self { - component: None, - edges: HashSet::new(), - } - } - /// Add an edge between this node and the other node. fn add_edge(&mut self, other: RuleId) { self.edges.insert(other); @@ -62,6 +55,7 @@ impl Node { /// A graph of all the rules in the isle source, with bi-directional edges between rules that are /// discovered to have overlap problems. +#[derive(Default)] struct Errors { /// Edges between rules indicating overlap. As the edges are not directed, the edges are /// normalized by ordering the rule ids. @@ -69,94 +63,15 @@ struct Errors { } impl Errors { - /// Make a new `Errors` graph for collecting overlap information. - fn new() -> Self { - Self { - nodes: HashMap::new(), - } - } - - fn graphviz(&mut self, env: &Env) { - let mut comps = self.components(env); - - comps.sort_by_key(|(_, comp)| std::cmp::Reverse(comp.len())); - - for (_, comp) in comps.into_iter() { - println!("graph {{"); - for a in comp { - let node = &self.nodes[&a]; - for b in node.edges.iter() { - if *b > a { - println!( - "{} -- {} // {} -- {}", - env.get_rule(a).pos.line, - env.get_rule(*b).pos.line, - a.0, - b.0 - ); - } - } - } - println!("}}"); - } - } - - /// Label components - fn components(&mut self, env: &Env) -> Vec<(usize, Vec)> { - let mut components = Vec::new(); - let mut work = Vec::new(); - - let keys: Vec<_> = self.nodes.keys().copied().collect(); - - for id in keys { - if self.nodes[&id].component.is_some() { - continue; - } - - if self.nodes[&id].edges.is_empty() { - continue; - } - - let comp = components.len(); - let mut component = Vec::new(); - - work.clear(); - work.push(id); - while let Some(other) = work.pop() { - if let Some(other_comp) = self.nodes[&other].component { - assert!(comp == other_comp); - continue; - } - - self.nodes.get_mut(&other).unwrap().component = Some(comp); - component.push(other); - - work.extend(self.nodes[&other].edges.iter()); - } - - component.sort(); - components.push((comp, component)); + fn union(mut self, other: Self) -> Self { + for (id, node) in other.nodes { + self.nodes.entry(id).or_default().edges.extend(node.edges); } - - components + self } /// Condense the overlap information down into individual errors. fn report(&mut self, env: &Env) -> Vec { - let mut rules: Vec<_> = self - .nodes - .iter() - .filter_map(|(id, node)| { - if node.edges.is_empty() { - None - } else { - Some(*id) - } - }) - .collect(); - - rules.sort_by_cached_key(|id| self.nodes[id].edges.len()); - let mut errors = Vec::new(); let get_info = |id| { @@ -167,11 +82,15 @@ impl Errors { (src, span) }; - // Work backwards through the ids to find the nodes with the largest conflict first. - for id in rules.into_iter().rev() { + while let Some(id) = self + .nodes + .keys() + .copied() + .max_by_key(|id| self.nodes[id].edges.len()) + { let node = self.remove_edges(id); if node.edges.is_empty() { - continue; + break; } // build the real error @@ -205,8 +124,8 @@ impl Errors { /// Add a bidirectional edge between two rules in the graph. fn add_edge(&mut self, a: RuleId, b: RuleId) { // edges are undirected - self.nodes.entry(a).or_insert_with(Node::new).add_edge(b); - self.nodes.entry(b).or_insert_with(Node::new).add_edge(a); + self.nodes.entry(a).or_default().add_edge(b); + self.nodes.entry(b).or_default().add_edge(a); } } From 49424cc437a033fd04195cbbfbfe86e5432a5bf1 Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Wed, 14 Sep 2022 17:49:15 -0700 Subject: [PATCH 10/27] Don't register overlap for rules of different priorities --- cranelift/isle/isle/src/overlap.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index 22a3e408187e..c56aec2b9c1d 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -131,7 +131,7 @@ impl Errors { /// Check for overlapping rules within individual priority groups. fn check_overlap_groups(errs: &mut Errors, env: &Env, term: &Term) { - let rows: Vec = env + let rows: Vec<_> = env .rules_for_term(term.id) .into_iter() .map(|id| Row::from_rule(env, id)) @@ -164,7 +164,9 @@ fn check_overlap_groups(errs: &mut Errors, env: &Env, term: &Term) { // Process conflicts sequentially. for (l, r) in conflicts { - errs.add_edge(l, r); + if env.get_rule(l).prio == env.get_rule(r).prio { + errs.add_edge(l, r); + } } } From 8e48ca2283ad23dfa27bb75ccaf4e94298863640 Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Wed, 14 Sep 2022 17:59:11 -0700 Subject: [PATCH 11/27] More fold/reduce instead of sequential reduction --- cranelift/isle/isle/src/overlap.rs | 34 +++++++++++------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index c56aec2b9c1d..d05326be8689 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -15,14 +15,14 @@ pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { let mut errors = termenv .terms .par_iter() - .fold(Errors::default, |mut errs, term| { + .fold(Errors::default, |errs, term| { // The only isle declaration that currently produces overlap is constructors whose // definition is entirely in isle. if env.is_internal_constructor(term.id) { - check_overlap_groups(&mut errs, &env, term); + errs.union(check_overlap_groups(&env, term)) + } else { + errs } - - errs }) .reduce(Errors::default, Errors::union); @@ -130,7 +130,7 @@ impl Errors { } /// Check for overlapping rules within individual priority groups. -fn check_overlap_groups(errs: &mut Errors, env: &Env, term: &Term) { +fn check_overlap_groups(env: &Env, term: &Term) -> Errors { let rows: Vec<_> = env .rules_for_term(term.id) .into_iter() @@ -144,30 +144,20 @@ fn check_overlap_groups(errs: &mut Errors, env: &Env, term: &Term) { pairs.extend(rest.iter().map(|other| (row, other))); } - if pairs.is_empty() { - return; - } - // Process rule pairs in parallel - let conflicts: Vec<_> = pairs + pairs .into_par_iter() - .filter_map(|(left, right)| { + .fold(Errors::default, |mut errs, (left, right)| { let lid = left.rule; let rid = right.rule; if check_overlap(env, left.clone(), right.clone()) { - Some((lid, rid)) - } else { - None + if env.get_rule(lid).prio == env.get_rule(rid).prio { + errs.add_edge(lid, rid); + } } + errs }) - .collect(); - - // Process conflicts sequentially. - for (l, r) in conflicts { - if env.get_rule(l).prio == env.get_rule(r).prio { - errs.add_edge(l, r); - } - } + .reduce(Errors::default, Errors::union) } /// Check for overlapping rules within a single prioirty group. From 2c0ea5ae6bc7d502b96a1f3bb53f763484b1fc8d Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Thu, 15 Sep 2022 11:41:11 -0700 Subject: [PATCH 12/27] Always run the overlap checker, but add a feature for raising errors --- cranelift/isle/isle/Cargo.toml | 2 +- cranelift/isle/isle/src/compile.rs | 5 ----- cranelift/isle/isle/src/overlap.rs | 21 ++++++++++++++++----- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/cranelift/isle/isle/Cargo.toml b/cranelift/isle/isle/Cargo.toml index 72d5866afce4..6052fb567bc8 100644 --- a/cranelift/isle/isle/Cargo.toml +++ b/cranelift/isle/isle/Cargo.toml @@ -19,6 +19,6 @@ tempfile = "3" [features] default = [] -check-overlap = [] +overlap-errors = [] logging = ["log"] miette-errors = ["miette"] diff --git a/cranelift/isle/isle/src/compile.rs b/cranelift/isle/isle/src/compile.rs index 090d5e3159c1..de0a3348722b 100644 --- a/cranelift/isle/isle/src/compile.rs +++ b/cranelift/isle/isle/src/compile.rs @@ -7,12 +7,7 @@ use crate::{ast, codegen, sema, trie}; pub fn compile(defs: &ast::Defs, options: &codegen::CodegenOptions) -> Result { let mut typeenv = sema::TypeEnv::from_ast(defs)?; let termenv = sema::TermEnv::from_ast(&mut typeenv, defs)?; - - // As the overlap checker currently finds a lot of overlap errors in the lowerings, require it - // to be explicitly enabled while we work through them. - #[cfg(feature = "check-overlap")] crate::overlap::check(&mut typeenv, &termenv)?; - let tries = trie::build_tries(&typeenv, &termenv); Ok(codegen::codegen(&typeenv, &termenv, &tries, options)) } diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index d05326be8689..971d77270d06 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -26,11 +26,22 @@ pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { }) .reduce(Errors::default, Errors::union); - let mut errors = errors.report(&env); - match errors.len() { - 0 => Ok(()), - 1 => Err(errors.pop().unwrap()), - _ => Err(Error::Errors(errors)), + #[cfg(feature = "overlap-errors")] + { + let mut errors = errors.report(&env); + match errors.len() { + 0 => Ok(()), + 1 => Err(errors.pop().unwrap()), + _ => Err(Error::Errors(errors)), + } + } + + + #[cfg(not(feature = "overlap-errors"))] + { + use crate::log; + log!("found {} overlap errors", errors.report(&env).len()); + Ok(()) } } From ff6c081233aeb661a92493d228f7247dfbab9206 Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Thu, 15 Sep 2022 12:11:49 -0700 Subject: [PATCH 13/27] Fix spelling error --- cranelift/isle/isle/src/overlap.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index 971d77270d06..77aa9e70a3cb 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -171,7 +171,7 @@ fn check_overlap_groups(env: &Env, term: &Term) -> Errors { .reduce(Errors::default, Errors::union) } -/// Check for overlapping rules within a single prioirty group. +/// Check for overlapping rules within a single priority group. fn check_overlap(env: &Env, mut left: Row, mut right: Row) -> bool { while !left.is_empty() { // drop leading wildcards from both @@ -478,7 +478,7 @@ impl Pattern { } } - /// Assuming that this is an and-pattern, extract the sub-pattern that matches the template and + /// If this pattern is an and-pattern, extract the sub-pattern that matches the template and /// remove it from `self`. This operation is used when specializing columns that contain /// and-patterns to another pattern, leaning on the assumption that the and-pattern matches if /// any of its sub-patterns also match. From f184e07272aeba968a718dc24b8cbb86547954d0 Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Thu, 15 Sep 2022 12:12:23 -0700 Subject: [PATCH 14/27] Format --- cranelift/isle/isle/src/overlap.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index 77aa9e70a3cb..3d734a4fcab1 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -36,7 +36,6 @@ pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { } } - #[cfg(not(feature = "overlap-errors"))] { use crate::log; From 75ed9b90a9e17c8ead5d92837d1d8c85f5c1552b Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Thu, 15 Sep 2022 12:25:53 -0700 Subject: [PATCH 15/27] Fix some comments and function names --- cranelift/isle/isle/src/overlap.rs | 32 +++++++++++++++++------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index 3d734a4fcab1..2ea81323a808 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -19,7 +19,7 @@ pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { // The only isle declaration that currently produces overlap is constructors whose // definition is entirely in isle. if env.is_internal_constructor(term.id) { - errs.union(check_overlap_groups(&env, term)) + errs.union(check_overlap(&env, term)) } else { errs } @@ -47,7 +47,7 @@ pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { /// A node in the error graph. #[derive(Default)] struct Node { - /// `true` entries where an edge exists to the node at that index. + /// Other rules that this one overlaps with. edges: HashSet, } @@ -63,16 +63,15 @@ impl Node { } } -/// A graph of all the rules in the isle source, with bi-directional edges between rules that are -/// discovered to have overlap problems. +/// A graph of rules that overlap in the ISLE source. The edges are undirected. #[derive(Default)] struct Errors { - /// Edges between rules indicating overlap. As the edges are not directed, the edges are - /// normalized by ordering the rule ids. + /// Edges between rules indicating overlap. nodes: HashMap, } impl Errors { + /// Merge together two Error graphs. fn union(mut self, other: Self) -> Self { for (id, node) in other.nodes { self.nodes.entry(id).or_default().edges.extend(node.edges); @@ -80,7 +79,10 @@ impl Errors { self } - /// Condense the overlap information down into individual errors. + /// Condense the overlap information down into individual errors. We iteratively remove the + /// nodes from the graph with the highest degree, reporting errors for them and their direct + /// connections. The goal with reporting errors this way is to prefer reporting rules that + /// overlap with many others first, and then report other more targeted overlaps later. fn report(&mut self, env: &Env) -> Vec { let mut errors = Vec::new(); @@ -139,8 +141,10 @@ impl Errors { } } -/// Check for overlapping rules within individual priority groups. -fn check_overlap_groups(env: &Env, term: &Term) -> Errors { +/// Determine if any rules that rewrite the given term overlap in the input that they accept. This +/// checkes every unique pair of rules, as checking rules in aggregate tends to suffer from +/// exponential explosion in the presence of wildcard patterns. +fn check_overlap(env: &Env, term: &Term) -> Errors { let rows: Vec<_> = env .rules_for_term(term.id) .into_iter() @@ -160,7 +164,7 @@ fn check_overlap_groups(env: &Env, term: &Term) -> Errors { .fold(Errors::default, |mut errs, (left, right)| { let lid = left.rule; let rid = right.rule; - if check_overlap(env, left.clone(), right.clone()) { + if check_overlap_pair(env, left.clone(), right.clone()) { if env.get_rule(lid).prio == env.get_rule(rid).prio { errs.add_edge(lid, rid); } @@ -170,8 +174,8 @@ fn check_overlap_groups(env: &Env, term: &Term) -> Errors { .reduce(Errors::default, Errors::union) } -/// Check for overlapping rules within a single priority group. -fn check_overlap(env: &Env, mut left: Row, mut right: Row) -> bool { +/// Check if two rules overlap in the inputs they accept. +fn check_overlap_pair(env: &Env, mut left: Row, mut right: Row) -> bool { while !left.is_empty() { // drop leading wildcards from both while !left.is_empty() && left.front().is_wildcard() && right.front().is_wildcard() { @@ -323,7 +327,7 @@ impl Pattern { /// 3. [`sema::Pattern::Term`] instances are turned into either [`Pattern::Variant`] or /// [`Pattern::Extractor`] cases depending on their term kind. /// 4. [`sema::Pattern::And`] instances are sorted to ensure that we can traverse them quickly - /// when specializing the matrix. + /// when specializing them. fn from_sema(env: &Env, binds: &mut Vec<(VarId, Pattern)>, pat: &sema::Pattern) -> Self { match pat { sema::Pattern::BindPattern(_, id, pat) => { @@ -501,7 +505,7 @@ impl Pattern { } } -/// A single row in the pattern matrix. +/// The patterns of a rule turned into a work-queue for overlap checking. #[derive(Debug, Clone)] struct Row { pats: Vec, From 717309760f135368e0bc1261cd94fa0093b82b0d Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Thu, 15 Sep 2022 12:28:01 -0700 Subject: [PATCH 16/27] Remove the `remove_node` function on `Errors` --- cranelift/isle/isle/src/overlap.rs | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index 2ea81323a808..859703c88369 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -100,11 +100,18 @@ impl Errors { .copied() .max_by_key(|id| self.nodes[id].edges.len()) { - let node = self.remove_edges(id); + let node = self.nodes.remove(&id).unwrap(); + if node.edges.is_empty() { break; } + for other in node.edges.iter() { + if let Some(other) = self.nodes.get_mut(&other) { + other.remove_edge(id); + } + } + // build the real error let mut rules = vec![get_info(id)]; @@ -119,20 +126,6 @@ impl Errors { errors } - /// Remove all the edges for this rule in the graph, returning the original `Node` contents for - /// further processing. - fn remove_edges(&mut self, id: RuleId) -> Node { - let node = self.nodes.remove(&id).unwrap(); - - for other in node.edges.iter() { - if let Some(other) = self.nodes.get_mut(&other) { - other.remove_edge(id); - } - } - - node - } - /// Add a bidirectional edge between two rules in the graph. fn add_edge(&mut self, a: RuleId, b: RuleId) { // edges are undirected From b42eab955dae0f45dda7cdd39a963ca2566d4cef Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Thu, 15 Sep 2022 12:32:42 -0700 Subject: [PATCH 17/27] The collected errors no longer need to be mutable --- cranelift/isle/isle/src/overlap.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index 859703c88369..fa2bbfab270f 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -12,7 +12,7 @@ use crate::sema::{ /// Check for overlap. pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { let env = Env::new(tyenv, termenv); - let mut errors = termenv + let errors = termenv .terms .par_iter() .fold(Errors::default, |errs, term| { @@ -26,20 +26,16 @@ pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { }) .reduce(Errors::default, Errors::union); - #[cfg(feature = "overlap-errors")] - { - let mut errors = errors.report(&env); + let mut errors = errors.report(&env); + if cfg!(feature = "overlap-errors") { match errors.len() { 0 => Ok(()), 1 => Err(errors.pop().unwrap()), _ => Err(Error::Errors(errors)), } - } - - #[cfg(not(feature = "overlap-errors"))] - { + } else { use crate::log; - log!("found {} overlap errors", errors.report(&env).len()); + log!("found {} overlap errors", errors.len()); Ok(()) } } @@ -83,7 +79,7 @@ impl Errors { /// nodes from the graph with the highest degree, reporting errors for them and their direct /// connections. The goal with reporting errors this way is to prefer reporting rules that /// overlap with many others first, and then report other more targeted overlaps later. - fn report(&mut self, env: &Env) -> Vec { + fn report(mut self, env: &Env) -> Vec { let mut errors = Vec::new(); let get_info = |id| { From 353880b1bb97de64614f14ad09117de19d338000 Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Fri, 16 Sep 2022 11:47:30 -0700 Subject: [PATCH 18/27] Reverse the row patterns after constructing them --- cranelift/isle/isle/src/overlap.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index fa2bbfab270f..f8ba72ac60d9 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -506,16 +506,16 @@ impl Row { fn from_rule(env: &Env, rule: RuleId) -> Row { if let sema::Pattern::Term(_, _, vars) = &env.get_rule(rule).lhs { let mut binds = Vec::new(); - Self { - // NOTE: the patterns are reversed so that it's easier to manipulate the leading - // column of the row by pushing/popping the pats vector. - pats: vars - .iter() - .rev() - .map(|pat| Pattern::from_sema(env, &mut binds, pat)) - .collect(), - rule, - } + let mut pats: Vec<_> = vars + .iter() + .map(|pat| Pattern::from_sema(env, &mut binds, pat)) + .collect(); + + // NOTE: the patterns are reversed so that it's easier to manipulate the leading + // column of the row by pushing/popping the pats vector. + pats.reverse(); + + Self { pats, rule } } else { panic!("Constructing a Row from a malformed rule") } From ac09ba22ca5d0490e8694ca125101ed9d9afce9e Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Fri, 16 Sep 2022 14:01:58 -0700 Subject: [PATCH 19/27] Remove the single_case field on Pattern::Variant --- cranelift/isle/isle/src/overlap.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index f8ba72ac60d9..29cc039cf9ed 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -289,7 +289,6 @@ enum Pattern { /// Enum variant constructors. Variant { id: TermId, - single_case: bool, pats: Vec, }, @@ -369,19 +368,14 @@ impl Pattern { sema::Pattern::ConstInt(_, value) => Pattern::Int { value: *value }, sema::Pattern::ConstPrim(_, name) => Pattern::Const { name: *name }, - sema::Pattern::Term(ty, id, pats) => { + sema::Pattern::Term(_, id, pats) => { let pats = pats .iter() .map(|pat| Pattern::from_sema(env, binds, pat)) .collect(); match &env.get_term(*id).kind { - TermKind::EnumVariant { .. } => Pattern::Variant { - id: *id, - single_case: env.is_single_constructor_enum(*ty), - pats, - }, - + TermKind::EnumVariant { .. } => Pattern::Variant { id: *id, pats }, TermKind::Decl { .. } => Pattern::Extractor { id: *id, pats }, } } From ae7cb60edce54b82fa8af2a1dd3dc1d83a17a9f9 Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Fri, 16 Sep 2022 18:14:31 -0700 Subject: [PATCH 20/27] Simplify by removing some abstractions --- cranelift/isle/isle/src/overlap.rs | 488 +++++++---------------------- 1 file changed, 111 insertions(+), 377 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index 29cc039cf9ed..9f75acaa6202 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -5,8 +5,7 @@ use std::collections::{HashMap, HashSet}; use crate::error::{Error, Result, Source, Span}; use crate::sema::{ - self, ConstructorKind, Rule, RuleId, Sym, Term, TermEnv, TermId, TermKind, Type, TypeEnv, - TypeId, VarId, + self, ConstructorKind, Rule, RuleId, Sym, Term, TermEnv, TermId, TermKind, TypeEnv, VarId, }; /// Check for overlap. @@ -15,15 +14,7 @@ pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { let errors = termenv .terms .par_iter() - .fold(Errors::default, |errs, term| { - // The only isle declaration that currently produces overlap is constructors whose - // definition is entirely in isle. - if env.is_internal_constructor(term.id) { - errs.union(check_overlap(&env, term)) - } else { - errs - } - }) + .map(|term| check_overlap(&env, term)) .reduce(Errors::default, Errors::union); let mut errors = errors.report(&env); @@ -134,11 +125,19 @@ impl Errors { /// checkes every unique pair of rules, as checking rules in aggregate tends to suffer from /// exponential explosion in the presence of wildcard patterns. fn check_overlap(env: &Env, term: &Term) -> Errors { - let rows: Vec<_> = env - .rules_for_term(term.id) - .into_iter() - .map(|id| Row::from_rule(env, id)) - .collect(); + // The only isle declaration that currently produces overlap is constructors whose definition + // is entirely in isle. + if !matches!( + term.kind, + TermKind::Decl { + constructor_kind: Some(ConstructorKind::InternalConstructor), + .. + } + ) { + return Errors::default(); + } + + let rows = env.rules_for_term(term.id); let mut pairs = Vec::new(); let mut cursor = &rows[..]; @@ -150,12 +149,10 @@ fn check_overlap(env: &Env, term: &Term) -> Errors { // Process rule pairs in parallel pairs .into_par_iter() - .fold(Errors::default, |mut errs, (left, right)| { - let lid = left.rule; - let rid = right.rule; - if check_overlap_pair(env, left.clone(), right.clone()) { - if env.get_rule(lid).prio == env.get_rule(rid).prio { - errs.add_edge(lid, rid); + .fold(Errors::default, |mut errs, ((lid, left), (rid, right))| { + if env.get_rule(*lid).prio == env.get_rule(*rid).prio { + if check_overlap_pair(left, right) { + errs.add_edge(*lid, *rid); } } errs @@ -164,37 +161,77 @@ fn check_overlap(env: &Env, term: &Term) -> Errors { } /// Check if two rules overlap in the inputs they accept. -fn check_overlap_pair(env: &Env, mut left: Row, mut right: Row) -> bool { - while !left.is_empty() { - // drop leading wildcards from both - while !left.is_empty() && left.front().is_wildcard() && right.front().is_wildcard() { - left.pop(); - right.pop(); - } +fn check_overlap_pair(a: &[Pattern], b: &[Pattern]) -> bool { + debug_assert_eq!(a.len(), b.len()); + let mut worklist: Vec<_> = a.iter().zip(b.iter()).collect(); + + while let Some((a, b)) = worklist.pop() { + // Checking the cross-product of two and-patterns is O(n*m). Merging sorted lists or + // hash-maps might be faster in practice, but: + // - The alternatives are not asymptotically faster, because in theory all the subpatterns + // might have the same extractor or enum variant, and in that case any approach has to + // check all of the cross-product combinations anyway. + // - It's easier to reason about this doubly-nested loop than about merging sorted lists or + // picking the right hash keys. + // - These lists are always so small that performance doesn't matter. + for a in a.as_and_subpatterns() { + for b in b.as_and_subpatterns() { + let overlap = match (a, b) { + (Pattern::Int { value: a }, Pattern::Int { value: b }) => a == b, + (Pattern::Const { name: a }, Pattern::Const { name: b }) => a == b, + + // if it's the same variant or same extractor, check all pairs of subterms + ( + Pattern::Variant { + id: a, + pats: a_pats, + }, + Pattern::Variant { + id: b, + pats: b_pats, + }, + ) + | ( + Pattern::Extractor { + id: a, + pats: a_pats, + }, + Pattern::Extractor { + id: b, + pats: b_pats, + }, + ) if a == b => { + debug_assert_eq!(a_pats.len(), b_pats.len()); + worklist.extend(a_pats.iter().zip(b_pats.iter())); + true + } - if left.is_empty() { - break; - } + // different variants of the same enum definitely do not overlap + (Pattern::Variant { .. }, Pattern::Variant { .. }) => false, - // pick the best pattern from the leading column of the two rows - let lr = left.leading_pattern(); - let rr = right.leading_pattern(); + // an extractor which does not exactly match the other pattern might overlap + (Pattern::Extractor { .. }, _) | (_, Pattern::Extractor { .. }) => true, - let pat = if lr < rr { lr.1.clone() } else { rr.1.clone() }; + // a wildcard definitely overlaps + (Pattern::Wildcard, _) | (_, Pattern::Wildcard) => true, - if lr.0 || rr.0 { - left.specialize_and_patterns(&pat); - right.specialize_and_patterns(&pat); - } + // these patterns can only be paired with patterns of the same type, or + // wildcards or extractors, and all those cases are covered above + (Pattern::Int { .. } | Pattern::Const { .. } | Pattern::Variant { .. }, _) => { + unreachable!() + } - // specialize both rows on that pattern, and if specialization fails we know the two don't - // overlap. - if !left.specialize(env, &pat) || !right.specialize(env, &pat) { - return false; + // and-patterns don't reach here due to as_and_subpatterns + (Pattern::And { .. }, _) => unreachable!(), + }; + + if !overlap { + return false; + } + } } } - - return true; + true } /// A convenience wrapper around the `TypeEnv` and `TermEnv` environments. @@ -209,11 +246,6 @@ impl<'a> Env<'a> { Self { tyenv, termenv } } - /// Fetch the string associated with a symbol. - fn get_sym(&self, id: Sym) -> &str { - &self.tyenv.syms[id.0] - } - /// Fetch the rule associated with this id. fn get_rule(&self, id: RuleId) -> &Rule { &self.termenv.rules[id.0] @@ -224,11 +256,6 @@ impl<'a> Env<'a> { &self.termenv.terms[id.0] } - /// Fetch the tyep associated with this id. - fn get_type(&self, id: TypeId) -> &Type { - &self.tyenv.types[id.0] - } - /// Fetch source information for a file id. fn get_source(&self, file: usize) -> Source { Source::new( @@ -237,44 +264,31 @@ impl<'a> Env<'a> { ) } - /// True when this term represents a constructor implemented in isle. - fn is_internal_constructor(&self, id: TermId) -> bool { - match self.get_term(id).kind { - TermKind::Decl { - constructor_kind: Some(ConstructorKind::InternalConstructor), - .. - } => true, - _ => false, - } - } - /// The ids of all [`Rule`]s defined for this term. - fn rules_for_term(&self, id: TermId) -> Vec { + fn rules_for_term(&self, id: TermId) -> Vec<(RuleId, Vec)> { self.termenv .rules .iter() .filter_map(|rule| { - if let sema::Pattern::Term(_, tid, _) = rule.lhs { - if tid == id { - return Some(rule.id); + if let sema::Pattern::Term(_, tid, vars) = &rule.lhs { + if *tid == id { + let mut binds = Vec::new(); + return Some(( + rule.id, + vars.iter() + .map(|pat| Pattern::from_sema(self, &mut binds, pat)) + .collect(), + )); } } None }) .collect() } - - /// Returns true when this type is an enum with only a single constructor. - fn is_single_constructor_enum(&self, ty: TypeId) -> bool { - match self.get_type(ty) { - Type::Primitive(_, _, _) => false, - Type::Enum { variants, .. } => variants.len() == 1, - } - } } /// A version of [`sema::Pattern`] with some simplifications to make overlap checking easier. -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +#[derive(Debug, Clone)] enum Pattern { /// Integer literal patterns. Int { @@ -292,7 +306,7 @@ enum Pattern { pats: Vec, }, - /// And patterns, with their sub-patterns sorted. + /// Conjunctions of patterns. And { pats: Vec, }, @@ -314,8 +328,6 @@ impl Pattern { /// would have introduced equalities with /// 3. [`sema::Pattern::Term`] instances are turned into either [`Pattern::Variant`] or /// [`Pattern::Extractor`] cases depending on their term kind. - /// 4. [`sema::Pattern::And`] instances are sorted to ensure that we can traverse them quickly - /// when specializing them. fn from_sema(env: &Env, binds: &mut Vec<(VarId, Pattern)>, pat: &sema::Pattern) -> Self { match pat { sema::Pattern::BindPattern(_, id, pat) => { @@ -331,9 +343,10 @@ impl Pattern { // spine of related patterns only, so more specific information about // individual values isn't necessarily helpful; we consider overlap // checking to be an over-approximation of overlapping rules, so handling - // equalies ends up being best-effort. As an approximation, we use whatever - // pattern happened to be at the binding of the variable for all of the - // cases where it's used for equality. For example, in the following rule: + // equalities ends up being best-effort. As an approximation, we use + // whatever pattern happened to be at the binding of the variable for all + // of the cases where it's used for equality. For example, in the following + // rule: // // > (rule (example x @ (Enum.Variant y) x) ...) // @@ -368,317 +381,38 @@ impl Pattern { sema::Pattern::ConstInt(_, value) => Pattern::Int { value: *value }, sema::Pattern::ConstPrim(_, name) => Pattern::Const { name: *name }, - sema::Pattern::Term(_, id, pats) => { + &sema::Pattern::Term(_, id, ref pats) => { let pats = pats .iter() .map(|pat| Pattern::from_sema(env, binds, pat)) .collect(); - match &env.get_term(*id).kind { - TermKind::EnumVariant { .. } => Pattern::Variant { id: *id, pats }, - TermKind::Decl { .. } => Pattern::Extractor { id: *id, pats }, + match &env.get_term(id).kind { + TermKind::EnumVariant { .. } => Pattern::Variant { id, pats }, + TermKind::Decl { .. } => Pattern::Extractor { id, pats }, } } sema::Pattern::Wildcard(_) => Pattern::Wildcard, sema::Pattern::And(_, pats) => { - let mut pats: Vec = pats + let pats = pats .iter() .map(|pat| Pattern::from_sema(env, binds, pat)) .collect(); - - if pats.len() == 1 { - pats.pop().unwrap() - } else { - pats.sort_unstable(); - Pattern::And { pats } - } - } - } - } - - /// True when this pattern is a wildcard. - fn is_wildcard(&self) -> bool { - match self { - Pattern::Wildcard => true, - _ => false, - } - } - - /// True when this pattern is an extractor. - fn is_extractor(&self) -> Option { - match self { - Pattern::Extractor { id, .. } => Some(*id), - _ => None, - } - } - - /// True when this pattern is an and-pattern. - fn is_and(&self) -> bool { - match self { - Pattern::And { .. } => true, - _ => false, - } - } - - /// For `Variant` and `Extractor` this is the number of arguments, otherwise it is `1`. - fn arity(&self) -> usize { - match self { - Pattern::Variant { pats, .. } => pats.len(), - Pattern::Extractor { pats, .. } => pats.len(), - _ => 1, - } - } - - /// Returns `true` for `Variant` or `Extractor`, as these are the two cases that can be - /// expanded into sub-patterns. - fn can_expand(&self) -> bool { - match self { - Pattern::Variant { .. } => true, - Pattern::Extractor { .. } => true, - _ => false, - } - } - - /// Returns `true` if this pattern could match one of the concrete patterns specified by - /// `other`. NOTE: this is intentionally a shallow match, and any sub-patterns of `other` are - /// intentionally ignored. We're only interested in the most top-level overlap between these - /// patterns here. - fn match_concrete(&self, other: &Pattern) -> bool { - match (self, other) { - // these are the cases where we know enough to say definitively yes or no - (Pattern::Int { value: left }, Pattern::Int { value: right }) => left == right, - (Pattern::Const { name: left }, Pattern::Const { name: right }) => left == right, - (Pattern::Variant { id: left, .. }, Pattern::Variant { id: right, .. }) => { - left == right - } - - (Pattern::Extractor { id: left, .. }, Pattern::Extractor { id: right, .. }) => { - left == right + Pattern::And { pats } } - - (Pattern::And { pats }, _) => pats.iter().rev().any(|pat| pat.match_concrete(other)), - - _ => false, } } - /// If this pattern is an and-pattern, extract the sub-pattern that matches the template and - /// remove it from `self`. This operation is used when specializing columns that contain - /// and-patterns to another pattern, leaning on the assumption that the and-pattern matches if - /// any of its sub-patterns also match. - fn extract_matching(&mut self, template: &Pattern) -> Option { + /// If this is an and-pattern, return its subpatterns. Otherwise pretend like there's an + /// and-pattern which has this as its only subpattern, and return self as a single-element + /// slice. + fn as_and_subpatterns(&self) -> &[Pattern] { if let Pattern::And { pats } = self { - for i in 0..pats.len() { - if pats[i].match_concrete(template) { - let res = pats.remove(i); - - // if the and has only a single element left, collapse it - if pats.len() == 1 { - *self = pats.remove(0); - } - - return Some(res); - } - } - } - - None - } -} - -/// The patterns of a rule turned into a work-queue for overlap checking. -#[derive(Debug, Clone)] -struct Row { - pats: Vec, - rule: RuleId, -} - -impl Row { - /// Construct a rule from this rule id. - fn from_rule(env: &Env, rule: RuleId) -> Row { - if let sema::Pattern::Term(_, _, vars) = &env.get_rule(rule).lhs { - let mut binds = Vec::new(); - let mut pats: Vec<_> = vars - .iter() - .map(|pat| Pattern::from_sema(env, &mut binds, pat)) - .collect(); - - // NOTE: the patterns are reversed so that it's easier to manipulate the leading - // column of the row by pushing/popping the pats vector. - pats.reverse(); - - Self { pats, rule } + pats } else { - panic!("Constructing a Row from a malformed rule") - } - } - - /// A row is empty when its pattern vector is empty. - fn is_empty(&self) -> bool { - self.pats.is_empty() - } - - /// The pattern from the first column of this row. - fn front(&self) -> &Pattern { - assert!(!self.pats.is_empty()); - self.pats.last().unwrap() - } - - /// A mutable reference to the pattern from the first column of this row. - fn front_mut(&mut self) -> &mut Pattern { - assert!(!self.pats.is_empty()); - self.pats.last_mut().unwrap() - } - - /// Push a new pattern on the front of this row. - fn push(&mut self, pat: Pattern) { - self.pats.push(pat); - } - - /// Pop the pattern from the front of the row. - fn pop(&mut self) -> Pattern { - assert!(!self.pats.is_empty()); - self.pats.pop().unwrap() - } - - fn leading_pattern(&self) -> (bool, &Pattern) { - assert!(!self.is_empty(), "leading_pattern called on an emtpy row"); - - match self.front() { - Pattern::And { pats } => (true, pats.first().unwrap()), - pat => (false, pat), - } - } - - /// Specialize any leading and-patterns to this template. - fn specialize_and_patterns(&mut self, template: &Pattern) { - let mut pat = self.pop(); - if let Some(p) = pat.extract_matching(template) { - self.push(pat); - self.push(p); - } else { - self.push(Pattern::Wildcard); - self.push(pat); - } - } - - /// Expand the leading pattern of this row according to the template. - fn expand_leading(&mut self, env: &Env, template: &Pattern) { - assert!(!self.front().is_and()); - - let arity = template.arity(); - match self.pop() { - Pattern::Variant { pats, .. } | Pattern::Extractor { pats, .. } => { - self.pats.extend(pats.into_iter().rev()); - } - - Pattern::Wildcard => self - .pats - .extend(std::iter::repeat(Pattern::Wildcard).take(arity)), - - pat => panic!( - "incorrect leading expansion:\nfound: {}\n expected: {}", - WithEnv::new(env, &pat), - WithEnv::new(env, template) - ), - } - } - - /// Expand the leading pattern of this row according to the template. - fn expand(&mut self, env: &Env, template: &Pattern) { - if template.can_expand() { - self.expand_leading(env, template); - } else { - self.pop(); - } - } - - /// Returns true if it was possible to specialize this row to the pattern template provided. - fn specialize(&mut self, env: &Env, template: &Pattern) -> bool { - assert!(!self.is_empty()); - - if self.front().is_wildcard() || self.front().match_concrete(template) { - self.expand(env, template); - return true; - } - - // If this is an extractor, we already know that it doesn't match the template exactly, so - // we'll treat it like a wildcard. - if self.front().is_extractor().is_some() { - *self.front_mut() = Pattern::Wildcard; - self.expand(env, template); - return true; - } - - return false; - } -} - -/// A convenience struct for pretty-printing values that need an environment. -struct WithEnv<'env, T> { - env: &'env Env<'env>, - value: T, -} - -impl<'env, T> WithEnv<'env, T> { - /// Construct a new `WithEnv` for the given environment and value. - fn new(env: &'env Env, value: T) -> Self { - Self { env, value } - } - - /// Construct a new `WithEnv` with the same environment, but a different value. - fn with_value(&self, value: U) -> WithEnv<'env, U> { - WithEnv { - env: self.env, - value, - } - } -} - -impl std::fmt::Display for WithEnv<'_, &Pattern> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self.value { - Pattern::Int { value } => write!(f, "{}", value), - - Pattern::Const { name } => write!(f, "${}", self.env.get_sym(*name)), - - Pattern::Variant { id, pats, .. } => { - write!(f, "({}", self.env.get_sym(self.env.get_term(*id).name))?; - for pat in pats { - write!(f, " {}", self.with_value(pat))?; - } - write!(f, ")") - } - - Pattern::And { pats } => { - write!(f, "(and")?; - for pat in pats { - write!(f, " {}", self.with_value(pat))?; - } - write!(f, ")") - } - - Pattern::Extractor { id, pats } => { - write!(f, "({}", self.env.get_sym(self.env.get_term(*id).name))?; - for pat in pats { - write!(f, " {}", self.with_value(pat))?; - } - write!(f, ")") - } - - Pattern::Wildcard => write!(f, "_"), - } - } -} - -impl std::fmt::Display for WithEnv<'_, &Row> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "[")?; - for pat in self.value.pats.iter().rev() { - write!(f, " {}", self.with_value(pat))?; + std::slice::from_ref(self) } - write!(f, " ] -> {}", self.value.rule.0) } } From b702e4697e2de676a4f6f1b24b732922e6920bc7 Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Mon, 19 Sep 2022 15:28:36 -0700 Subject: [PATCH 21/27] Only loop over rule list once This also puts all rule-pairs in a single list that rayon can efficiently partition across multiple CPUs. I previously suggested two layers of par_iters and relying on work-stealing to balance things out, but this is simpler to read and also ought to work more smoothly. --- cranelift/isle/isle/src/overlap.rs | 90 ++++++++++-------------------- 1 file changed, 31 insertions(+), 59 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index 9f75acaa6202..3078595dd35d 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -4,20 +4,12 @@ use rayon::prelude::*; use std::collections::{HashMap, HashSet}; use crate::error::{Error, Result, Source, Span}; -use crate::sema::{ - self, ConstructorKind, Rule, RuleId, Sym, Term, TermEnv, TermId, TermKind, TypeEnv, VarId, -}; +use crate::sema::{self, Rule, RuleId, Sym, TermEnv, TermId, TermKind, TypeEnv, VarId}; /// Check for overlap. pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { let env = Env::new(tyenv, termenv); - let errors = termenv - .terms - .par_iter() - .map(|term| check_overlap(&env, term)) - .reduce(Errors::default, Errors::union); - - let mut errors = errors.report(&env); + let mut errors = check_overlaps(termenv).report(&env); if cfg!(feature = "overlap-errors") { match errors.len() { 0 => Ok(()), @@ -124,35 +116,42 @@ impl Errors { /// Determine if any rules that rewrite the given term overlap in the input that they accept. This /// checkes every unique pair of rules, as checking rules in aggregate tends to suffer from /// exponential explosion in the presence of wildcard patterns. -fn check_overlap(env: &Env, term: &Term) -> Errors { - // The only isle declaration that currently produces overlap is constructors whose definition - // is entirely in isle. - if !matches!( - term.kind, - TermKind::Decl { - constructor_kind: Some(ConstructorKind::InternalConstructor), - .. +fn check_overlaps(env: &TermEnv) -> Errors { + struct RulePatterns<'a> { + rule: &'a Rule, + pats: Box<[Pattern]>, + } + let mut by_term = HashMap::new(); + for rule in env.rules.iter() { + if let sema::Pattern::Term(_, tid, ref vars) = rule.lhs { + let mut binds = Vec::new(); + let rule = RulePatterns { + rule, + pats: vars + .iter() + .map(|pat| Pattern::from_sema(env, &mut binds, pat)) + .collect(), + }; + by_term.entry(tid).or_insert_with(Vec::new).push(rule); } - ) { - return Errors::default(); } - let rows = env.rules_for_term(term.id); - let mut pairs = Vec::new(); - let mut cursor = &rows[..]; - while let Some((row, rest)) = cursor.split_first() { - cursor = rest; - pairs.extend(rest.iter().map(|other| (row, other))); + for rows in by_term.values() { + let mut cursor = &rows[..]; + while let Some((row, rest)) = cursor.split_first() { + cursor = rest; + pairs.extend(rest.iter().map(|other| (row, other))); + } } // Process rule pairs in parallel pairs .into_par_iter() - .fold(Errors::default, |mut errs, ((lid, left), (rid, right))| { - if env.get_rule(*lid).prio == env.get_rule(*rid).prio { - if check_overlap_pair(left, right) { - errs.add_edge(*lid, *rid); + .fold(Errors::default, |mut errs, (left, right)| { + if left.rule.prio == right.rule.prio { + if check_overlap_pair(&left.pats, &right.pats) { + errs.add_edge(left.rule.id, right.rule.id); } } errs @@ -251,11 +250,6 @@ impl<'a> Env<'a> { &self.termenv.rules[id.0] } - /// Fetch the term associated with this id. - fn get_term(&self, id: TermId) -> &Term { - &self.termenv.terms[id.0] - } - /// Fetch source information for a file id. fn get_source(&self, file: usize) -> Source { Source::new( @@ -263,28 +257,6 @@ impl<'a> Env<'a> { self.tyenv.file_texts[file].clone(), ) } - - /// The ids of all [`Rule`]s defined for this term. - fn rules_for_term(&self, id: TermId) -> Vec<(RuleId, Vec)> { - self.termenv - .rules - .iter() - .filter_map(|rule| { - if let sema::Pattern::Term(_, tid, vars) = &rule.lhs { - if *tid == id { - let mut binds = Vec::new(); - return Some(( - rule.id, - vars.iter() - .map(|pat| Pattern::from_sema(self, &mut binds, pat)) - .collect(), - )); - } - } - None - }) - .collect() - } } /// A version of [`sema::Pattern`] with some simplifications to make overlap checking easier. @@ -328,7 +300,7 @@ impl Pattern { /// would have introduced equalities with /// 3. [`sema::Pattern::Term`] instances are turned into either [`Pattern::Variant`] or /// [`Pattern::Extractor`] cases depending on their term kind. - fn from_sema(env: &Env, binds: &mut Vec<(VarId, Pattern)>, pat: &sema::Pattern) -> Self { + fn from_sema(env: &TermEnv, binds: &mut Vec<(VarId, Pattern)>, pat: &sema::Pattern) -> Self { match pat { sema::Pattern::BindPattern(_, id, pat) => { let pat = Self::from_sema(env, binds, pat); @@ -387,7 +359,7 @@ impl Pattern { .map(|pat| Pattern::from_sema(env, binds, pat)) .collect(); - match &env.get_term(id).kind { + match &env.terms[id.0].kind { TermKind::EnumVariant { .. } => Pattern::Variant { id, pats }, TermKind::Decl { .. } => Pattern::Extractor { id, pats }, } From 6534a84413473a82ff7024af96261a926be779a6 Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Mon, 19 Sep 2022 15:44:10 -0700 Subject: [PATCH 22/27] Remove nodes as soon as they have no edges left --- cranelift/isle/isle/src/overlap.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index 3078595dd35d..69319f6a0bab 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -1,6 +1,7 @@ //! Overlap detection for rules in ISLE. use rayon::prelude::*; +use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; use crate::error::{Error, Result, Source, Span}; @@ -80,14 +81,13 @@ impl Errors { .max_by_key(|id| self.nodes[id].edges.len()) { let node = self.nodes.remove(&id).unwrap(); - - if node.edges.is_empty() { - break; - } - for other in node.edges.iter() { - if let Some(other) = self.nodes.get_mut(&other) { + if let Entry::Occupied(mut entry) = self.nodes.entry(*other) { + let other = entry.get_mut(); other.remove_edge(id); + if other.edges.is_empty() { + entry.remove(); + } } } From c97fe05e5a148d5b777cecbb07c5b38c879557a2 Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Mon, 19 Sep 2022 15:50:39 -0700 Subject: [PATCH 23/27] Iterate over values instead of looking them up by key --- cranelift/isle/isle/src/overlap.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index 69319f6a0bab..aeb847a0b14a 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -74,12 +74,7 @@ impl Errors { (src, span) }; - while let Some(id) = self - .nodes - .keys() - .copied() - .max_by_key(|id| self.nodes[id].edges.len()) - { + while let Some((&id, _)) = self.nodes.iter().max_by_key(|(_, node)| node.edges.len()) { let node = self.nodes.remove(&id).unwrap(); for other in node.edges.iter() { if let Entry::Occupied(mut entry) = self.nodes.entry(*other) { From e0339a942f4d40df5363196a6574ef704a37cd12 Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Mon, 19 Sep 2022 16:00:09 -0700 Subject: [PATCH 24/27] Update comments --- cranelift/isle/isle/src/overlap.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index aeb847a0b14a..81f5104291c1 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -108,9 +108,9 @@ impl Errors { } } -/// Determine if any rules that rewrite the given term overlap in the input that they accept. This -/// checkes every unique pair of rules, as checking rules in aggregate tends to suffer from -/// exponential explosion in the presence of wildcard patterns. +/// Determine if any rules overlap in the input that they accept. This checkes every unique pair of +/// rules, as checking rules in aggregate tends to suffer from exponential explosion in the +/// presence of wildcard patterns. fn check_overlaps(env: &TermEnv) -> Errors { struct RulePatterns<'a> { rule: &'a Rule, @@ -131,6 +131,10 @@ fn check_overlaps(env: &TermEnv) -> Errors { } } + // Sequentially identify all rule pairs which are in the same term. We could make this a + // parallel iterator, but that's harder to read and this loop is fast. Also, Rayon can + // efficiently partition a vector across multiple CPUs, which it might have more trouble with + // if this were an iterator. let mut pairs = Vec::new(); for rows in by_term.values() { let mut cursor = &rows[..]; @@ -140,7 +144,8 @@ fn check_overlaps(env: &TermEnv) -> Errors { } } - // Process rule pairs in parallel + // Process rule pairs in parallel. Rayon makes this easy and we have independent bite-sized + // chunks of work, so we might as well take advantage of multiple CPUs if they're available. pairs .into_par_iter() .fold(Errors::default, |mut errs, (left, right)| { From c5490c0d018ee96776c23aea53435088b891a8ab Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Mon, 19 Sep 2022 16:06:34 -0700 Subject: [PATCH 25/27] Remove `Env` wrapper in favor of TypeEnv/TermEnv --- cranelift/isle/isle/src/overlap.rs | 42 +++++++----------------------- 1 file changed, 9 insertions(+), 33 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index 81f5104291c1..551943a51654 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -9,8 +9,7 @@ use crate::sema::{self, Rule, RuleId, Sym, TermEnv, TermId, TermKind, TypeEnv, V /// Check for overlap. pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { - let env = Env::new(tyenv, termenv); - let mut errors = check_overlaps(termenv).report(&env); + let mut errors = check_overlaps(termenv).report(tyenv, termenv); if cfg!(feature = "overlap-errors") { match errors.len() { 0 => Ok(()), @@ -63,13 +62,16 @@ impl Errors { /// nodes from the graph with the highest degree, reporting errors for them and their direct /// connections. The goal with reporting errors this way is to prefer reporting rules that /// overlap with many others first, and then report other more targeted overlaps later. - fn report(mut self, env: &Env) -> Vec { + fn report(mut self, tyenv: &TypeEnv, termenv: &TermEnv) -> Vec { let mut errors = Vec::new(); - let get_info = |id| { - let rule = env.get_rule(id); - - let src = env.get_source(rule.pos.file); + let get_info = |id: RuleId| { + let rule = &termenv.rules[id.0]; + let file = rule.pos.file; + let src = Source::new( + tyenv.filenames[file].clone(), + tyenv.file_texts[file].clone(), + ); let span = Span::new_single(rule.pos); (src, span) }; @@ -233,32 +235,6 @@ fn check_overlap_pair(a: &[Pattern], b: &[Pattern]) -> bool { true } -/// A convenience wrapper around the `TypeEnv` and `TermEnv` environments. -struct Env<'a> { - tyenv: &'a TypeEnv, - termenv: &'a TermEnv, -} - -impl<'a> Env<'a> { - /// Construct a new [`Env`]. - fn new(tyenv: &'a TypeEnv, termenv: &'a TermEnv) -> Self { - Self { tyenv, termenv } - } - - /// Fetch the rule associated with this id. - fn get_rule(&self, id: RuleId) -> &Rule { - &self.termenv.rules[id.0] - } - - /// Fetch source information for a file id. - fn get_source(&self, file: usize) -> Source { - Source::new( - self.tyenv.filenames[file].clone(), - self.tyenv.file_texts[file].clone(), - ) - } -} - /// A version of [`sema::Pattern`] with some simplifications to make overlap checking easier. #[derive(Debug, Clone)] enum Pattern { From 569b3f562e7fdf2179bc27d9c7a0fff4994b2fd1 Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Mon, 19 Sep 2022 16:35:19 -0700 Subject: [PATCH 26/27] Remove `Node` abstraction --- cranelift/isle/isle/src/overlap.rs | 44 ++++++++++-------------------- 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index 551943a51654..6b839f73feb8 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -23,37 +23,21 @@ pub fn check(tyenv: &TypeEnv, termenv: &TermEnv) -> Result<()> { } } -/// A node in the error graph. -#[derive(Default)] -struct Node { - /// Other rules that this one overlaps with. - edges: HashSet, -} - -impl Node { - /// Add an edge between this node and the other node. - fn add_edge(&mut self, other: RuleId) { - self.edges.insert(other); - } - - /// Remove an edge between this node and another node. - fn remove_edge(&mut self, other: RuleId) { - self.edges.remove(&other); - } -} - /// A graph of rules that overlap in the ISLE source. The edges are undirected. #[derive(Default)] struct Errors { /// Edges between rules indicating overlap. - nodes: HashMap, + nodes: HashMap>, } impl Errors { /// Merge together two Error graphs. fn union(mut self, other: Self) -> Self { - for (id, node) in other.nodes { - self.nodes.entry(id).or_default().edges.extend(node.edges); + for (id, edges) in other.nodes { + match self.nodes.entry(id) { + Entry::Occupied(entry) => entry.into_mut().extend(edges), + Entry::Vacant(entry) => _ = entry.insert(edges), + } } self } @@ -76,13 +60,13 @@ impl Errors { (src, span) }; - while let Some((&id, _)) = self.nodes.iter().max_by_key(|(_, node)| node.edges.len()) { + while let Some((&id, _)) = self.nodes.iter().max_by_key(|(_, edges)| edges.len()) { let node = self.nodes.remove(&id).unwrap(); - for other in node.edges.iter() { + for other in node.iter() { if let Entry::Occupied(mut entry) = self.nodes.entry(*other) { - let other = entry.get_mut(); - other.remove_edge(id); - if other.edges.is_empty() { + let back_edges = entry.get_mut(); + back_edges.remove(&id); + if back_edges.is_empty() { entry.remove(); } } @@ -91,7 +75,7 @@ impl Errors { // build the real error let mut rules = vec![get_info(id)]; - rules.extend(node.edges.into_iter().map(get_info)); + rules.extend(node.into_iter().map(get_info)); errors.push(Error::OverlapError { msg: String::from("rules are overlapping"), @@ -105,8 +89,8 @@ impl Errors { /// Add a bidirectional edge between two rules in the graph. fn add_edge(&mut self, a: RuleId, b: RuleId) { // edges are undirected - self.nodes.entry(a).or_default().add_edge(b); - self.nodes.entry(b).or_default().add_edge(a); + self.nodes.entry(a).or_default().insert(b); + self.nodes.entry(b).or_default().insert(a); } } From 01b078b2dfe921b1852e1e452ee90a368218bc29 Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Mon, 19 Sep 2022 16:53:45 -0700 Subject: [PATCH 27/27] Use boxed slices instead of Vec for immutable arrays --- cranelift/isle/isle/src/overlap.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cranelift/isle/isle/src/overlap.rs b/cranelift/isle/isle/src/overlap.rs index 6b839f73feb8..fb826f68fcb5 100644 --- a/cranelift/isle/isle/src/overlap.rs +++ b/cranelift/isle/isle/src/overlap.rs @@ -235,18 +235,18 @@ enum Pattern { /// Enum variant constructors. Variant { id: TermId, - pats: Vec, + pats: Box<[Pattern]>, }, /// Conjunctions of patterns. And { - pats: Vec, + pats: Box<[Pattern]>, }, /// Extractor uses (both fallible and infallible). Extractor { id: TermId, - pats: Vec, + pats: Box<[Pattern]>, }, Wildcard,