diff --git a/src/librustc/back/svh.rs b/src/librustc/back/svh.rs index 8a2f7ff197650..2b8594364dae9 100644 --- a/src/librustc/back/svh.rs +++ b/src/librustc/back/svh.rs @@ -51,6 +51,7 @@ use std::hash::Hash; use std::hash::sip::SipState; use std::iter::range_step; use syntax::ast; +use syntax::visit; #[deriving(Clone, Eq)] pub struct Svh { @@ -68,25 +69,28 @@ impl Svh { } pub fn calculate(krate: &ast::Crate) -> Svh { - // FIXME: see above for why this is wrong, it shouldn't just hash the - // crate. Fixing this would require more in-depth analysis in - // this function about what portions of the crate are reachable - // in tandem with bug fixes throughout the rest of the compiler. - // - // Note that for now we actually exclude some top-level things - // from the crate like the CrateConfig/span. The CrateConfig - // contains command-line `--cfg` flags, so this means that the - // stage1/stage2 AST for libstd and such is different hash-wise - // when it's actually the exact same representation-wise. - // - // As a first stab at only hashing the relevant parts of the - // AST, this only hashes the module/attrs, not the CrateConfig - // field. - // + // FIXME (#14132): This is better than it used to be, but it still not + // ideal. We now attempt to hash only the relevant portions of the + // Crate AST as well as the top-level crate attributes. (However, + // the hashing of the crate attributes should be double-checked + // to ensure it is not incorporating implementation artifacts into + // the hash that are not otherwise visible.) + // FIXME: this should use SHA1, not SipHash. SipHash is not built to // avoid collisions. let mut state = SipState::new(); - krate.module.hash(&mut state); + + { + let mut visit = svh_visitor::make(&mut state); + visit::walk_crate(&mut visit, krate, ()); + } + + // FIXME (#14132): This hash is still sensitive to e.g. the + // spans of the crate Attributes and their underlying + // MetaItems; we should make ContentHashable impl for those + // types and then use hash_content. But, since all crate + // attributes should appear near beginning of the file, it is + // not such a big deal to be sensitive to their spans for now. krate.attrs.hash(&mut state); let hash = state.result(); @@ -110,3 +114,391 @@ impl fmt::Show for Svh { f.pad(self.as_str()) } } + +// FIXME (#14132): Even this SVH computation still has implementation +// artifacts: namely, the order of item declaration will affect the +// hash computation, but for many kinds of items the order of +// declaration should be irrelevant to the ABI. + +mod svh_visitor { + use syntax::ast; + use syntax::ast::*; + use syntax::codemap::Span; + use syntax::parse::token; + use syntax::print::pprust; + use syntax::visit; + use syntax::visit::{Visitor, FnKind}; + + use std::hash::Hash; + use std::hash::sip::SipState; + + pub struct StrictVersionHashVisitor<'a> { + pub st: &'a mut SipState, + } + + pub fn make<'a>(st: &'a mut SipState) -> StrictVersionHashVisitor<'a> { + StrictVersionHashVisitor { st: st } + } + + // To off-load the bulk of the hash-computation on deriving(Hash), + // we define a set of enums corresponding to the content that our + // crate visitor will encounter as it traverses the ast. + // + // The important invariant is that all of the Saw*Component enums + // do not carry any Spans, Names, or Idents. + // + // Not carrying any Names/Idents is the important fix for problem + // noted on PR #13948: using the ident.name as the basis for a + // hash leads to unstable SVH, because ident.name is just an index + // into intern table (i.e. essentially a random address), not + // computed from the name content. + // + // With the below enums, the SVH computation is not sensitive to + // artifacts of how rustc was invoked nor of how the source code + // was laid out. (Or at least it is *less* sensitive.) + + // This enum represents the different potential bits of code the + // visitor could encounter that could affect the ABI for the crate, + // and assigns each a distinct tag to feed into the hash computation. + #[deriving(Hash)] + enum SawAbiComponent<'a> { + + // FIXME (#14132): should we include (some function of) + // ident.ctxt as well? + SawIdent(token::InternedString), + SawStructDef(token::InternedString), + + SawLifetimeRef(token::InternedString), + SawLifetimeDecl(token::InternedString), + + SawMod, + SawViewItem, + SawForeignItem, + SawItem, + SawDecl, + SawTy, + SawGenerics, + SawFn, + SawTyMethod, + SawTraitMethod, + SawStructField, + SawVariant, + SawExplicitSelf, + SawPath, + SawOptLifetimeRef, + SawBlock, + SawPat, + SawLocal, + SawArm, + SawExpr(SawExprComponent<'a>), + SawStmt(SawStmtComponent), + } + + /// SawExprComponent carries all of the information that we want + /// to include in the hash that *won't* be covered by the + /// subsequent recursive traversal of the expression's + /// substructure by the visitor. + /// + /// We know every Expr_ variant is covered by a variant because + /// `fn saw_expr` maps each to some case below. Ensuring that + /// each variant carries an appropriate payload has to be verified + /// by hand. + /// + /// (However, getting that *exactly* right is not so important + /// because the SVH is just a developer convenience; there is no + /// guarantee of collision-freedom, hash collisions are just + /// (hopefully) unlikely.) + #[deriving(Hash)] + pub enum SawExprComponent<'a> { + + SawExprLoop(Option), + SawExprField(token::InternedString), + SawExprBreak(Option), + SawExprAgain(Option), + + SawExprVstore, + SawExprBox, + SawExprVec, + SawExprCall, + SawExprMethodCall, + SawExprTup, + SawExprBinary(ast::BinOp), + SawExprUnary(ast::UnOp), + SawExprLit(ast::Lit_), + SawExprCast, + SawExprIf, + SawExprWhile, + SawExprMatch, + SawExprFnBlock, + SawExprProc, + SawExprBlock, + SawExprAssign, + SawExprAssignOp(ast::BinOp), + SawExprIndex, + SawExprPath, + SawExprAddrOf(ast::Mutability), + SawExprRet, + SawExprInlineAsm(&'a ast::InlineAsm), + SawExprStruct, + SawExprRepeat, + SawExprParen, + } + + fn saw_expr<'a>(node: &'a Expr_) -> SawExprComponent<'a> { + match *node { + ExprVstore(..) => SawExprVstore, + ExprBox(..) => SawExprBox, + ExprVec(..) => SawExprVec, + ExprCall(..) => SawExprCall, + ExprMethodCall(..) => SawExprMethodCall, + ExprTup(..) => SawExprTup, + ExprBinary(op, _, _) => SawExprBinary(op), + ExprUnary(op, _) => SawExprUnary(op), + ExprLit(lit) => SawExprLit(lit.node.clone()), + ExprCast(..) => SawExprCast, + ExprIf(..) => SawExprIf, + ExprWhile(..) => SawExprWhile, + ExprLoop(_, id) => SawExprLoop(id.map(content)), + ExprMatch(..) => SawExprMatch, + ExprFnBlock(..) => SawExprFnBlock, + ExprProc(..) => SawExprProc, + ExprBlock(..) => SawExprBlock, + ExprAssign(..) => SawExprAssign, + ExprAssignOp(op, _, _) => SawExprAssignOp(op), + ExprField(_, id, _) => SawExprField(content(id)), + ExprIndex(..) => SawExprIndex, + ExprPath(..) => SawExprPath, + ExprAddrOf(m, _) => SawExprAddrOf(m), + ExprBreak(id) => SawExprBreak(id.map(content)), + ExprAgain(id) => SawExprAgain(id.map(content)), + ExprRet(..) => SawExprRet, + ExprInlineAsm(ref asm) => SawExprInlineAsm(asm), + ExprStruct(..) => SawExprStruct, + ExprRepeat(..) => SawExprRepeat, + ExprParen(..) => SawExprParen, + + // just syntactic artifacts, expanded away by time of SVH. + ExprForLoop(..) => unreachable!(), + ExprMac(..) => unreachable!(), + } + } + + /// SawStmtComponent is analogous to SawExprComponent, but for statements. + #[deriving(Hash)] + pub enum SawStmtComponent { + SawStmtDecl, + SawStmtExpr, + SawStmtSemi, + } + + fn saw_stmt(node: &Stmt_) -> SawStmtComponent { + match *node { + StmtDecl(..) => SawStmtDecl, + StmtExpr(..) => SawStmtExpr, + StmtSemi(..) => SawStmtSemi, + StmtMac(..) => unreachable!(), + } + } + + // Ad-hoc overloading between Ident and Name to their intern table lookups. + trait InternKey { fn get_content(self) -> token::InternedString; } + impl InternKey for Ident { + fn get_content(self) -> token::InternedString { token::get_ident(self) } + } + impl InternKey for Name { + fn get_content(self) -> token::InternedString { token::get_name(self) } + } + fn content(k: K) -> token::InternedString { k.get_content() } + + // local short-hand eases writing signatures of syntax::visit mod. + type E = (); + + impl<'a> Visitor for StrictVersionHashVisitor<'a> { + + fn visit_mac(&mut self, macro: &Mac, e: E) { + // macro invocations, namely macro_rules definitions, + // *can* appear as items, even in the expanded crate AST. + + if macro_name(macro).get() == "macro_rules" { + // Pretty-printing definition to a string strips out + // surface artifacts (currently), such as the span + // information, yielding a content-based hash. + + // FIXME (#14132): building temporary string is + // expensive; a direct content-based hash on token + // trees might be faster. Implementing this is far + // easier in short term. + let macro_defn_as_string = + pprust::to_str(|pp_state| pp_state.print_mac(macro)); + macro_defn_as_string.hash(self.st); + } else { + // It is not possible to observe any kind of macro + // invocation at this stage except `macro_rules!`. + fail!("reached macro somehow: {}", + pprust::to_str(|pp_state| pp_state.print_mac(macro))); + } + + visit::walk_mac(self, macro, e); + + fn macro_name(macro: &Mac) -> token::InternedString { + match ¯o.node { + &MacInvocTT(ref path, ref _tts, ref _stx_ctxt) => { + let s = path.segments.as_slice(); + assert_eq!(s.len(), 1); + content(s[0].identifier) + } + } + } + } + + fn visit_struct_def(&mut self, s: &StructDef, ident: Ident, + g: &Generics, n: NodeId, e: E) { + SawStructDef(content(ident)).hash(self.st); + // For some reason walk_struct_def does not call walk_generics. + visit::walk_generics(self, g, e.clone()); + visit::walk_struct_def(self, s, ident, g, n, e) + } + + fn visit_variant(&mut self, v: &Variant, g: &Generics, e: E) { + SawVariant.hash(self.st); + // For some reason walk_variant does not call walk_generics. + visit::walk_generics(self, g, e.clone()); + visit::walk_variant(self, v, g, e) + } + + fn visit_opt_lifetime_ref(&mut self, _: Span, l: &Option, env: E) { + SawOptLifetimeRef.hash(self.st); + // (This is a strange method in the visitor trait, in that + // it does not expose a walk function to do the subroutine + // calls.) + match *l { + Some(ref l) => self.visit_lifetime_ref(l, env), + None => () + } + } + + // All of the remaining methods just record (in the hash + // SipState) that the visitor saw that particular variant + // (with its payload), and continue walking as the default + // visitor would. + // + // Some of the implementations have some notes as to how one + // might try to make their SVH computation less discerning + // (e.g. by incorporating reachability analysis). But + // currently all of their implementations are uniform and + // uninteresting. + // + // (If you edit a method such that it deviates from the + // pattern, please move that method up above this comment.) + + fn visit_ident(&mut self, _: Span, ident: Ident, _: E) { + SawIdent(content(ident)).hash(self.st); + } + + fn visit_lifetime_ref(&mut self, l: &Lifetime, _: E) { + SawLifetimeRef(content(l.name)).hash(self.st); + } + + fn visit_lifetime_decl(&mut self, l: &Lifetime, _: E) { + SawLifetimeDecl(content(l.name)).hash(self.st); + } + + // We do recursively walk the bodies of functions/methods + // (rather than omitting their bodies from the hash) since + // monomorphization and cross-crate inlining generally implies + // that a change to a crate body will require downstream + // crates to be recompiled. + fn visit_expr(&mut self, ex: &Expr, e: E) { + SawExpr(saw_expr(&ex.node)).hash(self.st); visit::walk_expr(self, ex, e) + } + + fn visit_stmt(&mut self, s: &Stmt, e: E) { + SawStmt(saw_stmt(&s.node)).hash(self.st); visit::walk_stmt(self, s, e) + } + + fn visit_view_item(&mut self, i: &ViewItem, e: E) { + // Two kinds of view items can affect the ABI for a crate: + // exported `pub use` view items (since that may expose + // items that downstream crates can call), and `use + // foo::Trait`, since changing that may affect method + // resolution. + // + // The simplest approach to handling both of the above is + // just to adopt the same simple-minded (fine-grained) + // hash that I am deploying elsewhere here. + SawViewItem.hash(self.st); visit::walk_view_item(self, i, e) + } + + fn visit_foreign_item(&mut self, i: &ForeignItem, e: E) { + // FIXME (#14132) ideally we would incorporate privacy (or + // perhaps reachability) somewhere here, so foreign items + // that do not leak into downstream crates would not be + // part of the ABI. + SawForeignItem.hash(self.st); visit::walk_foreign_item(self, i, e) + } + + fn visit_item(&mut self, i: &Item, e: E) { + // FIXME (#14132) ideally would incorporate reachability + // analysis somewhere here, so items that never leak into + // downstream crates (e.g. via monomorphisation or + // inlining) would not be part of the ABI. + SawItem.hash(self.st); visit::walk_item(self, i, e) + } + + fn visit_mod(&mut self, m: &Mod, _s: Span, _n: NodeId, e: E) { + SawMod.hash(self.st); visit::walk_mod(self, m, e) + } + + fn visit_decl(&mut self, d: &Decl, e: E) { + SawDecl.hash(self.st); visit::walk_decl(self, d, e) + } + + fn visit_ty(&mut self, t: &Ty, e: E) { + SawTy.hash(self.st); visit::walk_ty(self, t, e) + } + + fn visit_generics(&mut self, g: &Generics, e: E) { + SawGenerics.hash(self.st); visit::walk_generics(self, g, e) + } + + fn visit_fn(&mut self, fk: &FnKind, fd: &FnDecl, b: &Block, s: Span, n: NodeId, e: E) { + SawFn.hash(self.st); visit::walk_fn(self, fk, fd, b, s, n, e) + } + + fn visit_ty_method(&mut self, t: &TypeMethod, e: E) { + SawTyMethod.hash(self.st); visit::walk_ty_method(self, t, e) + } + + fn visit_trait_method(&mut self, t: &TraitMethod, e: E) { + SawTraitMethod.hash(self.st); visit::walk_trait_method(self, t, e) + } + + fn visit_struct_field(&mut self, s: &StructField, e: E) { + SawStructField.hash(self.st); visit::walk_struct_field(self, s, e) + } + + fn visit_explicit_self(&mut self, es: &ExplicitSelf, e: E) { + SawExplicitSelf.hash(self.st); visit::walk_explicit_self(self, es, e) + } + + fn visit_path(&mut self, path: &Path, _: ast::NodeId, e: E) { + SawPath.hash(self.st); visit::walk_path(self, path, e) + } + + fn visit_block(&mut self, b: &Block, e: E) { + SawBlock.hash(self.st); visit::walk_block(self, b, e) + } + + fn visit_pat(&mut self, p: &Pat, e: E) { + SawPat.hash(self.st); visit::walk_pat(self, p, e) + } + + fn visit_local(&mut self, l: &Local, e: E) { + SawLocal.hash(self.st); visit::walk_local(self, l, e) + } + + fn visit_arm(&mut self, a: &Arm, e: E) { + SawArm.hash(self.st); visit::walk_arm(self, a, e) + } + } +}