diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 0018d3094636..99a58d1e751f 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -1346,21 +1346,21 @@ ;; result = [ vec[3] vec[2] tmp[0] tmp[2] ] (rule (vec_insert_lane $F32X4 vec (RegMem.Reg val) 1) (let ((tmp Xmm (x64_movlhps val vec))) - (x64_shufps tmp vec 0xe2))) ;; 0xe2 == 0b11_10_00_10 + (x64_shufps tmp vec 0b11_10_00_10))) ;; f32x4.replace_lane 2 - without insertps ;; tmp = [ vec[0] vec[3] val[0] val[0] ] ;; result = [ tmp[2] tmp[0] vec[1] vec[0] ] (rule (vec_insert_lane $F32X4 vec (RegMem.Reg val) 2) - (let ((tmp Xmm (x64_shufps val vec 0x30))) ;; 0x30 == 0b00_11_00_00 - (x64_shufps vec tmp 0x84))) ;; 0x84 == 0b10_00_01_00 + (let ((tmp Xmm (x64_shufps val vec 0b00_11_00_00))) + (x64_shufps vec tmp 0b10_00_01_00))) ;; f32x4.replace_lane 3 - without insertps ;; tmp = [ vec[3] vec[2] val[1] val[0] ] ;; result = [ tmp[0] tmp[2] vec[1] vec[0] ] (rule (vec_insert_lane $F32X4 vec (RegMem.Reg val) 3) - (let ((tmp Xmm (x64_shufps val vec 0xe4))) ;; 0xe4 == 0b11_10_01_00 - (x64_shufps vec tmp 0x24))) ;; 0x24 == 0b00_10_01_00 + (let ((tmp Xmm (x64_shufps val vec 0b11_10_01_00))) + (x64_shufps vec tmp 0b00_10_01_00))) ;; Recursively delegate to the above rules by loading from memory first. (rule (vec_insert_lane $F32X4 vec (RegMem.Mem addr) idx) @@ -3422,7 +3422,7 @@ (a Xmm a) (libcall LibCall (round_libcall $F64 imm)) (result Xmm (libcall_1 libcall a)) - (a1 Xmm (libcall_1 libcall (x64_pshufd a 0x0e))) ;; 0x0e == 0b00_00_11_10 + (a1 Xmm (libcall_1 libcall (x64_pshufd a 0b00_00_11_10))) (result Xmm (vec_insert_lane $F64X2 result a1 1)) ) result)) @@ -3818,8 +3818,7 @@ ;; This is the only remaining case for F64X2 (rule 1 (lower (has_type $F64 (extractlane val @ (value_type (ty_vec128 ty)) (u8_from_uimm8 1)))) - ;; 0xee == 0b11_10_11_10 - (x64_pshufd val 0xee)) + (x64_pshufd val 0b11_10_11_10)) ;; Note that the `pextrb` lowering here is relied upon by the `extend_to_gpr` ;; helper because it will elide a `uextend` operation when `extractlane` is the diff --git a/cranelift/isle/docs/language-reference.md b/cranelift/isle/docs/language-reference.md index 0f430e411515..cb3cadbe267a 100644 --- a/cranelift/isle/docs/language-reference.md +++ b/cranelift/isle/docs/language-reference.md @@ -21,11 +21,11 @@ instructions. For example: - An `iadd` (integer add) operator can always be lowered to an x86 `ADD` instruction with two register sources. - + - An `iadd` operator with one `iconst` (integer-constant) argument can be lowered to an x86 `ADD` instruction with a register and an immediate. - + One could write something like the following in ISLE (simplified from the real code [here](../codegen/src/isa/x64/lower.isle)): @@ -43,7 +43,7 @@ the real code [here](../codegen/src/isa/x64/lower.isle)): ;; `y` is a `RegMemImm.Imm`. y))) ``` - + ISLE lets the compiler backend developer express this information in a declarative way -- i.e., just write down a list of patterns, without worrying how the compilation process tries them out -- and the ISLE @@ -68,16 +68,16 @@ This document is organized into the following sections: systems work, how to think about nested terms, patterns and rewrite rules, how they provide a general mechanism for computation, and how term-rewriting is often used in a compiler-implementation context. - + * Core ISLE: the foundational concepts of the ISLE DSL, building upon a general-purpose term-rewriting base. Covers the type system (typed terms) and how rules are written. - + * ISLE with Rust: covers how ISLE provides an "FFI" (foreign function interface) of sorts to allow interaction with Rust code, and describes the scheme by which ISLE execution is mapped onto Rust (data structures and control flow).[^1] - + * ISLE Internals: describes how the ISLE compiler works. Provides insight into how an unordered collection of rewrite rules are combined into executable Rust code that efficiently traverses the @@ -161,7 +161,7 @@ form, or at least can be interpreted that way.[^3] structure that is present in those symbols in any well-formed sequence. For example, we can define a TRS that only operates on terms with balanced parentheses; then we have our tree. - + In ISLE and hence in this document, we operate on terms that are written in an [S-expression](https://en.wikipedia.org/wiki/S-expression) syntax, @@ -239,9 +239,9 @@ right-hand nomenclature comes from a common way of writing rules as: ```plain A -> B ;; any term "A" is rewritten to "B" - + (A x) -> (B (C x)) ;; any term (A x), for some x, is rewritten to (B (C x)). - + (A _) -> (D) ;; any term (A _), where `_` is a wildcard (any subterm), ;; is rewritten to (D). ``` @@ -254,16 +254,16 @@ subterms: * `(A pat1 pat2 ...)` matches a constructor `A` with patterms for each of its arguments. - + * `x` matches any subterm and captures its value in a variable binding, which can be used later when we specify the right-hand side (so that the rewrite contains parts of the original term). - + * `_` is a wildcard and matches anything, without capturing it. * Primitive constant values, such as `42` or `$Symbol`, match only if the term is exactly equal to this constant. - + These pattern-matching operators can be combined, so we could write, for example, `(A (B x _) z)`. This pattern would match the term `(A (B 1 2) 3)` but not `(A (C 4 5) 6)`. @@ -388,7 +388,7 @@ clarity. separate Rust functions, so factoring rules to use intermediate terms can provide code-size and compile-time benefits for the ISLE-generated Rust code as well. - + [^6]: The [lambda calculus' reduction rules](https://en.wikipedia.org/wiki/Lambda_calculus#Reduction) are a good example of this. @@ -415,9 +415,9 @@ rewrites it into a machine-*dependent* instruction term. For example: ```plain (iadd a b) -> (isa.add_reg_reg a b) - + (iadd a (iconst 0)) -> a - + (iadd a (iconst n)) (isa.add_reg_imm a n) ``` @@ -494,8 +494,11 @@ The pattern (left-hand side) is made up of the following match operators: * Wildcards (`_`). -* Integer constants (decimal/hex, positive/negative: `1`, `-1`, - `0x80`, `-0x80`) and boolean constants (`#t`, `#f`). +* Integer constants (decimal/hex/binary/octal, positive/negative: `1`, `-1`, + `0x80`, `-0x80`) and boolean constants (`#t`, `#f`). Hex constants can + start with either `0x` or `0X`. Binary constants start with `0b`. Octal + constants start with `0o`. Integers can also be interspersed with `_` as a + separator, for example `1_000` or `0x1234_5678`, for readability. * constants imported from the embedding, of arbitrary type (`$MyConst`). * Variable captures and matches (bare identifiers like `x`; an @@ -515,7 +518,7 @@ operators: defined term (type variant or constructor) and the subpatterns are applied to each argument value in turn. Note that `term` cannot be a wildcard; it must be a specific, concrete term. - + The expression (right-hand side) is made up of the following expression operators: @@ -530,7 +533,7 @@ expression operators: to the immediately previous variable bindings (i.e., this is like a `let*` in Scheme). `let`s are lexically-scoped, meaning that bound variables are available only within the body of the `let`. - + When multiple rules are applicable to rewrite a particular term, ISLE will choose the "more specific" rule according to a particular heuristic: in the lowered sequence of matching steps, when one @@ -573,14 +576,14 @@ of type definitions are: (type u32 (primitive u32)) ;; u32 is a primitive, and is ;; spelled `u32` in the generated Rust code. - + (type MyType (enum (A (x u32) (y u32)) (B (z u32)) (C))) ;; MyType is an enum, with variants ;; `MyType::A { x, y }`, `MyType::B { z }`, ;; and `MyType::C`. - + (type MyType2 extern (enum (A))) ;; MyType2 is an enum with variant `MyType2::A`. ;; Its type definition is not included in the @@ -611,11 +614,11 @@ automatically have the following constructors: ;; These definitions are implicit and do not need to be written (doing ;; so is a compile-time error, actually). We write them here just to ;; show what they would look like. - + (decl MyType.A (u32 u32) MyType) (decl MyType.B (u32) MyType) (decl MyType.C () MyType) - + (decl MyType2.A () MyType2) ``` @@ -671,39 +674,39 @@ The typing rules for patterns in ISLE are: constant, etc.). This is because compilation and dispatch into rules is organized by the top-level constructor of the term being rewritten. - + * At each part of the pattern except the root, there is an "expected type" that is inferred from the surrounding context. We check that this matches the actual type of the pattern. - + * A constructor pattern `(C x y z)`, given a constructor `(decl C (T1 T2 T2) R)`, has type `R` and provides expected types `T1`, `T2`, and `T3` to its subpatterns. - + * A variable capture pattern `x` is compatible with any expected type the first time it appears, and captures this expected type under the variable identifier `x` in the type environment. Subsequent appearances of `x` check that the expected type matches the already-captured type. - + * A conjunction `(and PAT1 PAT2 ...)` checks that each subpattern is compatible with the expected type. - + * Integer constants are compatible with any primitive expected type. (This may change in the future if we add non-numeric primitives, such as strings.) - + If we are able to typecheck the pattern, we have a type environment that is a map from variable bindings to types: e.g., `{ x: MyType, y: MyType2, z: u32 }`. We then typecheck the rewrite expression. * Every expression also has an expected type, from the surrounding context. We check that the provided expression matches this type. - + * The top-level rewrite expression must have the same type as the top-level constructor in the pattern. (In other words, a term can only be rewritten to another term of the same type.) - + * Constructors check their return values against the expected type, and typecheck their argument expressions against their parameter types. @@ -712,7 +715,7 @@ MyType2, z: u32 }`. We then typecheck the rewrite expression. these are added to the type environment while typechecking the body. The expected type for the body is the same as the expected type for the `let` itself. - + ### A Note on Heterogeneous Types We should illuminate one particular aspect of the ISLE type system @@ -726,9 +729,9 @@ a `T2`. Concretely: ```lisp (type T1 ...) (type T2 ...) - + (decl Translate (T1) T2) - + (rule (Translate (T1.A ...)) (T2.X ...)) (rule (Translate (T1.B ...)) @@ -760,10 +763,10 @@ one could have: (type IR ...) (type Machine1 ...) (type Machine2 ...) - + (decl TranslateToMachine1 (IR) Machine1) (decl TranslateToMachine2 (IR) Machine2) - + (rule (TranslateToMachine1 (IR.add a b)) (Machine1.add a b)) (rule (TranslateToMachine2 (IR.add a b)) (Machine2.weird_inst a b)) ``` @@ -872,7 +875,7 @@ For example, if one is writing a rule such as ```lisp (decl u_to_v (U) V) (rule ...) - + (decl MyTerm (T) V) (rule (MyTerm t) (u_to_v t)) @@ -885,7 +888,7 @@ its argument but `t` has type `T`. However, if we define ```lisp (convert T U t_to_u) - + ;; For the above to be valid, `t_to_u` should be declared with the ;; signature: (decl t_to_u (T) U) @@ -904,12 +907,12 @@ This also works in the extractor position: for example, if one writes ```lisp (decl defining_instruction (Inst) Value) (extern extractor definining_instruction ...) - + (decl iadd (Value Value) Inst) - + (rule (lower (iadd (iadd a b) c)) ...) - + (convert Inst Value defining_instruction) ``` @@ -933,12 +936,12 @@ A term can have: 1. A single internal extractor body, via a toplevel `(extractor ...)` form, OR - + 2. A single external extractor binding (see next section); AND - + 3. One or more `(rule (Term ...) ...)` toplevel forms, which together make up an internal constructor definition, OR - + 4. A single external constructor binding (see next section). ### If-Let Clauses @@ -1030,8 +1033,8 @@ for purity. #### `partial` Expressions -ISLE's `partial` keyword on a term indicates that the term's -constructors may fail to match, otherwise, the ISLE compiler assumes +ISLE's `partial` keyword on a term indicates that the term's +constructors may fail to match, otherwise, the ISLE compiler assumes the term's constructors are infallible. For example, the following term's constructor only matches if the value @@ -1043,7 +1046,7 @@ is zero: (extern constructor is_zero_value is_zero_value) ``` -Internal constructors without the `partial` keyword can +Internal constructors without the `partial` keyword can only use other constructors that also do not have the `partial` keyword. #### `if` Shorthand @@ -1104,7 +1107,7 @@ Rust. The basic principles are: side expression; this can invoke further constructors for its subparts, kicking off more rewrites, until eventually a value is returned. - + 4. This design means that "intermediate terms" -- constructed terms that are then further rewritten -- are never actually built as in-memory data-structures. Rather, they exist only as ephemeral @@ -1112,7 +1115,7 @@ Rust. The basic principles are: means that there is very little or no performance penalty to factoring code into many sub-rules (subject only to function-call overhead and/or the effectiveness of the Rust inliner). - + 5. Backtracking -- attempting to match rules, and backing up to follow a different path when a match fails -- exists, but is entirely internal to the generated Rust function for rewriting one @@ -1123,16 +1126,16 @@ Rust. The basic principles are: sides, trying to find a matching rule, and once we find one, we commit and start to invoke constructors to build the right-hand side. - + Said another way, the principle is that left-hand sides can be fallible, and have no side-effects as they execute; right-hand sides, in contrast, are infallible. This simplifies the control flow and makes reasoning about side-effects (especially with respect to external Rust actions) easier. - + This will become more clear as we look at how Rust interfaces are defined, and how the generated code appears, below. - + ### Extern Constructors and Extractors ISLE programs interact with the surrounding Rust code in which they @@ -1181,11 +1184,11 @@ and returns a `U`. External constructors are infallible: that is, they must succeed, and always return their return type. In contrast, internal constructors can be fallible because they are implemented by a list of rules whose -patterns may not cover the entire domain (in which case, the term -should be marked `partial`). If fallible behavior is needed when -invoking external Rust code, that behavior should occur in an extractor -(see below) instead: only pattern left-hand sides are meant to be -fallible. +patterns may not cover the entire domain (in which case, the term +should be marked `partial`). If fallible behavior is needed when +invoking external Rust code, that behavior should occur in an extractor +(see below) instead: only pattern left-hand sides are meant to be +fallible. #### Extractors @@ -1265,7 +1268,7 @@ This allows code to refer to `$I32` whenever a value of type `Type` is needed, in either a pattern (LHS) or an expression (RHS). These constants are pulled in via the same `use super::*` that imports all external types. - + ### Exported Interface: Functions and Context Trait The generated ISLE code provides an interface that is designed to be @@ -1310,7 +1313,7 @@ we have the following terms and declarations: ```lisp (decl A (u32 u32) T) (extern constructor A build_a) - + (decl B (T) U) (external extractor B disassemble_b) ``` @@ -1440,7 +1443,7 @@ newline). The grammar accepted by the parser is as follows: | "(" "rule" ")" | "(" "extractor" ")" | "(" "extern" ")" - + ::= [ "extern" ] ::= ( "A".."Z" | "a".."z" | "_" | "$" ) @@ -1449,6 +1452,8 @@ newline). The grammar accepted by the parser is as follows: ::= [ "-" ] ( "0".."9" )+ | [ "-" ] "0x" ( "0".."9" "A".."F" "a".."f" )+ + | [ "-" ] "0o" ( "0".."7" )+ + | [ "-" ] "0b" ( "0".."1" )+ ::= "(" "primitive" ")" | "(" "enum" * ")" @@ -1464,7 +1469,7 @@ newline). The grammar accepted by the parser is as follows: ::= | - + ::= ::= "(" * ")" @@ -1487,7 +1492,7 @@ newline). The grammar accepted by the parser is as follows: | | "(" "let" "(" * ")" ")" | "(" * ")" - + ::= "(" ")" ::= "constructor" diff --git a/cranelift/isle/isle/isle_examples/run/iconst.isle b/cranelift/isle/isle/isle_examples/run/iconst.isle index f59555d65674..84270607467d 100644 --- a/cranelift/isle/isle/isle_examples/run/iconst.isle +++ b/cranelift/isle/isle/isle_examples/run/iconst.isle @@ -15,3 +15,12 @@ (rule (Y -0x1000_0000_0000_0000_1234_5678_9abc_def0) 1) (rule (Y -0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff) -3) + +;; Test some various syntaxes for numbers +(type i32 (primitive i32)) +(decl partial Z (i32) i32) +(rule (Z 0) 0x01) +(rule (Z 0x01) 0x0_2) +(rule (Z 0b10) 3) +(rule (Z 0b1_1) 0o4) +(rule (Z 0o7654321) 0b11_00_11_00) diff --git a/cranelift/isle/isle/isle_examples/run/iconst_main.rs b/cranelift/isle/isle/isle_examples/run/iconst_main.rs index abce597a3392..d53edea43831 100644 --- a/cranelift/isle/isle/isle_examples/run/iconst_main.rs +++ b/cranelift/isle/isle/isle_examples/run/iconst_main.rs @@ -8,11 +8,44 @@ fn main() { assert_eq!(iconst::constructor_X(&mut ctx, -1), Some(-2)); assert_eq!(iconst::constructor_X(&mut ctx, -2), Some(-3)); - assert_eq!(iconst::constructor_X(&mut ctx, 0x7fff_ffff_ffff_ffff), Some(0x8000_0000_0000_0000u64 as i64)); - assert_eq!(iconst::constructor_X(&mut ctx, 0xffff_ffff_ffff_fff0_u64 as i64), Some(1)); + assert_eq!( + iconst::constructor_X(&mut ctx, 0x7fff_ffff_ffff_ffff), + Some(0x8000_0000_0000_0000u64 as i64) + ); + assert_eq!( + iconst::constructor_X(&mut ctx, 0xffff_ffff_ffff_fff0_u64 as i64), + Some(1) + ); - assert_eq!(iconst::constructor_Y(&mut ctx, 0x1000_0000_0000_0000_1234_5678_9abc_def0), Some(-1)); - assert_eq!(iconst::constructor_Y(&mut ctx, 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffffu128 as i128), Some(3)); - assert_eq!(iconst::constructor_Y(&mut ctx, -0x1000_0000_0000_0000_1234_5678_9abc_def0), Some(1)); - assert_eq!(iconst::constructor_Y(&mut ctx, -(0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffffu128 as i128)), Some(-3)); + assert_eq!( + iconst::constructor_Y(&mut ctx, 0x1000_0000_0000_0000_1234_5678_9abc_def0), + Some(-1) + ); + assert_eq!( + iconst::constructor_Y( + &mut ctx, + 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffffu128 as i128 + ), + Some(3) + ); + assert_eq!( + iconst::constructor_Y(&mut ctx, -0x1000_0000_0000_0000_1234_5678_9abc_def0), + Some(1) + ); + assert_eq!( + iconst::constructor_Y( + &mut ctx, + -(0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffffu128 as i128) + ), + Some(-3) + ); + + assert_eq!(iconst::constructor_Z(&mut ctx, 0), Some(1)); + assert_eq!(iconst::constructor_Z(&mut ctx, 1), Some(2)); + assert_eq!(iconst::constructor_Z(&mut ctx, 2), Some(3)); + assert_eq!(iconst::constructor_Z(&mut ctx, 3), Some(4)); + assert_eq!( + iconst::constructor_Z(&mut ctx, 0o7654321), + Some(0b11_00_11_00) + ); } diff --git a/cranelift/isle/isle/src/lexer.rs b/cranelift/isle/isle/src/lexer.rs index 3eb048748125..2c8ee2540a05 100644 --- a/cranelift/isle/isle/src/lexer.rs +++ b/cranelift/isle/isle/src/lexer.rs @@ -243,14 +243,27 @@ impl<'a> Lexer<'a> { let mut radix = 10; - // Check for hex literals. - if self.buf.get(self.pos.offset).copied() == Some(b'0') - && (self.buf.get(self.pos.offset + 1).copied() == Some(b'x') - || self.buf.get(self.pos.offset + 1).copied() == Some(b'X')) - { - self.advance_pos(); - self.advance_pos(); - radix = 16; + // Check for prefixed literals. + match ( + self.buf.get(self.pos.offset), + self.buf.get(self.pos.offset + 1), + ) { + (Some(b'0'), Some(b'x')) | (Some(b'0'), Some(b'X')) => { + self.advance_pos(); + self.advance_pos(); + radix = 16; + } + (Some(b'0'), Some(b'o')) => { + self.advance_pos(); + self.advance_pos(); + radix = 8; + } + (Some(b'0'), Some(b'b')) => { + self.advance_pos(); + self.advance_pos(); + radix = 2; + } + _ => {} } // Find the range in the buffer for this integer literal. We'll @@ -258,7 +271,7 @@ impl<'a> Lexer<'a> { // string-to-integer conversion. let mut s = vec![]; while self.pos.offset < self.buf.len() - && ((radix == 10 && self.buf[self.pos.offset].is_ascii_digit()) + && ((radix <= 10 && self.buf[self.pos.offset].is_ascii_digit()) || (radix == 16 && self.buf[self.pos.offset].is_ascii_hexdigit()) || self.buf[self.pos.offset] == b'_') {