Add support for non-decimal floating point literals.

Issue rust-lang#1433. The syntax chosen was requiring an 0b, 0x, or 0o after the dot. If the literal is hexadecimal, an exponent is required.
rcxdude · Feb 16, 2014 · 16f5e9e · 16f5e9e
1 parent 0ba6d48
commit 16f5e9e
Show file tree

Hide file tree

Showing 15 changed files with 123 additions and 197 deletions.
diff --git a/src/doc/rust.md b/src/doc/rust.md
@@ -319,21 +319,23 @@ r##"foo #"# bar"##;                // foo #"# bar
 #### Number literals
 
 ~~~~ {.ebnf .gram}
-num_lit : nonzero_dec [ dec_digit | '_' ] * num_suffix ?
-        | '0' [       [ dec_digit | '_' ] * num_suffix ?
-              | 'b'   [ '1' | '0' | '_' ] + int_suffix ?
-              | 'o'   [ oct_digit | '_' ] + int_suffix ?
-              | 'x'   [ hex_digit | '_' ] + int_suffix ? ] ;
+num_lit : radix_lit num_suffix ; 
+
+radix_lit : nonzero_dec [ dec_digit | '_' ] *
+        | '0' [         [ dec_digit | '_' ] *
+               | 'b'    [ '1' | '0' | '_' ] +
+               | 'o'    [ oct_digit | '_' ] +
+               | 'x'    [ hex_digit | '_' ] + ] ;
 
 num_suffix : int_suffix | float_suffix ;
 
 int_suffix : 'u' int_suffix_size ?
            | 'i' int_suffix_size ? ;
 int_suffix_size : [ '8' | '1' '6' | '3' '2' | '6' '4' ] ;
 
-float_suffix : [ exponent | '.' dec_lit exponent ? ] ? float_suffix_ty ? ;
+float_suffix : [ exponent | '.' radix_lit exponent ? ] ? float_suffix_ty ? ;
 float_suffix_ty : 'f' [ '3' '2' | '6' '4' ] ;
-exponent : ['E' | 'e'] ['-' | '+' ] ? dec_lit ;
+exponent : ['E' | 'e' | 'p' | 'P'] ['-' | '+' ] ? dec_lit ;
 dec_lit : [ dec_digit | '_' ] + ;
 ~~~~
 
@@ -343,7 +345,7 @@ as they are differentiated by suffixes.
 
 ##### Integer literals
 
-An _integer literal_ has one of four forms:
+An _radix literal_ has one of four forms:
 
   * A _decimal literal_ starts with a *decimal digit* and continues with any
     mixture of *decimal digits* and _underscores_.
@@ -354,9 +356,9 @@ An _integer literal_ has one of four forms:
   * A _binary literal_ starts with the character sequence `U+0030` `U+0062`
     (`0b`) and continues as any mixture binary digits and underscores.
 
-An integer literal may be followed (immediately, without any spaces) by an
-_integer suffix_, which changes the type of the literal. There are two kinds
-of integer literal suffix:
+An integer literal consists of a radix literal and  may be followed 
+(immediately, without any spaces) by an _integer suffix_, which changes the
+type of the literal. There are two kinds of integer literal suffix:
 
   * The `i` and `u` suffixes give the literal type `int` or `uint`,
     respectively.
@@ -389,10 +391,11 @@ Examples of integer literals of various forms:
 
 A _floating-point literal_ has one of two forms:
 
-* Two _decimal literals_ separated by a period
+* Two _radix literals_ separated by a period
   character `U+002E` (`.`), with an optional _exponent_ trailing after the
-  second decimal literal.
-* A single _decimal literal_ followed by an _exponent_.
+  second decimal literal. Both radix literals must have the same base.
+* A single _radix literal_ followed by an _exponent_.
+* If the float literal is hexadecimal, an _exponent_ must be supplied.
 
 By default, a floating-point literal has a generic type, but will fall back to
 `f64`. A floating-point literal may be followed (immediately, without any
@@ -406,6 +409,8 @@ Examples of floating-point literals of various forms:
 123.0;                             // type f64
 0.1;                               // type f64
 0.1f32;                            // type f32
+0x4.0x432p-4_f32;                  // type f32
+0b1.0b10111011011000;              // type f64
 12E+99_f64;                        // type f64
 ~~~~
 

diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs
@@ -443,8 +443,7 @@ fn scan_exponent(rdr: &StringReader, start_bpos: BytePos) -> Option<~str> {
     // \x00 hits the `return None` case immediately, so this is fine.
     let mut c = rdr.curr.get().unwrap_or('\x00');
     let mut rslt = ~"";
-    if c == 'e' || c == 'E' {
-        rslt.push_char(c);
+    if c == 'e' || c == 'E' || c == 'p' || c == 'P' {
         bump(rdr);
         c = rdr.curr.get().unwrap_or('\x00');
         if c == '-' || c == '+' {
@@ -476,40 +475,32 @@ fn scan_digits(rdr: &StringReader, radix: uint) -> ~str {
     };
 }
 
-fn check_float_base(rdr: &StringReader, start_bpos: BytePos, last_bpos: BytePos,
-                    base: uint) {
-    match base {
-      16u => fatal_span(rdr, start_bpos, last_bpos,
-                      ~"hexadecimal float literal is not supported"),
-      8u => fatal_span(rdr, start_bpos, last_bpos,
-                     ~"octal float literal is not supported"),
-      2u => fatal_span(rdr, start_bpos, last_bpos,
-                     ~"binary float literal is not supported"),
-      _ => ()
-    }
-}
-
-fn scan_number(c: char, rdr: &StringReader) -> token::Token {
-    let mut num_str;
-    let mut base = 10u;
-    let mut c = c;
-    let mut n = nextch(rdr).unwrap_or('\x00');
-    let start_bpos = rdr.last_pos.get();
+fn scan_radix(rdr: &StringReader) -> uint {
+    let c = rdr.curr.get().unwrap_or('\x00');
+    let n = nextch(rdr).unwrap_or('\x00');
     if c == '0' && n == 'x' {
         bump(rdr);
         bump(rdr);
-        base = 16u;
+        return 16u;
     } else if c == '0' && n == 'o' {
         bump(rdr);
         bump(rdr);
-        base = 8u;
+        return 8u;
     } else if c == '0' && n == 'b' {
         bump(rdr);
         bump(rdr);
-        base = 2u;
+        return 2u;
     }
+    return 10u;
+}
+
+fn scan_number(rdr: &StringReader) -> token::Token {
+    let mut num_str;
+    let start_bpos = rdr.last_pos.get();
+    let mut base = scan_radix(rdr);
     num_str = scan_digits(rdr, base);
-    c = rdr.curr.get().unwrap_or('\x00');
+    let mut c = rdr.curr.get().unwrap_or('\x00');
+    let mut n:char;
     nextch(rdr);
     if c == 'u' || c == 'i' {
         enum Result { Signed(ast::IntTy), Unsigned(ast::UintTy) }
@@ -558,19 +549,71 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token {
         }
     }
     let mut is_float = false;
+    let mut dec_part = ~"";
     if rdr.curr_is('.') && !(ident_start(nextch(rdr)) || nextch_is(rdr, '.')) {
         is_float = true;
         bump(rdr);
-        let dec_part = scan_digits(rdr, 10u);
-        num_str.push_char('.');
-        num_str.push_str(dec_part);
+        let mantissa_base = scan_radix(rdr);
+        if mantissa_base != base {
+            //The ability to switch base, while conceivably useful, is much more
+            //likely to be triggered by accident.
+            fatal_span(rdr, start_bpos, rdr.last_pos.get(),
+                       ~"float literals must have consistent base before and after decimal point");
+        }
+        base = mantissa_base;
+        dec_part = scan_digits(rdr, mantissa_base);
     }
+    let mut exp_part = ~"";
     match scan_exponent(rdr, start_bpos) {
-      Some(ref s) => {
+      Some(s) => {
         is_float = true;
-        num_str.push_str(*s);
+        exp_part = s;
       }
-      None => ()
+      None => {
+        if is_float && base > 10 {
+            //otherwise we have ambiguity: 0x1.0xffff_f32 gets parsed as
+            //0x1.fffff32, which will create confusing results.
+            fatal_span(rdr, start_bpos, rdr.last_pos.get(),
+                        ~"hexadecimal float literals must contain exponent");
+        }
+      }
+    }
+    if is_float {
+        if base == 10 || base == 16 {
+            num_str.push_char('.');
+            num_str.push_str( if dec_part.len() > 0 {dec_part} else {~"0"} );
+            if exp_part.len() != 0 {
+                num_str.push_char(if base == 10 {'e'} else {'p'});
+                num_str.push_str(exp_part);
+            }
+        } else {
+            num_str = from_str_radix::<u64>(num_str, base).unwrap().to_str_radix(16);
+            let mut i = 0;
+            let len = dec_part.len();
+            let step = match base { 8 => 2, 2 => 4, _ => fail!("Impossible base for float")};
+            let mut dec_str = ~"";
+            while i < len {
+                let chunk = if i + step > len {
+                    let mut chunk = dec_part.slice_from(i).to_str();
+                    for _ in range(0, i + step - len) {
+                        chunk.push_char('0');
+                    }
+                    chunk
+                } else {
+                    dec_part.slice(i, i + step).to_str()
+                };
+                dec_str.push_str(from_str_radix::<u8>(chunk, base).unwrap_or(0).to_str());
+                i += step;
+            }
+            num_str.push_char('.');
+            num_str.push_str(dec_str);
+            num_str.push_char('p');
+            num_str.push_str(if exp_part.len() > 0 {exp_part} else {~"0"});
+        }
+        if base != 10 {
+            num_str.unshift_char('x');
+            num_str.unshift_char('0');
+        }
     }
 
     if rdr.curr_is('f') {
@@ -580,12 +623,10 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token {
         if c == '3' && n == '2' {
             bump(rdr);
             bump(rdr);
-            check_float_base(rdr, start_bpos, rdr.last_pos.get(), base);
             return token::LIT_FLOAT(str_to_ident(num_str), ast::TyF32);
         } else if c == '6' && n == '4' {
             bump(rdr);
             bump(rdr);
-            check_float_base(rdr, start_bpos, rdr.last_pos.get(), base);
             return token::LIT_FLOAT(str_to_ident(num_str), ast::TyF64);
             /* FIXME (#2252): if this is out of range for either a
             32-bit or 64-bit float, it won't be noticed till the
@@ -596,7 +637,6 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token {
         }
     }
     if is_float {
-        check_float_base(rdr, start_bpos, rdr.last_pos.get(), base);
         return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(num_str));
     } else {
         if num_str.len() == 0u {
@@ -687,7 +727,7 @@ fn next_token_inner(rdr: &StringReader) -> token::Token {
         })
     }
     if is_dec_digit(c) {
-        return scan_number(c.unwrap(), rdr);
+        return scan_number(rdr);
     }
     fn binop(rdr: &StringReader, op: token::BinOp) -> token::Token {
         bump(rdr);
@@ -1005,6 +1045,7 @@ mod test {
     use diagnostic;
     use parse::token;
     use parse::token::{str_to_ident};
+    use ast;
 
     // represents a testing reader (incl. both reader and interner)
     struct Env {
@@ -1139,4 +1180,20 @@ mod test {
         assert_eq!(tok,token::LIT_CHAR('a' as u32));
     }
 
+    #[test] fn hex_floats() {
+        let env = setup(~"0x1.0xffffffp100_f32");
+        let TokenAndSpan {tok, sp: _} =
+            env.string_reader.next_token();
+        let id = token::str_to_ident("0x1.ffffffp100");
+        assert_eq!(tok,token::LIT_FLOAT(id, ast::TyF32));
+    }
+
+    #[test] fn bin_floats() {
+        let env = setup(~"0b1.0b0000_0001_0010_0011_1p100_f32");
+        let TokenAndSpan {tok, sp: _} =
+            env.string_reader.next_token();
+        let id = token::str_to_ident("0x1.01238p100");
+        assert_eq!(tok,token::LIT_FLOAT(id, ast::TyF32));
+    }
+
 }
diff --git a/src/test/compile-fail/lex-bad-fp-base-1.rs b/src/test/compile-fail/lex-bad-fp-base-1.rs
diff --git a/src/test/compile-fail/lex-bad-fp-base-2.rs b/src/test/compile-fail/lex-bad-fp-base-2.rs
diff --git a/src/test/compile-fail/lex-bad-fp-base-3.rs b/src/test/compile-fail/lex-bad-fp-base-3.rs
diff --git a/src/test/compile-fail/lex-bad-fp-base-4.rs b/src/test/compile-fail/lex-bad-fp-base-4.rs
diff --git a/src/test/compile-fail/lex-bad-fp-base-5.rs b/src/test/compile-fail/lex-bad-fp-base-5.rs
diff --git a/src/test/compile-fail/lex-bad-fp-base-6.rs b/src/test/compile-fail/lex-bad-fp-base-6.rs
diff --git a/src/test/compile-fail/lex-bad-fp-base-7.rs b/src/test/compile-fail/lex-bad-fp-base-7.rs
diff --git a/src/test/compile-fail/lex-bad-fp-base-8.rs b/src/test/compile-fail/lex-bad-fp-base-8.rs