Skip to content

Commit

Permalink
perf(es/parser): Reduce allocations while lexing numbers (#9057)
Browse files Browse the repository at this point in the history
  • Loading branch information
kdy1 committed Jun 16, 2024
1 parent 064af53 commit ca26eb7
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 77 deletions.
5 changes: 2 additions & 3 deletions crates/swc_ecma_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::{cell::RefCell, char, iter::FusedIterator, rc::Rc};

use either::Either::{Left, Right};
use smallvec::{smallvec, SmallVec};
use smartstring::SmartString;
use swc_atoms::{Atom, AtomStoreCell};
use swc_common::{comments::Comments, input::StringInput, BytePos, Span};
use swc_ecma_ast::{op, AssignOp, EsVersion};
Expand Down Expand Up @@ -472,7 +471,7 @@ impl<'a> Lexer<'a> {
'x' => {
self.bump(); // 'x'

match self.read_int_u32::<16>(2, &mut Raw(None))? {
match self.read_int_u32::<16>(2)? {
Some(val) => return Ok(Some(vec![Char::from(val)])),
None => self.error(
start,
Expand Down Expand Up @@ -880,7 +879,7 @@ impl<'a> Lexer<'a> {
}

let state = self.input.cur_pos();
let c = match self.read_int_u32::<16>(if is_curly { 0 } else { 4 }, &mut Raw(None)) {
let c = match self.read_int_u32::<16>(if is_curly { 0 } else { 4 }) {
Ok(Some(val)) => {
if 0x0010_ffff >= val {
char::from_u32(val)
Expand Down
82 changes: 30 additions & 52 deletions crates/swc_ecma_parser/src/lexer/number.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@
//!
//!
//! See https://tc39.github.io/ecma262/#sec-literals-numeric-literals
use std::{borrow::Cow, fmt::Write};
use std::borrow::Cow;

use either::Either;
use num_bigint::BigInt as BigIntValue;
use num_traits::{Num as NumTrait, ToPrimitive};
use smartstring::LazyCompact;
use swc_common::SyntaxContext;
use tracing::trace;

Expand Down Expand Up @@ -46,7 +45,6 @@ impl<'a> Lexer<'a> {
}

let start = self.cur_pos();
let mut raw_val = SmartString::<LazyCompact>::new();

let val = if starts_with_dot {
// first char is '.'
Expand All @@ -70,8 +68,6 @@ impl<'a> Lexer<'a> {
)));
}

write!(raw_val, "{}", &s.value).unwrap();

if starts_with_zero {
// TODO: I guess it would be okay if I don't use -ffast-math
// (or something like that), but needs review.
Expand Down Expand Up @@ -146,29 +142,28 @@ impl<'a> Lexer<'a> {
//
// `.1.a`, `.1e-4.a` are valid,
if self.cur() == Some('.') {
raw_val.push('.');

self.bump();

if starts_with_dot {
debug_assert!(self.cur().is_some());
debug_assert!(self.cur().unwrap().is_ascii_digit());
}

let mut raw = Raw(Some(Default::default()));
// Read numbers after dot
let dec_val = self.read_int::<10>(0, &mut raw)?;
self.read_int::<10>(0)?;

val = {
if dec_val.is_some() {
raw_val.push_str(raw.0.as_ref().unwrap());
}
let end = self.cur_pos();
let raw = unsafe {
// Safety: We got both start and end position from `self.input`
self.input.slice(start, end)
};

// Remove number separator from number
if raw_val.contains('_') {
Cow::Owned(raw_val.replace('_', ""))
if raw.contains('_') {
Cow::Owned(raw.replace('_', ""))
} else {
Cow::Borrowed(&*raw_val)
Cow::Borrowed(raw)
}
.parse()
.expect("failed to parse float using rust's impl")
Expand All @@ -193,8 +188,6 @@ impl<'a> Lexer<'a> {
}
};

raw_val.push('e');

let positive = if next == '+' || next == '-' {
self.bump(); // remove '+', '-'

Expand All @@ -203,8 +196,7 @@ impl<'a> Lexer<'a> {
true
};

let mut raw = Raw(Some(Default::default()));
let exp = self.read_number_no_dot::<10>(&mut raw)?;
let exp = self.read_number_no_dot::<10>()?;

val = if exp == f64::INFINITY {
if positive && val != 0.0 {
Expand All @@ -213,16 +205,16 @@ impl<'a> Lexer<'a> {
0.0
}
} else {
let flag = if positive { '+' } else { '-' };

raw_val.push(flag);

write!(raw_val, "{}", exp).unwrap();

if raw_val.contains('_') {
Cow::Owned(raw_val.replace('_', ""))
let end = self.cur_pos();
let raw = unsafe {
// Safety: We got both start and end position from `self.input`
self.input.slice(start, end)
};

if raw.contains('_') {
Cow::Owned(raw.replace('_', ""))
} else {
Cow::Borrowed(&*raw_val)
Cow::Borrowed(raw)
}
.parse()
.expect("failed to parse float literal")
Expand Down Expand Up @@ -293,7 +285,7 @@ impl<'a> Lexer<'a> {

/// This can read long integers like
/// "13612536612375123612312312312312312312312".
fn read_number_no_dot<const RADIX: u8>(&mut self, raw: &mut Raw) -> LexResult<f64> {
fn read_number_no_dot<const RADIX: u8>(&mut self) -> LexResult<f64> {
debug_assert!(
RADIX == 2 || RADIX == 8 || RADIX == 10 || RADIX == 16,
"radix for read_number_no_dot should be one of 2, 8, 10, 16, but got {}",
Expand All @@ -309,7 +301,6 @@ impl<'a> Lexer<'a> {

Ok((f64::mul_add(total, radix as f64, v as f64), true))
},
raw,
true,
);

Expand All @@ -336,8 +327,6 @@ impl<'a> Lexer<'a> {
let mut non_octal = false;
let mut read_any = false;

let mut raw = Raw(Some(Default::default()));

self.read_digits::<_, f64, RADIX>(
|total, radix, v| {
read_any = true;
Expand All @@ -348,17 +337,20 @@ impl<'a> Lexer<'a> {

Ok((f64::mul_add(total, radix as f64, v as f64), true))
},
&mut raw,
true,
)?;

if !read_any {
self.error(start, SyntaxError::ExpectedDigit { radix: RADIX })?;
}

let raw_str = raw.0.take().unwrap();
let end = self.cur_pos();
let raw = unsafe {
// Safety: We got both start and end position from `self.input`
self.input.slice(start, end)
};
// Remove number separator from number
let raw_number_str = raw_str.replace('_', "");
let raw_number_str = raw.replace('_', "");
let parsed_float = BigIntValue::from_str_radix(&raw_number_str, RADIX as u32)
.expect("failed to parse float using BigInt")
.to_f64()
Expand All @@ -381,11 +373,7 @@ impl<'a> Lexer<'a> {
/// were read, the integer value otherwise.
/// When `len` is not zero, this
/// will return `None` unless the integer has exactly `len` digits.
pub(super) fn read_int<const RADIX: u8>(
&mut self,
len: u8,
raw: &mut Raw,
) -> LexResult<Option<f64>> {
pub(super) fn read_int<const RADIX: u8>(&mut self, len: u8) -> LexResult<Option<f64>> {
let mut count = 0u16;
let v = self.read_digits::<_, Option<f64>, RADIX>(
|opt: Option<f64>, radix, val| {
Expand All @@ -394,7 +382,6 @@ impl<'a> Lexer<'a> {

Ok((Some(total), count != len as u16))
},
raw,
true,
)?;
if len != 0 && count != len as u16 {
Expand All @@ -404,11 +391,7 @@ impl<'a> Lexer<'a> {
}
}

pub(super) fn read_int_u32<const RADIX: u8>(
&mut self,
len: u8,
raw: &mut Raw,
) -> LexResult<Option<u32>> {
pub(super) fn read_int_u32<const RADIX: u8>(&mut self, len: u8) -> LexResult<Option<u32>> {
let start = self.state.start;

let mut count = 0;
Expand All @@ -427,7 +410,6 @@ impl<'a> Lexer<'a> {

Ok((Some(total), count != len))
},
raw,
true,
)?;
if len != 0 && count != len {
Expand All @@ -441,7 +423,6 @@ impl<'a> Lexer<'a> {
fn read_digits<F, Ret, const RADIX: u8>(
&mut self,
mut op: F,
raw: &mut Raw,
allow_num_separator: bool,
) -> LexResult<Ret>
where
Expand Down Expand Up @@ -499,7 +480,6 @@ impl<'a> Lexer<'a> {
// Safety: cur() returns Some(c) where c is a valid char
self.input.bump();
}
raw.push(c);

continue;
}
Expand All @@ -511,8 +491,6 @@ impl<'a> Lexer<'a> {
return Ok(total);
};

raw.push(c);

self.bump();

let (t, cont) = op(total, RADIX, val)?;
Expand Down Expand Up @@ -574,7 +552,7 @@ mod tests {

fn int<const RADIX: u8>(s: &'static str) -> u32 {
lex(s, |l| {
l.read_int_u32::<RADIX>(0, &mut Raw(None))
l.read_int_u32::<RADIX>(0)
.unwrap()
.expect("read_int returned None")
})
Expand Down
22 changes: 0 additions & 22 deletions crates/swc_ecma_parser/src/lexer/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
//! [babylon/util/identifier.js]:https://github.com/babel/babel/blob/master/packages/babylon/src/util/identifier.js
use std::char;

use smartstring::{LazyCompact, SmartString};
use swc_common::{
comments::{Comment, CommentKind},
BytePos, Span, SyntaxContext,
Expand All @@ -22,27 +21,6 @@ use crate::{
Tokens,
};

/// Collector for raw string.
///
/// Methods of this struct is noop if the value is [None].
pub(super) struct Raw(pub Option<SmartString<LazyCompact>>);

impl Raw {
#[inline]
pub fn push(&mut self, c: char) {
if let Some(ref mut st) = self.0 {
st.push(c)
}
}
}

// pub const BACKSPACE: char = 8 as char;
// pub const SHIFT_OUT: char = 14 as char;
// pub const OGHAM_SPACE_MARK: char = '\u{1680}'; // ' '
// pub const LINE_FEED: char = '\n';
// pub const LINE_SEPARATOR: char = '\u{2028}';
// pub const PARAGRAPH_SEPARATOR: char = '\u{2029}';

impl<'a> Lexer<'a> {
pub(super) fn span(&self, start: BytePos) -> Span {
let end = self.last_pos();
Expand Down

0 comments on commit ca26eb7

Please sign in to comment.