From 82cd5ab4b4322f069c542475dae82f0a3ba82ba1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B0=95=EB=8F=99=EC=9C=A4=20=28Donny=29?= Date: Fri, 14 Jun 2024 13:28:11 +0900 Subject: [PATCH 1/7] `raw` --- crates/swc_ecma_parser/src/lexer/jsx.rs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/jsx.rs b/crates/swc_ecma_parser/src/lexer/jsx.rs index c597a9f8aecf..49c8d8181dae 100644 --- a/crates/swc_ecma_parser/src/lexer/jsx.rs +++ b/crates/swc_ecma_parser/src/lexer/jsx.rs @@ -211,9 +211,7 @@ impl<'a> Lexer<'a> { pub(super) fn read_jsx_str(&mut self, quote: char) -> LexResult { debug_assert!(self.syntax.jsx()); - let mut raw = String::new(); - - raw.push(quote); + let start = self.input.cur_pos(); unsafe { // Safety: cur() was Some(quote) @@ -243,8 +241,6 @@ impl<'a> Lexer<'a> { out.push_str(value); out.push('\\'); - raw.push_str(value); - raw.push('\\'); self.bump(); @@ -264,12 +260,10 @@ impl<'a> Lexer<'a> { }; out.push_str(value); - raw.push_str(value); let jsx_entity = self.read_jsx_entity()?; out.push(jsx_entity.0); - raw.push_str(&jsx_entity.1); chunk_start = self.input.cur_pos(); } else if ch.is_line_terminator() { @@ -279,16 +273,13 @@ impl<'a> Lexer<'a> { }; out.push_str(value); - raw.push_str(value); match self.read_jsx_new_line(false)? { Either::Left(s) => { out.push_str(s); - raw.push_str(s); } Either::Right(c) => { out.push(c); - raw.push(c); } } @@ -308,7 +299,6 @@ impl<'a> Lexer<'a> { }; out.push_str(value); - raw.push_str(value); // it might be at the end of the file when // the string literal is unterminated @@ -319,7 +309,11 @@ impl<'a> Lexer<'a> { } } - raw.push(quote); + let end = self.input.cur_pos(); + let raw = unsafe { + // Safety: Both of `start` and `end` are generated from `cur_pos()` + self.input.slice(start, end) + }; Ok(Token::Str { value: self.atoms.atom(out), From 9eb0477cf96335bdd399af9e930769df92db4663 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B0=95=EB=8F=99=EC=9C=A4=20=28Donny=29?= Date: Fri, 14 Jun 2024 13:30:18 +0900 Subject: [PATCH 2/7] `raw` --- crates/swc_ecma_parser/src/lexer/mod.rs | 33 +++++++------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/mod.rs b/crates/swc_ecma_parser/src/lexer/mod.rs index 0c0d3e18766c..88c124f5f42f 100644 --- a/crates/swc_ecma_parser/src/lexer/mod.rs +++ b/crates/swc_ecma_parser/src/lexer/mod.rs @@ -1160,19 +1160,7 @@ impl<'a> Lexer<'a> { let mut cooked = Ok(String::new()); let mut cooked_slice_start = start; - let mut raw = SmartString::new(); - let mut raw_slice_start = start; - - macro_rules! consume_raw { - () => {{ - let last_pos = self.cur_pos(); - raw.push_str(unsafe { - // Safety: Both of start and last_pos are valid position because we got them - // from `self.input` - self.input.slice(raw_slice_start, last_pos) - }); - }}; - } + let raw_slice_start = start; macro_rules! consume_cooked { () => {{ @@ -1201,21 +1189,24 @@ impl<'a> Lexer<'a> { } consume_cooked!(); - consume_raw!(); // TODO: Handle error + let end = self.input.cur_pos(); + let raw = unsafe { + // Safety: Both of start and last_pos are valid position because we got them + // from `self.input` + self.input.slice(raw_slice_start, end) + }; return Ok(Token::Template { cooked: cooked.map(Atom::from), - raw: self.atoms.atom(&*raw), + raw: self.atoms.atom(raw), }); } if c == '\\' { consume_cooked!(); - consume_raw!(); - raw.push('\\'); - let mut wrapped = Raw(Some(raw)); + let mut wrapped = Raw(None); match self.read_escaped_char(&mut wrapped, true) { Ok(Some(chars)) => { @@ -1231,17 +1222,13 @@ impl<'a> Lexer<'a> { } } - raw = wrapped.0.unwrap(); - raw_slice_start = self.cur_pos(); cooked_slice_start = self.cur_pos(); } else if c.is_line_terminator() { self.state.had_line_break = true; consume_cooked!(); - consume_raw!(); let c = if c == '\r' && self.peek() == Some('\n') { - raw.push('\r'); self.bump(); // '\r' '\n' } else { @@ -1259,8 +1246,6 @@ impl<'a> Lexer<'a> { if let Ok(ref mut cooked) = cooked { cooked.push(c); } - raw.push(c); - raw_slice_start = self.cur_pos(); cooked_slice_start = self.cur_pos(); } else { self.bump(); From 744d6e276c5923ce5e1ecb00f924c46ebf90a459 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B0=95=EB=8F=99=EC=9C=A4=20=28Donny=29?= Date: Fri, 14 Jun 2024 13:33:14 +0900 Subject: [PATCH 3/7] `raw` --- crates/swc_ecma_parser/src/lexer/number.rs | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/number.rs b/crates/swc_ecma_parser/src/lexer/number.rs index c093345a702a..809dfdd0bc5e 100644 --- a/crates/swc_ecma_parser/src/lexer/number.rs +++ b/crates/swc_ecma_parser/src/lexer/number.rs @@ -47,7 +47,6 @@ impl<'a> Lexer<'a> { let start = self.cur_pos(); let mut raw_val = SmartString::::new(); - let mut raw_str = SmartString::::new(); let val = if starts_with_dot { // first char is '.' @@ -69,8 +68,6 @@ impl<'a> Lexer<'a> { write!(raw_val, "{}", &s.value).unwrap(); - raw_str.push_str(&raw); - if starts_with_zero { // TODO: I guess it would be okay if I don't use -ffast-math // (or something like that), but needs review. @@ -132,7 +129,6 @@ impl<'a> Lexer<'a> { // `.1.a`, `.1e-4.a` are valid, if self.cur() == Some('.') { raw_val.push('.'); - raw_str.push('.'); self.bump(); @@ -145,8 +141,6 @@ impl<'a> Lexer<'a> { // Read numbers after dot let dec_val = self.read_int::<10>(0, &mut raw)?; - raw_str.push_str(raw.0.as_ref().unwrap()); - val = { if dec_val.is_some() { raw_val.push_str(raw.0.as_ref().unwrap()); @@ -170,7 +164,7 @@ impl<'a> Lexer<'a> { // 1e+2 = 100 // 1e-2 = 0.01 match self.cur() { - Some(e @ 'e') | Some(e @ 'E') => { + Some('e') | Some('E') => { self.bump(); let next = match self.cur() { @@ -182,13 +176,10 @@ impl<'a> Lexer<'a> { }; raw_val.push('e'); - raw_str.push(e); let positive = if next == '+' || next == '-' { self.bump(); // remove '+', '-' - raw_str.push(next); - next == '+' } else { true @@ -197,8 +188,6 @@ impl<'a> Lexer<'a> { let mut raw = Raw(Some(Default::default())); let exp = self.read_number_no_dot::<10>(&mut raw)?; - raw_str.push_str(&raw.0.take().unwrap()); - val = if exp == f64::INFINITY { if positive && val != 0.0 { f64::INFINITY @@ -226,7 +215,12 @@ impl<'a> Lexer<'a> { self.ensure_not_ident()?; - Ok(Either::Left((val, self.atoms.atom(&*raw_str)))) + let end = self.cur_pos(); + let raw_str = unsafe { + // Safety: We got both start and end position from `self.input` + self.input.slice(start, end) + }; + Ok(Either::Left((val, raw_str.into()))) } /// Returns `Left(value)` or `Right(BigInt)` From 820fbb62fce40cea472e9e0748e029aa78b00992 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B0=95=EB=8F=99=EC=9C=A4=20=28Donny=29?= Date: Fri, 14 Jun 2024 13:35:09 +0900 Subject: [PATCH 4/7] `raw` --- crates/swc_ecma_parser/src/lexer/mod.rs | 28 +++++++++++++------------ 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/mod.rs b/crates/swc_ecma_parser/src/lexer/mod.rs index 88c124f5f42f..41c16265e813 100644 --- a/crates/swc_ecma_parser/src/lexer/mod.rs +++ b/crates/swc_ecma_parser/src/lexer/mod.rs @@ -1000,11 +1000,8 @@ impl<'a> Lexer<'a> { fn read_str_lit(&mut self) -> LexResult { debug_assert!(self.cur() == Some('\'') || self.cur() == Some('"')); let start = self.cur_pos(); - let mut raw = String::new(); let quote = self.cur().unwrap(); - raw.push(quote); - self.bump(); // '"' self.with_buf(|l, out| { @@ -1015,24 +1012,27 @@ impl<'a> Lexer<'a> { .input .uncons_while(|c| c != quote && c != '\\' && !c.is_line_break()); out.push_str(s); - raw.push_str(s); } l.cur() } { match c { c if c == quote => { - raw.push(c); - l.bump(); + let end = l.cur_pos(); + + let raw = unsafe { + // Safety: start and end are valid position because we got them from + // `self.input` + l.input.slice(start, end) + }; + return Ok(Token::Str { value: l.atoms.atom(&*out), raw: l.atoms.atom(raw), }); } '\\' => { - raw.push(c); - let mut wrapped = Raw(Some(Default::default())); if let Some(chars) = l.read_escaped_char(&mut wrapped, false)? { @@ -1040,17 +1040,12 @@ impl<'a> Lexer<'a> { out.extend(c); } } - - raw.push_str(&wrapped.0.unwrap()); } c if c.is_line_break() => { - raw.push(c); - break; } _ => { out.push(c); - raw.push(c); l.bump(); } @@ -1059,6 +1054,13 @@ impl<'a> Lexer<'a> { l.emit_error(start, SyntaxError::UnterminatedStrLit); + let end = l.cur_pos(); + + let raw = unsafe { + // Safety: start and end are valid position because we got them from + // `self.input` + l.input.slice(start, end) + }; Ok(Token::Str { value: l.atoms.atom(&*out), raw: l.atoms.atom(raw), From d95c8acc2c020cf4752603fd9366ef932f7772d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B0=95=EB=8F=99=EC=9C=A4=20=28Donny=29?= Date: Fri, 14 Jun 2024 13:58:03 +0900 Subject: [PATCH 5/7] Buf --- crates/swc_ecma_codegen/tests/test262.rs | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/crates/swc_ecma_codegen/tests/test262.rs b/crates/swc_ecma_codegen/tests/test262.rs index 6e2b8b68c3ee..9bd02b6ae1d6 100644 --- a/crates/swc_ecma_codegen/tests/test262.rs +++ b/crates/swc_ecma_codegen/tests/test262.rs @@ -1,9 +1,6 @@ use std::{ - env, fs::read_to_string, - io::{self, Write}, path::{Path, PathBuf}, - sync::{Arc, RwLock}, }; use swc_common::comments::SingleThreadedComments; @@ -105,7 +102,7 @@ fn do_test(entry: &Path, minify: bool) { "\n\n========== Running codegen test {}\nSource:\n{}\n", file_name, input ); - let mut wr = Buf(Arc::new(RwLock::new(vec![]))); + let mut wr = vec![]; ::testing::run_test(false, |cm, handler| { let src = cm.load_file(entry).expect("failed to load file"); @@ -168,7 +165,7 @@ fn do_test(entry: &Path, minify: bool) { } let ref_file = format!("{}", ref_dir.join(&file_name).display()); - let code_output = wr.0.read().unwrap(); + let code_output = wr; let with_srcmap = NormalizedOutput::from(String::from_utf8_lossy(&code_output).into_owned()); with_srcmap.compare_to_file(ref_file).unwrap(); @@ -176,15 +173,3 @@ fn do_test(entry: &Path, minify: bool) { }) .expect("failed to run test"); } - -#[derive(Debug, Clone)] -struct Buf(Arc>>); -impl Write for Buf { - fn write(&mut self, data: &[u8]) -> io::Result { - self.0.write().unwrap().write(data) - } - - fn flush(&mut self) -> io::Result<()> { - self.0.write().unwrap().flush() - } -} From d2998cef610b52d8d683a64e3baddd43e79e11ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B0=95=EB=8F=99=EC=9C=A4=20=28Donny=29?= Date: Fri, 14 Jun 2024 14:00:10 +0900 Subject: [PATCH 6/7] Doc --- crates/swc_ecma_ast/src/expr.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/swc_ecma_ast/src/expr.rs b/crates/swc_ecma_ast/src/expr.rs index 85c905a314a8..825014cc7a0f 100644 --- a/crates/swc_ecma_ast/src/expr.rs +++ b/crates/swc_ecma_ast/src/expr.rs @@ -1020,6 +1020,8 @@ pub struct TplElement { /// don't have to worry about this value. pub cooked: Option, + /// You may need to perform. `.replace("\r\n", "\n").replace('\r', "\n")` on + /// this value. pub raw: Atom, } From d8014094bc4f32a9cfecf806e91f842b5b8653b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B0=95=EB=8F=99=EC=9C=A4=20=28Donny=29?= Date: Fri, 14 Jun 2024 14:00:12 +0900 Subject: [PATCH 7/7] codegen --- crates/swc_ecma_codegen/src/lib.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/swc_ecma_codegen/src/lib.rs b/crates/swc_ecma_codegen/src/lib.rs index 648856c0e171..7c084e9f640f 100644 --- a/crates/swc_ecma_codegen/src/lib.rs +++ b/crates/swc_ecma_codegen/src/lib.rs @@ -1965,11 +1965,12 @@ where fn emit_quasi(&mut self, node: &TplElement) -> Result { srcmap!(node, true); + let raw = node.raw.replace("\r\n", "\n").replace('\r', "\n"); if self.cfg.minify || (self.cfg.ascii_only && !node.raw.is_ascii()) { - let v = get_template_element_from_raw(&node.raw, self.cfg.ascii_only); + let v = get_template_element_from_raw(&raw, self.cfg.ascii_only); self.wr.write_str_lit(DUMMY_SP, &v)?; } else { - self.wr.write_str_lit(DUMMY_SP, &node.raw)?; + self.wr.write_str_lit(DUMMY_SP, &raw)?; } srcmap!(node, false);