diff --git a/src/parsing.rs b/src/parsing.rs index 365f711..7f99098 100644 --- a/src/parsing.rs +++ b/src/parsing.rs @@ -137,7 +137,7 @@ fn parse( parse_mode: ParseMode, post_process: impl FnOnce(Program, &Globals) -> Program, ) -> Result { - let source = params.text; + let source = strip_bom_from_arc(params.text, /* panic in debug */ true); let specifier = params.specifier; let input = StringInput::new( source.as_ref(), @@ -330,6 +330,25 @@ pub fn get_syntax(media_type: MediaType) -> Syntax { } } +pub fn strip_bom(mut s: String) -> String { + if s.starts_with('\u{FEFF}') { + s.drain(..3); + } + s +} + +fn strip_bom_from_arc(s: Arc, should_panic_in_debug: bool) -> Arc { + if let Some(stripped_text) = s.strip_prefix('\u{FEFF}') { + // this is only a perf concern, so don't crash in release + if cfg!(debug_assertions) && should_panic_in_debug { + panic!("BOM should be stripped from text before providing it to deno_ast to avoid a file text allocation"); + } + stripped_text.into() + } else { + s + } +} + #[cfg(test)] mod test { use crate::diagnostics::Diagnostic; @@ -662,6 +681,26 @@ function _bar(...Foo: Foo) { ); } + #[test] + fn test_strip_bom() { + let text = "\u{FEFF}test"; + assert_eq!(strip_bom(text.to_string()), "test"); + let text = "test"; + assert_eq!(strip_bom(text.to_string()), "test"); + let text = ""; + assert_eq!(strip_bom(text.to_string()), ""); + } + + #[test] + fn test_strip_bom_arc() { + let text = "\u{FEFF}test"; + assert_eq!(strip_bom_from_arc(text.into(), false), "test".into()); + let text = "test"; + assert_eq!(strip_bom_from_arc(text.into(), false), "test".into()); + let text = ""; + assert_eq!(strip_bom_from_arc(text.into(), false), "".into()); + } + fn parse_for_diagnostic(text: &str) -> ParseDiagnostic { let result = parse_ts_module(text).unwrap(); result.diagnostics().first().unwrap().to_owned()