fix: handle windows crlf with more brutality

Ref: warpfork/go-testmark#3
rvagg · Sep 10, 2021 · 805e176 · 805e176
1 parent 1c87017
commit 805e176
Show file tree

Hide file tree

Showing 5 changed files with 22 additions and 13 deletions.
diff --git a/.gitattributes b/.gitattributes
diff --git a/README.md b/README.md
@@ -179,14 +179,9 @@ export interface DirEnt {
 
 ## Note regarding Windows
 
-We're relying on text files and want to have byte-perfect representations of test fixtures, but this presents some problems for Windows. By default (unless the user has changed the settings), git on Windows will convert line endings to Windows style which include a carriage return (`\r`) character. This isn't good, because we don't know whether these characters are part of our fixture data or not!
+Ideally when we're relying on text files for test data input we'd want to have byte-perfect representations of fixtures, but this presents some problems for Windows. By default (unless the user has changed the settings), git on Windows will convert line endings to Windows style which include a carriage return (`\r`) character. This isn't good, because we don't know whether these characters are part of our fixture data or not!
 
-Our recommendation if you expect to support Windows users, or have Windows users try and run your tests, is to add a `.gitattributes` file to your Git project that uses testmark with something like the following:
-
-```
-* text=auto
-*.* text eol=lf
-```
+So, testmark takes a rather brute-force approach to this problem and just strips out carriage return characters when they appear with a line-ending. In practice this _may_ impact the byte-perfect requirements for test fixtures, so you should be careful when using data that strays outside of standard printable character range, especially when control characters get involved. This is a text file format, if your data isn't text, then make it text by encoding in hex or base64 or something that reduces the character set to the safe range.
 
 ## Note about the package name
 

diff --git a/parse.js b/parse.js
@@ -14,10 +14,13 @@ export function parse (original) {
     throw new TypeError('Expected a Markdown document string')
   }
 
+  // sorry windows users, we're even turning your original to unix
+  original = original.replace(/\r?\n/g, '\n')
+
   /** @type {Document & {original:string}} */
   const doc = {
     original,
-    lines: original.split('\n'), // can't split with \r? because we need offsets
+    lines: original.split('\n'),
     dataHunks: /** @type {DocHunk[]} */ ([]),
     hunksByName: new Map()
   }

diff --git a/test/test-parse.js b/test/test-parse.js
@@ -32,11 +32,24 @@ const exampleMdExpectedHunks = [
 ]
 
 describe('Read', () => {
-  it('can parse example.md', async () => {
+  /** @type {string} */
+  let exampleMdOriginal
+
+  before(async () => {
     const exampleMd = new URL('../example.md', import.meta.url)
-    const exampleMdOriginal = await fs.promises.readFile(exampleMd, 'utf8')
+    exampleMdOriginal = await fs.promises.readFile(exampleMd, 'utf8')
+  })
+
+  it('can parse example.md', async () => {
     const doc = parse(exampleMdOriginal)
     assert.deepStrictEqual(exampleMdExpectedHunks, doc.dataHunks)
     assert.deepStrictEqual(toString(doc), exampleMdOriginal)
   })
+
+  it('can parse example.md as windows', async () => {
+    const exampleMdOriginalWindows = exampleMdOriginal.replace(/\r?\n/g, '\r\n')
+    const doc = parse(exampleMdOriginalWindows)
+    assert.deepStrictEqual(exampleMdExpectedHunks, doc.dataHunks)
+    assert.deepStrictEqual(toString(doc), exampleMdOriginal)
+  })
 })
diff --git a/types/parse.d.ts.map b/types/parse.d.ts.map