diff --git a/ast/ast.go b/ast/ast.go index 67bb0da..7edd63b 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -379,6 +379,11 @@ func (n *BaseNode) Text(source []byte) []byte { var buf bytes.Buffer for c := n.firstChild; c != nil; c = c.NextSibling() { buf.Write(c.Text(source)) + if sb, ok := c.(interface { + SoftLineBreak() bool + }); ok && sb.SoftLineBreak() { + buf.WriteByte('\n') + } } return buf.Bytes() } diff --git a/ast/ast_test.go b/ast/ast_test.go index 7cee141..191fffd 100644 --- a/ast/ast_test.go +++ b/ast/ast_test.go @@ -1,28 +1,10 @@ package ast import ( - "bytes" "reflect" "testing" - - "github.com/yuin/goldmark/text" ) -func TestRemoveChildren(t *testing.T) { - root := NewDocument() - - node1 := NewDocument() - - node2 := NewDocument() - - root.AppendChild(root, node1) - root.AppendChild(root, node2) - - root.RemoveChildren(root) - - t.Logf("%+v", node2.PreviousSibling()) -} - func TestWalk(t *testing.T) { tests := []struct { name string @@ -76,48 +58,3 @@ func node(n Node, children ...Node) Node { } return n } - -func TestBaseBlock_Text(t *testing.T) { - source := []byte(`# Heading - - code block here - and also here - -A paragraph - -` + "```" + `somelang -fenced code block -` + "```" + ` - -The end`) - - t.Run("fetch text from code block", func(t *testing.T) { - block := NewCodeBlock() - block.lines = text.NewSegments() - block.lines.Append(text.Segment{Start: 15, Stop: 31}) - block.lines.Append(text.Segment{Start: 32, Stop: 46}) - - expected := []byte("code block here\nand also here\n") - if !bytes.Equal(expected, block.Text(source)) { - t.Errorf("Expected: %q, got: %q", string(expected), string(block.Text(source))) - } - }) - - t.Run("fetch text from fenced code block", func(t *testing.T) { - block := NewFencedCodeBlock(&Text{ - Segment: text.Segment{Start: 63, Stop: 71}, - }) - block.lines = text.NewSegments() - block.lines.Append(text.Segment{Start: 72, Stop: 90}) - - expectedLang := []byte("somelang") - if !bytes.Equal(expectedLang, block.Language(source)) { - t.Errorf("Expected: %q, got: %q", string(expectedLang), string(block.Language(source))) - } - - expected := []byte("fenced code block\n") - if !bytes.Equal(expected, block.Text(source)) { - t.Errorf("Expected: %q, got: %q", string(expected), string(block.Text(source))) - } - }) -} diff --git a/ast/block.go b/ast/block.go index 04d0d54..467819e 100644 --- a/ast/block.go +++ b/ast/block.go @@ -1,7 +1,6 @@ package ast import ( - "bytes" "fmt" "strings" @@ -48,15 +47,6 @@ func (b *BaseBlock) SetLines(v *textm.Segments) { b.lines = v } -// Text implements Node.Text. -func (b *BaseBlock) Text(source []byte) []byte { - var buf bytes.Buffer - for _, line := range b.Lines().Sliced(0, b.Lines().Len()) { - buf.Write(line.Value(source)) - } - return buf.Bytes() -} - // A Document struct is a root node of Markdown text. type Document struct { BaseBlock @@ -140,6 +130,11 @@ func (n *TextBlock) Kind() NodeKind { return KindTextBlock } +// Text implements Node.Text. +func (n *TextBlock) Text(source []byte) []byte { + return n.Lines().Value(source) +} + // NewTextBlock returns a new TextBlock node. func NewTextBlock() *TextBlock { return &TextBlock{ @@ -165,6 +160,11 @@ func (n *Paragraph) Kind() NodeKind { return KindParagraph } +// Text implements Node.Text. +func (n *Paragraph) Text(source []byte) []byte { + return n.Lines().Value(source) +} + // NewParagraph returns a new Paragraph node. func NewParagraph() *Paragraph { return &Paragraph{ @@ -259,6 +259,11 @@ func (n *CodeBlock) Kind() NodeKind { return KindCodeBlock } +// Text implements Node.Text. +func (n *CodeBlock) Text(source []byte) []byte { + return n.Lines().Value(source) +} + // NewCodeBlock returns a new CodeBlock node. func NewCodeBlock() *CodeBlock { return &CodeBlock{ @@ -314,6 +319,11 @@ func (n *FencedCodeBlock) Kind() NodeKind { return KindFencedCodeBlock } +// Text implements Node.Text. +func (n *FencedCodeBlock) Text(source []byte) []byte { + return n.Lines().Value(source) +} + // NewFencedCodeBlock return a new FencedCodeBlock node. func NewFencedCodeBlock(info *Text) *FencedCodeBlock { return &FencedCodeBlock{ @@ -508,6 +518,15 @@ func (n *HTMLBlock) Kind() NodeKind { return KindHTMLBlock } +// Text implements Node.Text. +func (n *HTMLBlock) Text(source []byte) []byte { + ret := n.Lines().Value(source) + if n.HasClosure() { + ret = append(ret, n.ClosureLine.Value(source)...) + } + return ret +} + // NewHTMLBlock returns a new HTMLBlock node. func NewHTMLBlock(typ HTMLBlockType) *HTMLBlock { return &HTMLBlock{ diff --git a/ast/inline.go b/ast/inline.go index 7e4c51f..9df8470 100644 --- a/ast/inline.go +++ b/ast/inline.go @@ -503,6 +503,11 @@ func (n *AutoLink) Label(source []byte) []byte { return n.value.Text(source) } +// Text implements Node.Text. +func (n *AutoLink) Text(source []byte) []byte { + return n.value.Text(source) +} + // NewAutoLink returns a new AutoLink node. func NewAutoLink(typ AutoLinkType, value *Text) *AutoLink { return &AutoLink{ @@ -541,6 +546,11 @@ func (n *RawHTML) Kind() NodeKind { return KindRawHTML } +// Text implements Node.Text. +func (n *RawHTML) Text(source []byte) []byte { + return n.Segments.Value(source) +} + // NewRawHTML returns a new RawHTML node. func NewRawHTML() *RawHTML { return &RawHTML{ diff --git a/ast_test.go b/ast_test.go new file mode 100644 index 0000000..e5e6016 --- /dev/null +++ b/ast_test.go @@ -0,0 +1,200 @@ +package goldmark_test + +import ( + "bytes" + "testing" + + . "github.com/yuin/goldmark" + "github.com/yuin/goldmark/testutil" + "github.com/yuin/goldmark/text" +) + +func TestASTBlockNodeText(t *testing.T) { + var cases = []struct { + Name string + Source string + T1 string + T2 string + C bool + }{ + { + Name: "AtxHeading", + Source: `# l1 + +a + +# l2`, + T1: `l1`, + T2: `l2`, + }, + { + Name: "SetextHeading", + Source: `l1 +l2 +=============== + +a + +l3 +l4 +==============`, + T1: `l1 +l2`, + T2: `l3 +l4`, + }, + { + Name: "CodeBlock", + Source: ` l1 + l2 + +a + + l3 + l4`, + T1: `l1 +l2 +`, + T2: `l3 +l4 +`, + }, + { + Name: "FencedCodeBlock", + Source: "```" + ` +l1 +l2 +` + "```" + ` + +a + +` + "```" + ` +l3 +l4`, + T1: `l1 +l2 +`, + T2: `l3 +l4 +`, + }, + { + Name: "Blockquote", + Source: `> l1 +> l2 + +a + +> l3 +> l4`, + T1: `l1 +l2`, + T2: `l3 +l4`, + }, + { + Name: "List", + Source: `- l1 + l2 + +a + +- l3 + l4`, + T1: `l1 +l2`, + T2: `l3 +l4`, + C: true, + }, + { + Name: "HTMLBlock", + Source: `
+l1 +l2 +
+ +a + +
+l3 +l4`, + T1: `
+l1 +l2 +
+`, + T2: `
+l3 +l4`, + }, + } + + for _, cs := range cases { + t.Run(cs.Name, func(t *testing.T) { + s := []byte(cs.Source) + md := New() + n := md.Parser().Parse(text.NewReader(s)) + c1 := n.FirstChild() + c2 := c1.NextSibling().NextSibling() + if cs.C { + c1 = c1.FirstChild() + c2 = c2.FirstChild() + } + if !bytes.Equal(c1.Text(s), []byte(cs.T1)) { + t.Errorf("%s unmatch: %s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1))) + } + if !bytes.Equal(c2.Text(s), []byte(cs.T2)) { + t.Errorf("%s(EOF) unmatch: %s", cs.Name, testutil.DiffPretty(c2.Text(s), []byte(cs.T2))) + } + }) + } + +} + +func TestASTInlineNodeText(t *testing.T) { + var cases = []struct { + Name string + Source string + T1 string + }{ + { + Name: "CodeSpan", + Source: "`c1`", + T1: `c1`, + }, + { + Name: "Emphasis", + Source: `*c1 **c2***`, + T1: `c1 c2`, + }, + { + Name: "Link", + Source: `[label](url)`, + T1: `label`, + }, + { + Name: "AutoLink", + Source: ``, + T1: `http://url`, + }, + { + Name: "RawHTML", + Source: `c1`, + T1: ``, + }, + } + + for _, cs := range cases { + t.Run(cs.Name, func(t *testing.T) { + s := []byte(cs.Source) + md := New() + n := md.Parser().Parse(text.NewReader(s)) + c1 := n.FirstChild().FirstChild() + if !bytes.Equal(c1.Text(s), []byte(cs.T1)) { + t.Errorf("%s unmatch:\n%s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1))) + } + }) + } + +} diff --git a/extension/ast_test.go b/extension/ast_test.go new file mode 100644 index 0000000..e0c24b1 --- /dev/null +++ b/extension/ast_test.go @@ -0,0 +1,117 @@ +package extension + +import ( + "bytes" + "testing" + + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/testutil" + "github.com/yuin/goldmark/text" +) + +func TestASTBlockNodeText(t *testing.T) { + var cases = []struct { + Name string + Source string + T1 string + T2 string + C bool + }{ + { + Name: "DefinitionList", + Source: `c1 +: c2 + c3 + +a + +c4 +: c5 + c6`, + T1: `c1c2 +c3`, + T2: `c4c5 +c6`, + }, + { + Name: "Table", + Source: `| h1 | h2 | +| -- | -- | +| c1 | c2 | + +a + + +| h3 | h4 | +| -- | -- | +| c3 | c4 |`, + + T1: `h1h2c1c2`, + T2: `h3h4c3c4`, + }, + } + + for _, cs := range cases { + t.Run(cs.Name, func(t *testing.T) { + s := []byte(cs.Source) + md := goldmark.New( + goldmark.WithRendererOptions( + html.WithUnsafe(), + ), + goldmark.WithExtensions( + DefinitionList, + Table, + ), + ) + n := md.Parser().Parse(text.NewReader(s)) + c1 := n.FirstChild() + c2 := c1.NextSibling().NextSibling() + if cs.C { + c1 = c1.FirstChild() + c2 = c2.FirstChild() + } + if !bytes.Equal(c1.Text(s), []byte(cs.T1)) { + t.Errorf("%s unmatch:\n%s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1))) + } + if !bytes.Equal(c2.Text(s), []byte(cs.T2)) { + t.Errorf("%s(EOF) unmatch: %s", cs.Name, testutil.DiffPretty(c2.Text(s), []byte(cs.T2))) + } + }) + } + +} + +func TestASTInlineNodeText(t *testing.T) { + var cases = []struct { + Name string + Source string + T1 string + }{ + { + Name: "Strikethrough", + Source: `~c1 *c2*~`, + T1: `c1 c2`, + }, + } + + for _, cs := range cases { + t.Run(cs.Name, func(t *testing.T) { + s := []byte(cs.Source) + md := goldmark.New( + goldmark.WithRendererOptions( + html.WithUnsafe(), + ), + goldmark.WithExtensions( + Strikethrough, + ), + ) + n := md.Parser().Parse(text.NewReader(s)) + c1 := n.FirstChild().FirstChild() + if !bytes.Equal(c1.Text(s), []byte(cs.T1)) { + t.Errorf("%s unmatch:\n%s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1))) + } + }) + } + +} diff --git a/parser/code_block.go b/parser/code_block.go index 732f18c..d99146c 100644 --- a/parser/code_block.go +++ b/parser/code_block.go @@ -35,6 +35,7 @@ func (b *codeBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) if segment.Padding != 0 { preserveLeadingTabInCodeBlock(&segment, reader, 0) } + segment.ForceNewline = true node.Lines().Append(segment) reader.Advance(segment.Len() - 1) return node, NoChildren @@ -59,6 +60,7 @@ func (b *codeBlockParser) Continue(node ast.Node, reader text.Reader, pc Context preserveLeadingTabInCodeBlock(&segment, reader, 0) } + segment.ForceNewline = true node.Lines().Append(segment) reader.Advance(segment.Len() - 1) return Continue | NoChildren diff --git a/parser/fcode_block.go b/parser/fcode_block.go index e51a35a..953b8dc 100644 --- a/parser/fcode_block.go +++ b/parser/fcode_block.go @@ -100,6 +100,7 @@ func (b *fencedCodeBlockParser) Continue(node ast.Node, reader text.Reader, pc C if padding != 0 { preserveLeadingTabInCodeBlock(&seg, reader, fdata.indent) } + seg.ForceNewline = true // EOF as newline node.Lines().Append(seg) reader.AdvanceAndSetPadding(segment.Stop-segment.Start-pos-1, padding) return Continue | NoChildren diff --git a/parser/parser.go b/parser/parser.go index b59666c..b05db13 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -878,12 +878,6 @@ func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node { blockReader := text.NewBlockReader(reader.Source(), nil) p.walkBlock(root, func(node ast.Node) { p.parseBlock(blockReader, node, pc) - lines := node.Lines() - if lines != nil && lines.Len() != 0 { - s := lines.At(lines.Len() - 1) - s.EOB = true - lines.Set(lines.Len()-1, s) - } }) for _, at := range p.astTransformers { at.Transform(root, reader, pc) diff --git a/text/segment.go b/text/segment.go index 83c875b..93fbf19 100644 --- a/text/segment.go +++ b/text/segment.go @@ -20,8 +20,19 @@ type Segment struct { // Padding is a padding length of the segment. Padding int - // EOB is true if the segment is end of the block. - EOB bool + // ForceNewline is true if the segment should be ended with a newline. + // Some elements(i.e. CodeBlock, FencedCodeBlock) does not trim trailing + // newlines. Spec defines that EOF is treated as a newline, so we need to + // add a newline to the end of the segment if it is not empty. + // + // i.e.: + // + // ```go + // const test = "test" + // + // This code does not close the code block and ends with EOF. In this case, + // we need to add a newline to the end of the last line like `const test = "test"\n`. + ForceNewline bool } // NewSegment return a new Segment. @@ -52,7 +63,7 @@ func (t *Segment) Value(buffer []byte) []byte { result = append(result, bytes.Repeat(space, t.Padding)...) result = append(result, buffer[t.Start:t.Stop]...) } - if t.EOB && len(result) > 0 && result[len(result)-1] != '\n' { + if t.ForceNewline && len(result) > 0 && result[len(result)-1] != '\n' { result = append(result, '\n') } return result @@ -217,3 +228,12 @@ func (s *Segments) Unshift(v Segment) { s.values = append(s.values[0:1], s.values[0:]...) s.values[0] = v } + +// Value returns a string value of the collection. +func (s *Segments) Value(buffer []byte) []byte { + var result []byte + for _, v := range s.values { + result = append(result, v.Value(buffer)...) + } + return result +}