Skip to content

Commit

Permalink
Remove CDATA preservation on read
Browse files Browse the repository at this point in the history
This feature, introduced in v1.1.3, was implemented in such a way
that it broke the ability to read XML documents encoded in non-UTF8
character sets.
  • Loading branch information
beevik committed May 8, 2023
1 parent 211cdce commit d50c583
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 65 deletions.
5 changes: 1 addition & 4 deletions etree.go
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,6 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er
var stack stack
stack.push(e)
for {
xr.ResetPeek(dec.InputOffset())
t, err := dec.RawToken()
switch {
case err == io.EOF:
Expand Down Expand Up @@ -806,9 +805,7 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er
case xml.CharData:
data := string(t)
var flags charDataFlags
if xr.PeekContainsCdata() {
flags = cdataFlag
} else if isWhitespace(data) {
if isWhitespace(data) {
flags = whitespaceFlag
}
newCharData(data, flags, top)
Expand Down
23 changes: 2 additions & 21 deletions etree_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -892,8 +892,8 @@ func TestIndentPreserveWhitespace(t *testing.T) {
{"<test> </test>", "<test> </test>"},
{"<test>\t</test>", "<test>\t</test>"},
{"<test>\t\n \t</test>", "<test>\t\n \t</test>"},
{"<test><![CDATA[ ]]></test>", "<test><![CDATA[ ]]></test>"},
{"<test> <![CDATA[ ]]> </test>", "<test><![CDATA[ ]]></test>"},
{"<test><![CDATA[ ]]></test>", "<test> </test>"},
{"<test> <![CDATA[ ]]> </test>", "<test/>"},
{"<outer> <inner> </inner> </outer>", "<outer>\n <inner> </inner>\n</outer>"},
}

Expand Down Expand Up @@ -1278,22 +1278,3 @@ func TestWhitespace(t *testing.T) {
cd.SetData("")
checkBoolEq(t, cd.IsWhitespace(), true)
}

func TestPreserveCDATA(t *testing.T) {
s := `<name><![CDATA[My]] <b>name</b> <![CDATA[is]]></name>`

doc := NewDocument()
err := doc.ReadFromString(s)
if err != nil {
t.Fatalf("etree: failed to ReadFromString: %v", err)
}

result, err := doc.WriteToString()
if err != nil {
t.Fatalf("etree: failed to WriteToString: %v", err)
}

if result != s {
t.Errorf("etree: wanted %q, got %q", s, result)
}
}
45 changes: 5 additions & 40 deletions helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ package etree

import (
"bufio"
"bytes"
"io"
"strings"
"unicode/utf8"
Expand Down Expand Up @@ -88,54 +87,20 @@ func (f *fifo) grow() {
// bytes read from its encapsulated reader and detects when a CDATA
// prefix has been parsed.
type xmlReader struct {
r io.ByteReader
r io.Reader
bytes int64
peek []byte
last byte
}

var cdataPrefix = []byte("<![CDATA[")

func newXmlReader(r io.Reader) *xmlReader {
return &xmlReader{
r: bufio.NewReader(r),
bytes: 0,
peek: make([]byte, 0, len(cdataPrefix)),
last: 0,
}
return &xmlReader{r, 0}
}

func (xr *xmlReader) Read(p []byte) (n int, err error) {
// Since xmlReader implements the io.ByteReader interface, the XML decoder
// bypasses Read in favor of ReadByte.
return 0, nil
}

func (xr *xmlReader) ReadByte() (b byte, err error) {
b, err = xr.r.ReadByte()
if err == nil {
xr.last = b
xr.bytes += 1
if len(xr.peek) < len(cdataPrefix) {
xr.peek = append(xr.peek, b)
}
}
return b, err
}

func (xr *xmlReader) ResetPeek(decoderOffset int64) {
xr.peek = xr.peek[0:0]

// If the decoder offset doesn't match the number of bytes read so far,
// then the decoder performed an "unget" on the last byte read. Return
// this byte to the front of the peek buffer.
if decoderOffset != xr.bytes {
xr.peek = append(xr.peek, xr.last)
}
}

func (xr *xmlReader) PeekContainsCdata() bool {
return bytes.Equal(xr.peek, cdataPrefix)
n, err = xr.r.Read(p)
xr.bytes += int64(n)
return n, err
}

// xmlWriter implements a proxy writer that counts the number of
Expand Down

0 comments on commit d50c583

Please sign in to comment.