diff --git a/changelog/unreleased/more-efficient-etag-pcdata.md b/changelog/unreleased/more-efficient-etag-pcdata.md new file mode 100644 index 0000000000..3580e0fc04 --- /dev/null +++ b/changelog/unreleased/more-efficient-etag-pcdata.md @@ -0,0 +1,3 @@ +Enhancement: We now only encode &, < and > in PROPFIND PCDATA + +https://github.com/cs3org/reva/pull/3240 diff --git a/internal/http/services/owncloud/ocdav/prop/prop.go b/internal/http/services/owncloud/ocdav/prop/prop.go index 607679520c..303c9123ec 100644 --- a/internal/http/services/owncloud/ocdav/prop/prop.go +++ b/internal/http/services/owncloud/ocdav/prop/prop.go @@ -21,6 +21,7 @@ package prop import ( "bytes" "encoding/xml" + "unicode/utf8" ) // PropertyXML represents a single DAV resource property as defined in RFC 4918. @@ -58,13 +59,78 @@ func EscapedNS(namespace string, local string, val string) PropertyXML { } } -// Escaped returns a new PropertyXML instance while xml-escaping the value +var ( + escAmp = []byte("&") + escLT = []byte("<") + escGT = []byte(">") + escFFFD = []byte(string(utf8.RuneError)) // Unicode replacement character +) + +// Decide whether the given rune is in the XML Character Range, per +// the Char production of https://www.xml.com/axml/testaxml.htm, +// Section 2.2 Characters. +func isInCharacterRange(r rune) (inrange bool) { + return r == 0x09 || + r == 0x0A || + r == 0x0D || + r >= 0x20 && r <= 0xD7FF || + r >= 0xE000 && r <= utf8.RuneError || + r >= 0x10000 && r <= 0x10FFFF +} + +// Escaped returns a new PropertyXML instance while replacing only +// * `&` with `&` +// * `<` with `<` +// * `>` with `>` +// as defined in https://www.w3.org/TR/REC-xml/#syntax: +// +// > The ampersand character (&) and the left angle bracket (<) must not appear +// > in their literal form, except when used as markup delimiters, or within a +// > comment, a processing instruction, or a CDATA section. If they are needed +// > elsewhere, they must be escaped using either numeric character references +// > or the strings " & " and " < " respectively. The right angle +// > bracket (>) may be represented using the string " > ", and must, for +// > compatibility, be escaped using either " > " or a character reference +// > when it appears in the string " ]]> " in content, when that string is not +// > marking the end of a CDATA section. +// +// The code ignores errors as the legacy Escaped() does // TODO properly use the space func Escaped(key, val string) PropertyXML { + s := []byte(val) + w := bytes.NewBuffer(make([]byte, 0, len(s))) + var esc []byte + last := 0 + for i := 0; i < len(s); { + r, width := utf8.DecodeRune(s[i:]) + i += width + switch r { + case '&': + esc = escAmp + case '<': + esc = escLT + case '>': + esc = escGT + default: + if !isInCharacterRange(r) || (r == utf8.RuneError && width == 1) { + esc = escFFFD + break + } + continue + } + if _, err := w.Write(s[last : i-width]); err != nil { + break + } + if _, err := w.Write(esc); err != nil { + break + } + last = i + } + _, _ = w.Write(s[last:]) return PropertyXML{ XMLName: xml.Name{Space: "", Local: key}, Lang: "", - InnerXML: xmlEscaped(val), + InnerXML: w.Bytes(), } } @@ -115,9 +181,12 @@ func Next(d *xml.Decoder) (xml.Token, error) { } // ActiveLock holds active lock xml data -// http://www.webdav.org/specs/rfc4918.html#ELEMENT_activelock +// +// http://www.webdav.org/specs/rfc4918.html#ELEMENT_activelock +// // +// +// locktoken?, lockroot)> type ActiveLock struct { XMLName xml.Name `xml:"activelock"` Exclusive *struct{} `xml:"lockscope>exclusive,omitempty"` diff --git a/internal/http/services/owncloud/ocdav/propfind/propfind_test.go b/internal/http/services/owncloud/ocdav/propfind/propfind_test.go index 58513a90f3..9ac24a80bf 100644 --- a/internal/http/services/owncloud/ocdav/propfind/propfind_test.go +++ b/internal/http/services/owncloud/ocdav/propfind/propfind_test.go @@ -516,7 +516,7 @@ var _ = Describe("Propfind", func() { Expect(sf.Href).To(Equal("http:/127.0.0.1:3000/foo/Shares/sharedFile")) Expect(string(sf.Propstat[0].Prop[0].InnerXML)).To(ContainSubstring("2000")) Expect(string(sf.Propstat[0].Prop[0].InnerXML)).To(ContainSubstring("Thu, 01 Jan 1970 00:00:01 GMT")) - Expect(string(sf.Propstat[0].Prop[0].InnerXML)).To(ContainSubstring(""1"")) + Expect(string(sf.Propstat[0].Prop[0].InnerXML)).To(ContainSubstring(`"1"`)) }) }) @@ -564,7 +564,7 @@ var _ = Describe("Propfind", func() { Expect(shares.Href).To(Equal("http:/127.0.0.1:3000/foo/Shares/")) Expect(string(shares.Propstat[0].Prop[0].InnerXML)).To(ContainSubstring("6000")) Expect(string(shares.Propstat[0].Prop[0].InnerXML)).To(ContainSubstring("Thu, 01 Jan 1970 00:00:03 GMT")) - Expect(string(shares.Propstat[0].Prop[0].InnerXML)).To(ContainSubstring(""3"")) + Expect(string(shares.Propstat[0].Prop[0].InnerXML)).To(ContainSubstring(`"3"`)) }) It("stats the embedded space", func() {