diff --git a/codec/dagjson/marshal.go b/codec/dagjson/marshal.go index 3e292fdc..5335f3a1 100644 --- a/codec/dagjson/marshal.go +++ b/codec/dagjson/marshal.go @@ -3,6 +3,7 @@ package dagjson import ( "encoding/base64" "fmt" + "sort" "github.com/polydawn/refmt/shared" "github.com/polydawn/refmt/tok" @@ -15,7 +16,21 @@ import ( // except for the `case ipld.Kind_Link` block, // which is dag-json's special sauce for schemafree links. -func Marshal(n ipld.Node, sink shared.TokenSink, allowLinks bool) error { +type MarshalOptions struct { + // If true, will encode nodes with a Link kind using the DAG-JSON + // `{"/":"cid string"}` form. + EncodeLinks bool + + // If true, will encode nodes with a Bytes kind using the DAG-JSON + // `{"/":{"bytes":"base64 bytes..."}}` form. + EncodeBytes bool + + // If true, will sort map keys prior to encoding using plain bytewise + // comparison. + SortMapKeys bool +} + +func Marshal(n ipld.Node, sink shared.TokenSink, options MarshalOptions) error { var tk tok.Token switch n.Kind() { case ipld.Kind_Invalid: @@ -31,22 +46,54 @@ func Marshal(n ipld.Node, sink shared.TokenSink, allowLinks bool) error { if _, err := sink.Step(&tk); err != nil { return err } - // Emit map contents (and recurse). - for itr := n.MapIterator(); !itr.Done(); { - k, v, err := itr.Next() - if err != nil { - return err - } - tk.Type = tok.TString - tk.Str, err = k.AsString() - if err != nil { - return err + if options.SortMapKeys { + // Collect map entries, then sort by key + type entry struct { + key string + value ipld.Node + } + entries := []entry{} + for itr := n.MapIterator(); !itr.Done(); { + k, v, err := itr.Next() + if err != nil { + return err + } + keyStr, err := k.AsString() + if err != nil { + return err + } + entries = append(entries, entry{keyStr, v}) + } + sort.Slice(entries, func(i, j int) bool { return entries[i].key < entries[j].key }) + // Emit map contents (and recurse). + for _, e := range entries { + tk.Type = tok.TString + tk.Str = e.key + if _, err := sink.Step(&tk); err != nil { + return err + } + if err := Marshal(e.value, sink, options); err != nil { + return err + } } - if _, err := sink.Step(&tk); err != nil { - return err - } - if err := Marshal(v, sink, allowLinks); err != nil { - return err + } else { + // Don't sort map, emit map contents (and recurse). + for itr := n.MapIterator(); !itr.Done(); { + k, v, err := itr.Next() + if err != nil { + return err + } + tk.Type = tok.TString + tk.Str, err = k.AsString() + if err != nil { + return err + } + if _, err := sink.Step(&tk); err != nil { + return err + } + if err := Marshal(v, sink, options); err != nil { + return err + } } } // Emit map close. @@ -67,7 +114,7 @@ func Marshal(n ipld.Node, sink shared.TokenSink, allowLinks bool) error { if err != nil { return err } - if err := Marshal(v, sink, allowLinks); err != nil { + if err := Marshal(v, sink, options); err != nil { return err } } @@ -116,7 +163,7 @@ func Marshal(n ipld.Node, sink shared.TokenSink, allowLinks bool) error { if err != nil { return err } - if allowLinks { + if options.EncodeBytes { // Precisely seven tokens to emit: tk.Type = tok.TMapOpen tk.Length = 1 @@ -158,7 +205,7 @@ func Marshal(n ipld.Node, sink shared.TokenSink, allowLinks bool) error { return err } case ipld.Kind_Link: - if !allowLinks { + if !options.EncodeLinks { return fmt.Errorf("cannot Marshal ipld links to JSON") } v, err := n.AsLink() diff --git a/codec/dagjson/multicodec.go b/codec/dagjson/multicodec.go index 2b8f413a..232d5fe9 100644 --- a/codec/dagjson/multicodec.go +++ b/codec/dagjson/multicodec.go @@ -21,7 +21,10 @@ func init() { } func Decode(na ipld.NodeAssembler, r io.Reader) error { - err := Unmarshal(na, json.NewDecoder(r), true) + err := Unmarshal(na, json.NewDecoder(r), UnmarshalOptions{ + ParseLinks: true, + ParseBytes: true, + }) if err != nil { return err } @@ -53,5 +56,10 @@ func Encode(n ipld.Node, w io.Writer) error { // Shell out directly to generic inspection path. // (There's not really any fastpaths of note for json.) // Write another function if you need to tune encoding options about whitespace. - return Marshal(n, json.NewEncoder(w, json.EncodeOptions{}), true) + return Marshal(n, json.NewEncoder(w, json.EncodeOptions{}), + MarshalOptions{ + EncodeLinks: true, + EncodeBytes: true, + SortMapKeys: true, + }) } diff --git a/codec/dagjson/roundtripBytes_test.go b/codec/dagjson/roundtripBytes_test.go index ccbef3e7..9c58e723 100644 --- a/codec/dagjson/roundtripBytes_test.go +++ b/codec/dagjson/roundtripBytes_test.go @@ -15,7 +15,11 @@ var byteNode = fluent.MustBuildMap(basicnode.Prototype__Map{}, 4, func(na fluent na.AssembleEntry("plain").AssignString("olde string") na.AssembleEntry("bytes").AssignBytes([]byte("deadbeef")) }) -var byteSerial = `{"plain":"olde string","bytes":{"/":{"bytes":"ZGVhZGJlZWY="}}}` +var byteNodeSorted = fluent.MustBuildMap(basicnode.Prototype__Map{}, 4, func(na fluent.MapAssembler) { + na.AssembleEntry("bytes").AssignBytes([]byte("deadbeef")) + na.AssembleEntry("plain").AssignString("olde string") +}) +var byteSerial = `{"bytes":{"/":{"bytes":"ZGVhZGJlZWY="}},"plain":"olde string"}` func TestRoundtripBytes(t *testing.T) { t.Run("encoding", func(t *testing.T) { @@ -29,7 +33,7 @@ func TestRoundtripBytes(t *testing.T) { nb := basicnode.Prototype__Map{}.NewBuilder() err := Decode(nb, buf) Require(t, err, ShouldEqual, nil) - Wish(t, nb.Build(), ShouldEqual, byteNode) + Wish(t, nb.Build(), ShouldEqual, byteNodeSorted) }) } diff --git a/codec/dagjson/roundtripCidlink_test.go b/codec/dagjson/roundtripCidlink_test.go index 43c63bb0..56cb3707 100644 --- a/codec/dagjson/roundtripCidlink_test.go +++ b/codec/dagjson/roundtripCidlink_test.go @@ -36,7 +36,7 @@ func TestRoundtripCidlink(t *testing.T) { n2, err := lsys.Load(ipld.LinkContext{}, lnk, basicnode.Prototype.Any) Require(t, err, ShouldEqual, nil) - Wish(t, n2, ShouldEqual, n) + Wish(t, n2, ShouldEqual, nSorted) } // Make sure that a map that *almost* looks like a link is handled safely. diff --git a/codec/dagjson/roundtrip_test.go b/codec/dagjson/roundtrip_test.go index 18896804..4bfd4f6d 100644 --- a/codec/dagjson/roundtrip_test.go +++ b/codec/dagjson/roundtrip_test.go @@ -27,7 +27,23 @@ var n = fluent.MustBuildMap(basicnode.Prototype__Map{}, 4, func(na fluent.MapAss }) }) }) -var serial = `{"plain":"olde string","map":{"one":1,"two":2},"list":["three","four"],"nested":{"deeper":["things"]}}` +var nSorted = fluent.MustBuildMap(basicnode.Prototype__Map{}, 4, func(na fluent.MapAssembler) { + na.AssembleEntry("list").CreateList(2, func(na fluent.ListAssembler) { + na.AssembleValue().AssignString("three") + na.AssembleValue().AssignString("four") + }) + na.AssembleEntry("map").CreateMap(2, func(na fluent.MapAssembler) { + na.AssembleEntry("one").AssignInt(1) + na.AssembleEntry("two").AssignInt(2) + }) + na.AssembleEntry("nested").CreateMap(1, func(na fluent.MapAssembler) { + na.AssembleEntry("deeper").CreateList(1, func(na fluent.ListAssembler) { + na.AssembleValue().AssignString("things") + }) + }) + na.AssembleEntry("plain").AssignString("olde string") +}) +var serial = `{"list":["three","four"],"map":{"one":1,"two":2},"nested":{"deeper":["things"]},"plain":"olde string"}` func TestRoundtrip(t *testing.T) { t.Run("encoding", func(t *testing.T) { @@ -41,7 +57,7 @@ func TestRoundtrip(t *testing.T) { nb := basicnode.Prototype__Map{}.NewBuilder() err := Decode(nb, buf) Require(t, err, ShouldEqual, nil) - Wish(t, nb.Build(), ShouldEqual, n) + Wish(t, nb.Build(), ShouldEqual, nSorted) }) } diff --git a/codec/dagjson/unmarshal.go b/codec/dagjson/unmarshal.go index b4d15710..913d245e 100644 --- a/codec/dagjson/unmarshal.go +++ b/codec/dagjson/unmarshal.go @@ -20,9 +20,19 @@ import ( // several steps of handling maps, because it necessitates peeking several // tokens before deciding what kind of value to create). -func Unmarshal(na ipld.NodeAssembler, tokSrc shared.TokenSource, parseLinks bool) error { +type UnmarshalOptions struct { + // If true, parse DAG-CBOR `{"/":"cid string"}` as a Link kind node rather + // than a plain map + ParseLinks bool + + // If true, parse DAG-CBOR `{"/":{"bytes":"base64 bytes..."}}` as a Bytes kind + // node rather than nested plain maps + ParseBytes bool +} + +func Unmarshal(na ipld.NodeAssembler, tokSrc shared.TokenSource, options UnmarshalOptions) error { var st unmarshalState - st.parseLinks = parseLinks + st.options = options done, err := tokSrc.Step(&st.tk[0]) if err != nil { return err @@ -34,9 +44,9 @@ func Unmarshal(na ipld.NodeAssembler, tokSrc shared.TokenSource, parseLinks bool } type unmarshalState struct { - tk [7]tok.Token // mostly, only 0'th is used... but [1:7] are used during lookahead for links. - shift int // how many times to slide something out of tk[1:7] instead of getting a new token. - parseLinks bool + tk [7]tok.Token // mostly, only 0'th is used... but [1:7] are used during lookahead for links. + shift int // how many times to slide something out of tk[1:7] instead of getting a new token. + options UnmarshalOptions } // step leaves a "new" token in tk[0], @@ -229,7 +239,7 @@ func (st *unmarshalState) unmarshal(na ipld.NodeAssembler, tokSrc shared.TokenSo case tok.TMapOpen: // dag-json has special needs: we pump a few tokens ahead to look for dag-json's "link" pattern. // We can't actually call BeginMap until we're sure it's not gonna turn out to be a link. - if st.parseLinks { + if st.options.ParseLinks { gotLink, err := st.linkLookahead(na, tokSrc) if err != nil { // return in error if any token peeks failed or if structure looked like a link but failed to parse as CID. return err @@ -237,7 +247,9 @@ func (st *unmarshalState) unmarshal(na ipld.NodeAssembler, tokSrc shared.TokenSo if gotLink { return nil } + } + if st.options.ParseBytes { gotBytes, err := st.bytesLookahead(na, tokSrc) if err != nil { return err diff --git a/codec/json/multicodec.go b/codec/json/multicodec.go index c3c807dd..9f2f8c7c 100644 --- a/codec/json/multicodec.go +++ b/codec/json/multicodec.go @@ -24,7 +24,10 @@ func init() { func Decode(na ipld.NodeAssembler, r io.Reader) error { // Shell out directly to generic builder path. // (There's not really any fastpaths of note for json.) - err := dagjson.Unmarshal(na, rfmtjson.NewDecoder(r), false) + err := dagjson.Unmarshal(na, rfmtjson.NewDecoder(r), dagjson.UnmarshalOptions{ + ParseLinks: false, + ParseBytes: false, + }) if err != nil { return err } @@ -59,5 +62,9 @@ func Encode(n ipld.Node, w io.Writer) error { return dagjson.Marshal(n, rfmtjson.NewEncoder(w, rfmtjson.EncodeOptions{ Line: []byte{'\n'}, Indent: []byte{'\t'}, - }), false) + }), dagjson.MarshalOptions{ + EncodeLinks: false, + EncodeBytes: false, + SortMapKeys: false, + }) } diff --git a/codec/jst/jst.go b/codec/jst/jst.go index 48747f00..438b5aa3 100644 --- a/codec/jst/jst.go +++ b/codec/jst/jst.go @@ -37,10 +37,9 @@ import ( "bytes" "io" - "github.com/polydawn/refmt/json" - ipld "github.com/ipld/go-ipld-prime" "github.com/ipld/go-ipld-prime/codec/dagjson" + "github.com/ipld/go-ipld-prime/codec/json" basicnode "github.com/ipld/go-ipld-prime/node/basic" ) @@ -168,8 +167,8 @@ func (tab *table) Finalize() { var buf bytes.Buffer for _, cn := range cols { buf.Reset() - dagjson.Marshal(basicnode.NewString(string(cn)), json.NewEncoder(&buf, json.EncodeOptions{}), false) // FIXME this would be a lot less irritating if we had more plumbing access to the json encoding -- we want to encode exactly one string into a buffer, it literally can't error. - tab.keySize[cn] = buf.Len() // FIXME this is ignoring charsets, renderable glyphs, etc at present. + json.Encode(basicnode.NewString(string(cn)), &buf) // FIXME this would be a lot less irritating if we had more plumbing access to the json encoding -- we want to encode exactly one string into a buffer, it literally can't error. + tab.keySize[cn] = buf.Len() // FIXME this is ignoring charsets, renderable glyphs, etc at present. } } @@ -297,9 +296,7 @@ func marshal(ctx *state, n ipld.Node, w io.Writer) error { // It doesn't colorize or anything else. To replace it with something clever that does, // we'll have to tear deeper into the plumbing level of json serializers; will, but later. func marshalPlain(ctx *state, n ipld.Node, w io.Writer) error { - err := dagjson.Marshal(n, json.NewEncoder(w, json.EncodeOptions{ - // never indent here: these values will always end up being emitted mid-line. - }), true) + err := dagjson.Encode(n, w) // never indent here: these values will always end up being emitted mid-line. if err != nil { return recordErrorPosition(ctx, err) } @@ -470,7 +467,7 @@ func emitKey(ctx *state, k ipld.Node, w io.Writer) error { if ctx.cfg.Color.Enabled { w.Write(ctx.cfg.Color.KeyHighlight) } - if err := dagjson.Marshal(k, json.NewEncoder(w, json.EncodeOptions{}), true); err != nil { + if err := dagjson.Encode(k, w); err != nil { return recordErrorPosition(ctx, err) } if ctx.cfg.Color.Enabled { diff --git a/fluent/qp/example_test.go b/fluent/qp/example_test.go index c0cac27a..f6cc2a7a 100644 --- a/fluent/qp/example_test.go +++ b/fluent/qp/example_test.go @@ -30,5 +30,5 @@ func Example() { dagjson.Encode(n, os.Stdout) // Output: - // {"some key":"some value","another key":"another value","nested map":{"deeper entries":"deeper values","more deeper entries":"more deeper values"},"nested list":[1,2]} + // {"another key":"another value","nested list":[1,2],"nested map":{"deeper entries":"deeper values","more deeper entries":"more deeper values"},"some key":"some value"} } diff --git a/node/bindnode/example_test.go b/node/bindnode/example_test.go index 34b1ad35..9a1d948a 100644 --- a/node/bindnode/example_test.go +++ b/node/bindnode/example_test.go @@ -42,7 +42,7 @@ func ExampleWrap_withSchema() { dagjson.Encode(nodeRepr, os.Stdout) // Output: - // {"Name":"Michael","Friends":["Sarah","Alex"]} + // {"Friends":["Sarah","Alex"],"Name":"Michael"} } func ExamplePrototype_onlySchema() { @@ -78,5 +78,5 @@ func ExamplePrototype_onlySchema() { dagjson.Encode(nodeRepr, os.Stdout) // Output: - // {"Name":"Michael","Friends":["Sarah","Alex"]} + // {"Friends":["Sarah","Alex"],"Name":"Michael"} } diff --git a/node/tests/schemaStructsContainingMaybe.go b/node/tests/schemaStructsContainingMaybe.go index 16c2f5e2..e4010555 100644 --- a/node/tests/schemaStructsContainingMaybe.go +++ b/node/tests/schemaStructsContainingMaybe.go @@ -47,7 +47,7 @@ func SchemaTestStructsContainingMaybe(t *testing.T, engine Engine) { { name: "vvvvv-AllFieldsSet", typeJson: `{"f1":"a","f2":"b","f3":"c","f4":"d","f5":"e"}`, - reprJson: `{"r1":"a","r2":"b","r3":"c","r4":"d","f5":"e"}`, + reprJson: `{"f5":"e","r1":"a","r2":"b","r3":"c","r4":"d"}`, typePoints: []testcasePoint{ {"", ipld.Kind_Map}, {"f1", "a"}, @@ -68,7 +68,7 @@ func SchemaTestStructsContainingMaybe(t *testing.T, engine Engine) { { name: "vvnnv-Nulls", typeJson: `{"f1":"a","f2":"b","f3":null,"f4":null,"f5":"e"}`, - reprJson: `{"r1":"a","r2":"b","r3":null,"r4":null,"f5":"e"}`, + reprJson: `{"f5":"e","r1":"a","r2":"b","r3":null,"r4":null}`, typePoints: []testcasePoint{ {"", ipld.Kind_Map}, {"f1", "a"}, @@ -89,7 +89,7 @@ func SchemaTestStructsContainingMaybe(t *testing.T, engine Engine) { { name: "vzvzv-AbsentOptionals", typeJson: `{"f1":"a","f3":"c","f5":"e"}`, - reprJson: `{"r1":"a","r3":"c","f5":"e"}`, + reprJson: `{"f5":"e","r1":"a","r3":"c"}`, typePoints: []testcasePoint{ {"", ipld.Kind_Map}, {"f1", "a"}, diff --git a/node/tests/testcase.go b/node/tests/testcase.go index 8072bd46..63648f87 100644 --- a/node/tests/testcase.go +++ b/node/tests/testcase.go @@ -210,7 +210,11 @@ func testMarshal(t *testing.T, n ipld.Node, data string) { // We'll marshal with "pretty" linebreaks and indents (and re-format the fixture to the same) for better diffing. prettyprint := json.EncodeOptions{Line: []byte{'\n'}, Indent: []byte{'\t'}} var buf bytes.Buffer - err := dagjson.Marshal(n, json.NewEncoder(&buf, prettyprint), true) + err := dagjson.Marshal(n, json.NewEncoder(&buf, prettyprint), dagjson.MarshalOptions{ + EncodeLinks: true, + EncodeBytes: true, + SortMapKeys: true, + }) if err != nil { t.Errorf("marshal failed: %s", err) } diff --git a/traversal/focus_test.go b/traversal/focus_test.go index 0b3f0591..301efb50 100644 --- a/traversal/focus_test.go +++ b/traversal/focus_test.go @@ -318,7 +318,7 @@ func TestFocusedTransformWithLinks(t *testing.T) { Wish(t, progress.Path.String(), ShouldEqual, "linkedMap/nested/nonlink") Wish(t, must.String(prev), ShouldEqual, "zoo") Wish(t, progress.LastBlock.Path.String(), ShouldEqual, "linkedMap") - Wish(t, progress.LastBlock.Link.String(), ShouldEqual, "baguqeeye2opztzy") + Wish(t, progress.LastBlock.Link.String(), ShouldEqual, "baguqeeyezhlahvq") nb := prev.Prototype().NewBuilder() nb.AssignString("new string!") return nb.Build(), nil