From 9394d65a69874e418b5969fbdf3555da93f1aca9 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Sat, 9 Sep 2023 17:37:43 +1000 Subject: [PATCH 1/3] chore: add failing tests for json map comma handling cases --- unmarshal_test.go | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/unmarshal_test.go b/unmarshal_test.go index a2fc51d..9d95f27 100644 --- a/unmarshal_test.go +++ b/unmarshal_test.go @@ -16,12 +16,48 @@ func TestUnmarshal(t *testing.T) { bs := []byte(`"str"`) err := Unmarshal(json.DecodeOptions{}, bs, &slot) So(err, ShouldBeNil) + So(slot, ShouldEqual, "str") }) Convey("map", func() { var slot map[string]string bs := []byte(`{"x":"1"}`) err := Unmarshal(json.DecodeOptions{}, bs, &slot) So(err, ShouldBeNil) + So(slot, ShouldResemble, map[string]string{"x": "1"}) + }) + Convey("map comma handling", func() { + var slot map[string]string + bs := []byte(`{"x":"1","y":"2"}`) + err := Unmarshal(json.DecodeOptions{}, bs, &slot) + So(err, ShouldBeNil) + So(slot, ShouldResemble, map[string]string{"x": "1", "y": "2"}) + }) + Convey("map comma handling errors", func() { + for _, tc := range []struct { + name string + j string + err string + }{ + // fails with: "expected colon after map key; got 0x2c" + {"trailing commas", `{"x":"1","y":"2",,,}`, "expected key after comma, got comma"}, + // fails with: "expected colon after map key; got 0x2c" + {"just commas", `{,,,}`, "expected key after comma, got comma"}, + // fails with: "expected colon after map key; got 0x2c" + {"leading commas", `{,,,"x":"1","y":"2",,,}`, "expected key after map open, got comma"}, + // doesn't error + {"no commas", `{"x":"1""y":"2"}`, "expected comma after value, got quote"}, + // doesn't error + {"no commas, just spaces", `{ "x":"1" "y":"2" }`, "expected comma after value, got quote"}, + // doesn't error + {"no commas, just tabs", `{ "x":"1" "y":"2" }`, "expected comma after value, got quote"}, + } { + Convey(tc.name, func() { + var slot map[string]string + err := Unmarshal(json.DecodeOptions{}, []byte(tc.j), &slot) + So(err, ShouldNotBeNil) + So(err.Error(), ShouldEqual, tc.err) + }) + } }) }) } From e600edc52dad72e8186a61d9d046e8f3599be49f Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Wed, 20 Sep 2023 19:25:05 +1000 Subject: [PATCH 2/3] feat: manage stack state for 2nd+ elements of arr & map, improved err msgs --- json/jsonDecoder.go | 81 ++++++++++++++++++++++++++++----------- json/jsonFixtures_test.go | 2 +- unmarshal_test.go | 39 +++++++++++++------ 3 files changed, 87 insertions(+), 35 deletions(-) diff --git a/json/jsonDecoder.go b/json/jsonDecoder.go index bc025e6..31708ba 100644 --- a/json/jsonDecoder.go +++ b/json/jsonDecoder.go @@ -8,33 +8,36 @@ import ( . "github.com/polydawn/refmt/tok" ) +type stackFrame struct { + step decoderStep + some bool // Set to true after first value in any context; use to decide if a comma must precede the next value. +} + type Decoder struct { r shared.SlickReader - stack []decoderStep // When empty, and step returns done, all done. - step decoderStep // Shortcut to end of stack. - some bool // Set to true after first value in any context; use to decide if a comma must precede the next value. + stack []stackFrame // When empty, and step returns done, all done. + frame stackFrame // Shortcut to end of stack. } func NewDecoder(r io.Reader) (d *Decoder) { d = &Decoder{ r: shared.NewReader(r), - stack: make([]decoderStep, 0, 10), + stack: make([]stackFrame, 0, 10), } - d.step = d.step_acceptValue + d.frame = stackFrame{d.step_acceptValue, false} return } func (d *Decoder) Reset() { d.stack = d.stack[0:0] - d.step = d.step_acceptValue - d.some = false + d.frame = stackFrame{d.step_acceptValue, false} } type decoderStep func(tokenSlot *Token) (done bool, err error) func (d *Decoder) Step(tokenSlot *Token) (done bool, err error) { - done, err = d.step(tokenSlot) + done, err = d.frame.step(tokenSlot) // If the step errored: out, entirely. if err != nil { return true, err @@ -49,16 +52,14 @@ func (d *Decoder) Step(tokenSlot *Token) (done bool, err error) { return true, nil // that's all folks } // Pop the stack. Reset "some" to true. - d.step = d.stack[nSteps] + d.frame = d.stack[nSteps] d.stack = d.stack[0:nSteps] - d.some = true return false, nil } func (d *Decoder) pushPhase(newPhase decoderStep) { - d.stack = append(d.stack, d.step) - d.step = newPhase - d.some = false + d.stack = append(d.stack, d.frame) + d.frame = stackFrame{newPhase, false} } func readn1skippingWhitespace(r shared.SlickReader) (majorByte byte, err error) { @@ -88,7 +89,7 @@ func (d *Decoder) step_acceptArrValueOrBreak(tokenSlot *Token) (done bool, err e if err != nil { return true, err } - if d.some { + if d.frame.some { switch majorByte { case ']': tokenSlot.Type = TArrClose @@ -99,6 +100,8 @@ func (d *Decoder) step_acceptArrValueOrBreak(tokenSlot *Token) (done bool, err e return true, err } // and now fall through to the next switch + default: + return true, fmt.Errorf("expected comma or array close after array value; got %s", byteToString(majorByte)) } } switch majorByte { @@ -106,8 +109,8 @@ func (d *Decoder) step_acceptArrValueOrBreak(tokenSlot *Token) (done bool, err e tokenSlot.Type = TArrClose return true, nil default: + d.frame = stackFrame{d.frame.step, true} _, err := d.stepHelper_acceptValue(majorByte, tokenSlot) - d.some = true return false, err } } @@ -118,7 +121,7 @@ func (d *Decoder) step_acceptMapKeyOrBreak(tokenSlot *Token) (done bool, err err if err != nil { return true, err } - if d.some { + if d.frame.some { switch majorByte { case '}': tokenSlot.Type = TMapClose @@ -129,6 +132,8 @@ func (d *Decoder) step_acceptMapKeyOrBreak(tokenSlot *Token) (done bool, err err return true, err } // and now fall through to the next switch + default: + return true, fmt.Errorf("expected comma or map close after map value; got %s", byteToString(majorByte)) } } switch majorByte { @@ -136,19 +141,22 @@ func (d *Decoder) step_acceptMapKeyOrBreak(tokenSlot *Token) (done bool, err err tokenSlot.Type = TMapClose return true, nil default: + d.frame.some = true // Consume a string for key. - _, err := d.stepHelper_acceptValue(majorByte, tokenSlot) // FIXME surely not *any* value? not composites, at least? + _, err := d.stepHelper_acceptKey(majorByte, tokenSlot) // FIXME surely not *any* value? not composites, at least? + if err != nil { + return true, err + } // Now scan up to consume the colon as well, which is required next. majorByte, err = readn1skippingWhitespace(d.r) if err != nil { return true, err } if majorByte != ':' { - return true, fmt.Errorf("expected colon after map key; got 0x%x", majorByte) + return true, fmt.Errorf("expected colon after map key; got %s", byteToString(majorByte)) } // Next up: expect a value. - d.step = d.step_acceptMapValue - d.some = true + d.frame = stackFrame{d.step_acceptMapValue, false} return false, err } } @@ -159,12 +167,20 @@ func (d *Decoder) step_acceptMapValue(tokenSlot *Token) (done bool, err error) { if err != nil { return true, err } - d.step = d.step_acceptMapKeyOrBreak + d.frame = stackFrame{d.step_acceptMapKeyOrBreak, true} _, err = d.stepHelper_acceptValue(majorByte, tokenSlot) return false, err } func (d *Decoder) stepHelper_acceptValue(majorByte byte, tokenSlot *Token) (done bool, err error) { + return d.stepHelper_acceptKV("value", majorByte, tokenSlot) +} + +func (d *Decoder) stepHelper_acceptKey(majorByte byte, tokenSlot *Token) (done bool, err error) { + return d.stepHelper_acceptKV("key", majorByte, tokenSlot) +} + +func (d *Decoder) stepHelper_acceptKV(t string, majorByte byte, tokenSlot *Token) (done bool, err error) { switch majorByte { case '{': tokenSlot.Type = TMapOpen @@ -202,6 +218,27 @@ func (d *Decoder) stepHelper_acceptValue(majorByte byte, tokenSlot *Token) (done tokenSlot.Type, tokenSlot.Int, tokenSlot.Float64, err = d.decodeNumber(majorByte) return true, err default: - return true, fmt.Errorf("Invalid byte while expecting start of value: 0x%x", majorByte) + return true, fmt.Errorf("invalid char while expecting start of %s: %s", t, byteToString(majorByte)) + } +} + +func byteToString(b byte) string { + switch b { + case ',': + return "comma" + case ':': + return "colon" + case '{': + return "map open" + case '}': + return "map close" + case '[': + return "array open" + case ']': + return "array close" + case '"': + return "quote" + default: + return fmt.Sprintf("0x%x", b) } } diff --git a/json/jsonFixtures_test.go b/json/jsonFixtures_test.go index f3877bc..dfbbe6d 100644 --- a/json/jsonFixtures_test.go +++ b/json/jsonFixtures_test.go @@ -64,7 +64,7 @@ func checkDecoding(t *testing.T, expectSequence fixtures.Sequence, serial string // so we'll strip that here rather than forcing all our fixtures to say it. expectSequence = expectSequence.SansLengthInfo() - t.Helper() + // t.Helper() inputBuf := bytes.NewBufferString(serial) tokenSrc := NewDecoder(inputBuf) diff --git a/unmarshal_test.go b/unmarshal_test.go index 9d95f27..89d0ec8 100644 --- a/unmarshal_test.go +++ b/unmarshal_test.go @@ -38,18 +38,12 @@ func TestUnmarshal(t *testing.T) { j string err string }{ - // fails with: "expected colon after map key; got 0x2c" - {"trailing commas", `{"x":"1","y":"2",,,}`, "expected key after comma, got comma"}, - // fails with: "expected colon after map key; got 0x2c" - {"just commas", `{,,,}`, "expected key after comma, got comma"}, - // fails with: "expected colon after map key; got 0x2c" - {"leading commas", `{,,,"x":"1","y":"2",,,}`, "expected key after map open, got comma"}, - // doesn't error - {"no commas", `{"x":"1""y":"2"}`, "expected comma after value, got quote"}, - // doesn't error - {"no commas, just spaces", `{ "x":"1" "y":"2" }`, "expected comma after value, got quote"}, - // doesn't error - {"no commas, just tabs", `{ "x":"1" "y":"2" }`, "expected comma after value, got quote"}, + {"trailing commas", `{"x":"1","y":"2",,,}`, "invalid char while expecting start of key: comma"}, + {"just commas", `{,,,}`, "invalid char while expecting start of key: comma"}, + {"leading commas", `{,,,"x":"1","y":"2",,,}`, "invalid char while expecting start of key: comma"}, + {"no commas", `{"x":"1""y":"2"}`, "expected comma or map close after map value; got quote"}, + {"no commas, just spaces", `{ "x":"1" "y":"2" }`, "expected comma or map close after map value; got quote"}, + {"no commas, just tabs", `{ "x":"1" "y":"2" }`, "expected comma or map close after map value; got quote"}, } { Convey(tc.name, func() { var slot map[string]string @@ -59,6 +53,27 @@ func TestUnmarshal(t *testing.T) { }) } }) + Convey("array comma handling errors", func() { + for _, tc := range []struct { + name string + j string + err string + }{ + {"trailing commas", `["1","2",,,]`, "invalid char while expecting start of value: comma"}, + {"just commas", `[,,,]`, "invalid char while expecting start of value: comma"}, + {"leading commas", `[,,,"1","2",,,]`, "invalid char while expecting start of value: comma"}, + {"no commas", `["1""2"]`, "expected comma or array close after array value; got quote"}, + {"no commas, just spaces", `[ "1" "2" ]`, "expected comma or array close after array value; got quote"}, + {"no commas, just tabs", `[ "1" "2" ]`, "expected comma or array close after array value; got quote"}, + } { + Convey(tc.name, func() { + var slot []string + err := Unmarshal(json.DecodeOptions{}, []byte(tc.j), &slot) + So(err, ShouldNotBeNil) + So(err.Error(), ShouldEqual, tc.err) + }) + } + }) }) } From d4821d99e86872ffb78951b06dc7917d1fd1d6b0 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Tue, 26 Sep 2023 20:55:43 +1000 Subject: [PATCH 3/3] fix: minor cleanups --- json/jsonDecoder.go | 30 +++++++++++++----------------- json/jsonFixtures_test.go | 2 +- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/json/jsonDecoder.go b/json/jsonDecoder.go index 31708ba..d513f35 100644 --- a/json/jsonDecoder.go +++ b/json/jsonDecoder.go @@ -222,23 +222,19 @@ func (d *Decoder) stepHelper_acceptKV(t string, majorByte byte, tokenSlot *Token } } +var byteToStringMap = map[byte]string{ + ',': "comma", + ':': "colon", + '{': "map open", + '}': "map close", + '[': "array open", + ']': "array close", + '"': "quote", +} + func byteToString(b byte) string { - switch b { - case ',': - return "comma" - case ':': - return "colon" - case '{': - return "map open" - case '}': - return "map close" - case '[': - return "array open" - case ']': - return "array close" - case '"': - return "quote" - default: - return fmt.Sprintf("0x%x", b) + if s, ok := byteToStringMap[b]; ok { + return s } + return fmt.Sprintf("0x%x", b) } diff --git a/json/jsonFixtures_test.go b/json/jsonFixtures_test.go index dfbbe6d..f3877bc 100644 --- a/json/jsonFixtures_test.go +++ b/json/jsonFixtures_test.go @@ -64,7 +64,7 @@ func checkDecoding(t *testing.T, expectSequence fixtures.Sequence, serial string // so we'll strip that here rather than forcing all our fixtures to say it. expectSequence = expectSequence.SansLengthInfo() - // t.Helper() + t.Helper() inputBuf := bytes.NewBufferString(serial) tokenSrc := NewDecoder(inputBuf)