diff --git a/dataconv/helper.go b/dataconv/helper.go index 2d399f9..627b9f5 100644 --- a/dataconv/helper.go +++ b/dataconv/helper.go @@ -4,7 +4,9 @@ import ( "bytes" "context" "encoding/json" + "fmt" "math" + "math/big" "reflect" "strings" "time" @@ -76,10 +78,19 @@ func MarshalStarlarkJSON(data starlark.Value, indent int) (string, error) { // In comparison with DecodeStarlarkJSON, it gives you more control over type conversion but may be less efficient due to intermediate steps. func UnmarshalStarlarkJSON(data []byte) (starlark.Value, error) { var m interface{} - err := json.Unmarshal(data, &m) - if err != nil { + // decode with UseNumber so integers keep their exact value: a plain + // json.Unmarshal turns every number into a float64, which silently + // saturates/rounds integers beyond 2^53 before TypeConvert ever runs. + dec := json.NewDecoder(bytes.NewReader(data)) + dec.UseNumber() + if err := dec.Decode(&m); err != nil { return starlark.None, err } + // json.Unmarshal rejected trailing content; the decoder does not, so + // re-impose single-document strictness (trailing whitespace still passes). + if dec.More() { + return starlark.None, fmt.Errorf("unexpected trailing data after JSON value") + } // fix all values to their appropriate types f := TypeConvert(m) @@ -179,6 +190,25 @@ func TypeConvert(data interface{}) interface{} { // If not a time or number, return the original string return v + case json.Number: + // UnmarshalStarlarkJSON now decodes with UseNumber, so numbers arrive + // here exact. Map by literal form, the same rule Marshal/json.decode + // use: an integer literal becomes an int (arbitrary precision, no + // float64 saturation), anything with a decimal point or exponent + // becomes a float. + if !strings.ContainsAny(v.String(), ".eE") { + if i, err := v.Int64(); err == nil { + return i + } + if bi, ok := new(big.Int).SetString(v.String(), 10); ok { + return bi + } + } + if f, err := v.Float64(); err == nil { + return f + } + return v + case float64: // Check for exact int match if math.Floor(v) == v { diff --git a/dataconv/helper_test.go b/dataconv/helper_test.go index 0e522d9..136db4d 100644 --- a/dataconv/helper_test.go +++ b/dataconv/helper_test.go @@ -3,6 +3,7 @@ package dataconv import ( "context" "fmt" + "math/big" "reflect" "testing" "time" @@ -338,6 +339,11 @@ func TestUnmarshalStarlarkJSON(t *testing.T) { input: []byte(`{"foo":42}`), want: d42, }, + { + name: "trailing data rejected", + input: []byte(`{"foo":42} {"bar":1}`), + wantErr: true, + }, { name: "list", input: []byte(`[43,"foo"]`), @@ -374,6 +380,60 @@ func TestUnmarshalStarlarkJSON(t *testing.T) { } } +// TestUnmarshalStarlarkJSONNumberFidelity pins the number handling of +// UnmarshalStarlarkJSON: integers survive exactly at any magnitude (decoding +// uses UseNumber instead of collapsing through float64, which silently +// saturated large ints), and int vs float is decided by literal form. Values +// are compared by starlark.Equal because big-int-bearing starlark.Int does +// not reflect.DeepEqual reliably. +func TestUnmarshalStarlarkJSONNumberFidelity(t *testing.T) { + bigID, _ := new(big.Int).SetString("12345678901234567890", 10) + equalsWant := func(t *testing.T, got, want starlark.Value, ctx string) { + t.Helper() + eq, err := starlark.Equal(got, want) + if err != nil { + t.Fatalf("%s: Equal: %v", ctx, err) + } + if !eq { + t.Fatalf("%s = %s (%s), want %s", ctx, got, got.Type(), want) + } + } + + for _, c := range []struct { + name, input string + want starlark.Value + }{ + {"small int", `6`, starlark.MakeInt(6)}, + {"big int exact", `12345678901234567890`, starlark.MakeBigInt(bigID)}, + {"fractional is float", `6.5`, starlark.Float(6.5)}, + {"exponent is float", `1e3`, starlark.Float(1000)}, + } { + t.Run(c.name, func(t *testing.T) { + got, err := UnmarshalStarlarkJSON([]byte(c.input)) + if err != nil { + t.Fatalf("UnmarshalStarlarkJSON(%q): %v", c.input, err) + } + equalsWant(t, got, c.want, c.name) + }) + } + + // the same exactness must hold for a number nested in a dict — this is + // the lib/http request-body path. + got, err := UnmarshalStarlarkJSON([]byte(`{"id":12345678901234567890}`)) + if err != nil { + t.Fatal(err) + } + d, ok := got.(*starlark.Dict) + if !ok { + t.Fatalf("want dict, got %s", got.Type()) + } + v, found, _ := d.Get(starlark.String("id")) + if !found { + t.Fatal("missing id key") + } + equalsWant(t, v, starlark.MakeBigInt(bigID), "dict id") +} + // TestEncodeStarlarkJSON tests the EncodeStarlarkJSON function func TestEncodeStarlarkJSON(t *testing.T) { now := time.Now() diff --git a/dataconv/interface.go b/dataconv/interface.go index a47c343..d7a3c6a 100644 --- a/dataconv/interface.go +++ b/dataconv/interface.go @@ -21,7 +21,9 @@ // - Starlark value -> JSON text and back, via the Go shapes above: // MarshalStarlarkJSON / UnmarshalStarlarkJSON. The decode direction // applies TypeConvert heuristics (RFC3339-looking strings become time -// values, whole floats become ints). +// values) and maps numbers by literal form — an integer literal becomes +// an int (exact, arbitrary precision), a number with a decimal point or +// exponent becomes a float. // - Starlark value -> JSON text and back, staying inside Starlark types: // EncodeStarlarkJSON / DecodeStarlarkJSON (the interpreter's own json // encoder: big ints work, bytes/time are errors, no heuristics). diff --git a/dataconv/marshal.go b/dataconv/marshal.go index e49e51a..8ade531 100644 --- a/dataconv/marshal.go +++ b/dataconv/marshal.go @@ -3,9 +3,11 @@ package dataconv // Based on https://github.com/qri-io/starlib/tree/master/util with some modifications and additions import ( + "encoding/json" "errors" "fmt" "math/big" + "strings" "time" "github.com/1set/starlight/convert" @@ -24,6 +26,20 @@ func Marshal(data interface{}) (v starlark.Value, err error) { v = starlark.Bool(x) case string: v = starlark.String(x) + case json.Number: + // a JSON number stays a number: Int for an integer literal (exact, + // arbitrary precision) and Float otherwise — matching json.decode and + // serial. A caller that decodes raw JSON into map[string]interface{} + // with dec.UseNumber() reaches this; it is the int-vs-float-preserving + // path that a plain json.Unmarshal (which collapses every number to + // float64, losing the int/float distinction) cannot offer. + v, err = marshalJSONNumber(x) + case *big.Int: + // the inverse of Unmarshal, which returns *big.Int for integers + // beyond uint64, so a marshal/unmarshal round-trip stays exact. + v = starlark.MakeBigInt(x) + case big.Int: + v = starlark.MakeBigInt(&x) case int: v = starlark.MakeInt(x) case int8: @@ -115,6 +131,28 @@ func Marshal(data interface{}) (v starlark.Value, err error) { return } +// marshalJSONNumber maps a json.Number to a Starlark Int (an integer literal, +// at arbitrary precision) or Float (a literal with a decimal point or +// exponent). It is the same int-vs-float rule json.decode and serial use, so a +// number written without a fractional part round-trips as an int and large +// integers keep their exact value instead of degrading through float64. +func marshalJSONNumber(n json.Number) (starlark.Value, error) { + s := n.String() + if !strings.ContainsAny(s, ".eE") { + if i, err := n.Int64(); err == nil { + return starlark.MakeInt64(i), nil + } + if bi, ok := new(big.Int).SetString(s, 10); ok { + return starlark.MakeBigInt(bi), nil + } + } + f, err := n.Float64() + if err != nil { + return nil, fmt.Errorf("invalid number %q: %w", s, err) + } + return starlark.Float(f), nil +} + // Unmarshal converts a starlark.Value into its Golang counterpart, like FromValue() of package starlight does. // // The contract: diff --git a/dataconv/marshal_test.go b/dataconv/marshal_test.go index ccf2aff..474e2f7 100644 --- a/dataconv/marshal_test.go +++ b/dataconv/marshal_test.go @@ -17,6 +17,14 @@ import ( "go.starlark.net/syntax" ) +func mustBigInt(s string) *big.Int { + bi, ok := new(big.Int).SetString(s, 10) + if !ok { + panic("bad big int literal: " + s) + } + return bi +} + func TestMarshal(t *testing.T) { expectedStringDict := starlark.NewDict(1) if err := expectedStringDict.SetKey(starlark.String("foo"), starlark.MakeInt(42)); err != nil { @@ -83,6 +91,19 @@ func TestMarshal(t *testing.T) { {uint64(1 << 42), starlark.MakeUint64(1 << 42), ""}, {float32(42), starlark.Float(42), ""}, {42., starlark.Float(42), ""}, + // json.Number maps by literal form: an integer literal -> Int (exact, + // arbitrary precision), a fractional/exponent literal -> Float. It is + // NOT mapped to a string (that would break value == 6 in scripts). + {json.Number("42"), starlark.MakeInt(42), ""}, + {json.Number("-7"), starlark.MakeInt(-7), ""}, + {json.Number("6.5"), starlark.Float(6.5), ""}, + {json.Number("1e3"), starlark.Float(1000), ""}, + {json.Number("12345678901234567890"), starlark.MakeBigInt(mustBigInt("12345678901234567890")), ""}, + {json.Number("not-a-number"), nil, `invalid number "not-a-number"`}, + // *big.Int is the inverse of Unmarshal (which returns *big.Int beyond + // uint64), so a marshal/unmarshal round-trip stays exact. + {mustBigInt("12345678901234567890"), starlark.MakeBigInt(mustBigInt("12345678901234567890")), ""}, + {*mustBigInt("99"), starlark.MakeInt(99), ""}, {time.Unix(1588540633, 0), startime.Time(time.Unix(1588540633, 0)), ""}, {now, startime.Time(now), ""}, {[]byte("Aloha"), starlark.Bytes("Aloha"), ""},