diff --git a/assembly/__tests__/as-json.spec.ts b/assembly/__tests__/as-json.spec.ts index 0b16c691..f2bc840f 100644 --- a/assembly/__tests__/as-json.spec.ts +++ b/assembly/__tests__/as-json.spec.ts @@ -73,9 +73,18 @@ describe("Ser/de Numbers", () => { canSerde(10e2, "1000.0"); canSerde(123456e-5, "1.23456"); - canSerde(0.0, "0.0"); - canSerde(7.23, "7.23"); + canSerde(-7.23, "-7.23"); + + canSerde(1e-6, "0.000001"); + canSerde(1e-7, "1e-7"); + canDeser("1E-7", 1e-7); + + canSerde(1e20, "100000000000000000000.0"); + canSerde(1e21, "1e+21"); + canDeser("1E+21", 1e21); + canDeser("1e21", 1e21); + canDeser("1E21", 1e21); }); it("should ser/de booleans", () => { @@ -97,6 +106,11 @@ describe("Ser/de Array", () => { it("should ser/de float arrays", () => { canSerde([7.23, 10e2, 10e2, 123456e-5, 123456e-5, 0.0, 7.23]); + + canSerde([1e21,1e22,1e-7,1e-8,1e-9], "[1e+21,1e+22,1e-7,1e-8,1e-9]"); + canDeser("[1E+21,1E+22,1E-7,1E-8,1E-9]", [1e21,1e22,1e-7,1e-8,1e-9]); + canDeser("[1e21,1e22,1e-7,1e-8,1e-9]", [1e21,1e22,1e-7,1e-8,1e-9]); + canDeser("[1E21,1E22,1E-7,1E-8,1E-9]", [1e21,1e22,1e-7,1e-8,1e-9]); }); it("should ser/de boolean arrays", () => { @@ -167,6 +181,38 @@ describe("Ser/de Objects", () => { isVerified: true, }, '{"firstName":"Emmet","lastName":"West","lastActive":[8,27,2022],"age":23,"pos":{"x":3.4,"y":1.2,"z":8.3},"isVerified":true}'); }); + + it("should ser/de object with floats", () => { + canSerde({ f: 7.23 }, '{"f":7.23}'); + canSerde({ f: 0.000001 }, '{"f":0.000001}'); + + canSerde({ f: 1e-7 }, '{"f":1e-7}'); + canDeser('{"f":1E-7}', { f: 1e-7 }); + + canSerde({ f: 1e20 }, '{"f":100000000000000000000.0}'); + canSerde({ f: 1e21 }, '{"f":1e+21}'); + canDeser('{"f":1E+21}', { f: 1e21 }); + canDeser('{"f":1e21}', { f: 1e21 }); + }); + + it("should ser/de object with float arrays", () => { + canSerde( + { fa: [1e21,1e22,1e-7,1e-8,1e-9] }, + '{"fa":[1e+21,1e+22,1e-7,1e-8,1e-9]}'); + + canDeser( + '{"fa":[1E+21,1E+22,1E-7,1E-8,1E-9]}', + { fa: [1e21,1e22,1e-7,1e-8,1e-9] }); + + canDeser( + '{"fa":[1e21,1e22,1e-7,1e-8,1e-9]}', + { fa: [1e21,1e22,1e-7,1e-8,1e-9] }); + + canDeser( + '{"fa":[1E21,1E22,1E-7,1E-8,1E-9]}', + { fa: [1e21,1e22,1e-7,1e-8,1e-9] }); + + }); }); describe("Ser externals", () => { @@ -343,3 +389,253 @@ describe("Ser/de Maps", () => { }); }); + +describe("Ser/de escape sequences in strings", () => { + it("should encode short escape sequences", () => { + canSer("\\", '"\\\\"'); + canSer('"', '"\\""'); + canSer("\n", '"\\n"'); + canSer("\r", '"\\r"'); + canSer("\t", '"\\t"'); + canSer("\b", '"\\b"'); + canSer("\f", '"\\f"'); + }); + + it("should decode short escape sequences", () => { + canDeser('"\\\\"', "\\"); + canDeser('"\\""', '"'); + canDeser('"\\n"', "\n"); + canDeser('"\\r"', "\r"); + canDeser('"\\t"', "\t"); + canDeser('"\\b"', "\b"); + canDeser('"\\f"', "\f"); + }); + + it("should decode escaped forward slash but not encode", () => { + canSer("/", '"/"'); + canDeser('"/"', "/"); + canDeser('"\\/"', "/"); // allowed + }); + + // 0x00 - 0x1f, excluding characters that have short escape sequences + it("should encode long escape sequences", () => { + const singles = ["\n", "\r", "\t", "\b", "\f"]; + for (let i = 0; i < 0x1F; i++) { + const c = String.fromCharCode(i); + if (singles.includes(c)) continue; + const actual = JSON.stringify(c); + const expected = `"\\u${i.toString(16).padStart(4, "0")}"`; + expect(actual).toBe(expected, `Failed to encode '\\x${i.toString(16).padStart(2, "0")}'`); + } + }); + + // \u0000 - \u001f + it("should decode long escape sequences (lower cased)", () => { + for (let i = 0; i <= 0x1f; i++) { + const s = `"\\u${i.toString(16).padStart(4, "0").toLowerCase()}"`; + const actual = JSON.parse(s); + const expected = String.fromCharCode(i); + expect(actual).toBe(expected, `Failed to decode ${s}`); + } + }); + + // \u0000 - \u001F + it("should decode long escape sequences (upper cased)", () => { + for (let i = 0; i <= 0x1f; i++) { + const s = `"\\u${i.toString(16).padStart(4, "0").toUpperCase()}"`; + const actual = JSON.parse(s); + const expected = String.fromCharCode(i); + expect(actual).toBe(expected, `Failed to decode ${s}`); + } + }); + + // See https://datatracker.ietf.org/doc/html/rfc8259#section-7 + it("should decode UTF-16 surrogate pairs", () => { + const s = '"\\uD834\\uDD1E"'; + const actual = JSON.parse(s); + const expected = "𝄞"; + expect(actual).toBe(expected); + }); + + // Just because we can decode UTF-16 surrogate pairs, doesn't mean we should encode them. + it("should not encode UTF-16 surrogate pairs", () => { + const s = "𝄞"; + const actual = JSON.stringify(s); + const expected = '"𝄞"'; + expect(actual).toBe(expected); + }); + + it("should encode multiple escape sequences", () => { + canSer('"""', '"\\"\\"\\""'); + canSer('\\\\\\', '"\\\\\\\\\\\\"'); + }); + + it("cannot parse invalid escape sequences", () => { + expect(() => { + JSON.parse('"\\z"'); + }).toThrow(); + }); + +}); + +describe("Ser/de special strings in object values", () => { + it("should serialize quotes in string in object", () => { + const o: ObjWithString = { s: '"""' }; + const s = '{"s":"\\"\\"\\""}'; + canSer(o, s); + }); + it("should deserialize quotes in string in object", () => { + const o: ObjWithString = { s: '"""' }; + const s = '{"s":"\\"\\"\\""}'; + canDeser(s, o); + }); + it("should serialize backslashes in string in object", () => { + const o: ObjWithString = { s: "\\\\\\" }; + const s = '{"s":"\\\\\\\\\\\\"}'; + canSer(o, s); + }); + it("should deserialize backslashes in string in object", () => { + const o: ObjWithString = { s: "\\\\\\" }; + const s = '{"s":"\\\\\\\\\\\\"}'; + canDeser(s, o); + }); + + it("should deserialize slashes in string in object", () => { + const o: ObjWithString = { s: "//" }; + const s = '{"s":"/\\/"}'; + canDeser(s, o); + }); + it("should deserialize slashes in string in array", () => { + const a = ["/", "/"]; + const s = '["/","\/"]'; + canDeser(s, a); + }); + + it("should ser/de short escape sequences in strings in objects", () => { + const o: ObjWithString = { s: "\n\r\t\b\f" }; + const s = '{"s":"\\n\\r\\t\\b\\f"}'; + canSerde(o, s); + }); + + it("should ser/de short escape sequences in string arrays", () => { + const a = ["\n", "\r", "\t", "\b", "\f"]; + const s = '["\\n","\\r","\\t","\\b","\\f"]'; + canSerde(a, s); + }); + + it("should ser/de short escape sequences in string arrays in objects", () => { + const o: ObjectWithStringArray = { sa: ["\n", "\r", "\t", "\b", "\f"] }; + const s = '{"sa":["\\n","\\r","\\t","\\b","\\f"]}'; + canSerde(o, s); + }); + + it("should ser/de long escape sequences in strings in objects", () => { + const singles = ["\n", "\r", "\t", "\b", "\f"]; + let x = ""; + let y = ""; + for (let i = 0; i < 0x1F; i++) { + const c = String.fromCharCode(i); + if (singles.includes(c)) continue; + x += c; + y += `\\u${i.toString(16).padStart(4, "0")}`; + } + const o: ObjWithString = { s: x }; + const s = `{"s":"${y}"}`; + canSerde(o, s); + }); + + it("should ser/de long escape sequences in strings in arrays", () => { + const singles = ["\n", "\r", "\t", "\b", "\f"]; + let x: string[] = []; + let y: string[] = []; + for (let i = 0; i < 0x1F; i++) { + const c = String.fromCharCode(i); + if (singles.includes(c)) continue; + x.push(c); + y.push(`\\u${i.toString(16).padStart(4, "0")}`); + } + const a = x; + const s = `["${y.join('","')}"]`; + canSerde(a, s); + }); + + it("should ser/de long escape sequences in string arrays in objects", () => { + const singles = ["\n", "\r", "\t", "\b", "\f"]; + let x: string[] = []; + let y: string[] = []; + for (let i = 0; i < 0x1F; i++) { + const c = String.fromCharCode(i); + if (singles.includes(c)) continue; + x.push(c); + y.push(`\\u${i.toString(16).padStart(4, "0")}`); + } + const o: ObjectWithStringArray = { sa: x }; + const s = `{"sa":["${y.join('","')}"]}`; + canSerde(o, s); + }); + +}); + +describe("Ser/de special strings in object keys", () => { + + it("should ser/de escape sequences in key of object with int value", () => { + const o: ObjWithStrangeKey = { data: 123 }; + const s = '{"a\\\\\\t\\"\\u0002b`c":123}'; + canSerde(o, s); + }); + + it("should ser/de escape sequences in key of object with float value", () => { + const o: ObjWithStrangeKey = { data: 123.4 }; + const s = '{"a\\\\\\t\\"\\u0002b`c":123.4}'; + canSerde(o, s); + }); + + it("should ser/de escape sequences in key of object with string value", () => { + const o: ObjWithStrangeKey = { data: "abc" }; + const s = '{"a\\\\\\t\\"\\u0002b`c":"abc"}'; + canSerde(o, s); + }); + + // Something buggy in as-pect needs a dummy value reflected here + // or the subsequent test fails. It's not used in any test. + Reflect.toReflectedValue(0); + + it("should ser/de escape sequences in map key", () => { + const m = new Map(); + m.set('a\\\t"\x02b', 'abc'); + const s = '{"a\\\\\\t\\"\\u0002b":"abc"}'; + canSerde(m, s); + }); + it("should ser/de escape sequences in map value", () => { + const m = new Map(); + m.set('abc', 'a\\\t"\x02b'); + const s = '{"abc":"a\\\\\\t\\"\\u0002b"}'; + canSerde(m, s); + }); +}); + +@json +class ObjWithString { + s!: string; +} + +@json +class ObjectWithStringArray { + sa!: string[]; +} + +@json +class ObjectWithFloat { + f!: f64; +} + +@json +class ObjectWithFloatArray { + fa!: f64[]; +} + +@json +class ObjWithStrangeKey { + @alias('a\\\t"\x02b`c') + data!: T; +} diff --git a/assembly/src/chars.ts b/assembly/src/chars.ts index 1bdf32d9..1d58c91f 100644 --- a/assembly/src/chars.ts +++ b/assembly/src/chars.ts @@ -35,6 +35,8 @@ @inline export const sCode = 115; // @ts-ignore = Decorator is valid here @inline export const nCode = 110; +// @ts-ignore = Decorator is valid here +@inline export const bCode = 98; // Strings // @ts-ignore: Decorator is valid here @inline export const trueWord = "true"; @@ -58,6 +60,15 @@ @inline export const rightBracketWord = "]"; // @ts-ignore: Decorator is valid here @inline export const quoteWord = "\""; + // Escape Codes // @ts-ignore: Decorator is valid here -@inline export const newLineCode = 10; +@inline export const backspaceCode = 8; // \b +// @ts-ignore: Decorator is valid here +@inline export const tabCode = 9; // \t +// @ts-ignore: Decorator is valid here +@inline export const newLineCode = 10; // \n +// @ts-ignore: Decorator is valid here +@inline export const formFeedCode = 12; // \f +// @ts-ignore: Decorator is valid here +@inline export const carriageReturnCode = 13; // \r diff --git a/assembly/src/json.ts b/assembly/src/json.ts index 11fb1ade..1a4e3cc8 100644 --- a/assembly/src/json.ts +++ b/assembly/src/json.ts @@ -2,6 +2,7 @@ import { StringSink } from "as-string-sink/assembly"; import { isSpace } from "util/string"; import { aCode, + bCode, eCode, fCode, lCode, @@ -14,20 +15,24 @@ import { backSlashCode, colonCode, commaCode, + forwardSlashCode, leftBraceCode, leftBracketCode, - newLineCode, quoteCode, rightBraceCode, rightBracketCode, - colonWord, + backspaceCode, + carriageReturnCode, + tabCode, + formFeedCode, + newLineCode, + commaWord, quoteWord, leftBraceWord, leftBracketWord, - rightBraceWord, rightBracketWord, emptyArrayWord, @@ -35,7 +40,7 @@ import { falseWord, nullWord, } from "./chars"; -import { snip_fast, unsafeCharCodeAt } from "./util"; +import { snip_fast, unsafeCharCodeAt, containsCodePoint } from "./util"; import { Virtual } from "as-virtual/assembly"; /** @@ -68,7 +73,7 @@ export namespace JSON { // @ts-ignore: Hidden function return data.__JSON_Serialize(); } else if (data instanceof Date) { - return "\"" + data.toISOString() + "\""; + return `"${data.toISOString()}"`; } else if (isArrayLike()) { // @ts-ignore if (data.length == 0) { @@ -100,11 +105,11 @@ export namespace JSON { for (let i = 0; i < data.length - 1; i++) { // @ts-ignore result.write(JSON.stringify(unchecked(data[i]))); - result.write(commaWord); + result.writeCodePoint(commaCode); } // @ts-ignore result.write(JSON.stringify(unchecked(data[data.length - 1]))); - result.write(rightBracketWord); + result.writeCodePoint(rightBracketCode); return result.toString(); } } else if (data instanceof Map) { @@ -112,14 +117,14 @@ export namespace JSON { let keys = data.keys(); let values = data.values(); for (let i = 0; i < data.size; i++) { - result.write(serializeString(keys[i].toString())); - result.write(colonWord); - result.write(JSON.stringify(values[i])); + result.write(serializeString(unchecked(keys[i]).toString())); + result.writeCodePoint(colonCode); + result.write(JSON.stringify(unchecked(values[i]))); if (i < data.size - 1) { - result.write(commaWord); + result.writeCodePoint(commaCode); } } - result.write(rightBraceWord); + result.writeCodePoint(rightBraceCode); return result.toString(); } else { throw new Error( @@ -194,11 +199,11 @@ export namespace JSON { for (let i = 0; i < data.length - 1; i++) { // @ts-ignore result.write(JSON.stringify(unchecked(data[i]))); - result.write(commaWord); + result.writeCodePoint(commaCode); } // @ts-ignore result.write(JSON.stringify(unchecked(data[data.length - 1]))); - result.write(rightBracketWord); + result.writeCodePoint(rightBracketCode); out = result.toString(); return; } @@ -254,51 +259,8 @@ export namespace JSON { @global @inline function __parseObjectValue(data: string, initializeDefaultValues: boolean): T { let type: T; if (isString()) { - let result = ""; - let last = 0; - for (let i = 0; i < data.length; i++) { - // \\" - if (unsafeCharCodeAt(data, i) === backSlashCode) { - const char = unsafeCharCodeAt(data, ++i); - result += data.slice(last, i - 1); - if (char === 34) { - result += '"'; - last = ++i; - } else if (char === 110) { - result += "\n"; - last = ++i; - // 92 98 114 116 102 117 - } else if (char >= 92 && char <= 117) { - if (char === 92) { - result += "\\"; - last = ++i; - } else if (char === 98) { - result += "\b"; - last = ++i; - } else if (char === 102) { - result += "\f"; - last = ++i; - } else if (char === 114) { - result += "\r"; - last = ++i; - } else if (char === 116) { - result += "\t"; - last = ++i; - } else if ( - char === 117 && - load(changetype(data) + ((i + 1) << 1)) === - 27584753879220272 - ) { - result += "\u000b"; - i += 4; - last = ++i; - } - } - } - } - result += data.slice(last); // @ts-ignore - return result; + return data; } else if (isBoolean()) { // @ts-ignore return parseBoolean(data); @@ -327,118 +289,133 @@ export namespace JSON { // @ts-ignore: Decorator @inline function serializeString(data: string): string { - let result = new StringSink('"'); + if (data.length === 0) { + return quoteWord + quoteWord; + } + + let result = new StringSink(quoteWord); let last: i32 = 0; for (let i = 0; i < data.length; i++) { const char = unsafeCharCodeAt(data, i); - if (char === 34 || char === 92) { + if (char === quoteCode || char === backSlashCode) { result.write(data, last, i); result.writeCodePoint(backSlashCode); last = i; - } else if (char <= 13 && char >= 8) { + } else if (char < 16) { result.write(data, last, i); last = i + 1; switch (char) { - case 8: { + case backspaceCode: { result.write("\\b"); break; } - case 9: { + case tabCode: { result.write("\\t"); break; } - case 10: { + case newLineCode: { result.write("\\n"); break; } - case 11: { - result.write("\\x0B"); // \\u000b - break; - } - case 12: { + case formFeedCode: { result.write("\\f"); break; } - case 13: { + case carriageReturnCode: { result.write("\\r"); break; } + default: { + // all chars 0-31 must be encoded as a four digit unicode escape sequence + // \u0000 to \u000f handled here + result.write("\\u000"); + result.write(char.toString(16)); + break; + } } + } else if (char < 32) { + result.write(data, last, i); + last = i + 1; + // all chars 0-31 must be encoded as a four digit unicode escape sequence + // \u0010 to \u001f handled here + result.write("\\u00"); + result.write(char.toString(16)); } } - if (result.length === 1) { - return quoteWord + data + quoteWord; - } result.write(data, last); - result.write(quoteWord); + result.writeCodePoint(quoteCode); return result.toString(); } // @ts-ignore: Decorator -@inline function parseString(data: string): string { - let result = new StringSink(); - let last = 1; - for (let i = 1; i < data.length - 1; i++) { - // \\" - if (unsafeCharCodeAt(data, i) === backSlashCode) { - const char = unsafeCharCodeAt(data, ++i); - result.write(data, last, i - 1); - if (char === 34) { +@inline function parseString(data: string, start: i32 = 0, end: i32 = 0): string { + end = end || data.length - 1; + let result = StringSink.withCapacity(end - start - 1); + let last = start + 1; + for (let i = last; i < end; i++) { + if (unsafeCharCodeAt(data, i) !== backSlashCode) { + continue; + } + const char = unsafeCharCodeAt(data, ++i); + result.write(data, last, i - 1); + switch (char) { + case quoteCode: { result.writeCodePoint(quoteCode); last = i + 1; - } else if (char >= 92 && char <= 117) { - switch (char) { - case 92: { - result.writeCodePoint(backSlashCode); - last = i + 1; - break; - } - case 98: { - result.write("\b"); - last = i + 1; - break; - } - case 102: { - result.write("\f"); - last = i + 1; - break; - } - case 110: { - result.writeCodePoint(newLineCode); - last = i + 1; - break; - } - case 114: { - result.write("\r"); - last = i + 1; - break; - } - case 116: { - result.write("\t"); - last = i + 1; - break; - } - default: { - if ( - char === 117 && - load(changetype(data) + ((i + 1) << 1)) === - 27584753879220272 - ) { - result.write("\u000b"); - i += 4; - last = i + 1; - } - break; - } - } + break; + } + case backSlashCode: { + result.writeCodePoint(backSlashCode); + last = i + 1; + break; + } + case forwardSlashCode: { + result.writeCodePoint(forwardSlashCode); + last = i + 1; + break; + } + case bCode: { + result.writeCodePoint(backspaceCode); + last = i + 1; + break; + } + case fCode: { + result.writeCodePoint(formFeedCode); + last = i + 1; + break; + } + case nCode: { + result.writeCodePoint(newLineCode); + last = i + 1; + break; + } + case rCode: { + result.writeCodePoint(carriageReturnCode); + last = i + 1; + break; + } + case tCode: { + result.writeCodePoint(tabCode); + last = i + 1; + break; + } + case uCode: { + const code = u16.parse(data.slice(i + 1, i + 5), 16); + result.writeCodePoint(code); + i += 4; + last = i + 1; + break; + } + default: { + throw new Error(`JSON: Cannot parse "${data}" as string. Invalid escape sequence: \\${data.charAt(i)}`); } } } - if ((data.length - 1) > last) { - result.write(data, last, data.length - 1); + if (end > last) { + result.write(data, last, end); } - return result.toString(); + return result.toString() } // @ts-ignore: Decorator @@ -491,7 +468,7 @@ export namespace JSON { if (depth === 0) { ++arrayValueIndex; // @ts-ignore - schema.__JSON_Set_Key>(key, data, outerLoopIndex, arrayValueIndex, initializeDefaultValues); + schema.__JSON_Set_Key(key, data, outerLoopIndex, arrayValueIndex, initializeDefaultValues); outerLoopIndex = arrayValueIndex; isKey = false; break; @@ -512,7 +489,7 @@ export namespace JSON { if (depth === 0) { ++objectValueIndex; // @ts-ignore - schema.__JSON_Set_Key>(key, data, outerLoopIndex, objectValueIndex, initializeDefaultValues); + schema.__JSON_Set_Key(key, data, outerLoopIndex, objectValueIndex, initializeDefaultValues); outerLoopIndex = objectValueIndex; isKey = false; break; @@ -530,15 +507,19 @@ export namespace JSON { if (char === backSlashCode && !escaping) { escaping = true; } else { - if ( - char === quoteCode && !escaping - ) { + if (char === quoteCode && !escaping) { if (isKey === false) { - key.reinst(data, outerLoopIndex, stringValueIndex); + // perf: we can avoid creating a new string here if the key doesn't contain any escape sequences + if (containsCodePoint(data, backSlashCode, outerLoopIndex, stringValueIndex)) { + key.reinst(parseString(data, outerLoopIndex - 1, stringValueIndex)); + } else { + key.reinst(data, outerLoopIndex, stringValueIndex); + } isKey = true; } else { + const value = parseString(data, outerLoopIndex - 1, stringValueIndex); // @ts-ignore - schema.__JSON_Set_Key>(key, data, outerLoopIndex, stringValueIndex, initializeDefaultValues); + schema.__JSON_Set_Key(key, value, 0, value.length, initializeDefaultValues); isKey = false; } outerLoopIndex = ++stringValueIndex; @@ -554,7 +535,7 @@ export namespace JSON { unsafeCharCodeAt(data, ++outerLoopIndex) === lCode ) { // @ts-ignore - schema.__JSON_Set_Key>(key, nullWord, 0, 4, initializeDefaultValues); + schema.__JSON_Set_Key(key, nullWord, 0, 4, initializeDefaultValues); isKey = false; } else if ( char === tCode && @@ -563,7 +544,7 @@ export namespace JSON { unsafeCharCodeAt(data, ++outerLoopIndex) === eCode ) { // @ts-ignore - schema.__JSON_Set_Key>(key, trueWord, 0, 4, initializeDefaultValues); + schema.__JSON_Set_Key(key, trueWord, 0, 4, initializeDefaultValues); isKey = false; } else if ( char === fCode && @@ -573,7 +554,7 @@ export namespace JSON { unsafeCharCodeAt(data, ++outerLoopIndex) === eCode ) { // @ts-ignore - schema.__JSON_Set_Key>(key, falseWord, 0, 5, initializeDefaultValues); + schema.__JSON_Set_Key(key, falseWord, 0, 5, initializeDefaultValues); isKey = false; } else if ((char >= 48 && char <= 57) || char === 45) { let numberValueIndex = ++outerLoopIndex; @@ -581,7 +562,7 @@ export namespace JSON { const char = unsafeCharCodeAt(data, numberValueIndex); if (char === commaCode || char === rightBraceCode || isSpace(char)) { // @ts-ignore - schema.__JSON_Set_Key>(key, data, outerLoopIndex - 1, numberValueIndex, initializeDefaultValues); + schema.__JSON_Set_Key(key, data, outerLoopIndex - 1, numberValueIndex, initializeDefaultValues); outerLoopIndex = numberValueIndex; isKey = false; break; @@ -664,11 +645,17 @@ export namespace JSON { char === quoteCode && !escaping ) { if (isKey === false) { - key.reinst(data, outerLoopIndex, stringValueIndex); + // perf: we can avoid creating a new string here if the key doesn't contain any escape sequences + if (containsCodePoint(data, backSlashCode, outerLoopIndex, stringValueIndex)) { + key.reinst(parseString(data, outerLoopIndex - 1, stringValueIndex)); + } else { + key.reinst(data, outerLoopIndex, stringValueIndex); + } isKey = true; } else { if (isString>()) { - map.set(parseMapKey>(key), data.slice(outerLoopIndex, stringValueIndex)); + const value = parseString(data, outerLoopIndex - 1, stringValueIndex); + map.set(parseMapKey>(key), value); } isKey = false; } @@ -788,7 +775,7 @@ export namespace JSON { lastPos = i; } else { instr = false; - result.push(parseString(data.slice(lastPos, i + 1))); + result.push(parseString(data, lastPos, i)); } } escaping = false; @@ -803,16 +790,6 @@ export namespace JSON { let lastPos = 1; for (let i = 1; i < data.length - 1; i++) { const char = unsafeCharCodeAt(data, i); - /*// if char == "t" && i+3 == "e" - if (char === tCode && data.charCodeAt(i + 3) === eCode) { - //i += 3; - result.push(parseBoolean>(data.slice(lastPos, i+2))); - //i++; - } else if (char === fCode && data.charCodeAt(i + 4) === eCode) { - //i += 4; - result.push(parseBoolean>(data.slice(lastPos, i+3))); - //i++; - }*/ if (char === tCode || char === fCode) { lastPos = i; } else if (char === eCode) { @@ -830,7 +807,7 @@ export namespace JSON { let i = 1; for (; i < data.length - 1; i++) { const char = unsafeCharCodeAt(data, i); - if ((lastPos === 0 && char >= 48 && char <= 57) || char === 45) { + if (lastPos === 0 && ((char >= 48 && char <= 57) || char === 45)) { lastPos = i; } else if ((isSpace(char) || char == commaCode) && lastPos > 0) { result.push(parseNumber>(data.slice(lastPos, i))); diff --git a/assembly/src/util.ts b/assembly/src/util.ts index 44c12c44..ca2714c1 100644 --- a/assembly/src/util.ts +++ b/assembly/src/util.ts @@ -12,11 +12,11 @@ import { backSlashCode, quoteCode } from "./chars"; const result = new StringSink(); let instr = false; for (let i = 0; i < data.length; i++) { - const char = data.charCodeAt(i); + const char = unsafeCharCodeAt(data, i); if (instr === false && char === quoteCode) instr = true; else if ( instr === true && char === quoteCode - && data.charCodeAt(i - 1) !== backSlashCode + && unsafeCharCodeAt(data, i - 1) !== backSlashCode ) instr = false; if (instr === false) { @@ -347,4 +347,12 @@ import { backSlashCode, quoteCode } from "./chars"; return load(changetype(p1_data) + p1_start) == load(changetype(p2_data) + p2_start) } return memory.compare(changetype(p1_data) + p1_start, changetype(p2_data) + p2_start, p1_len) === 0; -} \ No newline at end of file +} + +// @ts-ignore +@inline export function containsCodePoint(str: string, code: u32, start: i32, end: i32): bool { + for (let i = start; i <= end; i++) { + if (unsafeCharCodeAt(str, i) == code) return true; + } + return false; +} diff --git a/transform/lib/index.js b/transform/lib/index.js index 15517456..4e43b209 100644 --- a/transform/lib/index.js +++ b/transform/lib/index.js @@ -1,3 +1,4 @@ +import { Parser, Source, Tokenizer, } from "assemblyscript/dist/assemblyscript.js"; import { toString, isStdlib } from "visitor-as/dist/utils.js"; import { BaseVisitor, SimpleParser } from "visitor-as/dist/index.js"; import { Transform } from "assemblyscript/dist/transform.js"; @@ -17,7 +18,7 @@ class AsJSONTransform extends BaseVisitor { constructor() { super(...arguments); this.schemasList = []; - this.sources = []; + this.sources = new Set(); } visitMethodDeclaration() { } visitClassDeclaration(node) { @@ -77,8 +78,10 @@ class AsJSONTransform extends BaseVisitor { let type = toString(member.type); const name = member.name.text; let aliasName = name; + // @ts-ignore if (member.decorators && ((_d = member.decorators[0]) === null || _d === void 0 ? void 0 : _d.name.text) === "alias") { if (member.decorators[0] && member.decorators[0].args[0]) { + // @ts-ignore aliasName = member.decorators[0].args[0].value; } } @@ -96,13 +99,12 @@ class AsJSONTransform extends BaseVisitor { "u64", "i64", ].includes(type.toLowerCase())) { - this.currentClass.encodeStmts.push(`"${aliasName}":\${this.${name}.toString()},`); + this.currentClass.encodeStmts.push(`${encodeKey(aliasName)}:\${this.${name}},`); // @ts-ignore - this.currentClass.setDataStmts.push(`if (key.equals("${aliasName}")) { - this.${name} = __atoi_fast<${type}>(data, val_start << 1, val_end << 1); - return; - } - `); + this.currentClass.setDataStmts.push(`if (key.equals(${JSON.stringify(aliasName)})) { + this.${name} = __atoi_fast<${type}>(data, val_start << 1, val_end << 1); + return; + }`); if (member.initializer) { this.currentClass.initializeStmts.push(`this.${name} = ${toString(member.initializer)}`); } @@ -112,25 +114,23 @@ class AsJSONTransform extends BaseVisitor { "f32", "f64", ].includes(type.toLowerCase())) { - this.currentClass.encodeStmts.push(`"${aliasName}":\${this.${name}.toString()},`); + this.currentClass.encodeStmts.push(`${encodeKey(aliasName)}:\${this.${name}},`); // @ts-ignore - this.currentClass.setDataStmts.push(`if (key.equals("${aliasName}")) { - this.${name} = __parseObjectValue<${type}>(data.slice(val_start, val_end), initializeDefaultValues); - return; - } - `); + this.currentClass.setDataStmts.push(`if (key.equals(${JSON.stringify(aliasName)})) { + this.${name} = __parseObjectValue<${type}>(data.slice(val_start, val_end), initializeDefaultValues); + return; + }`); if (member.initializer) { this.currentClass.initializeStmts.push(`this.${name} = ${toString(member.initializer)}`); } } else { - this.currentClass.encodeStmts.push(`"${aliasName}":\${JSON.stringify<${type}>(this.${name})},`); + this.currentClass.encodeStmts.push(`${encodeKey(aliasName)}:\${JSON.stringify<${type}>(this.${name})},`); // @ts-ignore - this.currentClass.setDataStmts.push(`if (key.equals("${aliasName}")) { - this.${name} = __parseObjectValue<${type}>(val_start ? data.slice(val_start, val_end) : data, initializeDefaultValues); - return; - } - `); + this.currentClass.setDataStmts.push(`if (key.equals(${JSON.stringify(aliasName)})) { + this.${name} = __parseObjectValue<${type}>(val_start ? data.slice(val_start, val_end) : data, initializeDefaultValues); + return; + }`); if (member.initializer) { this.currentClass.initializeStmts.push(`this.${name} = ${toString(member.initializer)}`); } @@ -145,23 +145,17 @@ class AsJSONTransform extends BaseVisitor { serializeFunc = ` @inline __JSON_Serialize(): string { return \`{${this.currentClass.encodeStmts.join("")}}\`; - } - `; + }`; } else { serializeFunc = ` @inline __JSON_Serialize(): string { return "{}"; - } - `; + }`; } - // Odd behavior here... When pairing this transform with asyncify, having @inline on __JSON_Set_Key with a generic will cause it to freeze. - // Binaryen cannot predict and add/mangle code when it is genericed. const setKeyFunc = ` - __JSON_Set_Key<__JSON_Key_Type>(key: __JSON_Key_Type, data: string, val_start: i32, val_end: i32, initializeDefaultValues: boolean): void { - ${ - // @ts-ignore - this.currentClass.setDataStmts.join("")} + @inline __JSON_Set_Key(key: __Virtual, data: string, val_start: i32, val_end: i32, initializeDefaultValues: boolean): void { + ${this.currentClass.setDataStmts.join("\n ")} } `; let initializeFunc = ""; @@ -184,12 +178,37 @@ class AsJSONTransform extends BaseVisitor { const initializeMethod = SimpleParser.parseClassMember(initializeFunc, node); node.members.push(initializeMethod); this.schemasList.push(this.currentClass); - //console.log(toString(node)); + this.sources.add(node.name.range.source); + // Uncomment to see the generated code for debugging. + // console.log(serializeFunc); + // console.log(setKeyFunc); + // console.log(initializeFunc); } visitSource(node) { super.visitSource(node); + // Only add the import statement to sources that have JSON decorated classes. + if (!this.sources.has(node)) { + return; + } + // Note, the following one liner would be easier, but it fails with an assertion error + // because as-virtual's SimpleParser doesn't set the parser.currentSource correctly. + // + // const stmt = SimpleParser.parseTopLevelStatement('import { Virtual as __Virtual } from "as-virtual/assembly";'); + // ... So we have to do it the long way: + const s = 'import { Virtual as __Virtual } from "as-virtual/assembly";'; + const t = new Tokenizer(new Source(0 /* SourceKind.User */, "index.ts", s)); + const p = new Parser(); + p.currentSource = t.source; + const stmt = p.parseTopLevelStatement(t); + // Add the import statement to the top of the source. + node.statements.unshift(stmt); } } +function encodeKey(aliasName) { + return JSON.stringify(aliasName) + .replace(/\\/g, "\\\\") + .replace(/\`/g, '\\`'); +} export default class Transformer extends Transform { // Trigger the transform after parse. afterParse(parser) { diff --git a/transform/src/index.ts b/transform/src/index.ts index 24141196..c84ba2ee 100644 --- a/transform/src/index.ts +++ b/transform/src/index.ts @@ -1,9 +1,12 @@ import { ClassDeclaration, FieldDeclaration, - Source, Parser, -} from "assemblyscript/dist/assemblyscript"; + Source, + SourceKind, + Tokenizer, +} from "assemblyscript/dist/assemblyscript.js"; + import { toString, isStdlib } from "visitor-as/dist/utils.js"; import { BaseVisitor, SimpleParser } from "visitor-as/dist/index.js"; import { Transform } from "assemblyscript/dist/transform.js"; @@ -23,7 +26,7 @@ class SchemaData { class AsJSONTransform extends BaseVisitor { public schemasList: SchemaData[] = []; public currentClass!: SchemaData; - public sources: Source[] = []; + public sources = new Set(); visitMethodDeclaration(): void { } visitClassDeclaration(node: ClassDeclaration): void { @@ -89,8 +92,11 @@ class AsJSONTransform extends BaseVisitor { const name = member.name.text; let aliasName = name; + + // @ts-ignore if (member.decorators && member.decorators[0]?.name.text === "alias") { if (member.decorators[0] && member.decorators[0].args![0]) { + // @ts-ignore aliasName = member.decorators[0].args![0].value; } } @@ -111,15 +117,14 @@ class AsJSONTransform extends BaseVisitor { ].includes(type.toLowerCase()) ) { this.currentClass.encodeStmts.push( - `"${aliasName}":\${this.${name}.toString()},` + `${encodeKey(aliasName)}:\${this.${name}},` ); // @ts-ignore this.currentClass.setDataStmts.push( - `if (key.equals("${aliasName}")) { - this.${name} = __atoi_fast<${type}>(data, val_start << 1, val_end << 1); - return; - } - ` + `if (key.equals(${JSON.stringify(aliasName)})) { + this.${name} = __atoi_fast<${type}>(data, val_start << 1, val_end << 1); + return; + }` ); if (member.initializer) { this.currentClass.initializeStmts.push( @@ -134,15 +139,14 @@ class AsJSONTransform extends BaseVisitor { ].includes(type.toLowerCase()) ) { this.currentClass.encodeStmts.push( - `"${aliasName}":\${this.${name}.toString()},` + `${encodeKey(aliasName)}:\${this.${name}},` ); // @ts-ignore this.currentClass.setDataStmts.push( - `if (key.equals("${aliasName}")) { - this.${name} = __parseObjectValue<${type}>(data.slice(val_start, val_end), initializeDefaultValues); - return; - } - ` + `if (key.equals(${JSON.stringify(aliasName)})) { + this.${name} = __parseObjectValue<${type}>(data.slice(val_start, val_end), initializeDefaultValues); + return; + }` ); if (member.initializer) { this.currentClass.initializeStmts.push( @@ -151,15 +155,14 @@ class AsJSONTransform extends BaseVisitor { } } else { this.currentClass.encodeStmts.push( - `"${aliasName}":\${JSON.stringify<${type}>(this.${name})},` + `${encodeKey(aliasName)}:\${JSON.stringify<${type}>(this.${name})},` ); // @ts-ignore this.currentClass.setDataStmts.push( - `if (key.equals("${aliasName}")) { - this.${name} = __parseObjectValue<${type}>(val_start ? data.slice(val_start, val_end) : data, initializeDefaultValues); - return; - } - ` + `if (key.equals(${JSON.stringify(aliasName)})) { + this.${name} = __parseObjectValue<${type}>(val_start ? data.slice(val_start, val_end) : data, initializeDefaultValues); + return; + }` ); if (member.initializer) { this.currentClass.initializeStmts.push( @@ -182,24 +185,17 @@ class AsJSONTransform extends BaseVisitor { serializeFunc = ` @inline __JSON_Serialize(): string { return \`{${this.currentClass.encodeStmts.join("")}}\`; - } - `; + }`; } else { serializeFunc = ` @inline __JSON_Serialize(): string { return "{}"; - } - `; + }`; } - // Odd behavior here... When pairing this transform with asyncify, having @inline on __JSON_Set_Key with a generic will cause it to freeze. - // Binaryen cannot predict and add/mangle code when it is genericed. const setKeyFunc = ` - __JSON_Set_Key<__JSON_Key_Type>(key: __JSON_Key_Type, data: string, val_start: i32, val_end: i32, initializeDefaultValues: boolean): void { - ${ - // @ts-ignore - this.currentClass.setDataStmts.join("") - } + @inline __JSON_Set_Key(key: __Virtual, data: string, val_start: i32, val_end: i32, initializeDefaultValues: boolean): void { + ${this.currentClass.setDataStmts.join("\n ")} } `; @@ -226,13 +222,45 @@ class AsJSONTransform extends BaseVisitor { node.members.push(initializeMethod); this.schemasList.push(this.currentClass); - //console.log(toString(node)); + this.sources.add(node.name.range.source); + + // Uncomment to see the generated code for debugging. + // console.log(serializeFunc); + // console.log(setKeyFunc); + // console.log(initializeFunc); } + visitSource(node: Source): void { super.visitSource(node); + + // Only add the import statement to sources that have JSON decorated classes. + if (!this.sources.has(node)) { + return; + } + + // Note, the following one liner would be easier, but it fails with an assertion error + // because as-virtual's SimpleParser doesn't set the parser.currentSource correctly. + // + // const stmt = SimpleParser.parseTopLevelStatement('import { Virtual as __Virtual } from "as-virtual/assembly";'); + + // ... So we have to do it the long way: + const s = 'import { Virtual as __Virtual } from "as-virtual/assembly";' + const t = new Tokenizer(new Source(SourceKind.User, "index.ts", s)); + const p = new Parser(); + p.currentSource = t.source; + const stmt = p.parseTopLevelStatement(t)!; + + // Add the import statement to the top of the source. + node.statements.unshift(stmt); } } +function encodeKey(aliasName: string): string { + return JSON.stringify(aliasName) + .replace(/\\/g, "\\\\") + .replace(/\`/g, '\\`'); +} + export default class Transformer extends Transform { // Trigger the transform after parse. afterParse(parser: Parser): void {