diff --git a/vmap/decoder_scan.go b/vmap/decoder_scan.go new file mode 100644 index 0000000..4fe3512 --- /dev/null +++ b/vmap/decoder_scan.go @@ -0,0 +1,561 @@ +package vmap + +import ( + "bytes" + "errors" + "strconv" + "unsafe" +) + +// byteStr converts b to a string without copying. The returned string +// shares memory with b; b must not be modified while the string is in use. +func byteStr(b []byte) string { + if len(b) == 0 { + return "" + } + return unsafe.String(unsafe.SliceData(b), len(b)) +} + +// decodeXMLStr converts XML text bytes to a Go string, decoding entities. +// Zero-copy when no entities are present. +func decodeXMLStr(b []byte) string { + if len(b) == 0 { + return "" + } + if bytes.IndexByte(b, '&') < 0 { + return byteStr(b) + } + cp := make([]byte, len(b)) + copy(cp, b) + return string(xmlStringToString(cp)) +} + +// scan is a minimal byte scanner for VMAP/VAST XML. +type scan struct { + data []byte + pos int +} + +// next finds the next XML tag. Returns the tag name as a slice of the +// input, whether it is an end tag, and whether it is self-closing. +// After return, pos is right after the tag name (before attrs and '>'). +// For end tags, pos is advanced past '>'. +func (s *scan) next() (name []byte, isEnd, selfClose bool) { + for { + i := bytes.IndexByte(s.data[s.pos:], '<') + if i < 0 { + s.pos = len(s.data) + return nil, false, false + } + s.pos += i + 1 + if s.pos >= len(s.data) { + return nil, false, false + } + + c := s.data[s.pos] + if c == '?' || c == '!' { + j := bytes.IndexByte(s.data[s.pos:], '>') + if j < 0 { + s.pos = len(s.data) + return nil, false, false + } + s.pos += j + 1 + continue + } + + if c == '/' { + isEnd = true + s.pos++ + } + + start := s.pos + for s.pos < len(s.data) { + c = s.data[s.pos] + if c == ' ' || c == '>' || c == '/' || c == '\t' || c == '\n' || c == '\r' { + break + } + s.pos++ + } + name = s.data[start:s.pos] + // Strip namespace prefix (e.g., "vmap:VMAP" -> "VMAP") + if colon := bytes.IndexByte(name, ':'); colon >= 0 { + name = name[colon+1:] + } + + if isEnd { + j := bytes.IndexByte(s.data[s.pos:], '>') + if j >= 0 { + s.pos += j + 1 + } + return name, true, false + } + + j := bytes.IndexByte(s.data[s.pos:], '>') + if j >= 0 && j > 0 && s.data[s.pos+j-1] == '/' { + selfClose = true + } + return name, false, selfClose + } +} + +// attr finds the value of the named attribute in the current tag, +// matching on the local name (after any namespace prefix). +// Must be called after next() and before endAttrs(). +func (s *scan) attr(name string) []byte { + gt := bytes.IndexByte(s.data[s.pos:], '>') + if gt < 0 { + return nil + } + end := s.pos + gt + region := s.data[s.pos:end] + + // Try ' name="' (no namespace prefix) + var buf [64]byte + buf[0] = ' ' + n := 1 + copy(buf[1:], name) + buf[n] = '=' + buf[n+1] = '"' + + i := bytes.Index(region, buf[:n+2]) + if i >= 0 { + valStart := i + n + 2 + valEnd := bytes.IndexByte(region[valStart:], '"') + if valEnd >= 0 { + return s.data[s.pos+valStart : s.pos+valStart+valEnd] + } + } + + // Try ':name="' (namespace-prefixed, e.g. xmlns:vmap="...") + buf[0] = ':' + i = bytes.Index(region, buf[:n+2]) + if i >= 0 { + valStart := i + n + 2 + valEnd := bytes.IndexByte(region[valStart:], '"') + if valEnd >= 0 { + return s.data[s.pos+valStart : s.pos+valStart+valEnd] + } + } + + return nil +} + +// endAttrs advances past the '>' of the current start tag. +func (s *scan) endAttrs() { + j := bytes.IndexByte(s.data[s.pos:], '>') + if j >= 0 { + s.pos += j + 1 + } +} + +// text extracts text or CDATA content from the current position until the +// next '<'. Returns the raw bytes and whether it was CDATA. +func (s *scan) text() (content []byte, wasCDATA bool) { + if s.pos >= len(s.data) { + return nil, false + } + + // Skip whitespace before checking for CDATA + p := s.pos + for p < len(s.data) && (s.data[p] == ' ' || s.data[p] == '\t' || s.data[p] == '\n' || s.data[p] == '\r') { + p++ + } + + const cdataOpen = "" + if p+len(cdataOpen) <= len(s.data) && string(s.data[p:p+len(cdataOpen)]) == cdataOpen { + start := p + len(cdataOpen) + end := bytes.Index(s.data[start:], []byte(cdataClose)) + if end < 0 { + return nil, false + } + s.pos = start + end + len(cdataClose) + return s.data[start : start+end], true + } + + i := bytes.IndexByte(s.data[s.pos:], '<') + if i < 0 { + return nil, false + } + content = bytes.TrimSpace(s.data[s.pos : s.pos+i]) + s.pos += i + if len(content) == 0 { + return nil, false + } + return content, false +} + +// textStr extracts text content and returns it as a decoded string. +func (s *scan) textStr() string { + content, wasCDATA := s.text() + if content == nil { + return "" + } + if wasCDATA { + return byteStr(content) + } + return decodeXMLStr(content) +} + +// --- Top-level decoders --- + +// DecodeVmapScan decodes a VMAP document using direct byte scanning. +// String fields in the returned struct may reference the input slice; +// the input must not be modified while the result is in use. +func DecodeVmapScan(input []byte) (VMAP, error) { + var vmap VMAP + s := scan{data: input} + found := false + + for { + name, isEnd, _ := s.next() + if name == nil { + break + } + if isEnd { + continue + } + + switch string(name) { + case "VMAP": + found = true + if v := s.attr("version"); v != nil { + vmap.Version = byteStr(v) + } + if v := s.attr("vmap"); v != nil { + vmap.Vmap = byteStr(v) + vmap.XMLName.Space = byteStr(v) + } + vmap.XMLName.Local = "VMAP" + s.endAttrs() + case "AdBreak": + vmap.AdBreaks = append(vmap.AdBreaks, scanAdBreak(&s)) + } + } + + if !found { + return vmap, errors.New("no VMAP token found in document") + } + return vmap, nil +} + +// DecodeVastScan decodes a VAST document using direct byte scanning. +func DecodeVastScan(input []byte) (VAST, error) { + var vast VAST + s := scan{data: input} + found := false + + for { + name, isEnd, selfClose := s.next() + if name == nil { + break + } + if isEnd { + continue + } + if string(name) == "VAST" { + found = true + if selfClose { + break + } + vast = scanVast(&s) + } + } + + if !found { + return vast, errors.New("no VAST token found in document") + } + return vast, nil +} + +// --- Per-element scanners --- + +func scanAdBreak(s *scan) AdBreak { + var ab AdBreak + ab.AdSource = &AdSource{VASTData: &VASTData{}} + + if v := s.attr("breakId"); v != nil { + ab.Id = byteStr(v) + } + if v := s.attr("breakType"); v != nil { + ab.BreakType = byteStr(v) + } + if v := s.attr("timeOffset"); v != nil { + _ = ab.TimeOffset.UnmarshalText(v) + } + s.endAttrs() + + for { + name, isEnd, selfClose := s.next() + if name == nil { + break + } + if isEnd { + if string(name) == "AdBreak" { + break + } + continue + } + switch string(name) { + case "VAST": + if selfClose { + ab.AdSource.VASTData.VAST = &VAST{} + continue + } + vast := scanVast(s) + ab.AdSource.VASTData.VAST = &vast + case "Tracking": + if ab.TrackingEvents == nil { + ab.TrackingEvents = []TrackingEvent{} + } + var t TrackingEvent + if v := s.attr("event"); v != nil { + t.Event = byteStr(v) + } + s.endAttrs() + t.Text = s.textStr() + ab.TrackingEvents = append(ab.TrackingEvents, t) + } + } + return ab +} + +func scanVast(s *scan) VAST { + var vast VAST + if v := s.attr("version"); v != nil { + vast.Version = byteStr(v) + } + s.endAttrs() + + for { + name, isEnd, _ := s.next() + if name == nil { + break + } + if isEnd { + if string(name) == "VAST" { + break + } + continue + } + if string(name) == "Ad" { + vast.Ad = append(vast.Ad, scanAd(s)) + } + } + return vast +} + +func scanAd(s *scan) Ad { + var ad Ad + if v := s.attr("id"); v != nil { + ad.Id = byteStr(v) + } + if v := s.attr("sequence"); v != nil { + ad.Sequence, _ = strconv.Atoi(byteStr(v)) + } + s.endAttrs() + + for { + name, isEnd, _ := s.next() + if name == nil { + break + } + if isEnd { + if string(name) == "Ad" { + break + } + continue + } + if string(name) == "InLine" { + inline := scanInLine(s) + ad.InLine = &inline + } + } + return ad +} + +func scanInLine(s *scan) InLine { + var inline InLine + s.endAttrs() + + for { + name, isEnd, _ := s.next() + if name == nil { + break + } + if isEnd { + if string(name) == "InLine" { + break + } + continue + } + switch string(name) { + case "Creative": + inline.Creatives = append(inline.Creatives, scanCreative(s)) + case "Impression": + var imp Impression + if v := s.attr("id"); v != nil { + imp.Id = byteStr(v) + } + s.endAttrs() + imp.Text = s.textStr() + inline.Impression = append(inline.Impression, imp) + case "AdSystem": + s.endAttrs() + inline.AdSystem = s.textStr() + case "AdTitle": + s.endAttrs() + inline.AdTitle = s.textStr() + case "Extension": + inline.Extensions = append(inline.Extensions, scanExtension(s)) + case "Error": + s.endAttrs() + inline.Error = &Error{Value: s.textStr()} + } + } + return inline +} + +func scanCreative(s *scan) Creative { + var c Creative + if v := s.attr("id"); v != nil { + c.Id = byteStr(v) + } + if v := s.attr("adId"); v != nil { + c.AdId = byteStr(v) + } + s.endAttrs() + + for { + name, isEnd, _ := s.next() + if name == nil { + break + } + if isEnd { + if string(name) == "Creative" { + break + } + continue + } + switch string(name) { + case "UniversalAdId": + var uaid UniversalAdId + if v := s.attr("idRegistry"); v != nil { + uaid.IdRegistry = byteStr(v) + } + s.endAttrs() + uaid.Id = s.textStr() + c.UniversalAdId = &uaid + case "Tracking": + if c.Linear == nil { + c.Linear = &Linear{} + } + var t TrackingEvent + if v := s.attr("event"); v != nil { + t.Event = byteStr(v) + } + s.endAttrs() + t.Text = s.textStr() + c.Linear.TrackingEvents = append(c.Linear.TrackingEvents, t) + case "ClickThrough": + if c.Linear == nil { + c.Linear = &Linear{} + } + c.Linear.ClickThrough = &ClickThrough{} + if v := s.attr("id"); v != nil { + c.Linear.ClickThrough.Id = byteStr(v) + } + s.endAttrs() + c.Linear.ClickThrough.Text = s.textStr() + case "ClickTracking": + if c.Linear == nil { + c.Linear = &Linear{} + } + var ct ClickTracking + if v := s.attr("id"); v != nil { + ct.Id = byteStr(v) + } + s.endAttrs() + ct.Text = s.textStr() + c.Linear.ClickTracking = append(c.Linear.ClickTracking, ct) + case "Duration": + if c.Linear == nil { + c.Linear = &Linear{} + } + s.endAttrs() + content, wasCDATA := s.text() + if content != nil { + if wasCDATA || bytes.IndexByte(content, '&') < 0 { + _ = c.Linear.Duration.UnmarshalText(content) + } else { + cp := make([]byte, len(content)) + copy(cp, content) + _ = c.Linear.Duration.UnmarshalText(xmlStringToString(cp)) + } + } + case "MediaFile": + if c.Linear == nil { + c.Linear = &Linear{} + } + var m MediaFile + if v := s.attr("bitrate"); v != nil { + m.Bitrate, _ = strconv.Atoi(byteStr(v)) + } + if v := s.attr("height"); v != nil { + m.Height, _ = strconv.Atoi(byteStr(v)) + } + if v := s.attr("width"); v != nil { + m.Width, _ = strconv.Atoi(byteStr(v)) + } + if v := s.attr("delivery"); v != nil { + m.Delivery = byteStr(v) + } + if v := s.attr("type"); v != nil { + m.MediaType = byteStr(v) + } + if v := s.attr("codec"); v != nil { + m.Codec = byteStr(v) + } + s.endAttrs() + m.Text = s.textStr() + c.Linear.MediaFiles = append(c.Linear.MediaFiles, m) + } + } + return c +} + +func scanExtension(s *scan) Extension { + var ext Extension + if v := s.attr("type"); v != nil { + ext.ExtensionType = byteStr(v) + } + s.endAttrs() + + for { + name, isEnd, _ := s.next() + if name == nil { + break + } + if isEnd { + if string(name) == "Extension" { + break + } + continue + } + if string(name) == "CreativeParameter" { + var par CreativeParameter + if v := s.attr("creativeId"); v != nil { + par.CreativeId = byteStr(v) + } + if v := s.attr("name"); v != nil { + par.Name = byteStr(v) + } + if v := s.attr("type"); v != nil { + par.CreativeParameterType = byteStr(v) + } + s.endAttrs() + par.Value = s.textStr() + ext.CreativeParameters = append(ext.CreativeParameters, par) + } + } + return ext +} diff --git a/vmap/structure.go b/vmap/structure.go index 71c7aa8..0cf6504 100644 --- a/vmap/structure.go +++ b/vmap/structure.go @@ -1,7 +1,6 @@ package vmap import ( - "bytes" "encoding/xml" "fmt" "strconv" @@ -133,36 +132,31 @@ type CreativeParameter struct { type Duration struct{ time.Duration } -var formatStrings = [...]string{"h", "m", "s", "ms"} - func (d *Duration) UnmarshalText(data []byte) error { - var sb bytes.Buffer + var parts [4]int currentPart := 0 for i := 0; i < len(data); i++ { b := data[i] - switch b { - case ':', '.': - if currentPart == 3 { + switch { + case b >= '0' && b <= '9': + parts[currentPart] = parts[currentPart]*10 + int(b-'0') + case b == ':' || b == '.': + currentPart++ + if currentPart > 3 { return fmt.Errorf("invalid duration format: %s", string(data)) } - sb.WriteString(formatStrings[currentPart]) - currentPart++ - case '1', '2', '3', '4', '5', '6', '7', '8', '9', '0': - sb.WriteByte(b) } } - sb.WriteString(formatStrings[currentPart]) if currentPart < 2 { return fmt.Errorf("invalid duration format: %s", string(data)) } - dur, err := time.ParseDuration(sb.String()) - if err != nil { - return fmt.Errorf("error parsing duration: %w", err) - } - *d = Duration{dur} + d.Duration = time.Duration(parts[0])*time.Hour + + time.Duration(parts[1])*time.Minute + + time.Duration(parts[2])*time.Second + + time.Duration(parts[3])*time.Millisecond return nil } diff --git a/vmap/structure_test.go b/vmap/structure_test.go index 88f8167..7fbf26a 100644 --- a/vmap/structure_test.go +++ b/vmap/structure_test.go @@ -311,6 +311,125 @@ func BenchmarkFasterDecode(b *testing.B) { } } +func BenchmarkScanDecode(b *testing.B) { + doc, err := os.ReadFile("sample-vmap/testVmap.xml") + if err != nil { + panic(err) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = DecodeVmapScan(doc) + } +} + +func TestDecodeVmapScan(t *testing.T) { + is := is.New(t) + doc, err := os.ReadFile("sample-vmap/testVmap.xml") + is.NoErr(err) + + vmap1, err := DecodeVmap(doc) + is.NoErr(err) + vmap2, err := DecodeVmapScan(doc) + is.NoErr(err) + + is.Equal(vmap1.Version, vmap2.Version) + is.Equal(vmap1.Vmap, vmap2.Vmap) + is.Equal(vmap1.XMLName.Local, vmap2.XMLName.Local) + is.Equal(vmap1.XMLName.Space, vmap2.XMLName.Space) + + is.Equal(len(vmap1.AdBreaks), len(vmap2.AdBreaks)) + for i := range vmap1.AdBreaks { + a := vmap1.AdBreaks[i] + b := vmap2.AdBreaks[i] + is.Equal(a.Id, b.Id) + is.Equal(a.BreakType, b.BreakType) + is.Equal(a.TimeOffset, b.TimeOffset) + is.Equal(len(a.TrackingEvents), len(b.TrackingEvents)) + for j := range a.TrackingEvents { + is.Equal(strings.TrimSpace(a.TrackingEvents[j].Text), strings.TrimSpace(b.TrackingEvents[j].Text)) + is.Equal(a.TrackingEvents[j].Event, b.TrackingEvents[j].Event) + } + + v1 := a.AdSource.VASTData.VAST + v2 := b.AdSource.VASTData.VAST + is.True(v1 != nil) + is.True(v2 != nil) + is.Equal(v1.Version, v2.Version) + is.Equal(len(v1.Ad), len(v2.Ad)) + for j := range v1.Ad { + is.Equal(v1.Ad[j].Id, v2.Ad[j].Id) + is.Equal(v1.Ad[j].Sequence, v2.Ad[j].Sequence) + if v1.Ad[j].InLine != nil { + is.True(v2.Ad[j].InLine != nil) + is.Equal(strings.TrimSpace(v1.Ad[j].InLine.AdSystem), strings.TrimSpace(v2.Ad[j].InLine.AdSystem)) + is.Equal(strings.TrimSpace(v1.Ad[j].InLine.AdTitle), strings.TrimSpace(v2.Ad[j].InLine.AdTitle)) + is.Equal(v1.Ad[j].InLine.Error, v2.Ad[j].InLine.Error) + is.Equal(len(v1.Ad[j].InLine.Creatives), len(v2.Ad[j].InLine.Creatives)) + } + } + } +} + +func TestDecodeVastScan(t *testing.T) { + is := is.New(t) + doc, err := os.ReadFile("sample-vmap/testVast.xml") + is.NoErr(err) + + vast1, err := DecodeVast(doc) + is.NoErr(err) + vast2, err := DecodeVastScan(doc) + is.NoErr(err) + + is.Equal(vast1.Version, vast2.Version) + is.Equal(len(vast1.Ad), len(vast2.Ad)) + for i := range vast1.Ad { + a := vast1.Ad[i] + b := vast2.Ad[i] + is.Equal(a.Id, b.Id) + is.Equal(a.Sequence, b.Sequence) + if a.InLine != nil { + is.True(b.InLine != nil) + is.Equal(strings.TrimSpace(a.InLine.AdSystem), strings.TrimSpace(b.InLine.AdSystem)) + is.Equal(strings.TrimSpace(a.InLine.AdTitle), strings.TrimSpace(b.InLine.AdTitle)) + is.Equal(a.InLine.Error, b.InLine.Error) + is.Equal(len(a.InLine.Impression), len(b.InLine.Impression)) + is.Equal(len(a.InLine.Creatives), len(b.InLine.Creatives)) + for j := range a.InLine.Creatives { + c1 := a.InLine.Creatives[j] + c2 := b.InLine.Creatives[j] + is.Equal(c1.Id, c2.Id) + is.Equal(c1.AdId, c2.AdId) + is.Equal(c1.Linear.Duration, c2.Linear.Duration) + is.Equal(len(c1.Linear.TrackingEvents), len(c2.Linear.TrackingEvents)) + is.Equal(len(c1.Linear.MediaFiles), len(c2.Linear.MediaFiles)) + for k := range c1.Linear.MediaFiles { + is.Equal(c1.Linear.MediaFiles[k].Width, c2.Linear.MediaFiles[k].Width) + is.Equal(c1.Linear.MediaFiles[k].Height, c2.Linear.MediaFiles[k].Height) + is.Equal(c1.Linear.MediaFiles[k].Bitrate, c2.Linear.MediaFiles[k].Bitrate) + is.Equal(c1.Linear.MediaFiles[k].MediaType, c2.Linear.MediaFiles[k].MediaType) + is.Equal(c1.Linear.MediaFiles[k].Codec, c2.Linear.MediaFiles[k].Codec) + } + } + is.Equal(len(a.InLine.Extensions), len(b.InLine.Extensions)) + } + } +} + +func TestSpecialCharactersScan(t *testing.T) { + is := is.New(t) + doc, err := os.ReadFile("sample-vmap/testVastSpecialChars.xml") + is.NoErr(err) + + vastDecoded, err := DecodeVast(doc) + is.NoErr(err) + vastScanned, err := DecodeVastScan(doc) + is.NoErr(err) + + is.Equal(vastDecoded.Ad[0].InLine.AdTitle, vastScanned.Ad[0].InLine.AdTitle) + is.Equal(vastScanned.Ad[0].InLine.AdTitle, "Hej&รถ\n<>\"") +} + func TestSpecialCharacters(t *testing.T) { is := is.New(t) doc, err := os.ReadFile("sample-vmap/testVastSpecialChars.xml")