diff --git a/lib/m2ts/metadata-stream.js b/lib/m2ts/metadata-stream.js index bc3d4f0f..b8433054 100644 --- a/lib/m2ts/metadata-stream.js +++ b/lib/m2ts/metadata-stream.js @@ -19,12 +19,12 @@ var // return the string representation of the specified byte range, // interpreted as UTf-8. parseUtf8 = function(bytes, start, end) { - return window.decodeURIComponent(percentEncode(bytes, start, end)); + return decodeURIComponent(percentEncode(bytes, start, end)); }, // return the string representation of the specified byte range, // interpreted as ISO-8859-1. parseIso88591 = function(bytes, start, end) { - return window.unescape(percentEncode(bytes, start, end)); + return unescape(percentEncode(bytes, start, end)); }, parseSyncSafeInteger = function (data) { return (data[0] << 21) | diff --git a/lib/mp4/transmuxer.js b/lib/mp4/transmuxer.js index 7f5c0841..bcd3f3d1 100644 --- a/lib/mp4/transmuxer.js +++ b/lib/mp4/transmuxer.js @@ -20,7 +20,48 @@ var H264Stream = require('../codecs/h264').H264Stream; var VideoSegmentStream, AudioSegmentStream, Transmuxer, CoalesceStream; // Helper functions -var collectDtsInfo, clearDtsInfo, calculateTrackBaseMediaDecodeTime; +var collectDtsInfo, clearDtsInfo, calculateTrackBaseMediaDecodeTime, compareArrays, sumByteLengths; + +/** + * Compare two arrays (event typed) for same-ness + */ +compareArrays = function(a, b) { + var + i, + currentObj, + sum = 0; + + if (a.length !== b.length) { + return false; + } + + // sum the byteLength's all each nal unit in the frame + for (i = 0; i < a.length; i++) { + if (a[i] !== b[i]) { + return false; + } + } + + return true; +}; + +/** + * Sum the `byteLength` of a specific property in an array of objects + */ +sumByteLengths = function(array, property) { + var + i, + currentObj, + sum = 0; + + // sum the byteLength's all each nal unit in the frame + for (i = 0; i < array.length; i++) { + currentObj = array[i]; + sum += currentObj[property].byteLength; + } + + return sum; +}; /** * Constructs a single-track, ISO BMFF media segment from AAC data @@ -30,7 +71,6 @@ var collectDtsInfo, clearDtsInfo, calculateTrackBaseMediaDecodeTime; AudioSegmentStream = function(track) { var aacFrames = [], - aacFramesLength = 0, sequenceNumber = 0, earliestAllowedDts = 0; @@ -49,60 +89,35 @@ AudioSegmentStream = function(track) { // buffer audio data until end() is called aacFrames.push(data); - aacFramesLength += data.data.byteLength; }; - this.setEarliestDts = function (earliestDts) { + this.setEarliestDts = function(earliestDts) { earliestAllowedDts = earliestDts - track.timelineStartInfo.baseMediaDecodeTime; }; this.flush = function() { - var boxes, currentFrame, data, sample, i, mdat, moof; + var + frames, + moof, + mdat, + boxes; + // return early if no audio data has been observed - if (aacFramesLength === 0) { + if (aacFrames.length === 0) { this.trigger('done'); return; } - // If the audio segment extends before the earliest allowed dts - // value, remove AAC frames until starts at or after the earliest - // allowed dts. - if (track.minSegmentDts < earliestAllowedDts) { - // We will need to recalculate the earliest segment Dts - track.minSegmentDts = Infinity; - - aacFrames = aacFrames.filter(function(currentFrame) { - // If this is an allowed frame, keep it and record it's Dts - if (currentFrame.dts >= earliestAllowedDts) { - track.minSegmentDts = Math.min(track.minSegmentDts, currentFrame.dts); - track.minSegmentPts = track.minSegmentDts; - return true; - } - // Otherwise, discard it - aacFramesLength -= currentFrame.data.byteLength; - return false; - }); - } + frames = this.trimAacFramesByEarliestDts_(aacFrames); - // concatenate the audio data to constuct the mdat - data = new Uint8Array(aacFramesLength); - track.samples = []; - i = 0; - while (aacFrames.length) { - currentFrame = aacFrames[0]; - sample = { - size: currentFrame.data.byteLength, - duration: 1024 // FIXME calculate for realz - }; - track.samples.push(sample); + // we have to build the index from byte locations to + // samples (that is, aac frames) in the audio data + track.samples = this.generateSampleTable_(frames); - data.set(currentFrame.data, i); - i += currentFrame.data.byteLength; + // concatenate the audio data to constuct the mdat + mdat = mp4.mdat(this.concatenateFrameData_(frames)); - aacFrames.shift(); - } - aacFramesLength = 0; - mdat = mp4.mdat(data); + aacFrames = []; calculateTrackBaseMediaDecodeTime(track); moof = mp4.moof(sequenceNumber, [track]); @@ -119,7 +134,66 @@ AudioSegmentStream = function(track) { this.trigger('data', {track: track, boxes: boxes}); this.trigger('done'); }; + + // If the audio segment extends before the earliest allowed dts + // value, remove AAC frames until starts at or after the earliest + // allowed DTS + this.trimAacFramesByEarliestDts_ = function(aacFrames) { + if (track.minSegmentDts >= earliestAllowedDts) { + return aacFrames; + } + + // We will need to recalculate the earliest segment Dts + track.minSegmentDts = Infinity; + + return aacFrames.filter(function(currentFrame) { + // If this is an allowed frame, keep it and record it's Dts + if (currentFrame.dts >= earliestAllowedDts) { + track.minSegmentDts = Math.min(track.minSegmentDts, currentFrame.dts); + track.minSegmentPts = track.minSegmentDts; + return true; + } + // Otherwise, discard it + return false; + }); + }; + + // generate the track's raw mdat data from an array of frames + this.generateSampleTable_ = function(frames) { + var + i, + currentFrame, + samples = []; + + for (i = 0; i < frames.length; i++) { + currentFrame = frames[i]; + + samples.push({ + size: currentFrame.data.byteLength, + duration: 1024 // For AAC audio, all samples contain 1024 samples + }); + } + return samples; + }; + + // generate the track's sample table from an array of frames + this.concatenateFrameData_ = function(frames) { + var + i, + currentFrame, + dataOffset = 0, + data = new Uint8Array(sumByteLengths(frames, 'data')); + + for (i = 0; i < frames.length; i++) { + currentFrame = frames[i]; + + data.set(currentFrame.data, dataOffset); + dataOffset += currentFrame.data.byteLength; + } + return data; + }; }; + AudioSegmentStream.prototype = new Stream(); /** @@ -132,7 +206,7 @@ VideoSegmentStream = function(track) { var sequenceNumber = 0, nalUnits = [], - nalUnitsLength = 0, + gopCache = [], config, pps; @@ -140,38 +214,102 @@ VideoSegmentStream = function(track) { delete track.minPTS; - this.push = function(data) { - collectDtsInfo(track, data); + this.getGopForFusion_ = function (nalUnit) { + var + nearestDistance = Infinity, + dtsDistance, + nearestGopObj, + currentGopNals, + currentGopObj, + i; + + // Search for the GOP nearest to the beginning of this nal unit + for (i = 0; i < gopCache.length; i++) { + currentGopObj = gopCache[i]; + currentGopNals = currentGopObj.nalUnits; + + // Reject Gops without any nal units + if (currentGopNals.length === 0) { + continue; + } + + // Reject Gops with different SPS or PPS + if (!(track.pps && compareArrays(track.pps[0], currentGopObj.pps[0])) || + !(track.sps && compareArrays(track.sps[0], currentGopObj.sps[0]))) { + continue; + } + + dtsDistance = nalUnit.dts - currentGopNals[currentGopNals.length - 1].dts; + + // Only consider GOPS that start before the nal unit and end within + // a half-second of the nal unit + if (nalUnit.dts >= currentGopNals[0].dts && + dtsDistance <= 45000) { + + // Always use the closest GOP we found if there is more than + // one candidate + if (!nearestGopObj || + nearestDistance > dtsDistance) { + nearestGopObj = currentGopObj; + nearestDistance = dtsDistance; + } + } + } + + // If we found a GOP, use it to start the nalUnits array and update timeline info + if (nearestGopObj) { + return nearestGopObj.nalUnits.slice(); + } + return []; + }; + + this.push = function(nalUnit) { + collectDtsInfo(track, nalUnit); // record the track config - if (data.nalUnitType === 'seq_parameter_set_rbsp' && + if (nalUnit.nalUnitType === 'seq_parameter_set_rbsp' && !config) { - config = data.config; + config = nalUnit.config; track.width = config.width; track.height = config.height; - track.sps = [data.data]; + track.sps = [nalUnit.data]; track.profileIdc = config.profileIdc; track.levelIdc = config.levelIdc; track.profileCompatibility = config.profileCompatibility; } - if (data.nalUnitType === 'pic_parameter_set_rbsp' && + if (nalUnit.nalUnitType === 'pic_parameter_set_rbsp' && !pps) { - pps = data.data; - track.pps = [data.data]; + pps = nalUnit.data; + track.pps = [nalUnit.data]; } - // buffer video until end() is called - nalUnits.push(data); - nalUnitsLength += data.data.byteLength; + // buffer video until flush() is called + nalUnits.push(nalUnit); }; this.flush = function() { - var startUnit, currentNal, moof, mdat, boxes, i, data, view, sample, duration; - - // Throw away nalUnits at the start of the byte stream until we find - // the first AUD + var + frames, + gopForFusion, + gops, + moof, + mdat, + boxes; + + // Search for a candidate GOP for FUSION (prepending the gop to this segment) + gopForFusion = this.getGopForFusion_(nalUnits[0]); + + // Prepend it if we found it + nalUnits = gopForFusion.concat(nalUnits); + // Prepend it if we found it + nalUnits.forEach(collectDtsInfo.bind(null, track)); + + // Throw away nalUnits at the start of the byte stream until + // we find the first AUD + // This should only happen if GOP-FUSION failed to find a good candidate + // for prepending while (nalUnits.length) { if (nalUnits[0].nalUnitType === 'access_unit_delimiter_rbsp') { break; @@ -179,90 +317,42 @@ VideoSegmentStream = function(track) { nalUnits.shift(); } - // return early if no video data has been observed - if (nalUnitsLength === 0) { + // Return early if no video data has been observed + if (nalUnits.length === 0) { this.trigger('done'); return; } - // concatenate the video data and construct the mdat - // first, we have to build the index from byte locations to - // samples (that is, frames) in the video data - data = new Uint8Array(nalUnitsLength + (4 * nalUnits.length)); - view = new DataView(data.buffer); - track.samples = []; + // Organize the raw nal units into arrays that represent + // higher-level constructs + frames = this.groupNalsIntoFrames_(nalUnits); - // see ISO/IEC 14496-12:2012, section 8.6.4.3 - sample = { - size: 0, - flags: { - isLeading: 0, - dependsOn: 1, - isDependedOn: 0, - hasRedundancy: 0, - degradationPriority: 0 - } - }; + // Filter out any frames that exist before the first i-frame + // This should only happen if GOP-FUSION failed to find a good candidate + // for prepending + frames = this.filterLeadingNonIFrames_(frames); - // build the samples from the NAL units - i = 0; - while (nalUnits.length) { - currentNal = nalUnits[0]; - // flush the sample we've been building when a new sample is started - if (currentNal.nalUnitType === 'access_unit_delimiter_rbsp') { - if (startUnit) { - sample.duration = currentNal.dts - startUnit.dts; - track.samples.push(sample); - } - sample = { - size: 0, - flags: { - isLeading: 0, - dependsOn: 1, - isDependedOn: 0, - hasRedundancy: 0, - degradationPriority: 0 - }, - dataOffset: i, - compositionTimeOffset: currentNal.pts - currentNal.dts - }; - startUnit = currentNal; - } - if (currentNal.nalUnitType === 'slice_layer_without_partitioning_rbsp_idr') { - // the current sample is a key frame - sample.flags.dependsOn = 2; - } - sample.size += 4; // space for the NAL length - sample.size += currentNal.data.byteLength; + gops = this.groupFramesIntoGops_(frames); - view.setUint32(i, currentNal.data.byteLength); - i += 4; - data.set(currentNal.data, i); - i += currentNal.data.byteLength; + // First, we have to build the index from byte locations to + // samples (that is, frames) in the video data + track.samples = this.generateSampleTable_(frames); - nalUnits.shift(); - } - // record the last sample - if (track.samples.length) { - sample.duration = track.samples[track.samples.length - 1].duration; - } - track.samples.push(sample); - - // filter out pre-IDR data - duration = 0; - while (track.samples.length) { - sample = track.samples[0]; - if (sample.flags.dependsOn === 2) { - data = data.subarray(sample.dataOffset); - sample.duration += duration; - break; - } - duration += sample.duration; - track.samples.shift(); - } + // Concatenate the video data and construct the mdat + mdat = mp4.mdat(this.concatenateNalData_(frames)); - nalUnitsLength = 0; - mdat = mp4.mdat(data); + // Save all the nals in the last GOP into the gop cache + gopCache.unshift({ + nalUnits: gops.pop().reduce(function(a, b) { return a.concat(b); }, []), + pps: track.pps, + sps: track.sps + }); + + // Keep a maximum of 6 GOPs in the cache (about a minute worth) + gopCache.length = Math.min(6, gopCache.length); + + // Clear nalUnits + nalUnits = []; calculateTrackBaseMediaDecodeTime(track); @@ -291,7 +381,197 @@ VideoSegmentStream = function(track) { // Continue with the flush process now this.trigger('done'); }; + + // create the default sample + // see ISO/IEC 14496-12:2012, section 8.6.4.3 + this.createDefaultSample_ = function() { + return { + size: 0, + flags: { + isLeading: 0, + dependsOn: 1, + isDependedOn: 0, + hasRedundancy: 0, + degradationPriority: 0 + } + }; + }; + + // search an array of nal units to see if it qualifies as an i-frame (actually, IDR) + this.frameIsIFrame_ = function(frame) { + var + i, + currentNal; + + for (i = 0; i < frame.length; i++) { + currentNal = frame[i]; + + if (currentNal.nalUnitType === 'slice_layer_without_partitioning_rbsp_idr') { + return true; + } + } + return false; + }; + + this.filterLeadingNonIFrames_ = function(frames) { + var + i, + currentFrame, + initialPts = frames[0][0].pts, + initialDts = frames[0][0].dts; + + for (i = 0; i < frames.length; i++) { + currentFrame = frames[i]; + + // If we found an iframe, reset it's first nal unit's dts and pts value + // to mirror the first nal unit from the set we are dropping + // This has the effect of "extending" first i-frame in time so that it shows + // for the portion of time that the missing nal units would have been for + if (this.frameIsIFrame_(currentFrame)) { + currentFrame[0].pts = initialPts; + currentFrame[0].dts = initialDts; + break; + } + } + return frames.slice(i); + }; + + // convert an array of nal units into an array of frames with each frame being + // composed of the nal units that make up that frame + this.groupNalsIntoFrames_ = function(nalUnits) { + var + i, + currentNal, + currentFrame = [], + frames = []; + + for (i = 0; i < nalUnits.length; i++) { + currentNal = nalUnits[i]; + + if (currentNal.nalUnitType === 'access_unit_delimiter_rbsp') { + if (currentFrame && currentFrame.length) { + frames.push(currentFrame); + } + + currentFrame = [currentNal]; + } else { + currentFrame.push(currentNal); + } + } + // push the final frame + frames.push(currentFrame); + return frames; + }; + + // convert an array of frames into an array of Gop with each Gop being + // composed of the frames that make up that Gop + this.groupFramesIntoGops_ = function(frames) { + var + i, + currentFrame, + currentGop = [], + gops = []; + + for (i = 0; i < frames.length; i++) { + currentFrame = frames[i]; + + if (this.frameIsIFrame_(currentFrame)) { + if (currentGop && currentGop.length) { + gops.push(currentGop); + } + + currentGop = [currentFrame]; + } else { + currentGop.push(currentFrame); + } + } + // push the final Gop + gops.push(currentGop); + return gops; + }; + + // generate the track's sample table from an array of frames + this.generateSampleTable_ = function(frames, baseDataOffset) { + var + i, + sample, + currentFrame, + nextFrame, + firstNal, + lastNal, + frameDataSize, + currentSample, + dataOffset = baseDataOffset || 0, + samples = []; + + for (i = 0; i < frames.length; i++) { + currentFrame = frames[i]; + nextFrame = frames[i + 1]; + + firstNal = currentFrame[0]; + lastNal = currentFrame[currentFrame.length - 1]; + frameDataSize = sumByteLengths(currentFrame, 'data'); + + sample = this.createDefaultSample_(); + sample.dataOffset = dataOffset; + sample.compositionTimeOffset = firstNal.pts - firstNal.dts; + + if (nextFrame) { + sample.duration = nextFrame[0].dts - firstNal.dts; + } else { + sample.duration = lastNal.dts - firstNal.dts; + + if (sample.duration === 0 && + samples.length > 0) { + // for the last frame, copy the duration of the previous + // frame + sample.duration = samples[samples.length - 1].duration; + } + } + + sample.size = 4 * currentFrame.length; // Space for nal unit size + sample.size += frameDataSize; + + if (this.frameIsIFrame_(currentFrame)) { + sample.flags.dependsOn = 2; + } + + dataOffset += sample.size; + + samples.push(sample); + } + return samples; + }; + + // generate the track's raw mdat data from an array of frames + this.concatenateNalData_ = function (frames) { + var + i, j, + currentFrame, + currentNal, + dataOffset = 0, + nalsByteLength = frames.reduce(function(v, frame) {return v + sumByteLengths(frame, 'data'); }, 0), + numberOfNals = frames.reduce(function(v, frame) { return v + frame.length; }, 0), + totalByteLength = nalsByteLength + 4 * numberOfNals, + data = new Uint8Array(totalByteLength), + view = new DataView(data.buffer); + + for (i = 0; i < frames.length; i++) { + currentFrame = frames[i]; + + for (j = 0; j < currentFrame.length; j++) { + currentNal = currentFrame[j]; + + view.setUint32(dataOffset, currentNal.data.byteLength); + dataOffset += 4; + data.set(currentNal.data, dataOffset); + dataOffset += currentNal.data.byteLength; + } + } + return data; + }; }; + VideoSegmentStream.prototype = new Stream(); /** diff --git a/test/transmuxer.test.js b/test/transmuxer.test.js index 1277fe93..bb5a9fb4 100644 --- a/test/transmuxer.test.js +++ b/test/transmuxer.test.js @@ -1207,6 +1207,10 @@ QUnit.test('concatenates NAL units into AVC elementary streams', function() { nalUnitType: 'access_unit_delimiter_rbsp', data: new Uint8Array([0x09, 0x01]) }); + videoSegmentStream.push({ + nalUnitType: 'slice_layer_without_partitioning_rbsp_idr', + data: new Uint8Array([0x05, 0x01]) + }); videoSegmentStream.push({ data: new Uint8Array([ 0x08, @@ -1224,11 +1228,13 @@ QUnit.test('concatenates NAL units into AVC elementary streams', function() { QUnit.ok(segment, 'generated a data event'); boxes = mp4.tools.inspect(segment); QUnit.equal(boxes[1].byteLength, - (2 + 4) + (4 + 4) + (4 + 6), + (2 + 4) + (2 + 4) + (4 + 4) + (4 + 6), 'wrote the correct number of bytes'); QUnit.deepEqual(new Uint8Array(segment.subarray(boxes[0].size + 8)), new Uint8Array([ 0, 0, 0, 2, 0x09, 0x01, + 0, 0, 0, 2, + 0x05, 0x01, 0, 0, 0, 4, 0x08, 0x01, 0x02, 0x03, 0, 0, 0, 6, @@ -1237,6 +1243,40 @@ QUnit.test('concatenates NAL units into AVC elementary streams', function() { }); QUnit.test('infers sample durations from DTS values', function() { + var segment, boxes, samples; + videoSegmentStream.on('data', function(data) { + segment = data.boxes; + }); + videoSegmentStream.push({ + data: new Uint8Array([0x09, 0x01]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 1 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x09, 0x01]), + nalUnitType: 'slice_layer_without_partitioning_rbsp_idr', + dts: 1 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x09, 0x01]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 2 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x09, 0x01]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 4 + }); + videoSegmentStream.flush(); + boxes = mp4.tools.inspect(segment); + samples = boxes[0].boxes[1].boxes[2].samples; + QUnit.equal(samples.length, 3, 'generated three samples'); + QUnit.equal(samples[0].duration, 1, 'set the first sample duration'); + QUnit.equal(samples[1].duration, 2, 'set the second sample duration'); + QUnit.equal(samples[2].duration, 2, 'inferred the final sample duration'); +}); + +QUnit.test('filters pre-IDR samples and calculate duration correctly', function() { var segment, boxes, samples; videoSegmentStream.on('data', function(data) { segment = data.boxes; @@ -1248,12 +1288,17 @@ QUnit.test('infers sample durations from DTS values', function() { }); videoSegmentStream.push({ data: new Uint8Array([0x09, 0x01]), - nalUnitType: 'slice_layer_without_partitioning_rbsp_idr', + nalUnitType: 'slice_layer_without_partitioning_rbsp', dts: 1 }); videoSegmentStream.push({ data: new Uint8Array([0x09, 0x01]), nalUnitType: 'access_unit_delimiter_rbsp', + dts: 1 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x09, 0x01]), + nalUnitType: 'slice_layer_without_partitioning_rbsp_idr', dts: 2 }); videoSegmentStream.push({ @@ -1265,49 +1310,346 @@ QUnit.test('infers sample durations from DTS values', function() { boxes = mp4.tools.inspect(segment); samples = boxes[0].boxes[1].boxes[2].samples; - QUnit.equal(samples.length, 3, 'generated three samples'); - QUnit.equal(samples[0].duration, 1, 'set the first sample duration'); - QUnit.equal(samples[1].duration, 2, 'set the second sample duration'); - QUnit.equal(samples[2].duration, 2, 'inferred the final sample duration'); + QUnit.equal(samples.length, 2, 'generated two samples, filters out pre-IDR'); + QUnit.equal(samples[0].duration, 3, 'set the first sample duration'); + QUnit.equal(samples[1].duration, 3, 'set the second sample duration'); }); -QUnit.test('filters pre-IDR samples and caluculate duration correctly', function() { +QUnit.test('holds onto the last GOP and prepends the subsequent push operation with that GOP', function() { var segment, boxes, samples; + + videoSegmentStream.push({ + data: new Uint8Array([0x01, 0x01]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 1, + pts: 1 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x00, 0x00]), + nalUnitType: 'seq_parameter_set_rbsp', + config: {}, + dts: 1, + pts: 1 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x00, 0x00]), + nalUnitType: 'pic_parameter_set_rbsp', + dts: 1, + pts: 1 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x66, 0x66]), + nalUnitType: 'slice_layer_without_partitioning_rbsp_idr', + dts: 1, + pts: 1 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x01, 0x02]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 2, + pts: 2 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x01, 0x03]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 3, + pts: 3 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x99, 0x99]), + nalUnitType: 'slice_layer_without_partitioning_rbsp_idr', + dts: 3, + pts: 3 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x01, 0x04]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 4, + pts: 4 + }); + videoSegmentStream.flush(); + videoSegmentStream.on('data', function(data) { segment = data.boxes; }); + videoSegmentStream.push({ - data: new Uint8Array([0x09, 0x01]), + data: new Uint8Array([0x02, 0x01]), nalUnitType: 'access_unit_delimiter_rbsp', - dts: 1 + dts: 5, + pts: 5 }); videoSegmentStream.push({ - data: new Uint8Array([0x09, 0x01]), - nalUnitType: 'slice_layer_without_partitioning_rbsp', - dts: 1 + data: new Uint8Array([0x02, 0x02]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 6, + pts: 6 }); videoSegmentStream.push({ - data: new Uint8Array([0x09, 0x01]), + data: new Uint8Array([0x00, 0x00]), + nalUnitType: 'seq_parameter_set_rbsp', + config: {}, + dts: 1, + pts: 1 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x00, 0x00]), + nalUnitType: 'pic_parameter_set_rbsp', + dts: 1, + pts: 1 + }); videoSegmentStream.push({ + data: new Uint8Array([0x11, 0x11]), + nalUnitType: 'slice_layer_without_partitioning_rbsp_idr', + dts: 6, + pts: 6 + }); + videoSegmentStream.flush(); + + boxes = mp4.tools.inspect(segment); + samples = boxes[0].boxes[1].boxes[2].samples; + QUnit.equal(samples.length, 4, 'generated four samples, two from previous segment'); + QUnit.equal(samples[0].size, 12, 'first sample is an AUD + IDR pair'); + QUnit.equal(samples[1].size, 6, 'second sample is an AUD'); + QUnit.equal(samples[2].size, 6, 'third sample is an AUD'); + QUnit.equal(samples[3].size, 24, 'fourth sample is an AUD + PPS + SPS + IDR'); +}); + +QUnit.test('doesn\'t prepend the last GOP if the next segment has earlier PTS', function() { + var segment, boxes, samples; + + videoSegmentStream.push({ + data: new Uint8Array([0x01, 0x01]), nalUnitType: 'access_unit_delimiter_rbsp', - dts: 2 + dts: 10, + pts: 10 }); videoSegmentStream.push({ - data: new Uint8Array([0x09, 0x01]), + data: new Uint8Array([0x66, 0x66]), nalUnitType: 'slice_layer_without_partitioning_rbsp_idr', - dts: 1 + dts: 10, + pts: 10 }); videoSegmentStream.push({ - data: new Uint8Array([0x09, 0x01]), + data: new Uint8Array([0x01, 0x02]), nalUnitType: 'access_unit_delimiter_rbsp', - dts: 4 + dts: 11, + pts: 11 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x01, 0x03]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 12, + pts: 12 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x99, 0x99]), + nalUnitType: 'slice_layer_without_partitioning_rbsp_idr', + dts: 12, + pts: 12 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x01, 0x04]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 13, + pts: 13 + }); + videoSegmentStream.flush(); + + videoSegmentStream.on('data', function(data) { + segment = data.boxes; + }); + + videoSegmentStream.push({ + data: new Uint8Array([0x02, 0x01]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 5, + pts: 5 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x02, 0x02]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 6, + pts: 6 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x11, 0x11]), + nalUnitType: 'slice_layer_without_partitioning_rbsp_idr', + dts: 6, + pts: 6 }); videoSegmentStream.flush(); boxes = mp4.tools.inspect(segment); samples = boxes[0].boxes[1].boxes[2].samples; - QUnit.equal(samples.length, 2, 'generated two samples, filters out pre-IDR'); - QUnit.equal(samples[0].duration, 3, 'set the first sample duration'); - QUnit.equal(samples[1].duration, 2, 'set the second sample duration'); + QUnit.equal(samples.length, 1, 'generated one sample'); + QUnit.equal(samples[0].size, 12, 'first sample is an AUD + IDR pair'); +}); + +QUnit.test('doesn\'t prepend the last GOP if the next segment has different PPS or SPS', function() { + var segment, boxes, samples; + + videoSegmentStream.push({ + data: new Uint8Array([0x01, 0x01]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 1, + pts: 1 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x00, 0x00]), + nalUnitType: 'seq_parameter_set_rbsp', + config: {}, + dts: 1, + pts: 1 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x00, 0x00]), + nalUnitType: 'pic_parameter_set_rbsp', + dts: 1, + pts: 1 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x66, 0x66]), + nalUnitType: 'slice_layer_without_partitioning_rbsp_idr', + dts: 1, + pts: 1 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x01, 0x02]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 2, + pts: 2 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x01, 0x03]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 3, + pts: 3 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x99, 0x99]), + nalUnitType: 'slice_layer_without_partitioning_rbsp_idr', + dts: 3, + pts: 3 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x01, 0x04]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 4, + pts: 4 + }); + videoSegmentStream.flush(); + + videoSegmentStream.on('data', function(data) { + segment = data.boxes; + }); + + videoSegmentStream.push({ + data: new Uint8Array([0x02, 0x01]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 5, + pts: 5 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x02, 0x02]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 6, + pts: 6 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x00, 0x01]), + nalUnitType: 'seq_parameter_set_rbsp', + config: {}, + dts: 1, + pts: 1 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x00, 0x01]), + nalUnitType: 'pic_parameter_set_rbsp', + dts: 1, + pts: 1 + }); videoSegmentStream.push({ + data: new Uint8Array([0x11, 0x11]), + nalUnitType: 'slice_layer_without_partitioning_rbsp_idr', + dts: 6, + pts: 6 + }); + videoSegmentStream.flush(); + + boxes = mp4.tools.inspect(segment); + samples = boxes[0].boxes[1].boxes[2].samples; + QUnit.equal(samples.length, 1, 'generated one sample'); + QUnit.equal(samples[0].size, 24, 'first sample is an AUD + PPS + SPS + IDR'); +}); + +QUnit.test('doesn\'t prepend the last GOP if the next segment is more than 1 seconds in the future', function() { + var segment, boxes, samples; + + videoSegmentStream.push({ + data: new Uint8Array([0x01, 0x01]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 1, + pts: 1 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x66, 0x66]), + nalUnitType: 'slice_layer_without_partitioning_rbsp_idr', + dts: 1, + pts: 1 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x01, 0x02]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 2, + pts: 2 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x01, 0x03]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 3, + pts: 3 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x99, 0x99]), + nalUnitType: 'slice_layer_without_partitioning_rbsp_idr', + dts: 3, + pts: 3 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x01, 0x04]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 4, + pts: 4 + }); + videoSegmentStream.flush(); + + videoSegmentStream.on('data', function(data) { + segment = data.boxes; + }); + + videoSegmentStream.push({ + data: new Uint8Array([0x02, 0x01]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 1000000, + pts: 1000000 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x02, 0x02]), + nalUnitType: 'access_unit_delimiter_rbsp', + dts: 1000001, + pts: 1000001 + }); + videoSegmentStream.push({ + data: new Uint8Array([0x11, 0x11]), + nalUnitType: 'slice_layer_without_partitioning_rbsp_idr', + dts: 1000001, + pts: 1000001 + }); + videoSegmentStream.flush(); + + boxes = mp4.tools.inspect(segment); + samples = boxes[0].boxes[1].boxes[2].samples; + QUnit.equal(samples.length, 1, 'generated one sample'); + QUnit.equal(samples[0].size, 12, 'first sample is an AUD + IDR pair'); }); QUnit.test('track values from seq_parameter_set_rbsp should be cleared by a flush', function() { @@ -1315,6 +1657,10 @@ QUnit.test('track values from seq_parameter_set_rbsp should be cleared by a flus videoSegmentStream.on('data', function(data) { track = data.track; }); + videoSegmentStream.push({ + data: new Uint8Array([0xFF]), + nalUnitType: 'access_unit_delimiter_rbsp', + }); videoSegmentStream.push({ data: new Uint8Array([0xFF]), nalUnitType: 'seq_parameter_set_rbsp', @@ -1376,6 +1722,10 @@ QUnit.test('track pps from pic_parameter_set_rbsp should be cleared by a flush', videoSegmentStream.on('data', function(data) { track = data.track; }); + videoSegmentStream.push({ + data: new Uint8Array([0xFF]), + nalUnitType: 'access_unit_delimiter_rbsp', + }); videoSegmentStream.push({ data: new Uint8Array([0x01]), nalUnitType: 'pic_parameter_set_rbsp', @@ -1400,7 +1750,7 @@ QUnit.test('track pps from pic_parameter_set_rbsp should be cleared by a flush', QUnit.equal(track.pps[0][0], 0x03, 'first pps is 0x03 after a flush'); }); -QUnit.test('calculates compositionTimeOffset values from the PTS and DTS', function() { +QUnit.test('calculates compositionTimeOffset values from the PTS/DTS', function() { var segment, boxes, samples; videoSegmentStream.on('data', function(data) { segment = data.boxes; @@ -1832,6 +2182,9 @@ QUnit.test('no options creates combined output', function() { transmuxer.push(packetize(audioPes([ 0x19, 0x47 ], true))); + transmuxer.push(packetize(videoPes([ + 0x09, 0x01 // access_unit_delimiter_rbsp + ], true))); transmuxer.push(packetize(videoPes([ 0x08, 0x01 // pic_parameter_set_rbsp ], true))); @@ -1883,6 +2236,9 @@ QUnit.test('can specify that we want to generate separate audio and video segmen transmuxer.push(packetize(audioPes([ 0x19, 0x47 ], true))); + transmuxer.push(packetize(videoPes([ + 0x09, 0x01 // access_unit_delimiter_rbsp + ], true))); transmuxer.push(packetize(videoPes([ 0x08, 0x01 // pic_parameter_set_rbsp ], true))); @@ -1932,6 +2288,9 @@ QUnit.test('generates a video init segment', function() { hasVideo: true }))); + transmuxer.push(packetize(videoPes([ + 0x09, 0x01 // access_unit_delimiter_rbsp + ], true))); transmuxer.push(packetize(videoPes([ 0x08, 0x01 // pic_parameter_set_rbsp ], true))); @@ -1989,7 +2348,7 @@ QUnit.test('buffers video samples until flushed', function() { // buffer a NAL transmuxer.push(packetize(videoPes([0x09, 0x01], true))); - transmuxer.push(packetize(videoPes([0x00, 0x02]))); + transmuxer.push(packetize(videoPes([0x05, 0x02]))); // add an access_unit_delimiter_rbsp transmuxer.push(packetize(videoPes([0x09, 0x03]))); @@ -2010,7 +2369,7 @@ QUnit.test('buffers video samples until flushed', function() { 0, 0, 0, 2, 0x09, 0x01, 0, 0, 0, 2, - 0x00, 0x02, + 0x05, 0x02, 0, 0, 0, 2, 0x09, 0x03, 0, 0, 0, 2, @@ -2397,10 +2756,10 @@ QUnit.test('generates video tags', function() { }))); transmuxer.push(packetize(videoPes([ - 0x09, 0x01 // access_unit_delimiter + 0x09, 0x01 // access_unit_delimiter_rbsp ], true))); transmuxer.push(packetize(videoPes([ - 0x09, 0x01 // access_unit_delimiter + 0x09, 0x01 // access_unit_delimiter_rbsp ], true))); transmuxer.flush(); @@ -2431,7 +2790,7 @@ QUnit.test('drops nalUnits at the start of a segment not preceeded by an access_ 0xef, 0x7c, 0x04 ], false))); transmuxer.push(packetize(videoPes([ - 0x09, 0x01 // access_unit_delimiter + 0x09, 0x01 // access_unit_delimiter_rbsp ], true))); transmuxer.flush();