diff --git a/lib/m2ts/metadata-stream.js b/lib/m2ts/metadata-stream.js
index bc3d4f0f..b8433054 100644
--- a/lib/m2ts/metadata-stream.js
+++ b/lib/m2ts/metadata-stream.js
@@ -19,12 +19,12 @@ var
   // return the string representation of the specified byte range,
   // interpreted as UTf-8.
   parseUtf8 = function(bytes, start, end) {
-    return window.decodeURIComponent(percentEncode(bytes, start, end));
+    return decodeURIComponent(percentEncode(bytes, start, end));
   },
   // return the string representation of the specified byte range,
   // interpreted as ISO-8859-1.
   parseIso88591 = function(bytes, start, end) {
-    return window.unescape(percentEncode(bytes, start, end));
+    return unescape(percentEncode(bytes, start, end));
   },
   parseSyncSafeInteger = function (data) {
     return (data[0] << 21) |
diff --git a/lib/mp4/transmuxer.js b/lib/mp4/transmuxer.js
index 7f5c0841..bcd3f3d1 100644
--- a/lib/mp4/transmuxer.js
+++ b/lib/mp4/transmuxer.js
@@ -20,7 +20,48 @@ var H264Stream = require('../codecs/h264').H264Stream;
 var VideoSegmentStream, AudioSegmentStream, Transmuxer, CoalesceStream;
 
 // Helper functions
-var collectDtsInfo, clearDtsInfo, calculateTrackBaseMediaDecodeTime;
+var collectDtsInfo, clearDtsInfo, calculateTrackBaseMediaDecodeTime, compareArrays, sumByteLengths;
+
+/**
+ * Compare two arrays (event typed) for same-ness
+ */
+compareArrays = function(a, b) {
+  var
+    i,
+    currentObj,
+    sum = 0;
+
+  if (a.length !== b.length) {
+    return false;
+  }
+
+  // sum the byteLength's all each nal unit in the frame
+  for (i = 0; i < a.length; i++) {
+    if (a[i] !== b[i]) {
+      return false;
+    }
+  }
+
+  return true;
+};
+
+/**
+ * Sum the `byteLength` of a specific property in an array of objects
+ */
+sumByteLengths = function(array, property) {
+  var
+    i,
+    currentObj,
+    sum = 0;
+
+  // sum the byteLength's all each nal unit in the frame
+  for (i = 0; i < array.length; i++) {
+    currentObj = array[i];
+    sum += currentObj[property].byteLength;
+  }
+
+  return sum;
+};
 
 /**
  * Constructs a single-track, ISO BMFF media segment from AAC data
@@ -30,7 +71,6 @@ var collectDtsInfo, clearDtsInfo, calculateTrackBaseMediaDecodeTime;
 AudioSegmentStream = function(track) {
   var
     aacFrames = [],
-    aacFramesLength = 0,
     sequenceNumber = 0,
     earliestAllowedDts = 0;
 
@@ -49,60 +89,35 @@ AudioSegmentStream = function(track) {
 
     // buffer audio data until end() is called
     aacFrames.push(data);
-    aacFramesLength += data.data.byteLength;
   };
 
-  this.setEarliestDts = function (earliestDts) {
+  this.setEarliestDts = function(earliestDts) {
     earliestAllowedDts = earliestDts - track.timelineStartInfo.baseMediaDecodeTime;
   };
 
   this.flush = function() {
-    var boxes, currentFrame, data, sample, i, mdat, moof;
+    var
+      frames,
+      moof,
+      mdat,
+      boxes;
+
     // return early if no audio data has been observed
-    if (aacFramesLength === 0) {
+    if (aacFrames.length === 0) {
       this.trigger('done');
       return;
     }
 
-    // If the audio segment extends before the earliest allowed dts
-    // value, remove AAC frames until starts at or after the earliest
-    // allowed dts.
-    if (track.minSegmentDts < earliestAllowedDts) {
-      // We will need to recalculate the earliest segment Dts
-      track.minSegmentDts = Infinity;
-
-      aacFrames = aacFrames.filter(function(currentFrame) {
-        // If this is an allowed frame, keep it and record it's Dts
-        if (currentFrame.dts >= earliestAllowedDts) {
-          track.minSegmentDts = Math.min(track.minSegmentDts, currentFrame.dts);
-          track.minSegmentPts = track.minSegmentDts;
-          return true;
-        }
-        // Otherwise, discard it
-        aacFramesLength -= currentFrame.data.byteLength;
-        return false;
-      });
-    }
+    frames = this.trimAacFramesByEarliestDts_(aacFrames);
 
-    // concatenate the audio data to constuct the mdat
-    data = new Uint8Array(aacFramesLength);
-    track.samples = [];
-    i = 0;
-    while (aacFrames.length) {
-      currentFrame = aacFrames[0];
-      sample = {
-        size: currentFrame.data.byteLength,
-        duration: 1024 // FIXME calculate for realz
-      };
-      track.samples.push(sample);
+    // we have to build the index from byte locations to
+    // samples (that is, aac frames) in the audio data
+    track.samples = this.generateSampleTable_(frames);
 
-      data.set(currentFrame.data, i);
-      i += currentFrame.data.byteLength;
+    // concatenate the audio data to constuct the mdat
+    mdat = mp4.mdat(this.concatenateFrameData_(frames));
 
-      aacFrames.shift();
-    }
-    aacFramesLength = 0;
-    mdat = mp4.mdat(data);
+    aacFrames = [];
 
     calculateTrackBaseMediaDecodeTime(track);
     moof = mp4.moof(sequenceNumber, [track]);
@@ -119,7 +134,66 @@ AudioSegmentStream = function(track) {
     this.trigger('data', {track: track, boxes: boxes});
     this.trigger('done');
   };
+
+  // If the audio segment extends before the earliest allowed dts
+  // value, remove AAC frames until starts at or after the earliest
+  // allowed DTS
+  this.trimAacFramesByEarliestDts_ = function(aacFrames) {
+    if (track.minSegmentDts >= earliestAllowedDts) {
+      return aacFrames;
+    }
+
+    // We will need to recalculate the earliest segment Dts
+    track.minSegmentDts = Infinity;
+
+    return aacFrames.filter(function(currentFrame) {
+      // If this is an allowed frame, keep it and record it's Dts
+      if (currentFrame.dts >= earliestAllowedDts) {
+        track.minSegmentDts = Math.min(track.minSegmentDts, currentFrame.dts);
+        track.minSegmentPts = track.minSegmentDts;
+        return true;
+      }
+      // Otherwise, discard it
+      return false;
+    });
+  };
+
+  // generate the track's raw mdat data from an array of frames
+  this.generateSampleTable_ = function(frames) {
+    var
+      i,
+      currentFrame,
+      samples = [];
+
+    for (i = 0; i < frames.length; i++) {
+      currentFrame = frames[i];
+
+      samples.push({
+        size: currentFrame.data.byteLength,
+        duration: 1024 // For AAC audio, all samples contain 1024 samples
+      });
+    }
+    return samples;
+  };
+
+  // generate the track's sample table from an array of frames
+  this.concatenateFrameData_ = function(frames) {
+    var
+      i,
+      currentFrame,
+      dataOffset = 0,
+      data = new Uint8Array(sumByteLengths(frames, 'data'));
+
+    for (i = 0; i < frames.length; i++) {
+      currentFrame = frames[i];
+
+      data.set(currentFrame.data, dataOffset);
+      dataOffset += currentFrame.data.byteLength;
+    }
+    return data;
+  };
 };
+
 AudioSegmentStream.prototype = new Stream();
 
 /**
@@ -132,7 +206,7 @@ VideoSegmentStream = function(track) {
   var
     sequenceNumber = 0,
     nalUnits = [],
-    nalUnitsLength = 0,
+    gopCache = [],
     config,
     pps;
 
@@ -140,38 +214,102 @@ VideoSegmentStream = function(track) {
 
   delete track.minPTS;
 
-  this.push = function(data) {
-    collectDtsInfo(track, data);
+  this.getGopForFusion_ = function (nalUnit) {
+    var
+      nearestDistance = Infinity,
+      dtsDistance,
+      nearestGopObj,
+      currentGopNals,
+      currentGopObj,
+      i;
+
+    // Search for the GOP nearest to the beginning of this nal unit
+    for (i = 0; i < gopCache.length; i++) {
+      currentGopObj = gopCache[i];
+      currentGopNals = currentGopObj.nalUnits;
+
+      // Reject Gops without any nal units
+      if (currentGopNals.length === 0) {
+        continue;
+      }
+
+      // Reject Gops with different SPS or PPS
+      if (!(track.pps && compareArrays(track.pps[0], currentGopObj.pps[0])) ||
+          !(track.sps && compareArrays(track.sps[0], currentGopObj.sps[0]))) {
+        continue;
+      }
+
+      dtsDistance = nalUnit.dts - currentGopNals[currentGopNals.length - 1].dts;
+
+      // Only consider GOPS that start before the nal unit and end within
+      // a half-second of the nal unit
+      if (nalUnit.dts >= currentGopNals[0].dts &&
+        dtsDistance <= 45000) {
+
+        // Always use the closest GOP we found if there is more than
+        // one candidate
+        if (!nearestGopObj ||
+            nearestDistance > dtsDistance) {
+          nearestGopObj = currentGopObj;
+          nearestDistance = dtsDistance;
+        }
+      }
+    }
+
+    // If we found a GOP, use it to start the nalUnits array and update timeline info
+    if (nearestGopObj) {
+      return nearestGopObj.nalUnits.slice();
+    }
+    return [];
+  };
+
+  this.push = function(nalUnit) {
+    collectDtsInfo(track, nalUnit);
 
     // record the track config
-    if (data.nalUnitType === 'seq_parameter_set_rbsp' &&
+    if (nalUnit.nalUnitType === 'seq_parameter_set_rbsp' &&
         !config) {
-      config = data.config;
+      config = nalUnit.config;
 
       track.width = config.width;
       track.height = config.height;
-      track.sps = [data.data];
+      track.sps = [nalUnit.data];
       track.profileIdc = config.profileIdc;
       track.levelIdc = config.levelIdc;
       track.profileCompatibility = config.profileCompatibility;
     }
 
-    if (data.nalUnitType === 'pic_parameter_set_rbsp' &&
+    if (nalUnit.nalUnitType === 'pic_parameter_set_rbsp' &&
         !pps) {
-      pps = data.data;
-      track.pps = [data.data];
+      pps = nalUnit.data;
+      track.pps = [nalUnit.data];
     }
 
-    // buffer video until end() is called
-    nalUnits.push(data);
-    nalUnitsLength += data.data.byteLength;
+    // buffer video until flush() is called
+    nalUnits.push(nalUnit);
   };
 
   this.flush = function() {
-    var startUnit, currentNal, moof, mdat, boxes, i, data, view, sample, duration;
-
-    // Throw away nalUnits at the start of the byte stream until we find
-    // the first AUD
+    var
+      frames,
+      gopForFusion,
+      gops,
+      moof,
+      mdat,
+      boxes;
+
+    // Search for a candidate GOP for FUSION (prepending the gop to this segment)
+    gopForFusion = this.getGopForFusion_(nalUnits[0]);
+
+    // Prepend it if we found it
+    nalUnits = gopForFusion.concat(nalUnits);
+    // Prepend it if we found it
+    nalUnits.forEach(collectDtsInfo.bind(null, track));
+
+    // Throw away nalUnits at the start of the byte stream until
+    // we find the first AUD
+    // This should only happen if GOP-FUSION failed to find a good candidate
+    // for prepending
     while (nalUnits.length) {
       if (nalUnits[0].nalUnitType === 'access_unit_delimiter_rbsp') {
         break;
@@ -179,90 +317,42 @@ VideoSegmentStream = function(track) {
       nalUnits.shift();
     }
 
-    // return early if no video data has been observed
-    if (nalUnitsLength === 0) {
+    // Return early if no video data has been observed
+    if (nalUnits.length === 0) {
       this.trigger('done');
       return;
     }
 
-    // concatenate the video data and construct the mdat
-    // first, we have to build the index from byte locations to
-    // samples (that is, frames) in the video data
-    data = new Uint8Array(nalUnitsLength + (4 * nalUnits.length));
-    view = new DataView(data.buffer);
-    track.samples = [];
+    // Organize the raw nal units into arrays that represent
+    // higher-level constructs
+    frames = this.groupNalsIntoFrames_(nalUnits);
 
-    // see ISO/IEC 14496-12:2012, section 8.6.4.3
-    sample = {
-      size: 0,
-      flags: {
-        isLeading: 0,
-        dependsOn: 1,
-        isDependedOn: 0,
-        hasRedundancy: 0,
-        degradationPriority: 0
-      }
-    };
+    // Filter out any frames that exist before the first i-frame
+    // This should only happen if GOP-FUSION failed to find a good candidate
+    // for prepending
+    frames = this.filterLeadingNonIFrames_(frames);
 
-    // build the samples from the NAL units
-    i = 0;
-    while (nalUnits.length) {
-      currentNal = nalUnits[0];
-      // flush the sample we've been building when a new sample is started
-      if (currentNal.nalUnitType === 'access_unit_delimiter_rbsp') {
-        if (startUnit) {
-          sample.duration = currentNal.dts - startUnit.dts;
-          track.samples.push(sample);
-        }
-        sample = {
-          size: 0,
-          flags: {
-            isLeading: 0,
-            dependsOn: 1,
-            isDependedOn: 0,
-            hasRedundancy: 0,
-            degradationPriority: 0
-          },
-          dataOffset: i,
-          compositionTimeOffset: currentNal.pts - currentNal.dts
-        };
-        startUnit = currentNal;
-      }
-      if (currentNal.nalUnitType === 'slice_layer_without_partitioning_rbsp_idr') {
-        // the current sample is a key frame
-        sample.flags.dependsOn = 2;
-      }
-      sample.size += 4; // space for the NAL length
-      sample.size += currentNal.data.byteLength;
+    gops = this.groupFramesIntoGops_(frames);
 
-      view.setUint32(i, currentNal.data.byteLength);
-      i += 4;
-      data.set(currentNal.data, i);
-      i += currentNal.data.byteLength;
+    // First, we have to build the index from byte locations to
+    // samples (that is, frames) in the video data
+    track.samples = this.generateSampleTable_(frames);
 
-      nalUnits.shift();
-    }
-    // record the last sample
-    if (track.samples.length) {
-      sample.duration = track.samples[track.samples.length - 1].duration;
-    }
-    track.samples.push(sample);
-
-    // filter out pre-IDR data
-    duration = 0;
-    while (track.samples.length) {
-      sample = track.samples[0];
-      if (sample.flags.dependsOn === 2) {
-        data = data.subarray(sample.dataOffset);
-        sample.duration += duration;
-        break;
-      }
-      duration += sample.duration;
-      track.samples.shift();
-    }
+    // Concatenate the video data and construct the mdat
+    mdat = mp4.mdat(this.concatenateNalData_(frames));
 
-    nalUnitsLength = 0;
-    mdat = mp4.mdat(data);
+    // Save all the nals in the last GOP into the gop cache
+    gopCache.unshift({
+      nalUnits: gops.pop().reduce(function(a, b) { return a.concat(b); }, []),
+      pps: track.pps,
+      sps: track.sps
+    });
+
+    // Keep a maximum of 6 GOPs in the cache (about a minute worth)
+    gopCache.length = Math.min(6, gopCache.length);
+
+    // Clear nalUnits
+    nalUnits = [];
 
     calculateTrackBaseMediaDecodeTime(track);
 
@@ -291,7 +381,197 @@ VideoSegmentStream = function(track) {
     // Continue with the flush process now
     this.trigger('done');
   };
+
+  // create the default sample
+  // see ISO/IEC 14496-12:2012, section 8.6.4.3
+  this.createDefaultSample_ = function() {
+    return {
+      size: 0,
+      flags: {
+        isLeading: 0,
+        dependsOn: 1,
+        isDependedOn: 0,
+        hasRedundancy: 0,
+        degradationPriority: 0
+      }
+    };
+  };
+
+  // search an array of nal units to see if it qualifies as an i-frame (actually, IDR)
+  this.frameIsIFrame_ = function(frame) {
+    var
+      i,
+      currentNal;
+
+    for (i = 0; i < frame.length; i++) {
+      currentNal = frame[i];
+
+      if (currentNal.nalUnitType === 'slice_layer_without_partitioning_rbsp_idr') {
+        return true;
+      }
+    }
+    return false;
+  };
+
+  this.filterLeadingNonIFrames_ = function(frames) {
+    var
+      i,
+      currentFrame,
+      initialPts = frames[0][0].pts,
+      initialDts = frames[0][0].dts;
+
+    for (i = 0; i < frames.length; i++) {
+      currentFrame = frames[i];
+
+      // If we found an iframe, reset it's first nal unit's dts and pts value
+      // to mirror the first nal unit from the set we are dropping
+      // This has the effect of "extending" first i-frame in time so that it shows
+      // for the portion of time that the missing nal units would have been for
+      if (this.frameIsIFrame_(currentFrame)) {
+        currentFrame[0].pts = initialPts;
+        currentFrame[0].dts = initialDts;
+        break;
+      }
+    }
+    return frames.slice(i);
+  };
+
+  // convert an array of nal units into an array of frames with each frame being
+  // composed of the nal units that make up that frame
+  this.groupNalsIntoFrames_ = function(nalUnits) {
+    var
+      i,
+      currentNal,
+      currentFrame = [],
+      frames = [];
+
+    for (i = 0; i < nalUnits.length; i++) {
+      currentNal = nalUnits[i];
+
+      if (currentNal.nalUnitType === 'access_unit_delimiter_rbsp') {
+        if (currentFrame && currentFrame.length) {
+          frames.push(currentFrame);
+        }
+
+        currentFrame = [currentNal];
+      } else {
+        currentFrame.push(currentNal);
+      }
+    }
+    // push the final frame
+    frames.push(currentFrame);
+    return frames;
+  };
+
+  // convert an array of frames into an array of Gop with each Gop being
+  // composed of the frames that make up that Gop
+  this.groupFramesIntoGops_ = function(frames) {
+    var
+      i,
+      currentFrame,
+      currentGop = [],
+      gops = [];
+
+    for (i = 0; i < frames.length; i++) {
+      currentFrame = frames[i];
+
+      if (this.frameIsIFrame_(currentFrame)) {
+        if (currentGop && currentGop.length) {
+          gops.push(currentGop);
+        }
+
+        currentGop = [currentFrame];
+      } else {
+        currentGop.push(currentFrame);
+      }
+    }
+    // push the final Gop
+    gops.push(currentGop);
+    return gops;
+  };
+
+  // generate the track's sample table from an array of frames
+  this.generateSampleTable_ = function(frames, baseDataOffset) {
+    var
+      i,
+      sample,
+      currentFrame,
+      nextFrame,
+      firstNal,
+      lastNal,
+      frameDataSize,
+      currentSample,
+      dataOffset = baseDataOffset || 0,
+      samples = [];
+
+    for (i = 0; i < frames.length; i++) {
+      currentFrame = frames[i];
+      nextFrame = frames[i + 1];
+
+      firstNal = currentFrame[0];
+      lastNal = currentFrame[currentFrame.length - 1];
+      frameDataSize = sumByteLengths(currentFrame, 'data');
+
+      sample = this.createDefaultSample_();
+      sample.dataOffset = dataOffset;
+      sample.compositionTimeOffset = firstNal.pts - firstNal.dts;
+
+      if (nextFrame) {
+        sample.duration = nextFrame[0].dts - firstNal.dts;
+      } else {
+        sample.duration = lastNal.dts - firstNal.dts;
+
+        if (sample.duration === 0 &&
+            samples.length > 0) {
+          // for the last frame, copy the duration of the previous
+          // frame
+          sample.duration = samples[samples.length - 1].duration;
+        }
+      }
+
+      sample.size = 4 * currentFrame.length; // Space for nal unit size
+      sample.size += frameDataSize;
+
+      if (this.frameIsIFrame_(currentFrame)) {
+        sample.flags.dependsOn = 2;
+      }
+
+      dataOffset += sample.size;
+
+      samples.push(sample);
+    }
+    return samples;
+  };
+
+  // generate the track's raw mdat data from an array of frames
+  this.concatenateNalData_ = function (frames) {
+    var
+      i, j,
+      currentFrame,
+      currentNal,
+      dataOffset = 0,
+      nalsByteLength = frames.reduce(function(v, frame) {return v + sumByteLengths(frame, 'data'); }, 0),
+      numberOfNals = frames.reduce(function(v, frame) { return v + frame.length; }, 0),
+      totalByteLength = nalsByteLength + 4 * numberOfNals,
+      data = new Uint8Array(totalByteLength),
+      view = new DataView(data.buffer);
+
+    for (i = 0; i < frames.length; i++) {
+      currentFrame = frames[i];
+
+      for (j = 0; j < currentFrame.length; j++) {
+        currentNal = currentFrame[j];
+
+        view.setUint32(dataOffset, currentNal.data.byteLength);
+        dataOffset += 4;
+        data.set(currentNal.data, dataOffset);
+        dataOffset += currentNal.data.byteLength;
+      }
+    }
+    return data;
+  };
 };
+
 VideoSegmentStream.prototype = new Stream();
 
 /**
diff --git a/test/transmuxer.test.js b/test/transmuxer.test.js
index 1277fe93..bb5a9fb4 100644
--- a/test/transmuxer.test.js
+++ b/test/transmuxer.test.js
@@ -1207,6 +1207,10 @@ QUnit.test('concatenates NAL units into AVC elementary streams', function() {
     nalUnitType: 'access_unit_delimiter_rbsp',
     data: new Uint8Array([0x09, 0x01])
   });
+  videoSegmentStream.push({
+    nalUnitType: 'slice_layer_without_partitioning_rbsp_idr',
+    data: new Uint8Array([0x05, 0x01])
+  });
   videoSegmentStream.push({
     data: new Uint8Array([
       0x08,
@@ -1224,11 +1228,13 @@ QUnit.test('concatenates NAL units into AVC elementary streams', function() {
   QUnit.ok(segment, 'generated a data event');
   boxes = mp4.tools.inspect(segment);
   QUnit.equal(boxes[1].byteLength,
-        (2 + 4) + (4 + 4) + (4 + 6),
+        (2 + 4) + (2 + 4) + (4 + 4) + (4 + 6),
         'wrote the correct number of bytes');
   QUnit.deepEqual(new Uint8Array(segment.subarray(boxes[0].size + 8)), new Uint8Array([
     0, 0, 0, 2,
     0x09, 0x01,
+    0, 0, 0, 2,
+    0x05, 0x01,
     0, 0, 0, 4,
     0x08, 0x01, 0x02, 0x03,
     0, 0, 0, 6,
@@ -1237,6 +1243,40 @@ QUnit.test('concatenates NAL units into AVC elementary streams', function() {
 });
 
 QUnit.test('infers sample durations from DTS values', function() {
+   var segment, boxes, samples;
+   videoSegmentStream.on('data', function(data) {
+     segment = data.boxes;
+   });
+   videoSegmentStream.push({
+     data: new Uint8Array([0x09, 0x01]),
+     nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 1
+   });
+   videoSegmentStream.push({
+     data: new Uint8Array([0x09, 0x01]),
+     nalUnitType: 'slice_layer_without_partitioning_rbsp_idr',
+    dts: 1
+   });
+   videoSegmentStream.push({
+     data: new Uint8Array([0x09, 0x01]),
+     nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 2
+   });
+   videoSegmentStream.push({
+     data: new Uint8Array([0x09, 0x01]),
+     nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 4
+   });
+   videoSegmentStream.flush();
+  boxes = mp4.tools.inspect(segment);
+  samples = boxes[0].boxes[1].boxes[2].samples;
+  QUnit.equal(samples.length, 3, 'generated three samples');
+  QUnit.equal(samples[0].duration, 1, 'set the first sample duration');
+  QUnit.equal(samples[1].duration, 2, 'set the second sample duration');
+  QUnit.equal(samples[2].duration, 2, 'inferred the final sample duration');
+});
+
+QUnit.test('filters pre-IDR samples and calculate duration correctly', function() {
   var segment, boxes, samples;
   videoSegmentStream.on('data', function(data) {
     segment = data.boxes;
@@ -1248,12 +1288,17 @@ QUnit.test('infers sample durations from DTS values', function() {
   });
   videoSegmentStream.push({
     data: new Uint8Array([0x09, 0x01]),
-    nalUnitType: 'slice_layer_without_partitioning_rbsp_idr',
+    nalUnitType: 'slice_layer_without_partitioning_rbsp',
     dts: 1
   });
   videoSegmentStream.push({
     data: new Uint8Array([0x09, 0x01]),
     nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 1
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x09, 0x01]),
+    nalUnitType: 'slice_layer_without_partitioning_rbsp_idr',
     dts: 2
   });
   videoSegmentStream.push({
@@ -1265,49 +1310,346 @@ QUnit.test('infers sample durations from DTS values', function() {
 
   boxes = mp4.tools.inspect(segment);
   samples = boxes[0].boxes[1].boxes[2].samples;
-  QUnit.equal(samples.length, 3, 'generated three samples');
-  QUnit.equal(samples[0].duration, 1, 'set the first sample duration');
-  QUnit.equal(samples[1].duration, 2, 'set the second sample duration');
-  QUnit.equal(samples[2].duration, 2, 'inferred the final sample duration');
+  QUnit.equal(samples.length, 2, 'generated two samples, filters out pre-IDR');
+  QUnit.equal(samples[0].duration, 3, 'set the first sample duration');
+  QUnit.equal(samples[1].duration, 3, 'set the second sample duration');
 });
 
-QUnit.test('filters pre-IDR samples and caluculate duration correctly', function() {
+QUnit.test('holds onto the last GOP and prepends the subsequent push operation with that GOP', function() {
   var segment, boxes, samples;
+
+  videoSegmentStream.push({
+    data: new Uint8Array([0x01, 0x01]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 1,
+    pts: 1
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x00, 0x00]),
+    nalUnitType: 'seq_parameter_set_rbsp',
+    config: {},
+    dts: 1,
+    pts: 1
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x00, 0x00]),
+    nalUnitType: 'pic_parameter_set_rbsp',
+    dts: 1,
+    pts: 1
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x66, 0x66]),
+    nalUnitType: 'slice_layer_without_partitioning_rbsp_idr',
+    dts: 1,
+    pts: 1
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x01, 0x02]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 2,
+    pts: 2
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x01, 0x03]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 3,
+    pts: 3
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x99, 0x99]),
+    nalUnitType: 'slice_layer_without_partitioning_rbsp_idr',
+    dts: 3,
+    pts: 3
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x01, 0x04]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 4,
+    pts: 4
+  });
+  videoSegmentStream.flush();
+
   videoSegmentStream.on('data', function(data) {
     segment = data.boxes;
   });
+
   videoSegmentStream.push({
-    data: new Uint8Array([0x09, 0x01]),
+    data: new Uint8Array([0x02, 0x01]),
     nalUnitType: 'access_unit_delimiter_rbsp',
-    dts: 1
+    dts: 5,
+    pts: 5
   });
   videoSegmentStream.push({
-    data: new Uint8Array([0x09, 0x01]),
-    nalUnitType: 'slice_layer_without_partitioning_rbsp',
-    dts: 1
+    data: new Uint8Array([0x02, 0x02]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 6,
+    pts: 6
   });
   videoSegmentStream.push({
-    data: new Uint8Array([0x09, 0x01]),
+    data: new Uint8Array([0x00, 0x00]),
+    nalUnitType: 'seq_parameter_set_rbsp',
+    config: {},
+    dts: 1,
+    pts: 1
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x00, 0x00]),
+    nalUnitType: 'pic_parameter_set_rbsp',
+    dts: 1,
+    pts: 1
+  });  videoSegmentStream.push({
+    data: new Uint8Array([0x11, 0x11]),
+    nalUnitType: 'slice_layer_without_partitioning_rbsp_idr',
+    dts: 6,
+    pts: 6
+  });
+  videoSegmentStream.flush();
+
+  boxes = mp4.tools.inspect(segment);
+  samples = boxes[0].boxes[1].boxes[2].samples;
+  QUnit.equal(samples.length, 4, 'generated four samples, two from previous segment');
+  QUnit.equal(samples[0].size, 12, 'first sample is an AUD + IDR pair');
+  QUnit.equal(samples[1].size, 6, 'second sample is an AUD');
+  QUnit.equal(samples[2].size, 6, 'third sample is an AUD');
+  QUnit.equal(samples[3].size, 24, 'fourth sample is an AUD + PPS + SPS + IDR');
+});
+
+QUnit.test('doesn\'t prepend the last GOP if the next segment has earlier PTS', function() {
+  var segment, boxes, samples;
+
+  videoSegmentStream.push({
+    data: new Uint8Array([0x01, 0x01]),
     nalUnitType: 'access_unit_delimiter_rbsp',
-    dts: 2
+    dts: 10,
+    pts: 10
   });
   videoSegmentStream.push({
-    data: new Uint8Array([0x09, 0x01]),
+    data: new Uint8Array([0x66, 0x66]),
     nalUnitType: 'slice_layer_without_partitioning_rbsp_idr',
-    dts: 1
+    dts: 10,
+    pts: 10
   });
   videoSegmentStream.push({
-    data: new Uint8Array([0x09, 0x01]),
+    data: new Uint8Array([0x01, 0x02]),
     nalUnitType: 'access_unit_delimiter_rbsp',
-    dts: 4
+    dts: 11,
+    pts: 11
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x01, 0x03]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 12,
+    pts: 12
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x99, 0x99]),
+    nalUnitType: 'slice_layer_without_partitioning_rbsp_idr',
+    dts: 12,
+    pts: 12
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x01, 0x04]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 13,
+    pts: 13
+  });
+  videoSegmentStream.flush();
+
+  videoSegmentStream.on('data', function(data) {
+    segment = data.boxes;
+  });
+
+  videoSegmentStream.push({
+    data: new Uint8Array([0x02, 0x01]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 5,
+    pts: 5
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x02, 0x02]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 6,
+    pts: 6
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x11, 0x11]),
+    nalUnitType: 'slice_layer_without_partitioning_rbsp_idr',
+    dts: 6,
+    pts: 6
   });
   videoSegmentStream.flush();
 
   boxes = mp4.tools.inspect(segment);
   samples = boxes[0].boxes[1].boxes[2].samples;
-  QUnit.equal(samples.length, 2, 'generated two samples, filters out pre-IDR');
-  QUnit.equal(samples[0].duration, 3, 'set the first sample duration');
-  QUnit.equal(samples[1].duration, 2, 'set the second sample duration');
+  QUnit.equal(samples.length, 1, 'generated one sample');
+  QUnit.equal(samples[0].size, 12, 'first sample is an AUD + IDR pair');
+});
+
+QUnit.test('doesn\'t prepend the last GOP if the next segment has different PPS or SPS', function() {
+  var segment, boxes, samples;
+
+  videoSegmentStream.push({
+    data: new Uint8Array([0x01, 0x01]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 1,
+    pts: 1
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x00, 0x00]),
+    nalUnitType: 'seq_parameter_set_rbsp',
+    config: {},
+    dts: 1,
+    pts: 1
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x00, 0x00]),
+    nalUnitType: 'pic_parameter_set_rbsp',
+    dts: 1,
+    pts: 1
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x66, 0x66]),
+    nalUnitType: 'slice_layer_without_partitioning_rbsp_idr',
+    dts: 1,
+    pts: 1
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x01, 0x02]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 2,
+    pts: 2
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x01, 0x03]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 3,
+    pts: 3
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x99, 0x99]),
+    nalUnitType: 'slice_layer_without_partitioning_rbsp_idr',
+    dts: 3,
+    pts: 3
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x01, 0x04]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 4,
+    pts: 4
+  });
+  videoSegmentStream.flush();
+
+  videoSegmentStream.on('data', function(data) {
+    segment = data.boxes;
+  });
+
+  videoSegmentStream.push({
+    data: new Uint8Array([0x02, 0x01]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 5,
+    pts: 5
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x02, 0x02]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 6,
+    pts: 6
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x00, 0x01]),
+    nalUnitType: 'seq_parameter_set_rbsp',
+    config: {},
+    dts: 1,
+    pts: 1
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x00, 0x01]),
+    nalUnitType: 'pic_parameter_set_rbsp',
+    dts: 1,
+    pts: 1
+  });  videoSegmentStream.push({
+    data: new Uint8Array([0x11, 0x11]),
+    nalUnitType: 'slice_layer_without_partitioning_rbsp_idr',
+    dts: 6,
+    pts: 6
+  });
+  videoSegmentStream.flush();
+
+  boxes = mp4.tools.inspect(segment);
+  samples = boxes[0].boxes[1].boxes[2].samples;
+  QUnit.equal(samples.length, 1, 'generated one sample');
+  QUnit.equal(samples[0].size, 24, 'first sample is an AUD + PPS + SPS + IDR');
+});
+
+QUnit.test('doesn\'t prepend the last GOP if the next segment is more than 1 seconds in the future', function() {
+  var segment, boxes, samples;
+
+  videoSegmentStream.push({
+    data: new Uint8Array([0x01, 0x01]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 1,
+    pts: 1
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x66, 0x66]),
+    nalUnitType: 'slice_layer_without_partitioning_rbsp_idr',
+    dts: 1,
+    pts: 1
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x01, 0x02]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 2,
+    pts: 2
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x01, 0x03]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 3,
+    pts: 3
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x99, 0x99]),
+    nalUnitType: 'slice_layer_without_partitioning_rbsp_idr',
+    dts: 3,
+    pts: 3
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x01, 0x04]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 4,
+    pts: 4
+  });
+  videoSegmentStream.flush();
+
+  videoSegmentStream.on('data', function(data) {
+    segment = data.boxes;
+  });
+
+  videoSegmentStream.push({
+    data: new Uint8Array([0x02, 0x01]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 1000000,
+    pts: 1000000
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x02, 0x02]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+    dts: 1000001,
+    pts: 1000001
+  });
+  videoSegmentStream.push({
+    data: new Uint8Array([0x11, 0x11]),
+    nalUnitType: 'slice_layer_without_partitioning_rbsp_idr',
+    dts: 1000001,
+    pts: 1000001
+  });
+  videoSegmentStream.flush();
+
+  boxes = mp4.tools.inspect(segment);
+  samples = boxes[0].boxes[1].boxes[2].samples;
+  QUnit.equal(samples.length, 1, 'generated one sample');
+  QUnit.equal(samples[0].size, 12, 'first sample is an AUD + IDR pair');
 });
 
 QUnit.test('track values from seq_parameter_set_rbsp should be cleared by a flush', function() {
@@ -1315,6 +1657,10 @@ QUnit.test('track values from seq_parameter_set_rbsp should be cleared by a flus
   videoSegmentStream.on('data', function(data) {
     track = data.track;
   });
+  videoSegmentStream.push({
+    data: new Uint8Array([0xFF]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+  });
   videoSegmentStream.push({
     data: new Uint8Array([0xFF]),
     nalUnitType: 'seq_parameter_set_rbsp',
@@ -1376,6 +1722,10 @@ QUnit.test('track pps from pic_parameter_set_rbsp should be cleared by a flush',
   videoSegmentStream.on('data', function(data) {
     track = data.track;
   });
+  videoSegmentStream.push({
+    data: new Uint8Array([0xFF]),
+    nalUnitType: 'access_unit_delimiter_rbsp',
+  });
   videoSegmentStream.push({
     data: new Uint8Array([0x01]),
     nalUnitType: 'pic_parameter_set_rbsp',
@@ -1400,7 +1750,7 @@ QUnit.test('track pps from pic_parameter_set_rbsp should be cleared by a flush',
   QUnit.equal(track.pps[0][0], 0x03, 'first pps is 0x03 after a flush');
 });
 
-QUnit.test('calculates compositionTimeOffset values from the PTS and DTS', function() {
+QUnit.test('calculates compositionTimeOffset values from the PTS/DTS', function() {
   var segment, boxes, samples;
   videoSegmentStream.on('data', function(data) {
     segment = data.boxes;
@@ -1832,6 +2182,9 @@ QUnit.test('no options creates combined output', function() {
   transmuxer.push(packetize(audioPes([
     0x19, 0x47
   ], true)));
+  transmuxer.push(packetize(videoPes([
+      0x09, 0x01 // access_unit_delimiter_rbsp
+  ], true)));
   transmuxer.push(packetize(videoPes([
       0x08, 0x01 // pic_parameter_set_rbsp
   ], true)));
@@ -1883,6 +2236,9 @@ QUnit.test('can specify that we want to generate separate audio and video segmen
   transmuxer.push(packetize(audioPes([
     0x19, 0x47
   ], true)));
+  transmuxer.push(packetize(videoPes([
+      0x09, 0x01 // access_unit_delimiter_rbsp
+  ], true)));
   transmuxer.push(packetize(videoPes([
       0x08, 0x01 // pic_parameter_set_rbsp
   ], true)));
@@ -1932,6 +2288,9 @@ QUnit.test('generates a video init segment', function() {
     hasVideo: true
   })));
 
+  transmuxer.push(packetize(videoPes([
+      0x09, 0x01 // access_unit_delimiter_rbsp
+  ], true)));
   transmuxer.push(packetize(videoPes([
       0x08, 0x01 // pic_parameter_set_rbsp
   ], true)));
@@ -1989,7 +2348,7 @@ QUnit.test('buffers video samples until flushed', function() {
 
   // buffer a NAL
   transmuxer.push(packetize(videoPes([0x09, 0x01], true)));
-  transmuxer.push(packetize(videoPes([0x00, 0x02])));
+  transmuxer.push(packetize(videoPes([0x05, 0x02])));
 
   // add an access_unit_delimiter_rbsp
   transmuxer.push(packetize(videoPes([0x09, 0x03])));
@@ -2010,7 +2369,7 @@ QUnit.test('buffers video samples until flushed', function() {
               0, 0, 0, 2,
               0x09, 0x01,
               0, 0, 0, 2,
-              0x00, 0x02,
+              0x05, 0x02,
               0, 0, 0, 2,
               0x09, 0x03,
               0, 0, 0, 2,
@@ -2397,10 +2756,10 @@ QUnit.test('generates video tags', function() {
   })));
 
   transmuxer.push(packetize(videoPes([
-      0x09, 0x01 // access_unit_delimiter
+      0x09, 0x01 // access_unit_delimiter_rbsp
   ], true)));
   transmuxer.push(packetize(videoPes([
-      0x09, 0x01 // access_unit_delimiter
+      0x09, 0x01 // access_unit_delimiter_rbsp
   ], true)));
 
   transmuxer.flush();
@@ -2431,7 +2790,7 @@ QUnit.test('drops nalUnits at the start of a segment not preceeded by an access_
     0xef, 0x7c, 0x04
   ], false)));
   transmuxer.push(packetize(videoPes([
-      0x09, 0x01 // access_unit_delimiter
+      0x09, 0x01 // access_unit_delimiter_rbsp
   ], true)));
 
   transmuxer.flush();