google · alvestrand · Sep 12, 2016 · Aug 30, 2016 · Sep 6, 2016 · pzembrod
diff --git a/bin/take_snapshot b/bin/take_snapshot
@@ -26,6 +26,9 @@ if [ -z $CODEC_WORKDIR -o ! -d $CODEC_WORKDIR ]; then
 fi
 
 DESTINATION=$WORKDIR/snapshot
+echo "Removing old snapshot"
+rm -r $DESTINATION
+echo "Creating new snapshot"
 for RESULT in $(find $CODEC_WORKDIR -name '*.result' -o -name 'parameters'); do
   DEST_FILE=$(echo $RESULT | sed -e "s!$CODEC_WORKDIR!$DESTINATION!")
   DEST_DIR=$(dirname $DEST_FILE)

diff --git a/bin/verify_scores b/bin/verify_scores
@@ -45,20 +45,24 @@ def ClassifyScoreRelation(old_score, new_score):
     return 'improved'
   return 'worsened'
 
-def VerifyOneTarget(codec_names, rate, videofile, score):
+def VerifyOneTarget(codec_names, rate, videofile, old_scores, score):
   change_counts = collections.Counter()
   for codec_name in codec_names:
     codec = pick_codec.PickCodec(codec_name)
-    my_optimizer = optimizer.Optimizer(codec)
-    bestsofar = my_optimizer.BestEncoding(rate, videofile)
+    old_optimizer = optimizer.Optimizer(codec, scoredir=old_scores)
+    new_optimizer = optimizer.Optimizer(codec)
+    bestsofar = new_optimizer.RebaseEncoding(
+        old_optimizer.BestEncoding(rate, videofile))
+    bestsofar.Recover()
     if score:
       bestsofar.Execute().Store()
     try:
-      old_score = my_optimizer.Score(bestsofar, scoredir='snapshot')
+      old_score = old_optimizer.Score(bestsofar)
     except encoder.Error:
-      print 'No old score for %s, continuing' % (bestsofar.encoder.Hashname())
+      print 'No old score for %s %d %s, continuing' % \
+        (bestsofar.encoder.Hashname(), rate, videofile.filename)
       continue
-    new_score = my_optimizer.Score(bestsofar)
+    new_score = new_optimizer.Score(bestsofar)
     result = ClassifyScoreRelation(old_score, new_score)
     change_counts[result] += 1
     if result != 'no change':
@@ -70,12 +74,12 @@ def VerifyOneTarget(codec_names, rate, videofile, score):
 
   return change_counts
 
-def VerifyResults(codec_names, score):
+def VerifyResults(codec_names, old_scores, score):
   change_counts = collections.Counter()
   for rate, filename in mpeg_settings.MpegFiles().AllFilesAndRates():
     videofile = encoder.Videofile(filename)
     change_counts.update(VerifyOneTarget(codec_names, rate,
-                                         videofile, score))
+                                         videofile, old_scores, score))
     print 'Result so far:', dict(change_counts)
   return change_counts
 
@@ -84,8 +88,10 @@ def main():
   parser.add_argument('codec_names', nargs='*',
                       default=pick_codec.AllCodecNames())
   parser.add_argument('--score', action='store_true', default=False)
+  parser.add_argument('--old_scores', default='snapshot')
   args = parser.parse_args()
-  change_count = VerifyResults(args.codec_names, score=args.score)
+  change_count = VerifyResults(args.codec_names, old_scores=args.old_scores,
+                               score=args.score)
   print 'Change evaluations: ', dict(change_count)
   return 0
 

diff --git a/lib/encoder.py b/lib/encoder.py
@@ -494,12 +494,12 @@ class Context(object):
   is passed to the context constructor.
   """
 
-  def __init__(self, codec, cache_class=None):
+  def __init__(self, codec, cache_class=None, scoredir=None):
     self.codec = codec
     if cache_class:
-      self.cache = cache_class(self)
+      self.cache = cache_class(self, scoredir)
     else:
-      self.cache = EncodingMemoryCache(self)
+      self.cache = EncodingMemoryCache(self, scoredir)
 
 
 class Encoder(object):
@@ -731,11 +731,16 @@ def _FileNameToVideofile(full_filename):
 
 class EncodingDiskCache(object):
   """Encoder and encoding information, saved to disk."""
-  def __init__(self, context):
+  def __init__(self, context, scoredir=None):
     self.context = context
-    # Default work directory is current directory.
-    self.workdir = '%s/%s' % (encoder_configuration.conf.workdir(),
-                              context.codec.name)
+    self.bad_encodings = {}
+    if scoredir:
+      self.workdir = os.path.join(encoder_configuration.conf.sysdir(),
+                                  scoredir, context.codec.name)
+    else:
+      # Default work directory.
+      self.workdir = os.path.join(encoder_configuration.conf.workdir(),
+                                  context.codec.name)
     if not os.path.isdir(self.workdir):
       os.mkdir(self.workdir)
 
@@ -766,8 +771,13 @@ def _FilesToEncodings(self, files, videofile, bitrate=None,
         encoder or self._FileNameToEncoder(full_filename),
         bitrate or _FileNameToBitrate(full_filename),
         videofile or _FileNameToVideofile(full_filename))
-      candidate.Recover()
-      candidates.append(candidate)
+      try:
+        candidate.Recover()
+        candidates.append(candidate)
+      except Error as err:
+        # TODO(hta): Change this to a more specific catch once available.
+        self.bad_encodings[full_filename] = err
+        continue
     return candidates
 
   def _QueryScoredEncodings(self, encoder=None, bitrate=None, videofile=None):
@@ -848,22 +858,16 @@ def AllEncoderFilenames(self, only_workdir=False):
   def RemoveEncoder(self, hashname):
     shutil.rmtree(os.path.join(self.workdir, hashname))
 
-  def StoreEncoding(self, encoding, workdir=None):
+  def StoreEncoding(self, encoding):
     """Stores an encoding object on disk.
 
     An encoding object consists of its result (if executed).
     The bitrate is encoded as a directory, the videofilename
     is encoded as part of the output filename.
     """
-    if workdir:
-      dirname = os.path.join(workdir,
-                             self.context.codec.name,
-                             encoding.encoder.Hashname(),
-                             self.context.codec.SpeedGroup(encoding.bitrate))
-    else:
-      dirname = os.path.join(self.workdir,
-                             encoding.encoder.Hashname(),
-                             self.context.codec.SpeedGroup(encoding.bitrate))
+    dirname = os.path.join(self.workdir,
+                           encoding.encoder.Hashname(),
+                           self.context.codec.SpeedGroup(encoding.bitrate))
     if not os.path.isdir(dirname):
       os.mkdir(dirname)
     if not encoding.result:
@@ -872,17 +876,14 @@ def StoreEncoding(self, encoding, workdir=None):
     with open('%s/%s.result' % (dirname, videoname), 'w') as resultfile:
       json.dump(encoding.result, resultfile, indent=2)
 
-  def ReadEncodingResult(self, encoding, scoredir=None):
+  def ReadEncodingResult(self, encoding):
     """Reads an encoding result back from storage, if present.
 
     None is returned if file does not exist.
     If scoredir is given, only that directory is searched.
     If it is not given, all directories in the search path are searched."""
 
-    if scoredir:
-      searchpath = [os.path.join(scoredir, self.context.codec.name)]
-    else:
-      searchpath = self.SearchPathForScores()
+    searchpath = self.SearchPathForScores()
 
     for workdir in searchpath:
       dirname = os.path.join(workdir, encoding.encoder.Hashname(),
@@ -903,13 +904,16 @@ def ReadEncodingResult(self, encoding, scoredir=None):
 
 class EncodingMemoryCache(object):
   """Encoder and encoding information, in-memory only. For testing."""
-  def __init__(self, context):
+  def __init__(self, context, scoredir=None):
+    if scoredir:
+      raise Error('Cannot have scoredir on a memory cache')
     self.context = context
     self.encoders = {}
     self.encodings = []
+    self.workdir = '/not-valid-file/' + self.context.codec.name
 
   def WorkDir(self):
-    return '/not-valid-file/' + self.context.codec.name
+    return self.workdir
 
   def AllScoredEncodings(self, bitrate, videofile):
     result = []

diff --git a/lib/encoder_unittest.py b/lib/encoder_unittest.py
@@ -510,8 +510,11 @@ def testRemoveEncoder(self):
 
   def testReadResultFromAlternateDir(self):
     context = StorageOnlyContext()
-    otherdir = os.path.join(encoder_configuration.conf.sysdir(), 'otherdir')
+    otherdir_path = os.path.join(encoder_configuration.conf.sysdir(),
+                                 'otherdir')
+    os.mkdir(otherdir_path)
     cache = encoder.EncodingDiskCache(context)
+    other_cache = encoder.EncodingDiskCache(context, scoredir='otherdir')
     my_encoder = encoder.Encoder(
         context,
         encoder.OptionValueSet(encoder.OptionSet(), '--parameters'))
@@ -523,10 +526,11 @@ def testReadResultFromAlternateDir(self):
     my_encoding.result = testresult
     cache.StoreEncoding(my_encoding)
     my_encoding.result = None
-    result = cache.ReadEncodingResult(my_encoding, scoredir=otherdir)
+    result = other_cache.ReadEncodingResult(my_encoding)
     self.assertIsNone(result)
-    shutil.copytree(encoder_configuration.conf.workdir(), otherdir)
-    result = cache.ReadEncodingResult(my_encoding, scoredir=otherdir)
+    shutil.rmtree(otherdir_path)
+    shutil.copytree(encoder_configuration.conf.workdir(), otherdir_path)
+    result = other_cache.ReadEncodingResult(my_encoding)
     self.assertEquals(result, testresult)
 
   def testAllScoredEncodingsForEncoder(self):
@@ -587,9 +591,10 @@ def testEncodersInMultipleRepos(self):
         encoder.OptionValueSet(encoder.OptionSet(), '--parameters'))
     other_dir = os.path.join(encoder_configuration.conf.sysdir(),
                              'multirepo_test')
-    encoder_configuration.conf.override_scorepath_for_test([other_dir])
     os.mkdir(other_dir)
-    cache.StoreEncoder(my_encoder, workdir=other_dir)
+    other_cache = encoder.EncodingDiskCache(context, scoredir='multirepo_test')
+    encoder_configuration.conf.override_scorepath_for_test([other_dir])
+    other_cache.StoreEncoder(my_encoder)
     encoders = cache.AllEncoderFilenames(only_workdir=True)
     self.assertEquals(0, len(encoders))
     encoders = cache.AllEncoderFilenames(only_workdir=False)
@@ -601,13 +606,46 @@ def testEncodersInMultipleRepos(self):
                                    encoder.Videofile('x/foo_640_480_20.yuv'))
     testresult = {'foo': 'bar'}
     my_encoding.result = testresult
-    cache.StoreEncoding(my_encoding, workdir=other_dir)
+    other_cache.StoreEncoding(my_encoding)
     # With a specified directory, we should find it in only one place.
-    self.assertFalse(cache.ReadEncodingResult(my_encoding,
-        scoredir=encoder_configuration.conf.workdir()))
-    self.assertTrue(cache.ReadEncodingResult(my_encoding, scoredir=other_dir))
-    # Without a specified directory, we should find it everywhere.
+    self.assertTrue(other_cache.ReadEncodingResult(my_encoding))
+    # Without a specified directory, we should find it on the searchpath.
     self.assertTrue(cache.ReadEncodingResult(my_encoding))
+    # Without a searchpath, we shouldn't find it in the default cache.
+    encoder_configuration.conf.override_scorepath_for_test([])
+    self.assertFalse(cache.ReadEncodingResult(my_encoding))
+
+  def testBrokenStoredEncoding(self):
+    context = StorageOnlyContext()
+    other_dir = os.path.join(encoder_configuration.conf.sysdir(),
+                             'broken_files')
+    os.mkdir(other_dir)
+
+    cache = encoder.EncodingDiskCache(context, scoredir='broken_files')
+    # This particular test needs the context to know about the cache.
+    context.cache = cache
+    my_encoder = encoder.Encoder(
+        context,
+        encoder.OptionValueSet(encoder.OptionSet(), '--parameters'))
+    cache.StoreEncoder(my_encoder)
+    # Cache should start off empty.
+    self.assertFalse(cache.AllScoredEncodingsForEncoder(my_encoder))
+    videofile = encoder.Videofile('x/foo_640_480_20.yuv')
+    my_encoding = encoder.Encoding(my_encoder, 123, videofile)
+    testresult = {'foo': 'bar'}
+    my_encoding.result = testresult
+    cache.StoreEncoding(my_encoding)
+    # TODO(hta): Expose the filename generation as a function for testing.
+    with open(os.path.join(cache.workdir,
+                           my_encoding.encoder.Hashname(),
+                           cache.context.codec.SpeedGroup(my_encoding.bitrate),
+                           '%s.result' % my_encoding.videofile.basename),
+              'w') as scorefile:
+      scorefile.write('stuff that is not valid json')
+
+    result = cache.AllScoredEncodingsForEncoder(my_encoder)
+    self.assertFalse(result)
+    self.assertEquals(1, len(cache.bad_encodings))
 
 
 class TestEncodingMemoryCache(unittest.TestCase):

diff --git a/lib/optimizer.py b/lib/optimizer.py
@@ -30,29 +30,38 @@ class Optimizer(object):
   - A video file set, with associated target bitrates.
   - A set of pre-executed encodings (the cache).
   - A score function.
+  - A score directory, normally null, which means "take from context".
 
   One should be able ask an optimizer to find the parameters that give the
   best result on the score function for that codec."""
   def __init__(self, codec, file_set=None,
-               cache_class=None, score_function=None):
+               cache_class=None, score_function=None,
+               scoredir=None):
+    # pylint: disable=too-many-arguments
     self.context = encoder.Context(codec,
-                                   cache_class or encoder.EncodingDiskCache)
+                                   cache_class or encoder.EncodingDiskCache,
+                                   scoredir=scoredir)
     self.file_set = file_set
     self.score_function = score_function or score_tools.ScorePsnrBitrate
 
-  def Score(self, encoding, scoredir=None):
-    if scoredir is None:
-      result = encoding.result
-    else:
-      result = self.context.cache.ReadEncodingResult(encoding,
-          scoredir=scoredir)
+  def Score(self, encoding):
+    result = encoding.result
     if not result:
       raise encoder.Error('Trying to score an encoding without result')
     score = self.score_function(encoding.bitrate, result)
     # Weakly penalize long command lines.
     score -= len(encoding.encoder.parameters.values) * 0.00001
     return score
 
+  def RebaseEncoding(self, encoding):
+    """Take an encoding from another context and rebase it to
+    this context, using the same encoder arguments."""
+    return encoder.Encoding(self.RebaseEncoder(encoding.encoder),
+                            encoding.bitrate, encoding.videofile)
+
+  def RebaseEncoder(self, my_encoder):
+    return encoder.Encoder(self.context, my_encoder.parameters)
+
   def BestEncoding(self, bitrate, videofile):
     encodings = self.AllScoredEncodings(bitrate, videofile)
     if encodings:

diff --git a/lib/optimizer_unittest.py b/lib/optimizer_unittest.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 """ Unit tests for the optimizer. """
+import os
 import re
 import unittest
 
@@ -241,7 +242,9 @@ def EncoderFromParameterString(self, parameter_string):
                                parameter_string))
 
   def test_BestOverallConfigurationNotInWorkDirectory(self):
-    other_dir = encoder_configuration.conf.sysdir() + '/multirepo_test'
+    other_dir = os.path.join(encoder_configuration.conf.sysdir(),
+                             'multirepo_test')
+    os.mkdir(other_dir)
     encoder_configuration.conf.override_scorepath_for_test([other_dir])
 
     self.file_set = optimizer.FileAndRateSet(verify_files_present=False)
@@ -251,13 +254,15 @@ def test_BestOverallConfigurationNotInWorkDirectory(self):
     best_encoder = self.optimizer.BestOverallEncoder()
     self.assertIsNone(best_encoder)
     # Fill in the database with all the files and rates.
+    other_context = encoder.Context(self.codec, encoder.EncodingDiskCache,
+                                    scoredir='multirepo_test')
     my_encoder = self.EncoderFromParameterString('--score=7')
-    my_encoder.context.cache.StoreEncoder(my_encoder, workdir=other_dir)
+    other_context.cache.StoreEncoder(my_encoder)
     my_encoder.context.cache.StoreEncoder(my_encoder)
     for rate, filename in self.file_set.AllFilesAndRates():
       my_encoding = my_encoder.Encoding(rate, encoder.Videofile(filename))
       my_encoding.Execute()
-      my_encoding.context.cache.StoreEncoding(my_encoding, workdir=other_dir)
+      other_context.cache.StoreEncoding(my_encoding)
     # The best encoder should now be from the workdir, but the results are
     # all fetched from the searchpath.
     best_encoder = self.optimizer.BestOverallEncoder()
@@ -268,6 +273,26 @@ def test_BestOverallConfigurationNotInWorkDirectory(self):
     one_encoding.Recover()
     self.assertTrue(one_encoding.Result())
 
+  def test_MultipleOptimizers(self):
+    # Make sure other score directories don't interfere with this test.
+    encoder_configuration.conf.override_scorepath_for_test([])
+
+    os.mkdir(os.path.join(encoder_configuration.conf.sysdir(), 'first_dir'))
+    os.mkdir(os.path.join(encoder_configuration.conf.sysdir(), 'second_dir'))
+    one_optimizer = optimizer.Optimizer(self.codec, scoredir='first_dir')
+    another_optimizer = optimizer.Optimizer(self.codec, scoredir='second_dir')
+    self.assertNotEqual(one_optimizer.context.cache.workdir,
+                        another_optimizer.context.cache.workdir)
+    # Storing one encoding's score should not affect the other's.
+    one_encoding = one_optimizer.BestEncoding(100,
+                                              self.videofile)
+    one_encoding.Execute().Store()
+    another_encoding = another_optimizer.BestEncoding(100, self.videofile)
+    self.assertFalse(another_encoding.Result())
+    another_encoding.Recover()
+    self.assertFalse(another_encoding.Result())
+
+
 
 class TestFileAndRateSet(unittest.TestCase):