Skip to content
This repository was archived by the owner on Nov 5, 2022. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bin/take_snapshot
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ if [ -z $CODEC_WORKDIR -o ! -d $CODEC_WORKDIR ]; then
fi

DESTINATION=$WORKDIR/snapshot
echo "Removing old snapshot"
rm -r $DESTINATION
echo "Creating new snapshot"
for RESULT in $(find $CODEC_WORKDIR -name '*.result' -o -name 'parameters'); do
DEST_FILE=$(echo $RESULT | sed -e "s!$CODEC_WORKDIR!$DESTINATION!")
DEST_DIR=$(dirname $DEST_FILE)
Expand Down
24 changes: 15 additions & 9 deletions bin/verify_scores
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,24 @@ def ClassifyScoreRelation(old_score, new_score):
return 'improved'
return 'worsened'

def VerifyOneTarget(codec_names, rate, videofile, score):
def VerifyOneTarget(codec_names, rate, videofile, old_scores, score):
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My standard comment: You are refactoring untested code. 2 risky aspects to the refactoring: 1. splicing in a new parameter between old ones. 2. Spitting up the role of my_optimizer into 2 objects, old_optimizer and new_optimizer.

Not sure if there is a good way to test this, but since the tests you wrote in the past so secure similar-sized changes actually caught bugs, I probably should at least raise this.

Copy link
Copy Markdown
Member Author

@alvestrand alvestrand Sep 5, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Acknowledged. The naming structure I've adopted makes it hard to write tests for code in binaries (no .py means that you can't "import" them into the test).
The bug that this fixes is that before, if a score got worsened, it would result in it disappearing from the list of possible best scores, which meant that the next run would test the change against a different score :-(
This was part of #82 - but I decided to separate the code work from the actual version flip.

My only mitigating point on the "insert parameter in the middle of list" is that the function is called from exactly one place, and because of the naming mentioned above, I'm assured that it's not imported anywhere else :-)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All fair enough. Fix for the hard-to-test binaries is of course to move test-worthy code to lib. Granted, doesn't help as a safety net even for that bin2lib refactoring but might provide a motivation going forward to be more strict than in the past about allowing actual work code to live in the binaries directly.

change_counts = collections.Counter()
for codec_name in codec_names:
codec = pick_codec.PickCodec(codec_name)
my_optimizer = optimizer.Optimizer(codec)
bestsofar = my_optimizer.BestEncoding(rate, videofile)
old_optimizer = optimizer.Optimizer(codec, scoredir=old_scores)
new_optimizer = optimizer.Optimizer(codec)
bestsofar = new_optimizer.RebaseEncoding(
old_optimizer.BestEncoding(rate, videofile))
bestsofar.Recover()
if score:
bestsofar.Execute().Store()
try:
old_score = my_optimizer.Score(bestsofar, scoredir='snapshot')
old_score = old_optimizer.Score(bestsofar)
except encoder.Error:
print 'No old score for %s, continuing' % (bestsofar.encoder.Hashname())
print 'No old score for %s %d %s, continuing' % \
(bestsofar.encoder.Hashname(), rate, videofile.filename)
continue
new_score = my_optimizer.Score(bestsofar)
new_score = new_optimizer.Score(bestsofar)
result = ClassifyScoreRelation(old_score, new_score)
change_counts[result] += 1
if result != 'no change':
Expand All @@ -70,12 +74,12 @@ def VerifyOneTarget(codec_names, rate, videofile, score):

return change_counts

def VerifyResults(codec_names, score):
def VerifyResults(codec_names, old_scores, score):
change_counts = collections.Counter()
for rate, filename in mpeg_settings.MpegFiles().AllFilesAndRates():
videofile = encoder.Videofile(filename)
change_counts.update(VerifyOneTarget(codec_names, rate,
videofile, score))
videofile, old_scores, score))
print 'Result so far:', dict(change_counts)
return change_counts

Expand All @@ -84,8 +88,10 @@ def main():
parser.add_argument('codec_names', nargs='*',
default=pick_codec.AllCodecNames())
parser.add_argument('--score', action='store_true', default=False)
parser.add_argument('--old_scores', default='snapshot')
args = parser.parse_args()
change_count = VerifyResults(args.codec_names, score=args.score)
change_count = VerifyResults(args.codec_names, old_scores=args.old_scores,
score=args.score)
print 'Change evaluations: ', dict(change_count)
return 0

Expand Down
56 changes: 30 additions & 26 deletions lib/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,12 +494,12 @@ class Context(object):
is passed to the context constructor.
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The encoder.py file has grown to a size where I would consider splitting it up.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue #83 filed.

"""

def __init__(self, codec, cache_class=None):
def __init__(self, codec, cache_class=None, scoredir=None):
self.codec = codec
if cache_class:
self.cache = cache_class(self)
self.cache = cache_class(self, scoredir)
else:
self.cache = EncodingMemoryCache(self)
self.cache = EncodingMemoryCache(self, scoredir)


class Encoder(object):
Expand Down Expand Up @@ -731,11 +731,16 @@ def _FileNameToVideofile(full_filename):

class EncodingDiskCache(object):
"""Encoder and encoding information, saved to disk."""
def __init__(self, context):
def __init__(self, context, scoredir=None):
self.context = context
# Default work directory is current directory.
self.workdir = '%s/%s' % (encoder_configuration.conf.workdir(),
context.codec.name)
self.bad_encodings = {}
if scoredir:
self.workdir = os.path.join(encoder_configuration.conf.sysdir(),
scoredir, context.codec.name)
else:
# Default work directory.
self.workdir = os.path.join(encoder_configuration.conf.workdir(),
context.codec.name)
if not os.path.isdir(self.workdir):
os.mkdir(self.workdir)

Expand Down Expand Up @@ -766,8 +771,13 @@ def _FilesToEncodings(self, files, videofile, bitrate=None,
encoder or self._FileNameToEncoder(full_filename),
bitrate or _FileNameToBitrate(full_filename),
videofile or _FileNameToVideofile(full_filename))
candidate.Recover()
candidates.append(candidate)
try:
candidate.Recover()
candidates.append(candidate)
except Error as err:
# TODO(hta): Change this to a more specific catch once available.
self.bad_encodings[full_filename] = err
continue
return candidates

def _QueryScoredEncodings(self, encoder=None, bitrate=None, videofile=None):
Expand Down Expand Up @@ -848,22 +858,16 @@ def AllEncoderFilenames(self, only_workdir=False):
def RemoveEncoder(self, hashname):
shutil.rmtree(os.path.join(self.workdir, hashname))

def StoreEncoding(self, encoding, workdir=None):
def StoreEncoding(self, encoding):
"""Stores an encoding object on disk.

An encoding object consists of its result (if executed).
The bitrate is encoded as a directory, the videofilename
is encoded as part of the output filename.
"""
if workdir:
dirname = os.path.join(workdir,
self.context.codec.name,
encoding.encoder.Hashname(),
self.context.codec.SpeedGroup(encoding.bitrate))
else:
dirname = os.path.join(self.workdir,
encoding.encoder.Hashname(),
self.context.codec.SpeedGroup(encoding.bitrate))
dirname = os.path.join(self.workdir,
encoding.encoder.Hashname(),
self.context.codec.SpeedGroup(encoding.bitrate))
if not os.path.isdir(dirname):
os.mkdir(dirname)
if not encoding.result:
Expand All @@ -872,17 +876,14 @@ def StoreEncoding(self, encoding, workdir=None):
with open('%s/%s.result' % (dirname, videoname), 'w') as resultfile:
json.dump(encoding.result, resultfile, indent=2)

def ReadEncodingResult(self, encoding, scoredir=None):
def ReadEncodingResult(self, encoding):
"""Reads an encoding result back from storage, if present.

None is returned if file does not exist.
If scoredir is given, only that directory is searched.
If it is not given, all directories in the search path are searched."""

if scoredir:
searchpath = [os.path.join(scoredir, self.context.codec.name)]
else:
searchpath = self.SearchPathForScores()
searchpath = self.SearchPathForScores()

for workdir in searchpath:
dirname = os.path.join(workdir, encoding.encoder.Hashname(),
Expand All @@ -903,13 +904,16 @@ def ReadEncodingResult(self, encoding, scoredir=None):

class EncodingMemoryCache(object):
"""Encoder and encoding information, in-memory only. For testing."""
def __init__(self, context):
def __init__(self, context, scoredir=None):
if scoredir:
raise Error('Cannot have scoredir on a memory cache')
self.context = context
self.encoders = {}
self.encodings = []
self.workdir = '/not-valid-file/' + self.context.codec.name

def WorkDir(self):
return '/not-valid-file/' + self.context.codec.name
return self.workdir

def AllScoredEncodings(self, bitrate, videofile):
result = []
Expand Down
60 changes: 49 additions & 11 deletions lib/encoder_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,8 +510,11 @@ def testRemoveEncoder(self):

def testReadResultFromAlternateDir(self):
context = StorageOnlyContext()
otherdir = os.path.join(encoder_configuration.conf.sysdir(), 'otherdir')
otherdir_path = os.path.join(encoder_configuration.conf.sysdir(),
'otherdir')
os.mkdir(otherdir_path)
cache = encoder.EncodingDiskCache(context)
other_cache = encoder.EncodingDiskCache(context, scoredir='otherdir')
my_encoder = encoder.Encoder(
context,
encoder.OptionValueSet(encoder.OptionSet(), '--parameters'))
Expand All @@ -523,10 +526,11 @@ def testReadResultFromAlternateDir(self):
my_encoding.result = testresult
cache.StoreEncoding(my_encoding)
my_encoding.result = None
result = cache.ReadEncodingResult(my_encoding, scoredir=otherdir)
result = other_cache.ReadEncodingResult(my_encoding)
self.assertIsNone(result)
shutil.copytree(encoder_configuration.conf.workdir(), otherdir)
result = cache.ReadEncodingResult(my_encoding, scoredir=otherdir)
shutil.rmtree(otherdir_path)
shutil.copytree(encoder_configuration.conf.workdir(), otherdir_path)
result = other_cache.ReadEncodingResult(my_encoding)
self.assertEquals(result, testresult)

def testAllScoredEncodingsForEncoder(self):
Expand Down Expand Up @@ -587,9 +591,10 @@ def testEncodersInMultipleRepos(self):
encoder.OptionValueSet(encoder.OptionSet(), '--parameters'))
other_dir = os.path.join(encoder_configuration.conf.sysdir(),
'multirepo_test')
encoder_configuration.conf.override_scorepath_for_test([other_dir])
os.mkdir(other_dir)
cache.StoreEncoder(my_encoder, workdir=other_dir)
other_cache = encoder.EncodingDiskCache(context, scoredir='multirepo_test')
encoder_configuration.conf.override_scorepath_for_test([other_dir])
other_cache.StoreEncoder(my_encoder)
encoders = cache.AllEncoderFilenames(only_workdir=True)
self.assertEquals(0, len(encoders))
encoders = cache.AllEncoderFilenames(only_workdir=False)
Expand All @@ -601,13 +606,46 @@ def testEncodersInMultipleRepos(self):
encoder.Videofile('x/foo_640_480_20.yuv'))
testresult = {'foo': 'bar'}
my_encoding.result = testresult
cache.StoreEncoding(my_encoding, workdir=other_dir)
other_cache.StoreEncoding(my_encoding)
# With a specified directory, we should find it in only one place.
self.assertFalse(cache.ReadEncodingResult(my_encoding,
scoredir=encoder_configuration.conf.workdir()))
self.assertTrue(cache.ReadEncodingResult(my_encoding, scoredir=other_dir))
# Without a specified directory, we should find it everywhere.
self.assertTrue(other_cache.ReadEncodingResult(my_encoding))
# Without a specified directory, we should find it on the searchpath.
self.assertTrue(cache.ReadEncodingResult(my_encoding))
# Without a searchpath, we shouldn't find it in the default cache.
encoder_configuration.conf.override_scorepath_for_test([])
self.assertFalse(cache.ReadEncodingResult(my_encoding))

def testBrokenStoredEncoding(self):
context = StorageOnlyContext()
other_dir = os.path.join(encoder_configuration.conf.sysdir(),
'broken_files')
os.mkdir(other_dir)

cache = encoder.EncodingDiskCache(context, scoredir='broken_files')
# This particular test needs the context to know about the cache.
context.cache = cache
my_encoder = encoder.Encoder(
context,
encoder.OptionValueSet(encoder.OptionSet(), '--parameters'))
cache.StoreEncoder(my_encoder)
# Cache should start off empty.
self.assertFalse(cache.AllScoredEncodingsForEncoder(my_encoder))
videofile = encoder.Videofile('x/foo_640_480_20.yuv')
my_encoding = encoder.Encoding(my_encoder, 123, videofile)
testresult = {'foo': 'bar'}
my_encoding.result = testresult
cache.StoreEncoding(my_encoding)
# TODO(hta): Expose the filename generation as a function for testing.
with open(os.path.join(cache.workdir,
my_encoding.encoder.Hashname(),
cache.context.codec.SpeedGroup(my_encoding.bitrate),
'%s.result' % my_encoding.videofile.basename),
'w') as scorefile:
scorefile.write('stuff that is not valid json')

result = cache.AllScoredEncodingsForEncoder(my_encoder)
self.assertFalse(result)
self.assertEquals(1, len(cache.bad_encodings))


class TestEncodingMemoryCache(unittest.TestCase):
Expand Down
25 changes: 17 additions & 8 deletions lib/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,38 @@ class Optimizer(object):
- A video file set, with associated target bitrates.
- A set of pre-executed encodings (the cache).
- A score function.
- A score directory, normally null, which means "take from context".

One should be able ask an optimizer to find the parameters that give the
best result on the score function for that codec."""
def __init__(self, codec, file_set=None,
cache_class=None, score_function=None):
cache_class=None, score_function=None,
scoredir=None):
# pylint: disable=too-many-arguments
self.context = encoder.Context(codec,
cache_class or encoder.EncodingDiskCache)
cache_class or encoder.EncodingDiskCache,
scoredir=scoredir)
self.file_set = file_set
self.score_function = score_function or score_tools.ScorePsnrBitrate

def Score(self, encoding, scoredir=None):
if scoredir is None:
result = encoding.result
else:
result = self.context.cache.ReadEncodingResult(encoding,
scoredir=scoredir)
def Score(self, encoding):
result = encoding.result
if not result:
raise encoder.Error('Trying to score an encoding without result')
score = self.score_function(encoding.bitrate, result)
# Weakly penalize long command lines.
score -= len(encoding.encoder.parameters.values) * 0.00001
return score

def RebaseEncoding(self, encoding):
"""Take an encoding from another context and rebase it to
this context, using the same encoder arguments."""
return encoder.Encoding(self.RebaseEncoder(encoding.encoder),
encoding.bitrate, encoding.videofile)

def RebaseEncoder(self, my_encoder):
return encoder.Encoder(self.context, my_encoder.parameters)

def BestEncoding(self, bitrate, videofile):
encodings = self.AllScoredEncodings(bitrate, videofile)
if encodings:
Expand Down
31 changes: 28 additions & 3 deletions lib/optimizer_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# limitations under the License.

""" Unit tests for the optimizer. """
import os
import re
import unittest

Expand Down Expand Up @@ -241,7 +242,9 @@ def EncoderFromParameterString(self, parameter_string):
parameter_string))

def test_BestOverallConfigurationNotInWorkDirectory(self):
other_dir = encoder_configuration.conf.sysdir() + '/multirepo_test'
other_dir = os.path.join(encoder_configuration.conf.sysdir(),
'multirepo_test')
os.mkdir(other_dir)
encoder_configuration.conf.override_scorepath_for_test([other_dir])

self.file_set = optimizer.FileAndRateSet(verify_files_present=False)
Expand All @@ -251,13 +254,15 @@ def test_BestOverallConfigurationNotInWorkDirectory(self):
best_encoder = self.optimizer.BestOverallEncoder()
self.assertIsNone(best_encoder)
# Fill in the database with all the files and rates.
other_context = encoder.Context(self.codec, encoder.EncodingDiskCache,
scoredir='multirepo_test')
my_encoder = self.EncoderFromParameterString('--score=7')
my_encoder.context.cache.StoreEncoder(my_encoder, workdir=other_dir)
other_context.cache.StoreEncoder(my_encoder)
my_encoder.context.cache.StoreEncoder(my_encoder)
for rate, filename in self.file_set.AllFilesAndRates():
my_encoding = my_encoder.Encoding(rate, encoder.Videofile(filename))
my_encoding.Execute()
my_encoding.context.cache.StoreEncoding(my_encoding, workdir=other_dir)
other_context.cache.StoreEncoding(my_encoding)
# The best encoder should now be from the workdir, but the results are
# all fetched from the searchpath.
best_encoder = self.optimizer.BestOverallEncoder()
Expand All @@ -268,6 +273,26 @@ def test_BestOverallConfigurationNotInWorkDirectory(self):
one_encoding.Recover()
self.assertTrue(one_encoding.Result())

def test_MultipleOptimizers(self):
# Make sure other score directories don't interfere with this test.
encoder_configuration.conf.override_scorepath_for_test([])

os.mkdir(os.path.join(encoder_configuration.conf.sysdir(), 'first_dir'))
os.mkdir(os.path.join(encoder_configuration.conf.sysdir(), 'second_dir'))
one_optimizer = optimizer.Optimizer(self.codec, scoredir='first_dir')
another_optimizer = optimizer.Optimizer(self.codec, scoredir='second_dir')
self.assertNotEqual(one_optimizer.context.cache.workdir,
another_optimizer.context.cache.workdir)
# Storing one encoding's score should not affect the other's.
one_encoding = one_optimizer.BestEncoding(100,
self.videofile)
one_encoding.Execute().Store()
another_encoding = another_optimizer.BestEncoding(100, self.videofile)
self.assertFalse(another_encoding.Result())
another_encoding.Recover()
self.assertFalse(another_encoding.Result())



class TestFileAndRateSet(unittest.TestCase):

Expand Down