From af78d4decfb2f87c7c69edbe46e0a9c5b8e2c917 Mon Sep 17 00:00:00 2001 From: Patrick Renner Date: Tue, 18 Feb 2025 11:35:29 +0100 Subject: [PATCH 1/9] adding recursive parsing to flatten command --- ascmhl/commands.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/ascmhl/commands.py b/ascmhl/commands.py index 824ec07..28b71fd 100644 --- a/ascmhl/commands.py +++ b/ascmhl/commands.py @@ -1195,18 +1195,26 @@ def flatten_history( if len(existing_history.hash_lists) == 0: raise errors.NoMHLHistoryException(root_path) - for hash_list in existing_history.hash_lists: + flatten_child_histories(existing_history, session, root_path) + + commit_session_for_collection( + session, root_path, author_name, author_email, author_phone, author_role, location, comment + ) + + +def flatten_child_histories(history, session, roothistorypath, pathprefix=""): + for hash_list in history.hash_lists: for media_hash in hash_list.media_hashes: if not media_hash.is_directory: for hash_entry in media_hash.hash_entries: if hash_entry.action != "failed": # check if this entry is newer than the one already in there to avoid duplicate entries - found_media_hash = session.new_hash_lists[collection_history].find_media_hash_for_path( - media_hash.path + found_media_hash = session.new_hash_lists[session.root_history].find_media_hash_for_path( + pathprefix + "/" + media_hash.path ) if found_media_hash == None: session.append_file_hash( - media_hash.path, + pathprefix + "/" + media_hash.path, media_hash.file_size, media_hash.last_modification_date, hash_entry.hash_format, @@ -1222,7 +1230,7 @@ def flatten_history( if not hashformat_is_already_there: # assuming that hash_entry of same type also has same hash_value .. session.append_file_hash( - media_hash.path, + pathprefix + "/" + media_hash.path, media_hash.file_size, media_hash.last_modification_date, hash_entry.hash_format, @@ -1230,10 +1238,15 @@ def flatten_history( action=hash_entry.action, hash_date=hash_entry.hash_date, ) + + for child_history in history.child_histories: + childpath = child_history.get_root_path() - commit_session_for_collection( - session, root_path, author_name, author_email, author_phone, author_role, location, comment - ) + # if os.path.isabs(file_path): + childrelativepath = os.path.relpath(childpath, roothistorypath) + + logger.info(f"\nChild History at {childrelativepath}:") + flatten_child_histories(child_history, session, roothistorypath, childrelativepath) @click.command() From 0e20541622c9265893e2214cdd0f4de06a5b0177 Mon Sep 17 00:00:00 2001 From: Patrick Renner Date: Tue, 18 Feb 2025 11:45:03 +0100 Subject: [PATCH 2/9] adjusting for black --- ascmhl/commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ascmhl/commands.py b/ascmhl/commands.py index 28b71fd..07cf6be 100644 --- a/ascmhl/commands.py +++ b/ascmhl/commands.py @@ -1238,7 +1238,7 @@ def flatten_child_histories(history, session, roothistorypath, pathprefix=""): action=hash_entry.action, hash_date=hash_entry.hash_date, ) - + for child_history in history.child_histories: childpath = child_history.get_root_path() From 6370b12e2d9a936388289443c4d390e58b29c5c6 Mon Sep 17 00:00:00 2001 From: Patrick Renner Date: Tue, 18 Feb 2025 16:17:39 +0100 Subject: [PATCH 3/9] fixing path prefix handling for root history --- ascmhl/commands.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ascmhl/commands.py b/ascmhl/commands.py index 07cf6be..2f3cbfc 100644 --- a/ascmhl/commands.py +++ b/ascmhl/commands.py @@ -1208,13 +1208,17 @@ def flatten_child_histories(history, session, roothistorypath, pathprefix=""): if not media_hash.is_directory: for hash_entry in media_hash.hash_entries: if hash_entry.action != "failed": + # add prefix to media path if subhistory + media_path = media_hash.path + if pathprefix != "": + media_path = pathprefix + "/" + media_hash.path # check if this entry is newer than the one already in there to avoid duplicate entries found_media_hash = session.new_hash_lists[session.root_history].find_media_hash_for_path( - pathprefix + "/" + media_hash.path + media_path ) if found_media_hash == None: session.append_file_hash( - pathprefix + "/" + media_hash.path, + media_path, media_hash.file_size, media_hash.last_modification_date, hash_entry.hash_format, @@ -1230,7 +1234,7 @@ def flatten_child_histories(history, session, roothistorypath, pathprefix=""): if not hashformat_is_already_there: # assuming that hash_entry of same type also has same hash_value .. session.append_file_hash( - pathprefix + "/" + media_hash.path, + media_path, media_hash.file_size, media_hash.last_modification_date, hash_entry.hash_format, From 4a8baff195096b2615d6c3fdb4bea45533e82464 Mon Sep 17 00:00:00 2001 From: Patrick Renner Date: Tue, 18 Feb 2025 17:35:30 +0100 Subject: [PATCH 4/9] adding test for flattening of nested histories --- ascmhl/generator.py | 6 +++++- tests/test_flatten.py | 27 +++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/ascmhl/generator.py b/ascmhl/generator.py index 170ff8a..3a9c583 100644 --- a/ascmhl/generator.py +++ b/ascmhl/generator.py @@ -131,7 +131,11 @@ def append_file_hash( hash_entry = MHLHashEntry(hash_format, hash_string, hash_date=hash_date) if original_hash_entry is None: hash_entry.action = "original" - logger.verbose(f" created original hash for {relative_path} {hash_format}: {hash_string}") + if relative_path != None: + logger.verbose(f" created original hash for {relative_path} {hash_format}: {hash_string}") + else: + # flattening works a bit different, because we don't add to individual (nested) histories + logger.verbose(f" created original hash for {file_path} {hash_format}: {hash_string}") else: existing_hash_entry = history.find_first_hash_entry_for_path(history_relative_path, hash_format) if existing_hash_entry is not None: diff --git a/tests/test_flatten.py b/tests/test_flatten.py index 7783386..53f7dd3 100644 --- a/tests/test_flatten.py +++ b/tests/test_flatten.py @@ -51,3 +51,30 @@ def test_simple_two_hashformats(fs, simple_mhl_history): ascmhl.commands.flatten, [abspath_conversion_tests("/root"), abspath_conversion_tests("/out")] ) assert result.exit_code == 0 + + +@freeze_time("2020-01-16 09:15:00") +def test_nested(fs, nested_mhl_histories): + runner = CliRunner() + + result = runner.invoke( + ascmhl.commands.flatten, ["-v", abspath_conversion_tests("/root"), abspath_conversion_tests("/out")] + ) + assert result.exit_code == 0 + + # check for files in root and sub histories + assert ( + result.output == f"Flattening folder at path: /root ...\n" + " created original hash for Stuff.txt xxh64: 94c399c2a9a21f9a\n" + "\n" + "Child History at A/AA:\n" + " created original hash for A/AA/AA1.txt xxh64: ab6bec9ec04704f6\n" + "\n" + "Child History at B:\n" + " created original hash for B/B1.txt xxh64: 51fb8fb099e92821\n" + "\n" + "Child History at B/BB:\n" + " created original hash for B/BB/BB1.txt xxh64: 5c14eac4f4ad7501\n" + "Created new generation collection_2020-01-16/packinglist_root_2020-01-16_091500Z.mhl\n" + ) + \ No newline at end of file From 912e66ecb836677aa555fd897c9c93c53d38945f Mon Sep 17 00:00:00 2001 From: Patrick Renner Date: Tue, 18 Feb 2025 17:38:23 +0100 Subject: [PATCH 5/9] adjust test for windows paths --- tests/test_flatten.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_flatten.py b/tests/test_flatten.py index 53f7dd3..b66d289 100644 --- a/tests/test_flatten.py +++ b/tests/test_flatten.py @@ -64,7 +64,7 @@ def test_nested(fs, nested_mhl_histories): # check for files in root and sub histories assert ( - result.output == f"Flattening folder at path: /root ...\n" + result.output == f"Flattening folder at path: {abspath_conversion_tests('/root')} ...\n" " created original hash for Stuff.txt xxh64: 94c399c2a9a21f9a\n" "\n" "Child History at A/AA:\n" @@ -77,4 +77,3 @@ def test_nested(fs, nested_mhl_histories): " created original hash for B/BB/BB1.txt xxh64: 5c14eac4f4ad7501\n" "Created new generation collection_2020-01-16/packinglist_root_2020-01-16_091500Z.mhl\n" ) - \ No newline at end of file From a31ce2dd7296eae27d6275fffa9a6cab6e061bfa Mon Sep 17 00:00:00 2001 From: Patrick Renner Date: Tue, 18 Feb 2025 17:42:55 +0100 Subject: [PATCH 6/9] adjust test for windows paths 2 --- tests/test_flatten.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tests/test_flatten.py b/tests/test_flatten.py index b66d289..47c5ae9 100644 --- a/tests/test_flatten.py +++ b/tests/test_flatten.py @@ -10,6 +10,7 @@ import os from click.testing import CliRunner from freezegun import freeze_time +from .conftest import path_conversion_tests from .conftest import abspath_conversion_tests import ascmhl.commands @@ -65,15 +66,15 @@ def test_nested(fs, nested_mhl_histories): # check for files in root and sub histories assert ( result.output == f"Flattening folder at path: {abspath_conversion_tests('/root')} ...\n" - " created original hash for Stuff.txt xxh64: 94c399c2a9a21f9a\n" - "\n" - "Child History at A/AA:\n" - " created original hash for A/AA/AA1.txt xxh64: ab6bec9ec04704f6\n" - "\n" - "Child History at B:\n" - " created original hash for B/B1.txt xxh64: 51fb8fb099e92821\n" - "\n" - "Child History at B/BB:\n" - " created original hash for B/BB/BB1.txt xxh64: 5c14eac4f4ad7501\n" - "Created new generation collection_2020-01-16/packinglist_root_2020-01-16_091500Z.mhl\n" + f" created original hash for Stuff.txt xxh64: 94c399c2a9a21f9a\n" + f"\n" + f"Child History at {path_conversion_tests('A/AA')}:\n" + f" created original hash for {path_conversion_tests('A/AA/AA1.txt')} xxh64: ab6bec9ec04704f6\n" + f"\n" + f"Child History at B:\n" + f" created original hash for {path_conversion_tests('B/B1.txt')} xxh64: 51fb8fb099e92821\n" + f"\n" + f"Child History at {path_conversion_tests('B/BB')}:\n" + f" created original hash for {path_conversion_tests('B/BB/BB1.txt')} xxh64: 5c14eac4f4ad7501\n" + f"Created new generation {path_conversion_tests('collection_2020-01-16/packinglist_root_2020-01-16_091500Z.mhl')}\n" ) From 6dc9e66a3e12ca4457b1960f6da4178e13844b8c Mon Sep 17 00:00:00 2001 From: Patrick Renner Date: Tue, 18 Feb 2025 16:53:56 +0000 Subject: [PATCH 7/9] adjust test for windows paths 3 --- ascmhl/commands.py | 3 ++- ascmhl/generator.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ascmhl/commands.py b/ascmhl/commands.py index 2f3cbfc..1b78db9 100644 --- a/ascmhl/commands.py +++ b/ascmhl/commands.py @@ -34,6 +34,7 @@ from .traverse import post_order_lexicographic from typing import Dict from collections import namedtuple +from .utils import convert_local_path_to_posix @click.command() @@ -1211,7 +1212,7 @@ def flatten_child_histories(history, session, roothistorypath, pathprefix=""): # add prefix to media path if subhistory media_path = media_hash.path if pathprefix != "": - media_path = pathprefix + "/" + media_hash.path + media_path = convert_local_path_to_posix(pathprefix) + "/" + media_hash.path # check if this entry is newer than the one already in there to avoid duplicate entries found_media_hash = session.new_hash_lists[session.root_history].find_media_hash_for_path( media_path diff --git a/ascmhl/generator.py b/ascmhl/generator.py index 3a9c583..8d7298e 100644 --- a/ascmhl/generator.py +++ b/ascmhl/generator.py @@ -15,6 +15,7 @@ from .ignore import MHLIgnoreSpec from .hashlist import MHLHashList, MHLHashEntry, MHLCreatorInfo, MHLProcessInfo from .history import MHLHistory +from .utils import convert_posix_to_local_path class MHLGenerationCreationSession: @@ -135,7 +136,7 @@ def append_file_hash( logger.verbose(f" created original hash for {relative_path} {hash_format}: {hash_string}") else: # flattening works a bit different, because we don't add to individual (nested) histories - logger.verbose(f" created original hash for {file_path} {hash_format}: {hash_string}") + logger.verbose(f" created original hash for {convert_posix_to_local_path(file_path)} {hash_format}: {hash_string}") else: existing_hash_entry = history.find_first_hash_entry_for_path(history_relative_path, hash_format) if existing_hash_entry is not None: From 5edb78647c6effc9d144b81f30bd8e5ec323756e Mon Sep 17 00:00:00 2001 From: Patrick Renner Date: Tue, 18 Feb 2025 18:00:31 +0100 Subject: [PATCH 8/9] black, again --- ascmhl/generator.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ascmhl/generator.py b/ascmhl/generator.py index 8d7298e..d3a164d 100644 --- a/ascmhl/generator.py +++ b/ascmhl/generator.py @@ -136,7 +136,9 @@ def append_file_hash( logger.verbose(f" created original hash for {relative_path} {hash_format}: {hash_string}") else: # flattening works a bit different, because we don't add to individual (nested) histories - logger.verbose(f" created original hash for {convert_posix_to_local_path(file_path)} {hash_format}: {hash_string}") + logger.verbose( + f" created original hash for {convert_posix_to_local_path(file_path)} {hash_format}: {hash_string}" + ) else: existing_hash_entry = history.find_first_hash_entry_for_path(history_relative_path, hash_format) if existing_hash_entry is not None: From dbcd62ead8319e5e9bb9462ab389fd1706ce96bf Mon Sep 17 00:00:00 2001 From: Patrick Renner Date: Tue, 4 Mar 2025 13:29:50 +0100 Subject: [PATCH 9/9] moving flattened manifest out of temp collection folder --- ascmhl/commands.py | 2 +- ascmhl/generator.py | 29 +++++++++++++++++++++++++++-- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/ascmhl/commands.py b/ascmhl/commands.py index 1b78db9..c75d4e9 100644 --- a/ascmhl/commands.py +++ b/ascmhl/commands.py @@ -1489,7 +1489,7 @@ def commit_session_for_collection( process_info.root_media_hash = root_hash process_info.hashlist_custom_basename = "packinglist_" + os.path.basename(root_path) - session.commit(creator_info, process_info) + session.commit(creator_info, process_info, writeChain=False) """ diff --git a/ascmhl/generator.py b/ascmhl/generator.py index d3a164d..dfd7bab 100644 --- a/ascmhl/generator.py +++ b/ascmhl/generator.py @@ -7,6 +7,9 @@ __email__ = "opensource@pomfort.com" """ +import os +import shutil + from collections import defaultdict from typing import Dict, List @@ -279,7 +282,7 @@ def append_directory_hashes( hash_entry.structure_hash_string = structure_hash_string parent_media_hash.append_hash_entry(hash_entry) - def commit(self, creator_info: MHLCreatorInfo, process_info: MHLProcessInfo): + def commit(self, creator_info: MHLCreatorInfo, process_info: MHLProcessInfo, writeChain=True): """ this method needs to create the generations of the children bottom up # so each history can reference the children correctly and can get the actual hash of the file @@ -313,4 +316,26 @@ def commit(self, creator_info: MHLCreatorInfo, process_info: MHLProcessInfo): if history.parent_history is not None: referenced_hash_lists[history.parent_history].append(new_hash_list) - chain_xml_parser.write_chain(history.chain, new_hash_list) + if writeChain: + # regular history .... + chain_xml_parser.write_chain(history.chain, new_hash_list) + else: + # ... or flattened history manifest + root_path = os.path.dirname(new_hash_list.file_path) + if not os.path.exists(root_path): + print(f"ERR: folder {root_path} with flattened manifest does not exist") + return + + parent_folder = os.path.dirname(root_path) + + for file_name in os.listdir(root_path): + if file_name.endswith(".mhl"): + src_path = os.path.join(root_path, file_name) + dst_path = os.path.join(parent_folder, file_name) + shutil.move(src_path, dst_path) + + # Remove the folder if empty + if not os.listdir(root_path): + os.rmdir(root_path) + else: + print(f"ERR: temp folder not empty, did not remove {root_path}")