diff --git a/CMakeLists.txt b/CMakeLists.txt index 6ddad68..1c1e294 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.26) project( DIE - VERSION 0.5.0 + VERSION 0.5.1 LANGUAGES CXX DESCRIPTION "DIE Library implementation" ) diff --git a/README.md b/README.md index e21e70d..fe35a81 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ import die, pathlib print(die.scan_file("c:/windows/system32/ntdll.dll", die.ScanFlags.DEEP_SCAN)) 'PE64' -print(die.scan_file("../upx.exe", die.ScanFlags.RESULT_AS_JSON, str(die.database_path/'db') )) +print(die.scan_file("../upx.exe", die.ScanFlags.RESULT_AS_JSON, str(die.database_path) )) { "detects": [ { @@ -86,16 +86,11 @@ print(die.scan_file("../upx.exe", die.ScanFlags.RESULT_AS_JSON, str(die.database for db in die.databases(): print(db) -C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\ACE -C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\APK\PackageName.1.sg -C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\APK\SingleJar.3.sg -C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\APK\_APK.0.sg -C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\APK\_init -C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\Archive\_init -C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\archive-file -C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\arj -C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\Binary\Amiga loadable.1.sg -C:\Users\User\AppData\Roaming\Python\Python312\site-packages\die\db\db\Binary\archive.7z.1.sg +\path\to\your\pyenv\site-packages\die\db\ACE +\path\to\your\pyenv\site-packages\die\db\Amiga\DeliTracker.1.sg +\path\to\your\pyenv\site-packages\die\db\Amiga\_Amiga.0.sg +\path\to\your\pyenv\site-packages\die\db\Amiga\_init +\path\to\your\pyenv\site-packages\die\db\APK\AlibabaProtection.2.sg [...] ``` diff --git a/pyproject.toml b/pyproject.toml index 77c3142..66a59a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "scikit_build_core.build" [project] name = "die_python" -version = "0.5.0" +version = "0.5.1" description = "Python bindings for Detect It Easy (DIE)." readme = "./README.md" license.file = "./LICENSE" diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index fc744d2..ae26041 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,7 +1,7 @@ project( die-python LANGUAGES CXX - VERSION 0.5.0 + VERSION 0.5.1 ) find_package(Python 3 @@ -68,11 +68,45 @@ target_link_libraries(_die PRIVATE Qt6::Qml) target_link_libraries(_die PRIVATE Qt6::Concurrent) target_link_libraries(_die PRIVATE Qt6::Network) +# Workaround: die_library on Windows installs files to incorrect locations. +# Remove these before our correct install rules: +# - db/ directory (should be die/db, not site-packages/db) +# - die.lib (should be die/die.lib, not root die.lib) +# - include/ directory (C++ headers not needed in Python wheel) +if(WIN32) + install(CODE [[ + # List of paths to remove: each entry is "type|path" + # type: "dir" for directory, "file" for file + set(REMOVE_PATHS + "dir|${CMAKE_INSTALL_PREFIX}/db" + "file|${CMAKE_INSTALL_PREFIX}/die.lib" + "dir|${CMAKE_INSTALL_PREFIX}/include" + ) + + foreach(REMOVE_ENTRY ${REMOVE_PATHS}) + string(REPLACE "|" ";" REMOVE_LIST "${REMOVE_ENTRY}") + list(GET REMOVE_LIST 0 REMOVE_TYPE) + list(GET REMOVE_LIST 1 REMOVE_PATH) + + if(EXISTS "${REMOVE_PATH}") + if(REMOVE_TYPE STREQUAL "dir") + execute_process(COMMAND ${CMAKE_COMMAND} -E remove_directory "${REMOVE_PATH}") + message(STATUS "Removed directory: ${REMOVE_PATH}") + elseif(REMOVE_TYPE STREQUAL "file") + execute_process(COMMAND ${CMAKE_COMMAND} -E remove "${REMOVE_PATH}") + message(STATUS "Removed file: ${REMOVE_PATH}") + endif() + endif() + endforeach() + ]]) +endif() + install(DIRECTORY die DESTINATION .) install(TARGETS _die DESTINATION die/) install(TARGETS die DESTINATION die/) -install(DIRECTORY ${DIELIB_BASE_ROOT}/dep/Detect-It-Easy/db DESTINATION die/db) -install(DIRECTORY ${DIELIB_BASE_ROOT}/dep/Detect-It-Easy/db_custom DESTINATION die/db) +# Fix: Install database to die/db instead of die/db/db +install(DIRECTORY ${DIELIB_BASE_ROOT}/dep/Detect-It-Easy/db DESTINATION die) +install(DIRECTORY ${DIELIB_BASE_ROOT}/dep/Detect-It-Easy/db_custom DESTINATION die) if(LINUX OR APPLE) install( @@ -93,6 +127,7 @@ if(LINUX OR APPLE) PATTERN "pkgconfig" EXCLUDE ) else() + # Windows: Install Qt DLLs and ICU libraries install( DIRECTORY ${Qt6_DIR}/../../../bin/ @@ -103,5 +138,8 @@ else() PATTERN "Qt6Qml.*" PATTERN "Qt6Concurrent.*" PATTERN "Qt6Network.*" + PATTERN "icudt*.dll" + PATTERN "icuin*.dll" + PATTERN "icuuc*.dll" ) endif() \ No newline at end of file diff --git a/python/die/__init__.py b/python/die/__init__.py index 0b56fdd..7c39b40 100644 --- a/python/die/__init__.py +++ b/python/die/__init__.py @@ -1,5 +1,6 @@ import enum import pathlib +import warnings from typing import Generator, Optional, Union @@ -16,8 +17,110 @@ version_major, version_minor, version_patch = map(int, __version__.split(".")) -database_path = pathlib.Path(__path__[0]) / "db" -"""Path to the DIE signature database""" + +# Use concrete Path type to maintain isinstance() compatibility +_BasePath = type(pathlib.Path()) + +class _DatabasePath(_BasePath): + """ + Smart database path that maintains backward compatibility. + + This class automatically handles both old and new usage patterns: + - New code: use database_path directly + - Old code: database_path / 'db' still works but shows deprecation warning + + The path detection works as follows: + 1. If db/PE/ exists (new fixed version): use this path + 2. If db/db/PE/ exists (old buggy version): use the nested path + """ + + def __new__(cls, *args, **kwargs): + obj = super().__new__(cls, *args) + obj._resolved_path_str = None + return obj + + def _get_resolved_str(self): + """Resolve and return the actual database path as a string.""" + # Use getattr with default to handle Python 3.9's pathlib behavior + # where __new__ may not be called in path operations + # See: https://github.com/python/cpython/issues/100479 + resolved = getattr(self, '_resolved_path_str', None) + + if resolved is None: + # Use parent class's __str__ to get path without triggering our override + # This avoids recursion when __str__ calls _get_resolved_str + path_str = super().__str__() + concrete_path = pathlib.Path(path_str) + + if (concrete_path / 'PE').exists(): + resolved = path_str + elif (concrete_path / 'db' / 'PE').exists(): + resolved = str(concrete_path / 'db') + else: + resolved = path_str + + self._resolved_path_str = resolved + + return resolved + + def __truediv__(self, other): + """Handle path concatenation with backward compatibility.""" + if other == 'db': + # User is using the old workaround: database_path / 'db' + # Check if the base path (before resolution) already contains PE/ + # If yes, this is the new version and /'db' is redundant + base_path_str = super().__str__() + base_path = pathlib.Path(base_path_str) + + if (base_path / 'PE').exists(): + # New fixed version: database is at die/db/PE/ + warnings.warn( + "Using 'database_path / \"db\"' is deprecated and no longer needed. " + "The database is now directly at 'database_path'. " + "Simply use 'database_path' instead.", + DeprecationWarning, + stacklevel=2 + ) + return self + # else: Old version, database is at die/db/db/PE/ + # The /'db' is necessary, allow it to proceed + + # Default behavior: use parent's __truediv__ for normal path concatenation + return super().__truediv__(other) + + def __str__(self): + """Return the resolved database path as a string.""" + return self._get_resolved_str() + + def __fspath__(self): + """Return the resolved database path for os.fspath().""" + return self._get_resolved_str() + + def exists(self): + """Check if the resolved database path exists.""" + return pathlib.Path(self._get_resolved_str()).exists() + + def iterdir(self): + """Iterate over the resolved database path.""" + return pathlib.Path(self._get_resolved_str()).iterdir() + + +# Initialize database path with smart handling +database_path = _DatabasePath(__path__[0]) / "db" +"""Path to the DIE signature database + +This path automatically points to the correct database location, +regardless of how the package is laid out: +- When the database directory is installed directly at die/db/ +- When the database directory is installed at die/db/db/ + +Usage: + # Recommended: + die.scan_file(file, flags, str(die.database_path)) + + # Legacy code (still works, but may show a deprecation warning): + die.scan_file(file, flags, str(die.database_path / 'db')) +""" class ScanFlags(enum.IntFlag): diff --git a/python/src/die.cpp b/python/src/die.cpp index 312590b..3cdc015 100644 --- a/python/src/die.cpp +++ b/python/src/die.cpp @@ -123,7 +123,7 @@ NB_MODULE(_die, m) .export_values(); m.doc() = "The native `die` module"; - m.attr("__version__") = "0.5.0"; + m.attr("__version__") = "0.5.1"; m.attr("die_version") = DIE_VERSION; m.attr("dielib_version") = DIELIB_VERSION; diff --git a/python/tests/test_die.py b/python/tests/test_die.py index 60a8b59..8d8b9cc 100644 --- a/python/tests/test_die.py +++ b/python/tests/test_die.py @@ -19,9 +19,8 @@ def test_constants(): assert die.dielib_version # validate die database - assert isinstance(die.database_path, pathlib.Path) + assert isinstance(die.database_path, die._DatabasePath) assert die.database_path.exists() - assert die.database_path.is_dir() # validate scan flags assert die._DieFlags.Deepscan.value == die.ScanFlags.DEEP_SCAN @@ -156,3 +155,82 @@ def test_basic_databases(): assert isinstance(db, pathlib.Path) assert db.exists() assert db.is_file() + + +def test_database_path_backward_compatibility(): + """Test backward compatibility for database_path usage.""" + import warnings + + # Test 1: New usage should work without warnings + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + path_new = str(die.database_path) + assert len(w) == 0, "New usage should not produce warnings" + + # Test 2: database_path should resolve to a valid location with PE/ directory + db_path = pathlib.Path(path_new) + assert db_path.exists(), f"Database path does not exist: {db_path}" + assert (db_path / 'PE').exists(), f"PE directory not found at {db_path}" + + # Test 3: Old usage with /'db' should work through smart path resolution + # The smart path should detect the version and handle accordingly + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + path_old = str(die.database_path / 'db') + + # The path should exist in both old and new versions + assert pathlib.Path(path_old).exists(), f"Old usage path doesn't exist: {path_old}" + + if len(w) > 0: + # New fixed version: got deprecation warning + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "database_path" in str(w[0].message).lower() + # In new version, both should resolve to same location + assert pathlib.Path(path_new) == pathlib.Path(path_old) + else: + # Old buggy version: no warning, /'db' is necessary + # In old version, path_old should be die/db/db and path_new should also be die/db/db + assert path_new == path_old + + +def test_database_path_resolves_correctly(): + """Test that database_path resolves to the actual database location.""" + # The resolved path should contain PE/ directory + db_path = pathlib.Path(str(die.database_path)) + + # Check for PE directory (main signature database) + assert (db_path / 'PE').exists(), f"PE directory not found at {db_path}" + + # Check for other expected directories + expected_dirs = ['PE', 'ELF', 'MACH'] + for dir_name in expected_dirs: + assert (db_path / dir_name).exists(), \ + f"Expected directory {dir_name} not found at {db_path}" + + +def test_scan_with_explicit_database_path(target_binary: pathlib.Path): + """Test that scan_file works with explicit database path.""" + import warnings + + # Test with new usage (no /'db') + with warnings.catch_warnings(record=True): + warnings.simplefilter("always") + res = die.scan_file( + target_binary, + die.ScanFlags.DEEP_SCAN, + database=str(die.database_path), + ) + assert res + assert isinstance(res, str) + + # Test with old usage (with /'db') + with warnings.catch_warnings(record=True): + warnings.simplefilter("always") + res = die.scan_file( + target_binary, + die.ScanFlags.DEEP_SCAN, + database=str(die.database_path / 'db'), + ) + assert res + assert isinstance(res, str) diff --git a/python/tests/test_regression.py b/python/tests/test_regression.py index 3117532..a501f7a 100644 --- a/python/tests/test_regression.py +++ b/python/tests/test_regression.py @@ -5,10 +5,10 @@ TESTS_FOLDER = pathlib.Path(__file__).parent.absolute() DATA_FOLDER = TESTS_FOLDER / "data" -DB_FOLDER = die.database_path / "db" +DB_FOLDER = die.database_path -def test_issue_48(): +def test_issue_28(): # issue https://github.com/elastic/die-python/issues/28 # pr https://github.com/elastic/die-python/pull/30 fpath = DATA_FOLDER / "test.rar"