From fe90c62c5b3a752d484563b566891c93e712f96d Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 29 Apr 2026 09:08:13 +0200 Subject: [PATCH 1/6] pyproject.toml --- pyproject.toml | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4355115 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,53 @@ +[build-system] +requires = ["setuptools>=77"] +build-backend = "setuptools.build_meta" + +[project] +name = "complexfinder" +version = "0.4.48" +description = "ComplexFinder: A software package for the analysis of native protein complex fractionation experiments." +readme = "README.md" +requires-python = ">=3.11" +license = "MIT" +authors = [ + { name = "hnolCol" }, +] +classifiers = [ + "Intended Audience :: Science/Research", + "Programming Language :: Python :: 3", + "Topic :: Scientific/Engineering", +] +dependencies = [ + "hdbscan", + "imbalanced-learn", + "joblib", + "lmfit", + "matplotlib", + "numpy>=2", + "pandas>=2", + "scikit-learn", + "scipy", + "seaborn", + "umap-learn", +] + +[project.optional-dependencies] +test = ["pytest>=9"] + +[project.urls] +"Source" = "https://github.com/hnolcol/ComplexFinder" + +[tool.pytest] +minversion = "9.0" +testpaths = [ + "tests", +] +norecursedirs = [ + "dist", + "build", + "complexfinder.egg-info", + "results*", + "env*", + "venv*", + "*data*", +] From 72ad4349e50de027b66ef5b9f26999a70a7cf9cc Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 4 May 2026 08:31:52 +0200 Subject: [PATCH 2/6] apply src/ layout --- .github/workflows/ci.yml | 5 ++--- {src => example-data}/filter/Mito_d3.txt | 0 src/__init__.py | 0 src/{modules => complexfinder}/Database.py | 0 src/{modules => complexfinder}/Distance.py | 0 src/{modules => complexfinder}/Distance_archive.py | 0 src/{modules => complexfinder}/Plotter.py | 0 src/{modules => complexfinder}/Predictor.py | 0 src/{modules => complexfinder}/Signal.py | 0 src/complexfinder/__init__.py | 1 + src/{ => complexfinder}/main.py | 13 ++++++++----- src/{modules => complexfinder}/utils.py | 0 src/modules/__init__.py | 0 tests/test_misc.py | 2 +- 14 files changed, 12 insertions(+), 9 deletions(-) rename {src => example-data}/filter/Mito_d3.txt (100%) delete mode 100644 src/__init__.py rename src/{modules => complexfinder}/Database.py (100%) rename src/{modules => complexfinder}/Distance.py (100%) rename src/{modules => complexfinder}/Distance_archive.py (100%) rename src/{modules => complexfinder}/Plotter.py (100%) rename src/{modules => complexfinder}/Predictor.py (100%) rename src/{modules => complexfinder}/Signal.py (100%) create mode 100644 src/complexfinder/__init__.py rename src/{ => complexfinder}/main.py (99%) rename src/{modules => complexfinder}/utils.py (100%) delete mode 100644 src/modules/__init__.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 33858c0..f9e3395 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,10 +15,9 @@ jobs: with: python-version: '3.13' - - name: Install dependencies + - name: Install package run: | - pip install pandas scipy matplotlib lmfit joblib scikit-learn \ - imbalanced-learn umap-learn hdbscan seaborn pytest + pip install .[test] - name: Run tests run: | diff --git a/src/filter/Mito_d3.txt b/example-data/filter/Mito_d3.txt similarity index 100% rename from src/filter/Mito_d3.txt rename to example-data/filter/Mito_d3.txt diff --git a/src/__init__.py b/src/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/modules/Database.py b/src/complexfinder/Database.py similarity index 100% rename from src/modules/Database.py rename to src/complexfinder/Database.py diff --git a/src/modules/Distance.py b/src/complexfinder/Distance.py similarity index 100% rename from src/modules/Distance.py rename to src/complexfinder/Distance.py diff --git a/src/modules/Distance_archive.py b/src/complexfinder/Distance_archive.py similarity index 100% rename from src/modules/Distance_archive.py rename to src/complexfinder/Distance_archive.py diff --git a/src/modules/Plotter.py b/src/complexfinder/Plotter.py similarity index 100% rename from src/modules/Plotter.py rename to src/complexfinder/Plotter.py diff --git a/src/modules/Predictor.py b/src/complexfinder/Predictor.py similarity index 100% rename from src/modules/Predictor.py rename to src/complexfinder/Predictor.py diff --git a/src/modules/Signal.py b/src/complexfinder/Signal.py similarity index 100% rename from src/modules/Signal.py rename to src/complexfinder/Signal.py diff --git a/src/complexfinder/__init__.py b/src/complexfinder/__init__.py new file mode 100644 index 0000000..b5d2e77 --- /dev/null +++ b/src/complexfinder/__init__.py @@ -0,0 +1 @@ +from complexfinder.main import ComplexFinder diff --git a/src/main.py b/src/complexfinder/main.py similarity index 99% rename from src/main.py rename to src/complexfinder/main.py index 435ad93..a21c7dc 100644 --- a/src/main.py +++ b/src/complexfinder/main.py @@ -8,12 +8,13 @@ import pickle import shutil from datetime import datetime +from pathlib import Path #internal imports -from modules.Signal import Signal -from modules.Database import Database -from modules.Predictor import Classifier, ComplexBuilder -from modules.utils import calculateDistanceP, chunks, cleanPath, minMaxNorm, extractMeanByBounds, extractMetricByShiftBounds +from complexfinder.Signal import Signal +from complexfinder.Database import Database +from complexfinder.Predictor import Classifier, ComplexBuilder +from complexfinder.utils import calculateDistanceP, chunks, cleanPath, minMaxNorm, extractMeanByBounds, extractMetricByShiftBounds import joblib from joblib import Parallel, delayed, dump, load @@ -2634,4 +2635,6 @@ def _combinePeakResults(self): removeSingleDataPointPeaks=True, keepOnlySignalsValidInAllConditions = False, quantFiles = {}, - useRawDataForDimensionalReduction = False).run("../example-data/D1") #adjust the folder where the files are sstored + useRawDataForDimensionalReduction = False).run( + str(Path(__file__).parents[2] / "example-data" / "D1") + ) # adjust the folder where the files are sstored diff --git a/src/modules/utils.py b/src/complexfinder/utils.py similarity index 100% rename from src/modules/utils.py rename to src/complexfinder/utils.py diff --git a/src/modules/__init__.py b/src/modules/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_misc.py b/tests/test_misc.py index 8490b55..cec044a 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -8,7 +8,7 @@ # update sys.path until there is a proper package installation sys.path.append(str(Path(__file__).parent.parent / "src")) -from main import ComplexFinder +from complexfinder import ComplexFinder SAMPLE_DATA_DIR = Path(__file__).parents[1] / "example-data" From f8b522af12dfc16548656d6dbe87740067825f89 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 4 May 2026 08:44:12 +0200 Subject: [PATCH 3/6] add reference-data/ to package --- MANIFEST.in | 1 + src/complexfinder/Database.py | 3 +-- .../reference-data}/20190823_CORUM_Organism__Human.txt | 0 .../reference-data}/20190823_CORUM_Organism__Mouse.txt | 0 {reference-data => src/complexfinder/reference-data}/CORUM.txt | 0 .../complexfinder/reference-data}/CORUM_Organism__Human.txt | 0 {reference-data => src/complexfinder/reference-data}/Readme.md | 0 7 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 MANIFEST.in rename {reference-data => src/complexfinder/reference-data}/20190823_CORUM_Organism__Human.txt (100%) rename {reference-data => src/complexfinder/reference-data}/20190823_CORUM_Organism__Mouse.txt (100%) rename {reference-data => src/complexfinder/reference-data}/CORUM.txt (100%) rename {reference-data => src/complexfinder/reference-data}/CORUM_Organism__Human.txt (100%) rename {reference-data => src/complexfinder/reference-data}/Readme.md (100%) diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..8d23658 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +recursive-include src/complexfinder/reference-data * diff --git a/src/complexfinder/Database.py b/src/complexfinder/Database.py index a92bcd6..d7f6f88 100644 --- a/src/complexfinder/Database.py +++ b/src/complexfinder/Database.py @@ -389,9 +389,8 @@ def _getFiles(self, folderPath, extn = 'txt'): def _getPathToReferenceFiles(self): "" filePath = os.path.dirname(os.path.realpath(__file__)) - mainPath = os.path.abspath(os.path.join(filePath ,"../..")) pathToReferenceFolder = os.path.join( - mainPath, + filePath, 'reference-data' ) return pathToReferenceFolder diff --git a/reference-data/20190823_CORUM_Organism__Human.txt b/src/complexfinder/reference-data/20190823_CORUM_Organism__Human.txt similarity index 100% rename from reference-data/20190823_CORUM_Organism__Human.txt rename to src/complexfinder/reference-data/20190823_CORUM_Organism__Human.txt diff --git a/reference-data/20190823_CORUM_Organism__Mouse.txt b/src/complexfinder/reference-data/20190823_CORUM_Organism__Mouse.txt similarity index 100% rename from reference-data/20190823_CORUM_Organism__Mouse.txt rename to src/complexfinder/reference-data/20190823_CORUM_Organism__Mouse.txt diff --git a/reference-data/CORUM.txt b/src/complexfinder/reference-data/CORUM.txt similarity index 100% rename from reference-data/CORUM.txt rename to src/complexfinder/reference-data/CORUM.txt diff --git a/reference-data/CORUM_Organism__Human.txt b/src/complexfinder/reference-data/CORUM_Organism__Human.txt similarity index 100% rename from reference-data/CORUM_Organism__Human.txt rename to src/complexfinder/reference-data/CORUM_Organism__Human.txt diff --git a/reference-data/Readme.md b/src/complexfinder/reference-data/Readme.md similarity index 100% rename from reference-data/Readme.md rename to src/complexfinder/reference-data/Readme.md From 613b729442f6fb8952c319d7c113ad9cb6528a40 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 4 May 2026 09:06:48 +0200 Subject: [PATCH 4/6] -sys.path --- tests/test_misc.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_misc.py b/tests/test_misc.py index cec044a..ba02a13 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -1,13 +1,9 @@ import os -import sys from pathlib import Path from tempfile import TemporaryDirectory import pandas as pd -# update sys.path until there is a proper package installation -sys.path.append(str(Path(__file__).parent.parent / "src")) - from complexfinder import ComplexFinder SAMPLE_DATA_DIR = Path(__file__).parents[1] / "example-data" From 1620a58c559db0c41e80346976533e4b88387a6a Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 5 May 2026 12:58:17 +0200 Subject: [PATCH 5/6] doc: Update installation instructions --- README.md | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 21f9ee0..9ca9608 100644 --- a/README.md +++ b/README.md @@ -64,33 +64,46 @@ If you analyzed your data using ComplexFinder, we highly recommend to upload the ## Installation -Download the zip file containing the source code from github. -Navigate to the folder in terminal/command line tool. +Navigate to the folder in which you would like create a new virtual Python +environment via the terminal/command line tool, then: + On Mac / Linux: ``` #create virt env python3 -m venv env #activate source env/bin/activate -#install packages from req file -pip install -r requirements.txt +#install package from GitHub +pip install git+https://github.com/hnolCol/ComplexFinder.git ``` -For windows user: +For Windows user: ``` #create virt env py -m venv env -#actve +#activate .\env\Scripts\activate -#install packages from req file -pip3 install -r requirements.txt -```` +#install package from GitHub +pip install git+https://github.com/hnolCol/ComplexFinder.git +``` + +Alternatively, you can also clone the repository and install the package +locally. This will allow you to make changes to the code and test them +immediately. + +``` +git clone https://github.com/hnolCol/ComplexFinder.git +cd ComplexFinder +python3 -m venv env +source env/bin/activate +pip install -e . +``` ## Usage Example Upon downlaod and extraction of the package. You can find example data in the example-data folder. To run the anaylsis, you can enter: ```python -from .src.main import ComplexFinder +from complexfinder import ComplexFinder X = pd.read_table("./example-data/SILAC_01.txt", sep = "\t") #loading tab delimited txt file. ComplexFinder(analysisName = "ExampleRun_01").run(X) ``` @@ -98,7 +111,7 @@ You can also pass a folder path to run. This will yield in the anaylsis of each ```python import os -from .src.main import ComplexFinder +from complexfinder import ComplexFinder folderPath = os.path(".","") ComplexFinder().run(folderPath) ``` From c1b7fe773dae5dc59349f6e0e0b0c2f04551f7cb Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 5 May 2026 12:41:39 +0200 Subject: [PATCH 6/6] Allow custom database dir The default reference database directory is currently inside the installation directory. This may not always be writable. Thus, allow specifying a custom directory. --- src/complexfinder/Database.py | 22 +++++++++++++++------- src/complexfinder/main.py | 11 ++++++++++- tests/test_misc.py | 20 ++++++++++++++++++-- 3 files changed, 43 insertions(+), 10 deletions(-) diff --git a/src/complexfinder/Database.py b/src/complexfinder/Database.py index d7f6f88..a083125 100644 --- a/src/complexfinder/Database.py +++ b/src/complexfinder/Database.py @@ -10,6 +10,8 @@ import time import pickle from collections import OrderedDict +from pathlib import Path + def chunks(l, n): """Yield successive n-sized chunks from l.""" @@ -49,7 +51,7 @@ def createSingleChunk(self,idx, entriesInChunks,pathToTmp,metricColumns,df): class Database(object): - def __init__(self, nJobs = 4, splitString = ";"): + def __init__(self, nJobs = 4, splitString = ";", databaseDir=None): """Database Module. The pipeline requires a database containing positve feature interactions. @@ -62,19 +64,26 @@ def __init__(self, nJobs = 4, splitString = ";"): Parameters ---------- - - + databaseDir: + Directory to use for looking up reference files. + If `None`, the `reference-data` directory inside the + package root will be used. This is assumed to be writable. """ self.dbs = dict() self.nJobs = nJobs self.splitString = splitString self.params = {"n_jobs":nJobs} + + if databaseDir is None: + self.databaseDir = str(Path(__file__).parent / 'reference-data') + else: + self.databaseDir = databaseDir + self._load() def _load(self): "" - folderPath = self._getPathToReferenceFiles() - self._loadFiles(folderPath) + self._loadFiles(self.databaseDir) def _loadFiles(self, folderPath): """ @@ -323,8 +332,7 @@ def _loadFile(self, *args, **kwargs): def _checkIfFilteredFileExists(self,dbID,filterDb): "" fileName = self._generateFileName(dbID,filterDb) - sourcePath = self._getPathToReferenceFiles() - self.pathToFile = os.path.join(sourcePath,fileName) + self.pathToFile = os.path.join(self.databaseDir, fileName) return os.path.exists(self.pathToFile) def _generateFileName(self,dbName,filterDb): diff --git a/src/complexfinder/main.py b/src/complexfinder/main.py index a21c7dc..9dd3def 100644 --- a/src/complexfinder/main.py +++ b/src/complexfinder/main.py @@ -132,6 +132,7 @@ def __init__(self, compTabFormat = False, considerOnlyInteractionsPresentInAllRuns = 2, correlationWindowSize = 5, + databaseDir = None, databaseFilter = {'Organism': ["Human"]},#{'Organism': ["Human"]},#{"Confidence" : [1,2,3,4]} - for hu.map2.0,# {} for HUMAN_COMPLEX_PORTAL databaseIDColumn = "subunits(UniProt IDs)", databaseFileName = "20190823_CORUM.txt",#"humap2.txt @@ -236,6 +237,13 @@ def __init__(self, * correlationWindowSize = 5, Number of fractions used for rolling pearson correlation + * databaseDir + Directory to use for looking up reference files and caching + any filtered reference files. + If `None`, the `reference-data` directory inside the + package root will be used. This is assumed to be writable. + If the installation location is not writable, pass a + different path. * databaseFilter = {'Organism': ["Human"]}, Filter dict used to find relevant complexes from database. By default, the corum database is filtered based on the column 'Organism' using 'Mouse' as a search string. @@ -413,6 +421,7 @@ def __init__(self, "grouping" : grouping, "analysisMode" : analysisMode, "normValueDict" : normValueDict, + "databaseDir" : databaseDir, "databaseFilter" : databaseFilter, "databaseIDColumn" : databaseIDColumn, "databaseFileName" : databaseFileName, @@ -873,7 +882,7 @@ def _loadReferenceDB(self): print("Info :: Load positive set from data base") if not hasattr(self,"DB"): - self.DB = Database(nJobs = self.params["n_jobs"], splitString=self.params["databaseEntrySplitString"]) + self.DB = Database(nJobs = self.params["n_jobs"], splitString=self.params["databaseEntrySplitString"], databaseDir=self.params["databaseDir"]) pathToDatabase = os.path.join(self.params["pathToComb"], "InteractionDatabase.txt") if os.path.exists(pathToDatabase): diff --git a/tests/test_misc.py b/tests/test_misc.py index ba02a13..779581c 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -1,12 +1,15 @@ import os +import shutil from pathlib import Path -from tempfile import TemporaryDirectory +from tempfile import TemporaryDirectory, tempdir import pandas as pd from complexfinder import ComplexFinder +from complexfinder.Database import Database -SAMPLE_DATA_DIR = Path(__file__).parents[1] / "example-data" +REPO_ROOT = Path(__file__).parents[1] +SAMPLE_DATA_DIR = REPO_ROOT / "example-data" def test_workflow_completes(): @@ -67,3 +70,16 @@ def test_workflow_completes(): useRawDataForDimensionalReduction=False).run( tmpdir ) + + +def test_custom_database_dir(): + """Test that a custom database directory can be used.""" + db_file = REPO_ROOT / "src" / "complexfinder" / "reference-data" / "CORUM.txt" + with TemporaryDirectory() as tmpdir: + shutil.copyfile(db_file, Path(tmpdir) / db_file.name) + Database(databaseDir=tmpdir).pariwiseProteinInteractions( + dbID=db_file.name, + complexIDsColumn="subunits(UniProt IDs)", + filterDb={'Organism': ["Human"]} + ) + assert Path(tmpdir, f"{db_file.stem}_Organism__Human.txt").is_file()