diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 33858c0..f9e3395 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,10 +15,9 @@ jobs: with: python-version: '3.13' - - name: Install dependencies + - name: Install package run: | - pip install pandas scipy matplotlib lmfit joblib scikit-learn \ - imbalanced-learn umap-learn hdbscan seaborn pytest + pip install .[test] - name: Run tests run: | diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..8d23658 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +recursive-include src/complexfinder/reference-data * diff --git a/README.md b/README.md index 21f9ee0..9ca9608 100644 --- a/README.md +++ b/README.md @@ -64,33 +64,46 @@ If you analyzed your data using ComplexFinder, we highly recommend to upload the ## Installation -Download the zip file containing the source code from github. -Navigate to the folder in terminal/command line tool. +Navigate to the folder in which you would like create a new virtual Python +environment via the terminal/command line tool, then: + On Mac / Linux: ``` #create virt env python3 -m venv env #activate source env/bin/activate -#install packages from req file -pip install -r requirements.txt +#install package from GitHub +pip install git+https://github.com/hnolCol/ComplexFinder.git ``` -For windows user: +For Windows user: ``` #create virt env py -m venv env -#actve +#activate .\env\Scripts\activate -#install packages from req file -pip3 install -r requirements.txt -```` +#install package from GitHub +pip install git+https://github.com/hnolCol/ComplexFinder.git +``` + +Alternatively, you can also clone the repository and install the package +locally. This will allow you to make changes to the code and test them +immediately. + +``` +git clone https://github.com/hnolCol/ComplexFinder.git +cd ComplexFinder +python3 -m venv env +source env/bin/activate +pip install -e . +``` ## Usage Example Upon downlaod and extraction of the package. You can find example data in the example-data folder. To run the anaylsis, you can enter: ```python -from .src.main import ComplexFinder +from complexfinder import ComplexFinder X = pd.read_table("./example-data/SILAC_01.txt", sep = "\t") #loading tab delimited txt file. ComplexFinder(analysisName = "ExampleRun_01").run(X) ``` @@ -98,7 +111,7 @@ You can also pass a folder path to run. This will yield in the anaylsis of each ```python import os -from .src.main import ComplexFinder +from complexfinder import ComplexFinder folderPath = os.path(".","") ComplexFinder().run(folderPath) ``` diff --git a/src/filter/Mito_d3.txt b/example-data/filter/Mito_d3.txt similarity index 100% rename from src/filter/Mito_d3.txt rename to example-data/filter/Mito_d3.txt diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4355115 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,53 @@ +[build-system] +requires = ["setuptools>=77"] +build-backend = "setuptools.build_meta" + +[project] +name = "complexfinder" +version = "0.4.48" +description = "ComplexFinder: A software package for the analysis of native protein complex fractionation experiments." +readme = "README.md" +requires-python = ">=3.11" +license = "MIT" +authors = [ + { name = "hnolCol" }, +] +classifiers = [ + "Intended Audience :: Science/Research", + "Programming Language :: Python :: 3", + "Topic :: Scientific/Engineering", +] +dependencies = [ + "hdbscan", + "imbalanced-learn", + "joblib", + "lmfit", + "matplotlib", + "numpy>=2", + "pandas>=2", + "scikit-learn", + "scipy", + "seaborn", + "umap-learn", +] + +[project.optional-dependencies] +test = ["pytest>=9"] + +[project.urls] +"Source" = "https://github.com/hnolcol/ComplexFinder" + +[tool.pytest] +minversion = "9.0" +testpaths = [ + "tests", +] +norecursedirs = [ + "dist", + "build", + "complexfinder.egg-info", + "results*", + "env*", + "venv*", + "*data*", +] diff --git a/src/__init__.py b/src/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/modules/Database.py b/src/complexfinder/Database.py similarity index 97% rename from src/modules/Database.py rename to src/complexfinder/Database.py index a92bcd6..a083125 100644 --- a/src/modules/Database.py +++ b/src/complexfinder/Database.py @@ -10,6 +10,8 @@ import time import pickle from collections import OrderedDict +from pathlib import Path + def chunks(l, n): """Yield successive n-sized chunks from l.""" @@ -49,7 +51,7 @@ def createSingleChunk(self,idx, entriesInChunks,pathToTmp,metricColumns,df): class Database(object): - def __init__(self, nJobs = 4, splitString = ";"): + def __init__(self, nJobs = 4, splitString = ";", databaseDir=None): """Database Module. The pipeline requires a database containing positve feature interactions. @@ -62,19 +64,26 @@ def __init__(self, nJobs = 4, splitString = ";"): Parameters ---------- - - + databaseDir: + Directory to use for looking up reference files. + If `None`, the `reference-data` directory inside the + package root will be used. This is assumed to be writable. """ self.dbs = dict() self.nJobs = nJobs self.splitString = splitString self.params = {"n_jobs":nJobs} + + if databaseDir is None: + self.databaseDir = str(Path(__file__).parent / 'reference-data') + else: + self.databaseDir = databaseDir + self._load() def _load(self): "" - folderPath = self._getPathToReferenceFiles() - self._loadFiles(folderPath) + self._loadFiles(self.databaseDir) def _loadFiles(self, folderPath): """ @@ -323,8 +332,7 @@ def _loadFile(self, *args, **kwargs): def _checkIfFilteredFileExists(self,dbID,filterDb): "" fileName = self._generateFileName(dbID,filterDb) - sourcePath = self._getPathToReferenceFiles() - self.pathToFile = os.path.join(sourcePath,fileName) + self.pathToFile = os.path.join(self.databaseDir, fileName) return os.path.exists(self.pathToFile) def _generateFileName(self,dbName,filterDb): @@ -389,9 +397,8 @@ def _getFiles(self, folderPath, extn = 'txt'): def _getPathToReferenceFiles(self): "" filePath = os.path.dirname(os.path.realpath(__file__)) - mainPath = os.path.abspath(os.path.join(filePath ,"../..")) pathToReferenceFolder = os.path.join( - mainPath, + filePath, 'reference-data' ) return pathToReferenceFolder diff --git a/src/modules/Distance.py b/src/complexfinder/Distance.py similarity index 100% rename from src/modules/Distance.py rename to src/complexfinder/Distance.py diff --git a/src/modules/Distance_archive.py b/src/complexfinder/Distance_archive.py similarity index 100% rename from src/modules/Distance_archive.py rename to src/complexfinder/Distance_archive.py diff --git a/src/modules/Plotter.py b/src/complexfinder/Plotter.py similarity index 100% rename from src/modules/Plotter.py rename to src/complexfinder/Plotter.py diff --git a/src/modules/Predictor.py b/src/complexfinder/Predictor.py similarity index 100% rename from src/modules/Predictor.py rename to src/complexfinder/Predictor.py diff --git a/src/modules/Signal.py b/src/complexfinder/Signal.py similarity index 100% rename from src/modules/Signal.py rename to src/complexfinder/Signal.py diff --git a/src/complexfinder/__init__.py b/src/complexfinder/__init__.py new file mode 100644 index 0000000..b5d2e77 --- /dev/null +++ b/src/complexfinder/__init__.py @@ -0,0 +1 @@ +from complexfinder.main import ComplexFinder diff --git a/src/main.py b/src/complexfinder/main.py similarity index 99% rename from src/main.py rename to src/complexfinder/main.py index 435ad93..9dd3def 100644 --- a/src/main.py +++ b/src/complexfinder/main.py @@ -8,12 +8,13 @@ import pickle import shutil from datetime import datetime +from pathlib import Path #internal imports -from modules.Signal import Signal -from modules.Database import Database -from modules.Predictor import Classifier, ComplexBuilder -from modules.utils import calculateDistanceP, chunks, cleanPath, minMaxNorm, extractMeanByBounds, extractMetricByShiftBounds +from complexfinder.Signal import Signal +from complexfinder.Database import Database +from complexfinder.Predictor import Classifier, ComplexBuilder +from complexfinder.utils import calculateDistanceP, chunks, cleanPath, minMaxNorm, extractMeanByBounds, extractMetricByShiftBounds import joblib from joblib import Parallel, delayed, dump, load @@ -131,6 +132,7 @@ def __init__(self, compTabFormat = False, considerOnlyInteractionsPresentInAllRuns = 2, correlationWindowSize = 5, + databaseDir = None, databaseFilter = {'Organism': ["Human"]},#{'Organism': ["Human"]},#{"Confidence" : [1,2,3,4]} - for hu.map2.0,# {} for HUMAN_COMPLEX_PORTAL databaseIDColumn = "subunits(UniProt IDs)", databaseFileName = "20190823_CORUM.txt",#"humap2.txt @@ -235,6 +237,13 @@ def __init__(self, * correlationWindowSize = 5, Number of fractions used for rolling pearson correlation + * databaseDir + Directory to use for looking up reference files and caching + any filtered reference files. + If `None`, the `reference-data` directory inside the + package root will be used. This is assumed to be writable. + If the installation location is not writable, pass a + different path. * databaseFilter = {'Organism': ["Human"]}, Filter dict used to find relevant complexes from database. By default, the corum database is filtered based on the column 'Organism' using 'Mouse' as a search string. @@ -412,6 +421,7 @@ def __init__(self, "grouping" : grouping, "analysisMode" : analysisMode, "normValueDict" : normValueDict, + "databaseDir" : databaseDir, "databaseFilter" : databaseFilter, "databaseIDColumn" : databaseIDColumn, "databaseFileName" : databaseFileName, @@ -872,7 +882,7 @@ def _loadReferenceDB(self): print("Info :: Load positive set from data base") if not hasattr(self,"DB"): - self.DB = Database(nJobs = self.params["n_jobs"], splitString=self.params["databaseEntrySplitString"]) + self.DB = Database(nJobs = self.params["n_jobs"], splitString=self.params["databaseEntrySplitString"], databaseDir=self.params["databaseDir"]) pathToDatabase = os.path.join(self.params["pathToComb"], "InteractionDatabase.txt") if os.path.exists(pathToDatabase): @@ -2634,4 +2644,6 @@ def _combinePeakResults(self): removeSingleDataPointPeaks=True, keepOnlySignalsValidInAllConditions = False, quantFiles = {}, - useRawDataForDimensionalReduction = False).run("../example-data/D1") #adjust the folder where the files are sstored + useRawDataForDimensionalReduction = False).run( + str(Path(__file__).parents[2] / "example-data" / "D1") + ) # adjust the folder where the files are sstored diff --git a/reference-data/20190823_CORUM_Organism__Human.txt b/src/complexfinder/reference-data/20190823_CORUM_Organism__Human.txt similarity index 100% rename from reference-data/20190823_CORUM_Organism__Human.txt rename to src/complexfinder/reference-data/20190823_CORUM_Organism__Human.txt diff --git a/reference-data/20190823_CORUM_Organism__Mouse.txt b/src/complexfinder/reference-data/20190823_CORUM_Organism__Mouse.txt similarity index 100% rename from reference-data/20190823_CORUM_Organism__Mouse.txt rename to src/complexfinder/reference-data/20190823_CORUM_Organism__Mouse.txt diff --git a/reference-data/CORUM.txt b/src/complexfinder/reference-data/CORUM.txt similarity index 100% rename from reference-data/CORUM.txt rename to src/complexfinder/reference-data/CORUM.txt diff --git a/reference-data/CORUM_Organism__Human.txt b/src/complexfinder/reference-data/CORUM_Organism__Human.txt similarity index 100% rename from reference-data/CORUM_Organism__Human.txt rename to src/complexfinder/reference-data/CORUM_Organism__Human.txt diff --git a/reference-data/Readme.md b/src/complexfinder/reference-data/Readme.md similarity index 100% rename from reference-data/Readme.md rename to src/complexfinder/reference-data/Readme.md diff --git a/src/modules/utils.py b/src/complexfinder/utils.py similarity index 100% rename from src/modules/utils.py rename to src/complexfinder/utils.py diff --git a/src/modules/__init__.py b/src/modules/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_misc.py b/tests/test_misc.py index 8490b55..779581c 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -1,16 +1,15 @@ import os -import sys +import shutil from pathlib import Path -from tempfile import TemporaryDirectory +from tempfile import TemporaryDirectory, tempdir import pandas as pd -# update sys.path until there is a proper package installation -sys.path.append(str(Path(__file__).parent.parent / "src")) +from complexfinder import ComplexFinder +from complexfinder.Database import Database -from main import ComplexFinder - -SAMPLE_DATA_DIR = Path(__file__).parents[1] / "example-data" +REPO_ROOT = Path(__file__).parents[1] +SAMPLE_DATA_DIR = REPO_ROOT / "example-data" def test_workflow_completes(): @@ -71,3 +70,16 @@ def test_workflow_completes(): useRawDataForDimensionalReduction=False).run( tmpdir ) + + +def test_custom_database_dir(): + """Test that a custom database directory can be used.""" + db_file = REPO_ROOT / "src" / "complexfinder" / "reference-data" / "CORUM.txt" + with TemporaryDirectory() as tmpdir: + shutil.copyfile(db_file, Path(tmpdir) / db_file.name) + Database(databaseDir=tmpdir).pariwiseProteinInteractions( + dbID=db_file.name, + complexIDsColumn="subunits(UniProt IDs)", + filterDb={'Organism': ["Human"]} + ) + assert Path(tmpdir, f"{db_file.stem}_Organism__Human.txt").is_file()