Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@ jobs:
with:
python-version: '3.13'

- name: Install dependencies
- name: Install package
run: |
pip install pandas scipy matplotlib lmfit joblib scikit-learn \
imbalanced-learn umap-learn hdbscan seaborn pytest
pip install .[test]

- name: Run tests
run: |
Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
recursive-include src/complexfinder/reference-data *
35 changes: 24 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,41 +64,54 @@ If you analyzed your data using ComplexFinder, we highly recommend to upload the

## Installation

Download the zip file containing the source code from github.
Navigate to the folder in terminal/command line tool.
Navigate to the folder in which you would like create a new virtual Python
environment via the terminal/command line tool, then:

On Mac / Linux:
```
#create virt env
python3 -m venv env
#activate
source env/bin/activate
#install packages from req file
pip install -r requirements.txt
#install package from GitHub
pip install git+https://github.com/hnolCol/ComplexFinder.git
```
For windows user:
For Windows user:
```
#create virt env
py -m venv env
#actve
#activate
.\env\Scripts\activate
#install packages from req file
pip3 install -r requirements.txt
````
#install package from GitHub
pip install git+https://github.com/hnolCol/ComplexFinder.git
```

Alternatively, you can also clone the repository and install the package
locally. This will allow you to make changes to the code and test them
immediately.

```
git clone https://github.com/hnolCol/ComplexFinder.git
cd ComplexFinder
python3 -m venv env
source env/bin/activate
pip install -e .
```

## Usage Example

Upon downlaod and extraction of the package. You can find example data in the example-data folder.
To run the anaylsis, you can enter:
```python
from .src.main import ComplexFinder
from complexfinder import ComplexFinder
X = pd.read_table("./example-data/SILAC_01.txt", sep = "\t") #loading tab delimited txt file.
ComplexFinder(analysisName = "ExampleRun_01").run(X)
```
You can also pass a folder path to run. This will yield in the anaylsis of each txt file in the folder.

```python
import os
from .src.main import ComplexFinder
from complexfinder import ComplexFinder
folderPath = os.path(".","<my folder>")
ComplexFinder().run(folderPath)
```
Expand Down
File renamed without changes.
53 changes: 53 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
[build-system]
requires = ["setuptools>=77"]
build-backend = "setuptools.build_meta"

[project]
name = "complexfinder"
version = "0.4.48"
description = "ComplexFinder: A software package for the analysis of native protein complex fractionation experiments."
readme = "README.md"
requires-python = ">=3.11"
license = "MIT"
authors = [
{ name = "hnolCol" },
]
classifiers = [
"Intended Audience :: Science/Research",
"Programming Language :: Python :: 3",
"Topic :: Scientific/Engineering",
]
dependencies = [
"hdbscan",
"imbalanced-learn",
"joblib",
"lmfit",
"matplotlib",
"numpy>=2",
"pandas>=2",
"scikit-learn",
"scipy",
"seaborn",
"umap-learn",
]

[project.optional-dependencies]
test = ["pytest>=9"]

[project.urls]
"Source" = "https://github.com/hnolcol/ComplexFinder"

[tool.pytest]
minversion = "9.0"
testpaths = [
"tests",
]
norecursedirs = [
"dist",
"build",
"complexfinder.egg-info",
"results*",
"env*",
"venv*",
"*data*",
]
Empty file removed src/__init__.py
Empty file.
25 changes: 16 additions & 9 deletions src/modules/Database.py → src/complexfinder/Database.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import time
import pickle
from collections import OrderedDict
from pathlib import Path


def chunks(l, n):
"""Yield successive n-sized chunks from l."""
Expand Down Expand Up @@ -49,7 +51,7 @@ def createSingleChunk(self,idx, entriesInChunks,pathToTmp,metricColumns,df):
class Database(object):


def __init__(self, nJobs = 4, splitString = ";"):
def __init__(self, nJobs = 4, splitString = ";", databaseDir=None):
"""Database Module.

The pipeline requires a database containing positve feature interactions.
Expand All @@ -62,19 +64,26 @@ def __init__(self, nJobs = 4, splitString = ";"):

Parameters
----------


databaseDir:
Directory to use for looking up reference files.
If `None`, the `reference-data` directory inside the
package root will be used. This is assumed to be writable.
"""
self.dbs = dict()
self.nJobs = nJobs
self.splitString = splitString
self.params = {"n_jobs":nJobs}

if databaseDir is None:
self.databaseDir = str(Path(__file__).parent / 'reference-data')
else:
self.databaseDir = databaseDir

self._load()

def _load(self):
""
folderPath = self._getPathToReferenceFiles()
self._loadFiles(folderPath)
self._loadFiles(self.databaseDir)

def _loadFiles(self, folderPath):
"""
Expand Down Expand Up @@ -323,8 +332,7 @@ def _loadFile(self, *args, **kwargs):
def _checkIfFilteredFileExists(self,dbID,filterDb):
""
fileName = self._generateFileName(dbID,filterDb)
sourcePath = self._getPathToReferenceFiles()
self.pathToFile = os.path.join(sourcePath,fileName)
self.pathToFile = os.path.join(self.databaseDir, fileName)
return os.path.exists(self.pathToFile)

def _generateFileName(self,dbName,filterDb):
Expand Down Expand Up @@ -389,9 +397,8 @@ def _getFiles(self, folderPath, extn = 'txt'):
def _getPathToReferenceFiles(self):
""
filePath = os.path.dirname(os.path.realpath(__file__))
mainPath = os.path.abspath(os.path.join(filePath ,"../.."))
pathToReferenceFolder = os.path.join(
mainPath,
filePath,
'reference-data'
)
return pathToReferenceFolder
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
1 change: 1 addition & 0 deletions src/complexfinder/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from complexfinder.main import ComplexFinder
24 changes: 18 additions & 6 deletions src/main.py → src/complexfinder/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
import pickle
import shutil
from datetime import datetime
from pathlib import Path

#internal imports
from modules.Signal import Signal
from modules.Database import Database
from modules.Predictor import Classifier, ComplexBuilder
from modules.utils import calculateDistanceP, chunks, cleanPath, minMaxNorm, extractMeanByBounds, extractMetricByShiftBounds
from complexfinder.Signal import Signal
from complexfinder.Database import Database
from complexfinder.Predictor import Classifier, ComplexBuilder
from complexfinder.utils import calculateDistanceP, chunks, cleanPath, minMaxNorm, extractMeanByBounds, extractMetricByShiftBounds

import joblib
from joblib import Parallel, delayed, dump, load
Expand Down Expand Up @@ -131,6 +132,7 @@ def __init__(self,
compTabFormat = False,
considerOnlyInteractionsPresentInAllRuns = 2,
correlationWindowSize = 5,
databaseDir = None,
databaseFilter = {'Organism': ["Human"]},#{'Organism': ["Human"]},#{"Confidence" : [1,2,3,4]} - for hu.map2.0,# {} for HUMAN_COMPLEX_PORTAL
databaseIDColumn = "subunits(UniProt IDs)",
databaseFileName = "20190823_CORUM.txt",#"humap2.txt
Expand Down Expand Up @@ -235,6 +237,13 @@ def __init__(self,
* correlationWindowSize = 5,
Number of fractions used for rolling pearson correlation

* databaseDir
Directory to use for looking up reference files and caching
any filtered reference files.
If `None`, the `reference-data` directory inside the
package root will be used. This is assumed to be writable.
If the installation location is not writable, pass a
different path.
* databaseFilter = {'Organism': ["Human"]},
Filter dict used to find relevant complexes from database. By default,
the corum database is filtered based on the column 'Organism' using 'Mouse' as a search string.
Expand Down Expand Up @@ -412,6 +421,7 @@ def __init__(self,
"grouping" : grouping,
"analysisMode" : analysisMode,
"normValueDict" : normValueDict,
"databaseDir" : databaseDir,
"databaseFilter" : databaseFilter,
"databaseIDColumn" : databaseIDColumn,
"databaseFileName" : databaseFileName,
Expand Down Expand Up @@ -872,7 +882,7 @@ def _loadReferenceDB(self):

print("Info :: Load positive set from data base")
if not hasattr(self,"DB"):
self.DB = Database(nJobs = self.params["n_jobs"], splitString=self.params["databaseEntrySplitString"])
self.DB = Database(nJobs = self.params["n_jobs"], splitString=self.params["databaseEntrySplitString"], databaseDir=self.params["databaseDir"])

pathToDatabase = os.path.join(self.params["pathToComb"], "InteractionDatabase.txt")
if os.path.exists(pathToDatabase):
Expand Down Expand Up @@ -2634,4 +2644,6 @@ def _combinePeakResults(self):
removeSingleDataPointPeaks=True,
keepOnlySignalsValidInAllConditions = False,
quantFiles = {},
useRawDataForDimensionalReduction = False).run("../example-data/D1") #adjust the folder where the files are sstored
useRawDataForDimensionalReduction = False).run(
str(Path(__file__).parents[2] / "example-data" / "D1")
) # adjust the folder where the files are sstored
File renamed without changes.
File renamed without changes.
File renamed without changes.
Empty file removed src/modules/__init__.py
Empty file.
26 changes: 19 additions & 7 deletions tests/test_misc.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import os
import sys
import shutil
from pathlib import Path
from tempfile import TemporaryDirectory
from tempfile import TemporaryDirectory, tempdir

import pandas as pd

# update sys.path until there is a proper package installation
sys.path.append(str(Path(__file__).parent.parent / "src"))
from complexfinder import ComplexFinder
from complexfinder.Database import Database

from main import ComplexFinder

SAMPLE_DATA_DIR = Path(__file__).parents[1] / "example-data"
REPO_ROOT = Path(__file__).parents[1]
SAMPLE_DATA_DIR = REPO_ROOT / "example-data"


def test_workflow_completes():
Expand Down Expand Up @@ -71,3 +70,16 @@ def test_workflow_completes():
useRawDataForDimensionalReduction=False).run(
tmpdir
)


def test_custom_database_dir():
"""Test that a custom database directory can be used."""
db_file = REPO_ROOT / "src" / "complexfinder" / "reference-data" / "CORUM.txt"
with TemporaryDirectory() as tmpdir:
shutil.copyfile(db_file, Path(tmpdir) / db_file.name)
Database(databaseDir=tmpdir).pariwiseProteinInteractions(
dbID=db_file.name,
complexIDsColumn="subunits(UniProt IDs)",
filterDb={'Organism': ["Human"]}
)
assert Path(tmpdir, f"{db_file.stem}_Organism__Human.txt").is_file()