forked from lukasjf/mphp
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclassification.py
More file actions
99 lines (74 loc) · 3.93 KB
/
classification.py
File metadata and controls
99 lines (74 loc) · 3.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import numpy as np
from datetime import datetime
from pprint import pprint
from utils.DataLoader import DataLoader
from utils.DimensionalityReducer import DimensionalityReducer
from utils.DataNormalizer import DataNormalizer
from utils.Sampler import Sampler
from utils.plot import plotScatter
from utils import Expressions
from validation.Analyzer import Analyzer
from validation.ClusterValidator import ClusterValidator
from validation.ClassificationValidator import ClassificationValidator
from multiprocessing import Array
if __name__ == '__main__':
print("Imported modules")
dataLoader = DataLoader("dataset5")
dimReducer = DimensionalityReducer()
analyzer = Analyzer()
clusVal = ClusterValidator()
classVal = ClassificationValidator()
sampler = Sampler()
print("data loaded")
#healthy = dataLoader.getData(["healthy"], ["THCA","LUAD"])
#healthy = sampler.over_sample(healthy)
start = datetime.now()
sick = dataLoader.getData(["sick"], ["all"])
healthy = dataLoader.getData(["healthy"], ["all"])
gene_labels = dataLoader.getGeneLabels()
print("got combined data")
print(datetime.now() - start)
selected_genes = dimReducer.getOneAgainstRestFeatures(sick, healthy, 10, "norm", "exclude")
"""
#selected_genes = dimReducer.getFeatures(data, 10)
#selected_genes = dimReducer.getOneAgainstRestFeatures(data, "",5)
#selected_genes = dimReducer.getNormalizedFeatures(sick, healthy, k=10)
#selected_genes = dimReducer.getOneAgainstRestFeatures(sick, healthy, 10, "norm", "relief")
#selected_genes = dimReducer.getOneAgainstRestFeatures(sick, healthy, 10, "norm", "exclude")
#selected_genes = dimReducer.getDecisionTreeFeatures(data, 10)
#selected_genes = dimReducer.getOneAgainstRestFeatures(data, "", 10, "tree")
#selected_genes = dimReducer.getFeaturesBySFS(sick, healthy, 3, fitness="combined", returnMultipleSets =True)
#selected_genes = dimReducer.getOneAgainstRestFeatures(sick, healthy, 3, fitness="combined", normalization="exclude")
pprint(selected_genes)
#expressions = analyzer.computeExpressionMatrixOneAgainstRest(sick, healthy, selected_genes)
#results = analyzer.computeFeatureValidationOneAgainstRest(sick, healthy, selected_genes)
features = dimReducer.getOneAgainstRestFeatures(sick,healthy)
pprint(features)
pprint(results)
pprint(expressions)
# Feature Selection
#selected_genes = dimReducer.getEAFeatures(sick,healthy,fitness="clustering")
selected_genes = dimReducer.getNormalizedFeatures(sick, healthy, k=3, normalization="relief")
print(selected_genes)
print("SICK REDUCED")
pprint(classVal.evaluate(sick, selected_genes, ["decisionTree"]))
plotScatter(sick, selected_genes, gene_labels)
print("HEALTHY REDUCED")
pprint(classVal.evaluate(healthy, selected_genes, ["decisionTree"]))
plotScatter(healthy, selected_genes, gene_labels)
pprint(analyzer.computeFeatureValidation(sick, healthy, selected_genes)["fitness"])
from datetime import datetime
start = datetime.now()
selected_genes = dimReducer.getFeaturesBySFS(sick, healthy, 10, fitness="combined", returnMultipleSets = True)
#selected_genes = dimReducer.getEAFeatures(sick, healthy, fitness="distance", returnMultipleSets = True)
#selected_genes = dimReducer.getDecisionTreeFeatures(data, 5, returnMultipleSets = True)
#selected_genes = dimReducer.getNormalizedFeatures(sick, healthy, k=3, returnMultipleSets = True)
pprint(selected_genes)
print(datetime.now()-start)
#start = datetime.now()
#selected_genes = dimReducer.getFeaturesBySFS(sick, healthy, 10, m=100 ,fitness="classification", returnMultipleSets = False)
#selected_genes = dimReducer.getNormalizedFeaturesE(sick, healthy, k=3, n=10000, m="chi2", returnMultipleSets=True)
#print(selected_genes)
#pprint(analyzer.computeFeatureValidation(sick, healthy, selected_genes)["fitness"])
#print(datetime.now() - start)
"""