diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..89df162 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,71 @@ +name: CI + +on: + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + defaults: + run: + shell: bash -el {0} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Generate cache key + id: cache-key + run: | + # Create a hash from the environment file and PyTorch CPU install string + ENV_HASH=$(sha256sum toolbox_env_conda.yml | cut -d' ' -f1) + PYTORCH_SPEC="pytorch-cpuonly" + CACHE_KEY="micromamba-${{ runner.os }}-${ENV_HASH}-${PYTORCH_SPEC}" + echo "key=${CACHE_KEY}" >> $GITHUB_OUTPUT + + - name: Restore micromamba environment cache + id: cache-restore + uses: actions/cache/restore@v4 + with: + path: | + ~/micromamba/envs + ~/micromamba-bin + key: ${{ steps.cache-key.outputs.key }} + + - name: Setup micromamba (cache miss) + if: steps.cache-restore.outputs.cache-hit != 'true' + uses: mamba-org/setup-micromamba@v2 + with: + environment-file: toolbox_env_conda.yml + environment-name: tbe + cache-environment: false + cache-downloads: false + micromamba-binary-path: ~/micromamba-bin/micromamba + micromamba-root-path: ~/micromamba + init-shell: bash + + - name: Initialize micromamba from cache (cache hit) + if: steps.cache-restore.outputs.cache-hit == 'true' + run: | + # Make micromamba available to subsequent steps without shell hooks + echo "$HOME/micromamba-bin" >> "$GITHUB_PATH" + echo "MAMBA_ROOT_PREFIX=$HOME/micromamba" >> "$GITHUB_ENV" + + - name: Install CPU-only PyTorch and dependencies + if: steps.cache-restore.outputs.cache-hit != 'true' + run: | + ~/micromamba-bin/micromamba install -n tbe -y pytorch cpuonly -c pytorch -c conda-forge + pip install esm + + - name: Save micromamba environment cache + if: steps.cache-restore.outputs.cache-hit != 'true' + uses: actions/cache/save@v4 + with: + path: | + ~/micromamba/envs + ~/micromamba-bin + key: ${{ steps.cache-key.outputs.key }} + + - name: Run tests + run: | + ~/micromamba-bin/micromamba run -n tbe pytest ./tests diff --git a/.gitignore b/.gitignore index fa1c585..ac5f7ab 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ config.json tests/data/dataset_generated +tests/data/distograms_generated tests/data/pdb_generated tests/data/data.info tests/data/dataset_expected_backup/* diff --git a/experiments/compare_download/compare.py b/experiments/compare_download/compare.py deleted file mode 100644 index bc60a00..0000000 --- a/experiments/compare_download/compare.py +++ /dev/null @@ -1,184 +0,0 @@ -import os -import csv -import statistics -from pathlib import Path -import io -import re - - -def read_csv_data(file_path): - with open(file_path, "r") as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - if row["name"] == "retrieve_cif" or row["name"] == "retrieve_binary_cif": - retrieve_time = float(row["average_time_s"]) - elif row["name"] == "cif_to_pdbs" or row["name"] == "binary_cif_to_pdbs": - conversion_time = float(row["average_time_s"]) - - total_time = float(row["total_duration_seconds"]) - return retrieve_time, conversion_time, total_time - - -def extract_cores(filename): - match = re.search(r"_(\d+)_", filename) - return match.group(1) if match else "Unknown" - - -def compare_directories(text_dir, binary_dir): - text_files = sorted([f for f in os.listdir(text_dir) if f.endswith(".csv")]) - binary_files = sorted([f for f in os.listdir(binary_dir) if f.endswith(".csv")]) - - results = [] - for text_file, binary_file in zip(text_files, binary_files): - text_path = Path(text_dir) / text_file - binary_path = Path(binary_dir) / binary_file - - text_retrieve, text_convert, text_total = read_csv_data(text_path) - binary_retrieve, binary_convert, binary_total = read_csv_data(binary_path) - - cores = extract_cores(text_file) - - retrieve_diff = (binary_retrieve - text_retrieve) / text_retrieve * 100 - convert_diff = (binary_convert - text_convert) / text_convert * 100 - total_diff = (binary_total - text_total) / text_total * 100 - - results.append( - { - "CPU cores": cores, - "Text Retrieve (s)": f"{text_retrieve:.6f}", - "Binary Retrieve (s)": f"{binary_retrieve:.6f}", - "Retrieve Diff (%)": f"{retrieve_diff:.2f}", - "Text Convert (s)": f"{text_convert:.6f}", - "Binary Convert (s)": f"{binary_convert:.6f}", - "Convert Diff (%)": f"{convert_diff:.2f}", - "Text Total (s)": f"{text_total:.6f}", - "Binary Total (s)": f"{binary_total:.6f}", - "Total Diff (%)": f"{total_diff:.2f}", - } - ) - - return results - - -def save_results_to_csv(results, output_file): - with open(output_file, "w", newline="") as csvfile: - fieldnames = results[0].keys() - writer = csv.DictWriter(csvfile, fieldnames=fieldnames) - writer.writeheader() - for row in results: - writer.writerow(row) - - -def csv_to_latex(csv_file): - with open(csv_file, "r") as f: - reader = csv.DictReader(f) - rows = list(reader) - - # Sort rows by CPU cores - rows.sort(key=lambda x: int(x["CPU cores"])) - - latex_output = io.StringIO() - - # Calculate averages - avg_text_retrieve = statistics.mean(float(row["Text Retrieve (s)"]) for row in rows) - avg_binary_retrieve = statistics.mean( - float(row["Binary Retrieve (s)"]) for row in rows - ) - avg_text_convert = statistics.mean(float(row["Text Convert (s)"]) for row in rows) - avg_binary_convert = statistics.mean( - float(row["Binary Convert (s)"]) for row in rows - ) - avg_retrieve_diff = statistics.mean(float(row["Retrieve Diff (%)"]) for row in rows) - avg_convert_diff = statistics.mean(float(row["Convert Diff (%)"]) for row in rows) - avg_text_total = statistics.mean(float(row["Text Total (s)"]) for row in rows) - avg_binary_total = statistics.mean(float(row["Binary Total (s)"]) for row in rows) - avg_total_diff = statistics.mean(float(row["Total Diff (%)"]) for row in rows) - - # First table - latex_output.write("\\begin{table}[!htb]\n") - latex_output.write("\\centering\n") - latex_output.write("\\begin{tabular}{|l|rr|rr|rr|}\n") - latex_output.write("\\hline\n") - latex_output.write( - "\\textbf{CPU cores} & \\multicolumn{2}{c|}{\\textbf{Retrieve (s)}} & \\multicolumn{2}{c|}{\\textbf{Convert (s)}} & \\multicolumn{2}{c|}{\\textbf{Diff (\\%)}}\\\\\n" - ) - latex_output.write( - "& \\textbf{Text} & \\textbf{Binary} & \\textbf{Text} & \\textbf{Binary} & \\textbf{Retrieve} & \\textbf{Convert} \\\\\n" - ) - latex_output.write("\\hline\n") - - for row in rows: - latex_output.write( - f"{row['CPU cores']} & {float(row['Text Retrieve (s)']):.2f} & {float(row['Binary Retrieve (s)']):.2f} & " - ) - latex_output.write( - f"{float(row['Text Convert (s)']):.3f} & {float(row['Binary Convert (s)']):.3f} & " - ) - latex_output.write( - f"{float(row['Retrieve Diff (%)']):.2f} & {float(row['Convert Diff (%)']):.2f} \\\\\n" - ) - - latex_output.write("\\hline\n") - latex_output.write( - f"Average & {avg_text_retrieve:.2f} & {avg_binary_retrieve:.2f} & " - ) - latex_output.write(f"{avg_text_convert:.3f} & {avg_binary_convert:.3f} & ") - latex_output.write(f"{avg_retrieve_diff:.2f} & {avg_convert_diff:.2f} \\\\\n") - latex_output.write("\\hline\n") - latex_output.write("\\end{tabular}\n") - latex_output.write( - "\\caption{Comparison of Text and Binary Processing (Retrieve and Convert)}\n" - ) - latex_output.write("\\label{tab:exp_download_pdb_comparison}\n") - latex_output.write("\\end{table}\n\n") - - # Second table - latex_output.write("\\begin{table}[!htb]\n") - latex_output.write("\\centering\n") - latex_output.write("\\begin{tabular}{|r|r|r|r|}\n") - latex_output.write("\\hline\n") - latex_output.write( - "\\textbf{CPU cores} & \\textbf{Text Total (s)} & \\textbf{Binary Total (s)} & \\textbf{Diff (\\%)} \\\\\n" - ) - latex_output.write("\\hline\n") - - for row in rows: - latex_output.write( - f"{row['CPU cores']} & {float(row['Text Total (s)']):.0f} & {float(row['Binary Total (s)']):.0f} & " - ) - latex_output.write(f"{float(row['Total Diff (%)']):.2f} \\\\\n") - - latex_output.write("\\hline\n") - latex_output.write( - f"Average & {avg_text_total:.0f} & {avg_binary_total:.0f} & {avg_total_diff:.2f} \\\\\n" - ) - latex_output.write("\\hline\n") - latex_output.write("\\end{tabular}\n") - latex_output.write( - "\\caption{Comparison of total processing Time for text and binary download}\n" - ) - latex_output.write("\\label{tab:exp_download_pdb_total_comparison}\n") - latex_output.write("\\end{table}\n") - - return latex_output.getvalue() - - -# Usage -text_directory = ( - "/Users/youngdashu/sano/deepFRI2-toolbox-dev/experiments/download_pdb_nb/results" -) -binary_directory = ( - "/Users/youngdashu/sano/deepFRI2-toolbox-dev/experiments/download/results" -) -output_csv = "comparison_results.csv" -output_latex = "comparison_results.tex" - -results = compare_directories(text_directory, binary_directory) -save_results_to_csv(results, output_csv) -latex_table = csv_to_latex(output_csv) - -with open(output_latex, "w") as f: - f.write(latex_table) - -print(f"Results saved to {output_csv}") -print(f"LaTeX table saved to {output_latex}") diff --git a/experiments/compression/compress_csv.py b/experiments/compression/compress_csv.py deleted file mode 100644 index 688b4d3..0000000 --- a/experiments/compression/compress_csv.py +++ /dev/null @@ -1,298 +0,0 @@ -import re -from collections import defaultdict -import csv - - -def parse_compression_data(data): - compression_info = defaultdict(lambda: defaultdict(list)) - current_method = "" - - for line in data.split("\n"): - line = line.strip() - if line.startswith(("individual", "combined")): - current_method = line - elif "Compress time" in line: - time_match = re.search(r"Compress time.*?: ([\d.]+)", line) - if time_match: - time = float(time_match.group(1)) - compression_info[current_method]["time"].append(time) - elif ".h5" in line: - parts = line.split() - if len(parts) >= 2: - try: - size = float(parts[-1]) - compression_info[current_method]["size"].append(size) - except ValueError: - print(f"Warning: Could not parse size from line: {line}") - - return compression_info - - -def calculate_averages(compression_info): - averages = {} - for method, data in compression_info.items(): - avg_time = sum(data["time"]) / len(data["time"]) if data["time"] else 0 - avg_size = sum(data["size"]) / len(data["size"]) if data["size"] else 0 - averages[method] = {"avg_time": avg_time, "avg_size": avg_size} - return averages - - -def write_csv(averages, filename="compression_results.csv"): - with open(filename, "w", newline="") as csvfile: - writer = csv.writer(csvfile) - writer.writerow(["Compression Method", "Average Time (s)", "Average Size (MB)"]) - for method, data in averages.items(): - writer.writerow( - [method, f"{data['avg_time']:.2f}", f"{data['avg_size']:.2f}"] - ) - - -# Main execution -data = """ -individual gzip -Compress time (individual): 8.960439920425415 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/8/pdbs_individual_gzip.h5 175.14 -individual lzf -Compress time (individual): 2.7194790840148926 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/8/pdbs_individual_lzf.h5 263.8 -individual lzf shuffle -Compress time (shuffle_individual): 2.708061695098877 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/8/pdbs_individual_lzf_shuffle.h5 263.8 -combined gzip -Compress time (combined): 8.428448915481567 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/8/pdbs_combined_gzip.h5 158.46 -combined lzf -Compress time (combined): 1.545198917388916 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/8/pdbs_combined_lzf.h5 240.88 -combined lzf shuffle -Compress time (shuffle_combined): 1.5805041790008545 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/8/pdbs_combined_lzf_shuffle.h5 240.88 -combined zlib -Compress time: 26.11584734916687 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/8/pdbs.h5 145.9 -Download time: 54.976277589797974 -individual gzip -Compress time (individual): 10.46397614479065 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/6/pdbs_individual_gzip.h5 192.46 -individual lzf -Compress time (individual): 3.113642930984497 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/6/pdbs_individual_lzf.h5 290.4 -individual lzf shuffle -Compress time (shuffle_individual): 3.0937726497650146 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/6/pdbs_individual_lzf_shuffle.h5 290.4 -combined gzip -Compress time (combined): 9.483084440231323 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/6/pdbs_combined_gzip.h5 173.85 -combined lzf -Compress time (combined): 1.8779964447021484 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/6/pdbs_combined_lzf.h5 264.75 -combined lzf shuffle -Compress time (shuffle_combined): 1.904184341430664 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/6/pdbs_combined_lzf_shuffle.h5 264.75 -combined zlib -Compress time: 28.72207021713257 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/6/pdbs.h5 160.13 -Retrying downloading 1n50 1 -Retrying downloading 3n25 1 -Download time: 155.74999403953552 -individual gzip -Compress time (individual): 8.921097993850708 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/9/pdbs_individual_gzip.h5 168.3 -individual lzf -Compress time (individual): 2.7093045711517334 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/9/pdbs_individual_lzf.h5 253.92 -individual lzf shuffle -Compress time (shuffle_individual): 2.7249491214752197 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/9/pdbs_individual_lzf_shuffle.h5 253.92 -combined gzip -Compress time (combined): 8.13132381439209 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/9/pdbs_combined_gzip.h5 152.31 -combined lzf -Compress time (combined): 1.4809176921844482 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/9/pdbs_combined_lzf.h5 231.85 -combined lzf shuffle -Compress time (shuffle_combined): 1.4856204986572266 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/9/pdbs_combined_lzf_shuffle.h5 231.85 -combined zlib -Compress time: 24.98205327987671 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/9/pdbs.h5 140.05 -Download time: 179.54293203353882 -individual gzip -Compress time (individual): 9.52885913848877 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/7/pdbs_individual_gzip.h5 183.71 -individual lzf -Compress time (individual): 2.8948311805725098 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/7/pdbs_individual_lzf.h5 277.01 -individual lzf shuffle -Compress time (shuffle_individual): 2.916029691696167 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/7/pdbs_individual_lzf_shuffle.h5 277.01 -combined gzip -Compress time (combined): 9.085760116577148 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/7/pdbs_combined_gzip.h5 166.45 -combined lzf -Compress time (combined): 1.7873072624206543 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/7/pdbs_combined_lzf.h5 253.25 -combined lzf shuffle -Compress time (shuffle_combined): 1.79245924949646 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/7/pdbs_combined_lzf_shuffle.h5 253.25 -combined zlib -Compress time: 27.33644127845764 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/7/pdbs.h5 153.35 -Retrying downloading 7lcs 1 -Download time: 176.0704219341278 -individual gzip -Compress time (individual): 8.679517269134521 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/0/pdbs_individual_gzip.h5 168.67 -individual lzf -Compress time (individual): 2.6313188076019287 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/0/pdbs_individual_lzf.h5 254.29 -individual lzf shuffle -Compress time (shuffle_individual): 2.649430513381958 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/0/pdbs_individual_lzf_shuffle.h5 254.29 -combined gzip -Compress time (combined): 8.253647089004517 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/0/pdbs_combined_gzip.h5 152.12 -combined lzf -Compress time (combined): 1.6141126155853271 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/0/pdbs_combined_lzf.h5 231.73 -combined lzf shuffle -Compress time (shuffle_combined): 1.6149871349334717 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/0/pdbs_combined_lzf_shuffle.h5 231.73 -combined zlib -Compress time: 25.073856592178345 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/0/pdbs.h5 139.86 -Retrying downloading 2lcp 1 -Download time: 172.5204393863678 -individual gzip -Compress time (individual): 9.471012115478516 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/1/pdbs_individual_gzip.h5 177.06 -individual lzf -Compress time (individual): 2.777259588241577 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/1/pdbs_individual_lzf.h5 267.26 -individual lzf shuffle -Compress time (shuffle_individual): 2.718510150909424 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/1/pdbs_individual_lzf_shuffle.h5 267.26 -combined gzip -Compress time (combined): 8.694792032241821 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/1/pdbs_combined_gzip.h5 160.34 -combined lzf -Compress time (combined): 1.6806697845458984 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/1/pdbs_combined_lzf.h5 244.43 -combined lzf shuffle -Compress time (shuffle_combined): 1.697434663772583 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/1/pdbs_combined_lzf_shuffle.h5 244.43 -combined zlib -Compress time: 26.56005311012268 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/1/pdbs.h5 147.62 -Total processing time 6: 114.06452345848083 -Retrying downloading 1lmk 1 -Retrying downloading 8pb5 1 -Total processing time 4: 237.8919596672058 -Total processing time 2: 215.83751487731934 -Download time: 163.4844193458557 -individual gzip -Compress time (individual): 9.176882982254028 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/2/pdbs_individual_gzip.h5 173.18 -individual lzf -Compress time (individual): 2.729750633239746 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/2/pdbs_individual_lzf.h5 261.2 -individual lzf shuffle -Compress time (shuffle_individual): 2.7659482955932617 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/2/pdbs_individual_lzf_shuffle.h5 261.2 -combined gzip -Compress time (combined): 8.476070404052734 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/2/pdbs_combined_gzip.h5 156.95 -combined lzf -Compress time (combined): 1.5406594276428223 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/2/pdbs_combined_lzf.h5 239.02 -combined lzf shuffle -Compress time (shuffle_combined): 1.5622673034667969 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/2/pdbs_combined_lzf_shuffle.h5 239.02 -combined zlib -Compress time: 25.9271342754364 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/2/pdbs.h5 144.47 -Download time: 183.9232165813446 -individual gzip -Compress time (individual): 9.457458734512329 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/4/pdbs_individual_gzip.h5 174.62 -individual lzf -Compress time (individual): 2.867518901824951 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/4/pdbs_individual_lzf.h5 263.08 -individual lzf shuffle -Compress time (shuffle_individual): 2.8551931381225586 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/4/pdbs_individual_lzf_shuffle.h5 263.08 -combined gzip -Compress time (combined): 8.59383487701416 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/4/pdbs_combined_gzip.h5 157.31 -combined lzf -Compress time (combined): 1.7046394348144531 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/4/pdbs_combined_lzf.h5 239.54 -combined lzf shuffle -Compress time (shuffle_combined): 1.5941860675811768 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/4/pdbs_combined_lzf_shuffle.h5 239.54 -combined zlib -Compress time: 26.08711886405945 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/4/pdbs.h5 144.65 -Retrying downloading 7pyg 1 -Download time: 54.93039345741272 -individual gzip -Compress time (individual): 9.85055661201477 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/5/pdbs_individual_gzip.h5 186.86 -individual lzf -Compress time (individual): 3.0021700859069824 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/5/pdbs_individual_lzf.h5 281.57 -individual lzf shuffle -Compress time (shuffle_individual): 3.043093204498291 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/5/pdbs_individual_lzf_shuffle.h5 281.57 -combined gzip -Compress time (combined): 9.187070369720459 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/5/pdbs_combined_gzip.h5 168.87 -combined lzf -Compress time (combined): 1.7993779182434082 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/5/pdbs_combined_lzf.h5 256.83 -combined lzf shuffle -Compress time (shuffle_combined): 1.808286190032959 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/5/pdbs_combined_lzf_shuffle.h5 256.83 -combined zlib -Compress time: 28.313949584960938 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/5/pdbs.h5 155.45 -Download time: 54.57556438446045 -individual gzip -Compress time (individual): 9.232254266738892 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/3/pdbs_individual_gzip.h5 168.56 -individual lzf -Compress time (individual): 2.65150785446167 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/3/pdbs_individual_lzf.h5 254.3 -individual lzf shuffle -Compress time (shuffle_individual): 2.6797468662261963 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/3/pdbs_individual_lzf_shuffle.h5 254.3 -combined gzip -Compress time (combined): 8.357633590698242 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/3/pdbs_combined_gzip.h5 153.45 -combined lzf -Compress time (combined): 1.6917576789855957 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/3/pdbs_combined_lzf.h5 233.26 -combined lzf shuffle -Compress time (shuffle_combined): 1.6612491607666016 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/3/pdbs_combined_lzf_shuffle.h5 233.26 -combined zlib -Compress time: 25.18907904624939 -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/PDB/all_/20240905_0042/structures/3/pdbs.h5 141.14 -""" - -compression_info = parse_compression_data(data) -print("Parsed compression info:") -for method, data in compression_info.items(): - print(f"{method}:") - print(f" Time data: {data['time']}") - print(f" Size data: {data['size']}") - print(f" Number of time entries: {len(data['time'])}") - print(f" Number of size entries: {len(data['size'])}") - print() - -averages = calculate_averages(compression_info) -write_csv(averages) - -print( - "CSV file 'compression_results.csv' has been created with the average compression results." -) diff --git a/experiments/compression/visualize/visualize_size_time.py b/experiments/compression/visualize/visualize_size_time.py deleted file mode 100644 index 514d97b..0000000 --- a/experiments/compression/visualize/visualize_size_time.py +++ /dev/null @@ -1,75 +0,0 @@ -import matplotlib.pyplot as plt -import numpy as np - -# Font size variables -SMALL_FONT = 21 -MEDIUM_FONT = 23 -LARGE_FONT = 24 -TITLE_FONT = 25 - -raw_size = 1224.63 - -# Data with added numbers -data = [ - (1, "gzip, 1, False", 17.84, 969.85), - (2, "gzip, 4, False", 26.62, 966.50), - (3, "gzip, 9, False", 28.75, 965.61), - (4, "szip, None, False", 4.84, 756.35), - (5, "lzf, None, False", 4.93, 1221.79), - (6, "gzip, 1, True", 14.45, 830.28), - (7, "gzip, 4, True", 20.86, 797.66), - (8, "gzip, 9, True", 110.96, 781.90), - (9, "szip, None, True", 5.47, 1049.11), - (10, "lzf, None, True", 5.77, 923.48), -] - -# Sort data by time -sorted_data = sorted(data, key=lambda x: x[2]) - -# Extract sorted values -numbers = [item[0] for item in sorted_data] -times = [item[2] for item in sorted_data] -sizes = [item[3] for item in sorted_data] - -# Set the default font size -plt.rcParams.update({"font.size": SMALL_FONT}) - -# Create the plot -fig, ax1 = plt.subplots(figsize=(16, 8)) - -# Plot time as a line with dots -line = ax1.plot( - range(len(data)), times, "bo-", linewidth=2, markersize=8, label="Time (s)" -)[0] -ax1.set_xlabel("Compression Options (original numbering)", fontsize=MEDIUM_FONT) -ax1.set_ylabel("Time (s)", color="b", fontsize=MEDIUM_FONT) -ax1.tick_params(axis="y", labelcolor="b", labelsize=SMALL_FONT) - -# Create a second y-axis for size -ax2 = ax1.twinx() -bars = ax2.bar(range(len(data)), sizes, alpha=0.3, color="r", label="Size (MB)") -ax2.set_ylabel("Size (MB)", color="r", fontsize=MEDIUM_FONT) -ax2.tick_params(axis="y", labelcolor="r", labelsize=SMALL_FONT) - -ax2.axhline(y=raw_size, color="g", linestyle="-", alpha=0.3, label="Raw Size (MB)") - -# Set x-axis ticks and labels -plt.xticks(range(len(data)), numbers, fontsize=SMALL_FONT) - -# Add a title -plt.title("HDF5 Compression: Time vs Size (Sorted by Time)", fontsize=TITLE_FONT) - -# Add legend -lines = [line, bars] -labels = [l.get_label() for l in lines] - -# Adjust layout -plt.tight_layout() - -# Save the plot as a PDF file -plt.savefig("hdf5_compression_plot.pdf", format="pdf", bbox_inches="tight") - -print("Plot saved as 'hdf5_compression_plot.pdf'") - -# If you also want to display the plot, uncomment the following line: -plt.show() diff --git a/experiments/data/distograms_pdb_all_192.txt b/experiments/data/distograms_pdb_all_192.txt deleted file mode 100644 index 276e54e..0000000 --- a/experiments/data/distograms_pdb_all_192.txt +++ /dev/null @@ -1,790 +0,0 @@ -PYTHONPATH='.' python3 -u /net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/deepFRI2-toolbox-dev/toolbox/scripts/create_dataset.py generate_distograms -p /net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/datasets/PDB-all--20240812_1910 -IP -172.25.0.101 -Now please run command: -eval "$(conda shell.bash hook)" -to set conda env - Miniconda3/23.9.0-0 loaded. -Start time: Wed Aug 14 00:12:25 CEST 2024 -http://127.0.0.1:8989/status -Generating distograms -Index len 860504 -Globbing /net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/datasets -Found 0 files -/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_env/lib/python3.10/site-packages/distributed/client.py:3161: UserWarning: Sending large graph of size 10.93 MiB. -This may cause some slowdown. -Consider scattering data ahead of time and using futures. - warnings.warn( -Found 0 present distograms files -Found 860504 missing distograms ids -Missing distograms -860504 -Max parallel workers 19 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16 -17 -18 -19 -Collecting results -Only one CA in 7ajz_C.pdb. -Only one CA in 1c0q_A.pdb. -Only one CA in 1c0q_B.pdb. -Only one CA in 4jj7_B.pdb. -Only one CA in 7aku_C.pdb. -Only one CA in 1mcd_P.pdb. -Only one CA in 9b5q_D.pdb. -Only one CA in 8g1w_M.pdb. -Only one CA in 8g1w_L.pdb. -Only one CA in 6m9d_B.pdb. -Only one CA in 7f0f_C.pdb. -Only one CA in 3mbu_A.pdb. -Only one CA in 3mbu_B.pdb. -Only one CA in 3mbu_C.pdb. -Only one CA in 3mbu_D.pdb. -Only one CA in 4bxe_C.pdb. -Only one CA in 4bxe_D.pdb. -Only one CA in 4gm3_I.pdb. -Only one CA in 4gm3_J.pdb. -Only one CA in 4gm3_K.pdb. -Only one CA in 4gm3_L.pdb. -Only one CA in 4gm3_M.pdb. -Only one CA in 4gm3_N.pdb. -Only one CA in 4gm3_O.pdb. -Only one CA in 4gm3_P.pdb. -Only one CA in 2r5d_H.pdb. -Only one CA in 2r5d_K.pdb. -Only one CA in 2r5d_L.pdb. -Only one CA in 3che_C.pdb. -Only one CA in 3che_D.pdb. -Only one CA in 3gkr_B.pdb. -Only one CA in 7pjx_y.pdb. -Only one CA in 4d69_S.pdb. -Only one CA in 7wq8_B.pdb. -Only one CA in 1gac_C.pdb. -Only one CA in 1gac_D.pdb. -Only one CA in 8ti6_B.pdb. -Only one CA in 7lfv_D.pdb. -Only one CA in 7lfv_F.pdb. -Only one CA in 2bd2_P.pdb. -Only one CA in 5aa2_E.pdb. -19 results collected -20 -21 -22 -23 -24 -25 -26 -27 -28 -29 -30 -31 -32 -33 -34 -35 -36 -37 -38 -Collecting results -Only one CA in 4uzq_B.pdb. -Only one CA in 7pjt_y.pdb. -Only one CA in 1sho_A.pdb. -Only one CA in 1sho_B.pdb. -Only one CA in 1ay3_A.pdb. -Only one CA in 4gk7_1.pdb. -Only one CA in 4gk7_2.pdb. -Only one CA in 4gk7_3.pdb. -Only one CA in 4gk7_4.pdb. -Only one CA in 4gk7_5.pdb. -Only one CA in 4gk7_6.pdb. -Only one CA in 3to6_B.pdb. -Only one CA in 2bd8_P.pdb. -Only one CA in 1hh3_A.pdb. -Only one CA in 1hh3_B.pdb. -Only one CA in 1hh3_C.pdb. -Only one CA in 1hh3_D.pdb. -Only one CA in 3l35_H.pdb. -Only one CA in 3l35_K.pdb. -Only one CA in 3l35_L.pdb. -Only one CA in 4cvk_B.pdb. -Only one CA in 7axp_B.pdb. -Only one CA in 3u1i_E.pdb. -Only one CA in 1zea_A.pdb. -Only one CA in 3ove_B.pdb. -Only one CA in 6phq_A.pdb. -Only one CA in 8aoo_H.pdb. -Only one CA in 7lkc_A.pdb. -Only one CA in 7lkc_B.pdb. -Only one CA in 7zmu_B.pdb. -Only one CA in 8cmw_D.pdb. -Only one CA in 2r3c_C.pdb. -Only one CA in 2r3c_D.pdb. -Only one CA in 8dny_D.pdb. -Only one CA in 1a1e_C.pdb. -Only one CA in 1a1e_D.pdb. -Only one CA in 6ecd_B.pdb. -Only one CA in 7pju_y.pdb. -Only one CA in 1mhw_E.pdb. -Only one CA in 1mhw_F.pdb. -Only one CA in 148l_S.pdb. -Only one CA in 6mw0_A.pdb. -Only one CA in 9b5g_D.pdb. -Only one CA in 4fzg_c.pdb. -Only one CA in 4fzg_d.pdb. -Only one CA in 4fzg_e.pdb. -Only one CA in 4fzg_f.pdb. -Only one CA in 7pjy_y.pdb. -Only one CA in 1a2c_J.pdb. -Only one CA in 3l37_H.pdb. -Only one CA in 6fms_E.pdb. -Only one CA in 6fms_F.pdb. -Only one CA in 6fms_G.pdb. -Only one CA in 6fms_H.pdb. -19 results collected -39 -40 -41 -42 -43 -44 -45 -46 -47 -48 -49 -50 -51 -52 -53 -54 -55 -56 -57 -Collecting results -No CA found in 3h9q_H.pdb -Only one CA in 1mcr_P.pdb. -Only one CA in 6yd3_611.pdb. -Only one CA in 5efj_F.pdb. -Only one CA in 1bx9_B.pdb. -Only one CA in 3h9q_F.pdb. -Only one CA in 1waw_B.pdb. -Only one CA in 7z3u_F.pdb. -Only one CA in 7z3u_G.pdb. -Only one CA in 7zmw_B.pdb. -Only one CA in 1hhz_A.pdb. -Only one CA in 1hhz_B.pdb. -Only one CA in 1hhz_C.pdb. -Only one CA in 1hhz_D.pdb. -Only one CA in 1hhz_E.pdb. -Only one CA in 1hhz_F.pdb. -Only one CA in 4gm8_E.pdb. -Only one CA in 4gm8_F.pdb. -Only one CA in 4gm8_G.pdb. -Only one CA in 4gm8_H.pdb. -Only one CA in 7e74_G.pdb. -Only one CA in 7axq_C.pdb. -Only one CA in 3ah8_Y.pdb. -Only one CA in 7zb0_E.pdb. -Only one CA in 7zb0_G.pdb. -Only one CA in 7zl3_E.pdb. -Only one CA in 1fvm_A.pdb. -Only one CA in 1fvm_B.pdb. -Only one CA in 1fvm_C.pdb. -Only one CA in 1fvm_D.pdb. -Only one CA in 1fvm_E.pdb. -Only one CA in 1fvm_F.pdb. -Only one CA in 6mw2_A.pdb. -Only one CA in 5n8j_P.pdb. -Only one CA in 5n8j_E.pdb. -Only one CA in 2bda_P.pdb. -Only one CA in 9b5i_D.pdb. -Only one CA in 2bd5_P.pdb. -Only one CA in 4lte_M.pdb. -Only one CA in 4lte_N.pdb. -Only one CA in 4chx_C.pdb. -Only one CA in 7jfr_L.pdb. -Only one CA in 6x5s_C.pdb. -Only one CA in 6x5s_D.pdb. -Only one CA in 1evc_A.pdb. -Only one CA in 6xbg_C.pdb. -Only one CA in 6xbg_E.pdb. -Only one CA in 6wo2_C.pdb. -Only one CA in 6wo2_D.pdb. -19 results collected -58 -59 -60 -61 -62 -63 -64 -65 -66 -67 -68 -69 -70 -71 -72 -73 -74 -75 -76 -Collecting results -Only one CA in 7q4t_LbL.pdb. -Only one CA in 1axd_C.pdb. -Only one CA in 1axd_D.pdb. -Only one CA in 1qd8_A.pdb. -Only one CA in 1qd8_B.pdb. -Only one CA in 1d5x_D.pdb. -Only one CA in 4nz8_B.pdb. -Only one CA in 4nge_F.pdb. -Only one CA in 5i5b_B.pdb. -Only one CA in 8i82_D.pdb. -Only one CA in 8cep_V.pdb. -Only one CA in 8cep_B.pdb. -Only one CA in 8cep_C.pdb. -Only one CA in 8cep_G.pdb. -Only one CA in 8cep_I.pdb. -Only one CA in 5m2h_A.pdb. -Only one CA in 5m2h_B.pdb. -Only one CA in 5aa1_E.pdb. -Only one CA in 6ryo_B.pdb. -Only one CA in 6ecf_G.pdb. -Only one CA in 6ecf_I.pdb. -Only one CA in 6ecf_H.pdb. -Only one CA in 6ecf_K.pdb. -Only one CA in 6ecf_J.pdb. -Only one CA in 6ecf_L.pdb. -Only one CA in 1bcr_C.pdb. -Only one CA in 8bfd_B.pdb. -Only one CA in 9b5m_D.pdb. -Only one CA in 3s8o_B.pdb. -Only one CA in 7c4v_A.pdb. -Only one CA in 7c4v_B.pdb. -Only one CA in 1hhy_A.pdb. -Only one CA in 1hhy_B.pdb. -Only one CA in 8bh4_6.pdb. -Only one CA in 6o34_B.pdb. -Only one CA in 6yb6_D.pdb. -Only one CA in 3k26_B.pdb. -Only one CA in 8juf_B.pdb. -Only one CA in 4wkm_I.pdb. -Only one CA in 4wkm_J.pdb. -Only one CA in 4wkm_K.pdb. -Only one CA in 4wkm_L.pdb. -Only one CA in 4wkm_M.pdb. -Only one CA in 4wkm_N.pdb. -Only one CA in 4wkm_O.pdb. -Only one CA in 4wkm_P.pdb. -19 results collected -77 -78 -79 -80 -81 -82 -83 -84 -85 -86 -87 -88 -89 -90 -91 -92 -93 -94 -95 -Collecting results -Only one CA in 5ctv_E.pdb. -Only one CA in 5fwe_C.pdb. -Only one CA in 9b5f_D.pdb. -Only one CA in 5m2k_A.pdb. -Only one CA in 5m2k_B.pdb. -Only one CA in 7pjs_y.pdb. -Only one CA in 4y69_c.pdb. -Only one CA in 4y69_d.pdb. -Only one CA in 8gi5_D.pdb. -Only one CA in 5n22_J.pdb. -Only one CA in 5n22_L.pdb. -Only one CA in 3mul_D.pdb. -Only one CA in 1hhu_A.pdb. -Only one CA in 1hhu_B.pdb. -Only one CA in 1hhu_C.pdb. -Only one CA in 1hhu_D.pdb. -Only one CA in 3nzj_3.pdb. -Only one CA in 3nzj_4.pdb. -Only one CA in 7axs_B.pdb. -Only one CA in 6fbt_E.pdb. -Only one CA in 1c0r_A.pdb. -Only one CA in 1c0r_B.pdb. -Only one CA in 4fzc_c.pdb. -Only one CA in 4fzc_d.pdb. -Only one CA in 4fzc_e.pdb. -Only one CA in 4fzc_f.pdb. -Only one CA in 2ig0_B.pdb. -Only one CA in 4jj8_C.pdb. -Only one CA in 4jj8_D.pdb. -Only one CA in 8i8g_D.pdb. -Only one CA in 1a1a_C.pdb. -Only one CA in 1a1a_D.pdb. -Only one CA in 9b5r_D.pdb. -Only one CA in 3rc0_Q.pdb. -Only one CA in 3nzw_3.pdb. -Only one CA in 3nzw_4.pdb. -Only one CA in 1hhc_A.pdb. -Only one CA in 1hhc_B.pdb. -Only one CA in 1hhc_C.pdb. -Only one CA in 1hhc_D.pdb. -Only one CA in 7c4u_A.pdb. -Only one CA in 7c4u_B.pdb. -Only one CA in 5lrk_F.pdb. -Only one CA in 5lrk_G.pdb. -Only one CA in 5lrk_H.pdb. -Only one CA in 7wkd_C.pdb. -Only one CA in 1mck_P.pdb. -19 results collected -96 -97 -98 -99 -100 -101 -102 -103 -104 -105 -106 -107 -108 -109 -110 -111 -112 -113 -114 -Collecting results -Only one CA in 5mfk_D.pdb. -Only one CA in 8hos_C.pdb. -Only one CA in 8hos_D.pdb. -Only one CA in 8gh7_D.pdb. -Only one CA in 8gh7_H.pdb. -Only one CA in 3chf_C.pdb. -Only one CA in 3chf_D.pdb. -Only one CA in 1q7o_A.pdb. -Only one CA in 8hoo_C.pdb. -Only one CA in 8hoo_D.pdb. -Only one CA in 7oyq_C.pdb. -Only one CA in 4y6a_c.pdb. -Only one CA in 4y6a_d.pdb. -Only one CA in 6df1_A.pdb. -Only one CA in 5fa5_C.pdb. -Only one CA in 1w9u_C.pdb. -Only one CA in 1w9u_D.pdb. -Only one CA in 6m9c_B.pdb. -Only one CA in 7nxe_D.pdb. -Only one CA in 7new_F.pdb. -Only one CA in 8hop_C.pdb. -Only one CA in 8hop_D.pdb. -Only one CA in 2bd4_P.pdb. -Only one CA in 6xif_I.pdb. -Only one CA in 5dnj_B.pdb. -Only one CA in 1mcj_P.pdb. -Only one CA in 4gmb_E.pdb. -Only one CA in 1a1b_C.pdb. -Only one CA in 1a1b_D.pdb. -Only one CA in 3azq_C.pdb. -Only one CA in 3azq_D.pdb. -Only one CA in 7pjz_y.pdb. -Only one CA in 8bhn_6.pdb. -Only one CA in 1a08_C.pdb. -Only one CA in 1a08_D.pdb. -Only one CA in 6yd7_B.pdb. -Only one CA in 3u4w_B.pdb. -19 results collected -115 -116 -117 -118 -119 -120 -121 -122 -123 -124 -125 -126 -127 -128 -129 -130 -131 -132 -133 -Collecting results -No CA found in 4lke_G.pdb -Only one CA in 2bcd_B.pdb. -Only one CA in 7axx_B.pdb. -Only one CA in 5nqa_G.pdb. -Only one CA in 6yd2_611.pdb. -Only one CA in 1eb1_B.pdb. -Only one CA in 4bxd_C.pdb. -Only one CA in 4bxd_D.pdb. -Only one CA in 3td5_I.pdb. -Only one CA in 3td5_J.pdb. -Only one CA in 3td5_K.pdb. -Only one CA in 3td5_L.pdb. -Only one CA in 3td5_M.pdb. -Only one CA in 3td5_N.pdb. -Only one CA in 3td5_O.pdb. -Only one CA in 3td5_P.pdb. -Only one CA in 8bf7_6.pdb. -Only one CA in 8r8m_5.pdb. -Only one CA in 6cnu_A.pdb. -Only one CA in 2bd7_P.pdb. -Only one CA in 1mcl_P.pdb. -Only one CA in 3uw4_Z.pdb. -Only one CA in 9b5c_D.pdb. -Only one CA in 1tmb_T.pdb. -Only one CA in 1jyq_L.pdb. -Only one CA in 1jyq_H.pdb. -Only one CA in 1pek_D.pdb. -Only one CA in 4y84_e.pdb. -Only one CA in 4y84_f.pdb. -Only one CA in 4y84_g.pdb. -Only one CA in 4y84_h.pdb. -Only one CA in 4y84_i.pdb. -Only one CA in 4y84_j.pdb. -Only one CA in 1ghg_A.pdb. -Only one CA in 1ghg_B.pdb. -Only one CA in 1ghg_C.pdb. -Only one CA in 1ghg_D.pdb. -Only one CA in 6xbh_C.pdb. -Only one CA in 3e7a_C.pdb. -Only one CA in 3e7a_D.pdb. -Only one CA in 4jna_H.pdb. -Only one CA in 4jna_I.pdb. -Only one CA in 7oyn_B.pdb. -Only one CA in 9b5h_D.pdb. -Only one CA in 3s8n_B.pdb. -Only one CA in 6f4p_B.pdb. -19 results collected -134 -135 -136 -137 -138 -139 -140 -141 -142 -143 -144 -145 -146 -147 -148 -149 -150 -151 -152 -Collecting results -No CA found in 4lkf_D.pdb -Only one CA in 4whl_B.pdb. -Only one CA in 7ofv_B.pdb. -Only one CA in 2aiz_U.pdb. -Only one CA in 3g3p_D.pdb. -Only one CA in 9b5j_D.pdb. -Only one CA in 3nzx_3.pdb. -Only one CA in 3nzx_4.pdb. -Only one CA in 8ysa_B.pdb. -Only one CA in 6xzv_B.pdb. -Only one CA in 3run_B.pdb. -Only one CA in 2mtz_B.pdb. -Only one CA in 2mtz_C.pdb. -Only one CA in 2mtz_D.pdb. -Only one CA in 2mtz_E.pdb. -Only one CA in 2mtz_F.pdb. -Only one CA in 2mtz_G.pdb. -Only one CA in 6wm1_B.pdb. -Only one CA in 6wm1_D.pdb. -Only one CA in 3egh_E.pdb. -Only one CA in 3egh_F.pdb. -Only one CA in 7f0c_C.pdb. -Only one CA in 9b5s_D.pdb. -Only one CA in 5d13_I.pdb. -Only one CA in 1mcb_P.pdb. -Only one CA in 8ano_I.pdb. -Only one CA in 5dir_E.pdb. -Only one CA in 5dir_F.pdb. -Only one CA in 5dir_G.pdb. -Only one CA in 5dir_H.pdb. -Only one CA in 1czq_D.pdb. -Only one CA in 1aa5_A.pdb. -Only one CA in 1aa5_B.pdb. -Only one CA in 9b5t_D.pdb. -Only one CA in 6wuu_G.pdb. -Only one CA in 6wuu_H.pdb. -Only one CA in 6wuu_I.pdb. -Only one CA in 6wuu_J.pdb. -Only one CA in 4uca_C.pdb. -Only one CA in 8g82_A.pdb. -Only one CA in 8g82_B.pdb. -Only one CA in 8g82_C.pdb. -Only one CA in 8g82_D.pdb. -Only one CA in 8g82_E.pdb. -Only one CA in 8g82_F.pdb. -Only one CA in 8g82_G.pdb. -Only one CA in 8g82_H.pdb. -Only one CA in 8g82_I.pdb. -Only one CA in 8g82_J.pdb. -19 results collected -153 -154 -155 -156 -157 -158 -159 -160 -161 -162 -163 -164 -165 -166 -167 -168 -169 -170 -171 -Collecting results -No CA found in 5ngq_F.pdb -No CA found in 5ngq_G.pdb -No CA found in 5ngq_H.pdb -Only one CA in 5i5a_B.pdb. -Only one CA in 6fcr_F.pdb. -Only one CA in 6fcr_G.pdb. -Only one CA in 4jsq_c.pdb. -Only one CA in 4jsq_d.pdb. -Only one CA in 1evd_A.pdb. -Only one CA in 4gm9_E.pdb. -Only one CA in 4gm9_F.pdb. -Only one CA in 4jsu_c.pdb. -Only one CA in 4jsu_d.pdb. -Only one CA in 4jsu_e.pdb. -Only one CA in 4jsu_f.pdb. -Only one CA in 1a09_C.pdb. -Only one CA in 1a09_D.pdb. -Only one CA in 6s7g_E.pdb. -Only one CA in 1a07_C.pdb. -Only one CA in 1a07_D.pdb. -Only one CA in 4whh_B.pdb. -Only one CA in 2nyq_B.pdb. -Only one CA in 3s8l_B.pdb. -Only one CA in 1rrv_C.pdb. -Only one CA in 1rrv_D.pdb. -Only one CA in 4whk_B.pdb. -Only one CA in 6m5e_F.pdb. -Only one CA in 6m5e_G.pdb. -Only one CA in 6m5e_H.pdb. -Only one CA in 6m5e_I.pdb. -Only one CA in 6m5e_J.pdb. -Only one CA in 4y6v_c.pdb. -Only one CA in 4y6v_d.pdb. -Only one CA in 4dkt_B.pdb. -Only one CA in 8hor_C.pdb. -Only one CA in 8hor_D.pdb. -Only one CA in 4ttk_A.pdb. -Only one CA in 5v93_b.pdb. -Only one CA in 5zms_F.pdb. -Only one CA in 5zms_I.pdb. -Only one CA in 3e0n_A.pdb. -Only one CA in 1hhf_A.pdb. -Only one CA in 1hhf_B.pdb. -Only one CA in 1hhf_C.pdb. -Only one CA in 1hhf_D.pdb. -Only one CA in 4p9v_B.pdb. -Only one CA in 4p9z_B.pdb. -Only one CA in 7m2p_B.pdb. -Only one CA in 5tdb_B.pdb. -Only one CA in 8c3d_LIG.pdb. -Only one CA in 7pjw_y.pdb. -Only one CA in 1hha_A.pdb. -Only one CA in 1hha_B.pdb. -Only one CA in 1hha_C.pdb. -Only one CA in 1hha_D.pdb. -Only one CA in 2xdw_P.pdb. -19 results collected -172 -173 -174 -175 -176 -177 -178 -179 -180 -181 -182 -183 -184 -185 -186 -187 -188 -189 -190 -Collecting results -Only one CA in 6ppm_F.pdb. -Only one CA in 3u8o_I.pdb. -Only one CA in 6s5p_E.pdb. -Only one CA in 6s5p_F.pdb. -Only one CA in 6cl1_E.pdb. -Only one CA in 6cl1_F.pdb. -Only one CA in 4knl_F.pdb. -Only one CA in 4knl_G.pdb. -Only one CA in 1a1c_C.pdb. -Only one CA in 1a1c_D.pdb. -Only one CA in 4ucb_C.pdb. -Only one CA in 4ucb_D.pdb. -Only one CA in 7qdi_B.pdb. -Only one CA in 7qdi_C.pdb. -Only one CA in 7qdi_E.pdb. -Only one CA in 7qdi_G.pdb. -Only one CA in 1p4n_B.pdb. -Only one CA in 4jt0_c.pdb. -Only one CA in 4jt0_d.pdb. -Only one CA in 7pjv_y.pdb. -Only one CA in 6fcs_B.pdb. -Only one CA in 4o46_L.pdb. -Only one CA in 7ltb_A.pdb. -Only one CA in 7ltb_B.pdb. -Only one CA in 4v7m_AZ.pdb. -Only one CA in 4v7m_CZ.pdb. -Only one CA in 8pxy_B.pdb. -Only one CA in 8k4z_B.pdb. -Only one CA in 1pn3_C.pdb. -Only one CA in 1pn3_D.pdb. -Only one CA in 6ece_C.pdb. -Only one CA in 6ece_D.pdb. -Only one CA in 1mcq_P.pdb. -Only one CA in 6x5r_C.pdb. -Only one CA in 6x5r_D.pdb. -Only one CA in 3qxy_P.pdb. -Only one CA in 3qxy_Q.pdb. -Only one CA in 5cgh_c.pdb. -Only one CA in 5cgh_g.pdb. -Only one CA in 4zkr_P.pdb. -Only one CA in 3mgn_H.pdb. -Only one CA in 3mgn_K.pdb. -Only one CA in 8r6c_6.pdb. -Only one CA in 3ov1_B.pdb. -Only one CA in 1gdq_B.pdb. -Only one CA in 4rcp_B.pdb. -Only one CA in 1a7z_A.pdb. -Only one CA in 1a7z_B.pdb. -Only one CA in 5nwk_W.pdb. -Only one CA in 2r5b_H.pdb. -Only one CA in 2r5b_K.pdb. -Only one CA in 2r5b_L.pdb. -Only one CA in 1czi_P.pdb. -Only one CA in 8rhk_g.pdb. -Only one CA in 8rhk_e.pdb. -Only one CA in 8rhk_i.pdb. -Only one CA in 8rhk_f.pdb. -Only one CA in 8rhk_h.pdb. -Only one CA in 8rhk_j.pdb. -Only one CA in 6y9h_C.pdb. -Only one CA in 5onj_D.pdb. -19 results collected -191 -192 -193 -194 -195 -196 -197 -198 -199 -200 -201 -202 -203 -204 -205 -206 -207 -208 -209 -Collecting results -Only one CA in 1go6_A.pdb. -Only one CA in 1go6_B.pdb. -Only one CA in 1go6_C.pdb. -Only one CA in 1go6_D.pdb. -Only one CA in 1go6_E.pdb. -Only one CA in 1go6_F.pdb. -Only one CA in 1go6_G.pdb. -Only one CA in 1go6_H.pdb. -Only one CA in 1go6_I.pdb. -Only one CA in 1go6_K.pdb. -Only one CA in 1go6_M.pdb. -Only one CA in 1go6_O.pdb. -Only one CA in 2g2l_D.pdb. -Only one CA in 6psa_H.pdb. -Only one CA in 4f75_D.pdb. -Only one CA in 5nf0_F.pdb. -Only one CA in 1mci_P.pdb. -Only one CA in 3bv9_C.pdb. -Only one CA in 1mcn_P.pdb. -Only one CA in 7lfu_A.pdb. -Only one CA in 6e8m_B.pdb. -Only one CA in 6tch_A.pdb. -Only one CA in 8rhj_g.pdb. -Only one CA in 8rhj_e.pdb. -Only one CA in 8rhj_i.pdb. -Only one CA in 8rhj_f.pdb. -Only one CA in 8rhj_h.pdb. -Only one CA in 8rhj_j.pdb. -Only one CA in 6phm_A.pdb. -Only one CA in 8i84_D.pdb. -Only one CA in 8bhj_6.pdb. -Only one CA in 1h0g_C.pdb. -Only one CA in 1h0g_D.pdb. -Only one CA in 8s9z_B.pdb. -Only one CA in 3h5r_G.pdb. -Only one CA in 3h5r_H.pdb. -Only one CA in 4ks6_B.pdb. -Only one CA in 9b5p_D.pdb. -Only one CA in 6l5z_C.pdb. -Only one CA in 8jug_C.pdb. -Only one CA in 1pnv_C.pdb. -Only one CA in 4jje_B.pdb. -19 results collected -Time taken (save to h5): 2115.938725709915 seconds -No CA found in 4lke_G.pdb -No CA found in 4lkf_D.pdb -No CA found in 3h9q_H.pdb -No CA found in 5ngq_F.pdb -No CA found in 5ngq_G.pdb -No CA found in 5ngq_H.pdb -End time: Wed Aug 14 00:48:08 CEST 2024 -Computation time: 00:35:43 diff --git a/experiments/data/pdb_all_192.txt b/experiments/data/pdb_all_192.txt deleted file mode 100644 index ce8cb7b..0000000 --- a/experiments/data/pdb_all_192.txt +++ /dev/null @@ -1,1148 +0,0 @@ -IP -172.25.0.102 -Now please run command: -eval "$(conda shell.bash hook)" -to set conda env - Miniconda3/23.9.0-0 loaded. -Start time: Mon Aug 12 19:10:19 CEST 2024 -http://127.0.0.1:8989/status -2024-08-12 19:10:34.876656 -Globbing /net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/datasets -Found 0 files -Retrieving index file. Takes about 27 MB. -PDBList().get_all_entries time: 9.328515768051147 seconds -After removing non protein codes 218923 -Found 0 present protein files -Found 218923 missing protein ids -Downloading ids -Downloading PDBs into 219 chunks -Total download + to h5 256.4463484287262 -Total download + to h5 268.57615303993225 -Total download + to h5 283.40497970581055 -Total download + to h5 284.06529927253723 -Total download + to h5 291.915301322937 -Total download + to h5 290.92255544662476 -Total download + to h5 309.23478388786316 -Total download + to h5 298.2814748287201 -Total download + to h5 299.25596284866333 -Total download + to h5 315.13100695610046 -Total download + to h5 303.84771609306335 -Total download + to h5 319.6289482116699 -Total download + to h5 321.14261198043823 -Total download + to h5 310.52193331718445 -Total download + to h5 312.29970955848694 -Total download + to h5 317.64353013038635 -Total download + to h5 319.2199237346649 -Total download + to h5 323.73480439186096 -Total download + to h5 360.38008761405945 -Total download + to h5 275.01791620254517 -Total download + to h5 275.9345817565918 -Total download + to h5 264.88845896720886 -Total download + to h5 282.4938371181488 -Total download + to h5 305.39479064941406 -Total download + to h5 295.1144585609436 -Total download + to h5 296.8580541610718 -Total download + to h5 303.0214283466339 -Total download + to h5 304.49925923347473 -Total download + to h5 313.170045375824 -Total download + to h5 328.1118412017822 -Total download + to h5 316.4661138057709 -Total download + to h5 196.77590370178223 -Total download + to h5 334.20031213760376 -Total download + to h5 351.79816031455994 -Total download + to h5 353.9921691417694 -Total download + to h5 339.91995644569397 -Total download + to h5 345.48388671875 -Total download + to h5 353.0572922229767 -Total download + to h5 207.29337358474731 -Total download + to h5 284.4203767776489 -Total download + to h5 273.9354598522186 -Total download + to h5 275.03883385658264 -Total download + to h5 284.62511134147644 -Total download + to h5 290.63397097587585 -Total download + to h5 291.2079064846039 -Total download + to h5 294.23969769477844 -Total download + to h5 296.2894721031189 -Total download + to h5 300.5322165489197 -Total download + to h5 315.7534170150757 -Total download + to h5 315.88746190071106 -Total download + to h5 323.38559222221375 -Total download + to h5 326.2505202293396 -Total download + to h5 345.32963514328003 -Total download + to h5 346.67214345932007 -Total download + to h5 346.71013283729553 -Total download + to h5 359.96675848960876 -Total download + to h5 371.90297770500183 -Total download + to h5 254.2955505847931 -Total download + to h5 276.8765585422516 -Total download + to h5 271.74218010902405 -Total download + to h5 293.780912399292 -Total download + to h5 286.8968381881714 -Total download + to h5 295.03443121910095 -Total download + to h5 296.03205943107605 -Total download + to h5 311.77219223976135 -Total download + to h5 303.2792537212372 -Total download + to h5 306.29833579063416 -Total download + to h5 314.21231150627136 -Total download + to h5 326.9099600315094 -Total download + to h5 323.95018577575684 -Total download + to h5 325.20751309394836 -Total download + to h5 325.5990433692932 -Total download + to h5 337.7343773841858 -Total download + to h5 344.7753596305847 -Total download + to h5 345.3444139957428 -Total download + to h5 355.3049404621124 -Total download + to h5 262.4690718650818 -Total download + to h5 287.1353099346161 -Total download + to h5 282.9660761356354 -Total download + to h5 285.00448298454285 -Total download + to h5 292.59982109069824 -Total download + to h5 310.5111219882965 -Total download + to h5 298.0343985557556 -Total download + to h5 303.1496708393097 -Total download + to h5 319.52433681488037 -Total download + to h5 317.3453640937805 -Total download + to h5 318.1446363925934 -Total download + to h5 325.7487599849701 -Total download + to h5 326.8904058933258 -Total download + to h5 330.7923834323883 -Total download + to h5 288.3323218822479 -Total download + to h5 334.1163811683655 -Total download + to h5 335.39249420166016 -Total download + to h5 336.50076842308044 -Total download + to h5 356.6597936153412 -Total download + to h5 208.94483613967896 -Total download + to h5 261.43696641921997 -Total download + to h5 282.4053695201874 -Total download + to h5 292.07412600517273 -Total download + to h5 293.7116858959198 -Total download + to h5 313.4460985660553 -Total download + to h5 305.3218982219696 -Total download + to h5 308.8170499801636 -Total download + to h5 311.4237720966339 -Total download + to h5 323.69327688217163 -Total download + to h5 318.79333901405334 -Total download + to h5 330.9580948352814 -Total download + to h5 331.0918519496918 -Total download + to h5 334.6832413673401 -Total download + to h5 352.44618344306946 -Total download + to h5 340.96432280540466 -Total download + to h5 347.0188205242157 -Total download + to h5 347.65894317626953 -Total download + to h5 347.59255194664 -Total download + to h5 242.00575232505798 -Total download + to h5 253.74963927268982 -Total download + to h5 256.29515171051025 -Total download + to h5 260.33493971824646 -Total download + to h5 248.97310876846313 -Total download + to h5 275.5248284339905 -Total download + to h5 311.4109025001526 -Total download + to h5 308.1357078552246 -Total download + to h5 314.093829870224 -Total download + to h5 321.5709912776947 -Total download + to h5 325.96577739715576 -Total download + to h5 326.69378328323364 -Total download + to h5 335.58699464797974 -Total download + to h5 339.0101788043976 -Total download + to h5 348.41044092178345 -Total download + to h5 349.140154838562 -Total download + to h5 357.00191283226013 -Total download + to h5 370.1439735889435 -Total download + to h5 359.0408983230591 -Total download + to h5 229.60449647903442 -Total download + to h5 266.3094446659088 -Total download + to h5 286.590833902359 -Total download + to h5 290.2743318080902 -Total download + to h5 299.9331064224243 -Total download + to h5 303.5882179737091 -Total download + to h5 279.784539937973 -Total download + to h5 317.4639892578125 -Total download + to h5 321.55198884010315 -Total download + to h5 324.37418961524963 -Total download + to h5 339.6519601345062 -Total download + to h5 342.2562437057495 -Total download + to h5 341.9607181549072 -Total download + to h5 334.94605708122253 -Total download + to h5 346.9305217266083 -Total download + to h5 364.6004567146301 -Total download + to h5 379.0898857116699 -Total download + to h5 369.59704780578613 -Total download + to h5 379.5478949546814 -Total download + to h5 256.99757504463196 -Total download + to h5 260.0996940135956 -Total download + to h5 249.77260971069336 -Total download + to h5 262.37908935546875 -Total download + to h5 247.04669070243835 -Total download + to h5 265.53390526771545 -Total download + to h5 197.3304488658905 -Total download + to h5 255.52291750907898 -Total download + to h5 284.97889852523804 -Total download + to h5 286.6716146469116 -Total download + to h5 292.1790814399719 -Total download + to h5 223.63168382644653 -Total download + to h5 209.91374468803406 -Total download + to h5 215.8878471851349 -Total download + to h5 228.97281432151794 -Total download + to h5 214.3267319202423 -Total download + to h5 197.15452480316162 -Total download + to h5 234.16546940803528 -Total download + to h5 250.91994857788086 -Total download + to h5 275.38979387283325 -Total download + to h5 289.7571666240692 -Total download + to h5 292.55367064476013 -Total download + to h5 293.5512726306915 -Total download + to h5 282.05709528923035 -Total download + to h5 305.25981402397156 -Total download + to h5 320.86791729927063 -Total download + to h5 308.12144923210144 -Total download + to h5 306.09631872177124 -Total download + to h5 310.05412793159485 -Total download + to h5 331.74141359329224 -Total download + to h5 321.4482522010803 -Total download + to h5 334.96778535842896 -Total download + to h5 337.7604615688324 -Total download + to h5 338.53901743888855 -Total download + to h5 339.6776955127716 -Total download + to h5 358.3839359283447 -Total download + to h5 343.6420433521271 -Total download + to h5 345.5493993759155 -Total download + to h5 297.8291528224945 -Total download + to h5 313.6565775871277 -Total download + to h5 302.3830654621124 -Total download + to h5 321.72128224372864 -Total download + to h5 341.1219844818115 -Total download + to h5 326.44264340400696 -Total download + to h5 334.81666803359985 -Total download + to h5 355.6277987957001 -Total download + to h5 338.33704137802124 -Total download + to h5 301.1920621395111 -Total download + to h5 342.88831424713135 -Total download + to h5 344.39749097824097 -Total download + to h5 340.92007756233215 -Total download + to h5 348.28559255599976 -Total download + to h5 343.95181822776794 -Total download + to h5 364.6757650375366 -Total download + to h5 350.17813777923584 -Total download + to h5 362.6128306388855 -Total download + to h5 371.2220368385315 -Total download + to h5 166.0706946849823 -Total download + to h5 166.5716691017151 -Total download + to h5 158.91274094581604 -Total download + to h5 198.515522480011 -Total download + to h5 231.30122113227844 -Total download + to h5 236.78256011009216 -Total download + to h5 244.56594371795654 -Total download + to h5 259.43544125556946 -Total download + to h5 270.53493428230286 -Total download + to h5 271.3877046108246 -Retrying downloading 7b7o 1 -Retrying downloading 4yoc 1 -Retrying downloading 7c2j 1 -Retrying downloading 6zfk 1 -Retrying downloading 2d5m 1 -Retrying downloading 3ok5 1 -Retrying downloading 2i0w 1 -Retrying downloading 3b8g 1 -Retrying downloading 2x31 1 -Retrying downloading 6qjy 1 -Retrying downloading 8pay 1 -Retrying downloading 4ci0 1 -Retrying downloading 5aei 1 -Retrying downloading 7bgr 1 -Retrying downloading 7yoh 1 -Retrying downloading 7rnb 1 -Retrying downloading 8tl0 1 -Retrying downloading 1nfx 1 -Retrying downloading 5uib 1 -Retrying downloading 6lbd 1 -Retrying downloading 5hxq 1 -Retrying downloading 3dgc 1 -Retrying downloading 5uvs 1 -Retrying downloading 3vxc 1 -Retrying downloading 5x1y 1 -Retrying downloading 4iyj 1 -Retrying downloading 1olg 1 -Retrying downloading 7mbj 1 -Retrying downloading 6wf4 1 -Retrying downloading 3fkz 1 -Retrying downloading 4px3 1 -Retrying downloading 7r2p 1 -Retrying downloading 6c7m 1 -Retrying downloading 8cpa 1 -Retrying downloading 1p9e 1 -Retrying downloading 1bvl 1 -Retrying downloading 2mu4 1 -Retrying downloading 7m07 1 -Retrying downloading 1eg2 1 -Retrying downloading 2zwl 1 -Retrying downloading 5w3c 1 -Retrying downloading 4jbr 1 -Retrying downloading 8t8p 1 -Retrying downloading 6fmb 1 -Retrying downloading 5cv7 1 -Retrying downloading 7tyz 1 -Retrying downloading 6cze 1 -Retrying downloading 5kpm 1 -Retrying downloading 5e0i 1 -Retrying downloading 6xpr 1 -Retrying downloading 7t22 1 -Retrying downloading 1odg 1 -Retrying downloading 3c03 1 -Retrying downloading 5vfo 1 -Retrying downloading 4edl 1 -Retrying downloading 5ihj 1 -Retrying downloading 6ppq 1 -Retrying downloading 2r1f 1 -Retrying downloading 6a4o 1 -Retrying downloading 1qwr 1 -Retrying downloading 5un9 1 -Retrying downloading 5xdt 1 -Retrying downloading 4nr8 1 -Retrying downloading 3ob0 1 -Retrying downloading 1l10 1 -Retrying downloading 6eht 1 -Retrying downloading 1j9z 1 -Retrying downloading 8hzw 1 -Retrying downloading 6gsg 1 -Retrying downloading 3nxd 1 -Retrying downloading 1rwi 1 -Retrying downloading 1ohh 1 -Retrying downloading 2i5t 1 -Retrying downloading 5vkf 1 -Retrying downloading 6edj 1 -Retrying downloading 4l9n 1 -Retrying downloading 7gf5 1 -Retrying downloading 7o8z 1 -Retrying downloading 7lzr 1 -Retrying downloading 1eif 1 -Retrying downloading 4hta 1 -Retrying downloading 5oqq 1 -Retrying downloading 7cch 1 -Retrying downloading 4cj0 1 -Retrying downloading 6th7 1 -Retrying downloading 2j37 1 -Retrying downloading 4klo 1 -Retrying downloading 1hdq 1 -Retrying downloading 6ne6 1 -Retrying downloading 7kzw 1 -Retrying downloading 5kw9 1 -Retrying downloading 2fgg 1 -Retrying downloading 3f57 1 -Retrying downloading 2cdb 1 -Retrying downloading 8ssa 1 -Retrying downloading 2rhp 1 -Retrying downloading 1no8 1 -Retrying downloading 3nhb 1 -Retrying downloading 3ptf 1 -Retrying downloading 5ofa 1 -Retrying downloading 8bw7 1 -Retrying downloading 7foo 1 -Retrying downloading 7kre 1 -Retrying downloading 4k8b 1 -Retrying downloading 4jpz 1 -Retrying downloading 3peb 1 -Retrying downloading 4zu2 1 -Retrying downloading 4hk6 1 -Retrying downloading 2jt6 1 -Retrying downloading 7dvv 1 -Retrying downloading 7sns 1 -Retrying downloading 5aep 1 -Retrying downloading 1qly 1 -Retrying downloading 3mwz 1 -Retrying downloading 7ony 1 -Retrying downloading 2izt 1 -Retrying downloading 4pbe 1 -Retrying downloading 4txh 1 -Retrying downloading 5gat 1 -Retrying downloading 8e89 1 -Retrying downloading 8sn5 1 -Retrying downloading 2bwr 1 -Retrying downloading 5xhf 1 -Retrying downloading 3wme 1 -Retrying downloading 6zse 1 -Retrying downloading 4dpz 1 -Retrying downloading 6iqf 1 -Retrying downloading 1mhp 1 -Retrying downloading 3fr1 1 -Retrying downloading 5lgs 1 -Retrying downloading 2zch 1 -Retrying downloading 3a1p 1 -Retrying downloading 7g21 1 -Retrying downloading 5gj6 1 -Retrying downloading 2wyl 1 -Retrying downloading 4gey 1 -Retrying downloading 7zpl 1 -Retrying downloading 4rve 1 -Retrying downloading 7wlt 1 -Retrying downloading 5odt 1 -Retrying downloading 8a82 1 -Retrying downloading 3igz 1 -Retrying downloading 4dgi 1 -Retrying downloading 7xom 1 -Retrying downloading 7d8s 1 -Retrying downloading 7s33 1 -Retrying downloading 1vbb 1 -Retrying downloading 4ebb 1 -Retrying downloading 4pg2 1 -Retrying downloading 1r9h 1 -Retrying downloading 5eh9 1 -Retrying downloading 6g1k 1 -Retrying downloading 1ca0 1 -Retrying downloading 3kx2 1 -Retrying downloading 3gf7 1 -Retrying downloading 4gvw 1 -Retrying downloading 6xay 1 -Retrying downloading 5vap 1 -Retrying downloading 5tdb 1 -Retrying downloading 4cpt 1 -Retrying downloading 1o30 1 -Retrying downloading 4bfc 1 -Retrying downloading 5xma 1 -Retrying downloading 6v25 1 -Retrying downloading 6dkd 1 -Retrying downloading 7xta 1 -Retrying downloading 4h6d 1 -Retrying downloading 5ckv 1 -Retrying downloading 6nsu 1 -Retrying downloading 4gwx 1 -Retrying downloading 4wg4 1 -Retrying downloading 1vg1 1 -Retrying downloading 6bry 1 -Retrying downloading 5voe 1 -Retrying downloading 6qlg 1 -Retrying downloading 3x3t 1 -Retrying downloading 1qnp 1 -Retrying downloading 1bi0 1 -Retrying downloading 5qbe 1 -Retrying downloading 1e32 1 -Retrying downloading 5d1j 1 -Retrying downloading 5j72 1 -Retrying downloading 8snd 1 -Retrying downloading 2dl8 1 -Retrying downloading 1ttf 1 -Retrying downloading 3i71 1 -Retrying downloading 6wlc 1 -Retrying downloading 1qxr 1 -Retrying downloading 4y8j 1 -Retrying downloading 1eo4 1 -Retrying downloading 4bo9 1 -Retrying downloading 4cgl 1 -Retrying downloading 2dic 1 -Retrying downloading 1t6s 1 -Retrying downloading 6f76 1 -Retrying downloading 8j10 1 -Retrying downloading 5hq0 1 -Retrying downloading 7b5p 1 -Retrying downloading 5yb3 1 -Retrying downloading 6ion 1 -Retrying downloading 8bj0 1 -Retrying downloading 3pia 1 -Retrying downloading 1ejv 1 -Retrying downloading 2p74 1 -Retrying downloading 4cag 1 -Retrying downloading 7aiu 1 -Retrying downloading 4jz5 1 -Retrying downloading 5e05 1 -Retrying downloading 7qi9 1 -Retrying downloading 2cfh 1 -Retrying downloading 8xjm 1 -Retrying downloading 3rmj 1 -Retrying downloading 3oza 1 -Retrying downloading 5wkb 1 -Retrying downloading 4qvg 1 -Retrying downloading 6srh 1 -Retrying downloading 3syc 1 -Retrying downloading 1t1y 1 -Retrying downloading 1xoa 1 -Retrying downloading 6sc9 1 -Retrying downloading 2q8x 1 -Retrying downloading 6m7v 1 -Retrying downloading 2kbc 1 -Retrying downloading 4lup 1 -Retrying downloading 5wr9 1 -Retrying downloading 5fxn 1 -Retrying downloading 7fyd 1 -Retrying downloading 3shl 1 -Retrying downloading 1n8v 1 -Retrying downloading 5rub 1 -Retrying downloading 4fxc 1 -Retrying downloading 4xri 1 -Retrying downloading 5dhu 1 -Retrying downloading 3ouj 1 -Retrying downloading 6y9p 1 -Retrying downloading 1ooa 1 -Retrying downloading 7nk2 1 -Retrying downloading 1x8d 1 -Retrying downloading 6cka 1 -Retrying downloading 3m7f 1 -Retrying downloading 5j5u 1 -Retrying downloading 3dcx 1 -Retrying downloading 7jlk 1 -Retrying downloading 8eo2 1 -Retrying downloading 5p6s 1 -Retrying downloading 4ats 1 -Retrying downloading 5qqr 1 -Retrying downloading 7pg2 1 -Retrying downloading 6e3n 1 -Retrying downloading 7wc7 1 -Retrying downloading 4qig 1 -Retrying downloading 3lrf 1 -Retrying downloading 2n5i 1 -Retrying downloading 4rw5 1 -Retrying downloading 6z20 1 -Retrying downloading 4d13 1 -Retrying downloading 3hmv 1 -Retrying downloading 1pms 1 -Retrying downloading 8orh 1 -Retrying downloading 2jsi 1 -Retrying downloading 7yw4 1 -Retrying downloading 1fyg 1 -Retrying downloading 8fcq 1 -Retrying downloading 1pvh 1 -Retrying downloading 2q0s 1 -Retrying downloading 4ea8 1 -Retrying downloading 8ew6 1 -Retrying downloading 2vp0 1 -Total processing time: 346.71013283729553 -Retrying downloading 7zq6 1 -Retrying downloading 8poa 1 -Total processing time: 369.59704780578613 -Retrying downloading 8qfj 1 -Retrying downloading 1pa1 1 -Retrying downloading 2jt0 1 -Total processing time: 359.96675848960876 -Retrying downloading 8wy4 1 -Retrying downloading 4bf3 1 -Total processing time: 266.3094446659088 -Retrying downloading 6w22 1 -Retrying downloading 6wpy 1 -Retrying downloading 3ou3 1 -Total processing time: 271.74218010902405 -Retrying downloading 3qf4 1 -Retrying downloading 1a2q 1 -Retrying downloading 3n8y 1 -Retrying downloading 7ze3 1 -Retrying downloading 8qya 1 -Total processing time: 282.9660761356354 -Retrying downloading 5j4n 1 -Retrying downloading 6d07 1 -Total processing time: 370.1439735889435 -Retrying downloading 4igx 1 -Retrying downloading 2z7e 1 -Total processing time: 310.05412793159485 -Retrying downloading 3hhb 1 -Total processing time: 344.7753596305847 -Retrying downloading 8y14 1 -Retrying downloading 7r2d 1 -Total processing time: 353.0572922229767 -Retrying downloading 4r2e 1 -Total processing time: 314.093829870224 -Retrying downloading 4zso 1 -Total processing time: 286.6716146469116 -Retrying downloading 5spw 1 -Total processing time: 328.1118412017822 -Retrying downloading 1vae 1 -Total processing time: 325.7487599849701 -Total processing time: 313.4460985660553 -Total processing time: 289.7571666240692 -Retrying downloading 5t3h 1 -Retrying downloading 2lp0 1 -Total processing time: 291.2079064846039 -Total processing time: 346.9305217266083 -Retrying downloading 3lah 1 -Total processing time: 326.2505202293396 -Retrying downloading 6dog 1 -Retrying downloading 5ohx 1 -Retrying downloading 4dp5 1 -Total processing time: 317.4639892578125 -Total processing time: 234.16546940803528 -Retrying downloading 8cgp 1 -Total processing time: 298.2814748287201 -Retrying downloading 2gpv 1 -Retrying downloading 6wg3 1 -Total processing time: 305.3218982219696 -Retrying downloading 3iw1 1 -Retrying downloading 1enj 1 -Retrying downloading 5spq 1 -Retrying downloading 3odf 1 -Total processing time: 315.7534170150757 -Retrying downloading 5fxy 1 -Retrying downloading 4r2m 1 -Total processing time: 364.6004567146301 -Retrying downloading 7zx8 1 -Retrying downloading 5h1b 1 -Total processing time: 356.6597936153412 -Retrying downloading 5ips 1 -Retrying downloading 3fs9 1 -Retrying downloading 6hwt 1 -Retrying downloading 3v8r 1 -Retrying downloading 6dy6 1 -Total processing time: 325.96577739715576 -Retrying downloading 7xol 1 -Retrying downloading 1b7o 1 -Total processing time: 334.20031213760376 -Retrying downloading 3vdg 1 -Retrying downloading 3bh9 1 -Retrying downloading 5il2 1 -Total processing time: 355.3049404621124 -Retrying downloading 4c1a 1 -Retrying downloading 7s5e 1 -Total processing time: 275.03883385658264 -Retrying downloading 3c7u 1 -Retrying downloading 3h21 1 -Retrying downloading 1gyg 1 -Retrying downloading 6tl3 1 -Total processing time: 300.5322165489197 -Retrying downloading 5lsv 1 -Total processing time: 290.2743318080902 -Retrying downloading 2yek 1 -Retrying downloading 5yvq 1 -Total processing time: 276.8765585422516 -Retrying downloading 6ukl 1 -Retrying downloading 6zmp 1 -Total processing time: 256.99757504463196 -Retrying downloading 6a54 1 -Retrying downloading 1p5x 1 -Retrying downloading 4clr 1 -Retrying downloading 1ett 1 -Total processing time: 303.1496708393097 -Retrying downloading 4pwg 1 -Total processing time: 275.38979387283325 -Retrying downloading 4g70 1 -Retrying downloading 3aqp 1 -Retrying downloading 3d5q 1 -Retrying downloading 6bf7 1 -Retrying downloading 1rl5 1 -Retrying downloading 3iw0 1 -Total processing time: 209.91374468803406 -Retrying downloading 5j12 1 -Total processing time: 312.29970955848694 -Retrying downloading 4juc 1 -Total processing time: 334.6832413673401 -Retrying downloading 5czt 1 -Total processing time: 302.3830654621124 -Retrying downloading 1nkv 1 -Retrying downloading 1p3o 1 -Retrying downloading 4bn7 1 -Retrying downloading 7e65 1 -Total processing time: 326.44264340400696 -Retrying downloading 2xq1 1 -Retrying downloading 3noy 1 -Total processing time: 326.9099600315094 -Retrying downloading 3ebe 1 -Retrying downloading 6stp 1 -Total processing time: 349.140154838562 -Retrying downloading 5e97 1 -Retrying downloading 5zod 1 -Retrying downloading 6nmt 1 -Total processing time: 336.50076842308044 -Retrying downloading 5io1 1 -Total processing time: 305.25981402397156 -Retrying downloading 3wtc 1 -Total processing time: 339.91995644569397 -Retrying downloading 7upt 1 -Total processing time: 359.0408983230591 -Retrying downloading 7gfz 1 -Retrying downloading 7eaw 1 -Retrying downloading 8edu 1 -Total processing time: 286.8968381881714 -Retrying downloading 7h22 1 -Retrying downloading 6yrc 1 -Total processing time: 323.95018577575684 -Retrying downloading 6cx9 1 -Retrying downloading 7s8l 1 -Total processing time: 198.515522480011 -Total processing time: 255.52291750907898 -Retrying downloading 8ia8 1 -Total processing time: 313.170045375824 -Retrying downloading 3q65 1 -Retrying downloading 7ts2 1 -Total processing time: 335.58699464797974 -Retrying downloading 8xi7 1 -Retrying downloading 3nso 1 -Retrying downloading 1a7h 1 -Total processing time: 275.5248284339905 -Retrying downloading 6gqd 1 -Retrying downloading 5p9v 1 -Retrying downloading 9ewq 1 -Total processing time: 309.23478388786316 -Total processing time: 303.84771609306335 -Total processing time: 292.07412600517273 -Retrying downloading 2zr0 1 -Total processing time: 297.8291528224945 -Total processing time: 284.4203767776489 -Total processing time: 294.23969769477844 -Retrying downloading 5vrd 1 -Retrying downloading 1gp6 1 -Retrying downloading 2iqf 1 -Retrying downloading 4i0p 1 -Total processing time: 342.2562437057495 -Retrying downloading 3nzj 1 -Total processing time: 287.1353099346161 -Retrying downloading 6q31 1 -Retrying downloading 6ne5 1 -Total processing time: 299.25596284866333 -Retrying downloading 3qz1 1 -Total processing time: 340.96432280540466 -Retrying downloading 1r7m 1 -Retrying downloading 6w8a 1 -Total processing time: 362.6128306388855 -Retrying downloading 2z07 1 -Total processing time: 316.4661138057709 -Retrying downloading 5z9o 1 -Total processing time: 260.33493971824646 -Retrying downloading 5hkm 1 -Retrying downloading 6yjy 1 -Retrying downloading 3fbq 1 -Retrying downloading 6yh3 1 -Total processing time: 262.4690718650818 -Total processing time: 345.5493993759155 -Retrying downloading 7zph 1 -Retrying downloading 8ct9 1 -Total processing time: 254.2955505847931 -Retrying downloading 6enl 1 -Total processing time: 292.1790814399719 -Retrying downloading 5o5u 1 -Total processing time: 284.06529927253723 -Retrying downloading 7gjb 1 -Total processing time: 331.0918519496918 -Retrying downloading 6p3t 1 -Total processing time: 350.17813777923584 -Retrying downloading 2nxy 1 -Retrying downloading 2h9h 1 -Retrying downloading 1iux 1 -Total processing time: 335.39249420166016 -Total processing time: 323.69327688217163 -Total processing time: 293.5512726306915 -Total processing time: 196.77590370178223 -Total processing time: 250.91994857788086 -Total processing time: 343.6420433521271 -Total processing time: 353.9921691417694 -Retrying downloading 2w73 1 -Total processing time: 311.4109025001526 -Retrying downloading 7oc7 1 -Total processing time: 259.43544125556946 -Total processing time: 321.14261198043823 -Retrying downloading 1g8h 1 -Retrying downloading 2zl1 1 -Total processing time: 347.65894317626953 -Retrying downloading 4ea3 1 -Retrying downloading 6tat 1 -Retrying downloading 1xui 1 -Total processing time: 288.3323218822479 -Retrying downloading 6rtj 1 -Retrying downloading 5bvn 1 -Total processing time: 339.6776955127716 -Retrying downloading 3u9q 1 -Retrying downloading 4wrc 1 -Retrying downloading 6kzs 1 -Retrying downloading 3gf3 1 -Retrying downloading 4lzp 1 -Total processing time: 248.97310876846313 -Retrying downloading 1p93 1 -Retrying downloading 2cs2 1 -Total processing time: 231.30122113227844 -Retrying downloading 4yi0 1 -Retrying downloading 6qts 1 -Retrying downloading 8czu 1 -Total processing time: 247.04669070243835 -Retrying downloading 6fgw 1 -Retrying downloading 5sma 1 -Total processing time: 351.79816031455994 -Retrying downloading 8qbt 1 -Retrying downloading 8cso 1 -Total processing time: 256.4463484287262 -Retrying downloading 3up5 1 -Retrying downloading 7a2l 1 -Total processing time: 347.59255194664 -Retrying downloading 6r02 1 -Retrying downloading 6l8k 1 -Retrying downloading 1sgz 1 -Total processing time: 321.72128224372864 -Retrying downloading 6kpp 1 -Retrying downloading 7ryq 1 -Total processing time: 343.95181822776794 -Retrying downloading 6eej 1 -Retrying downloading 5vd6 1 -Retrying downloading 1m1r 1 -Retrying downloading 8jby 1 -Total processing time: 215.8878471851349 -Retrying downloading 6kfc 1 -Retrying downloading 2er7 1 -Retrying downloading 6lv4 1 -Retrying downloading 5xko 1 -Retrying downloading 3dos 1 -Retrying downloading 7bqo 1 -Total processing time: 313.6565775871277 -Retrying downloading 8hzt 1 -Total processing time: 291.915301322937 -Total processing time: 275.9345817565918 -Retrying downloading 4cmy 1 -Retrying downloading 6yjv 1 -Total processing time: 347.0188205242157 -Retrying downloading 3m64 1 -Total processing time: 348.28559255599976 -Retrying downloading 2hm1 1 -Total processing time: 315.88746190071106 -Total processing time: 293.780912399292 -Retrying downloading 7sz2 1 -Retrying downloading 2r1m 1 -Total processing time: 321.55198884010315 -Retrying downloading 1wc0 1 -Total processing time: 304.49925923347473 -Retrying downloading 8crs 1 -Retrying downloading 3oex 1 -Retrying downloading 7br5 1 -Total processing time: 321.5709912776947 -Total processing time: 223.63168382644653 -Retrying downloading 8dl2 1 -Total processing time: 270.53493428230286 -Total processing time: 290.92255544662476 -Retrying downloading 4h47 1 -Retrying downloading 4i6a 1 -Retrying downloading 5n80 1 -Retrying downloading 3lrm 1 -Total processing time: 311.4237720966339 -Retrying downloading 1b5f 1 -Retrying downloading 7ffw 1 -Retrying downloading 6you 1 -Retrying downloading 5sa5 1 -Total processing time: 301.1920621395111 -Retrying downloading 5ehv 1 -Retrying downloading 4own 1 -Retrying downloading 2yob 1 -Total processing time: 292.59982109069824 -Retrying downloading 3aoa 1 -Retrying downloading 5e7b 1 -Retrying downloading 1xlw 1 -Total processing time: 338.53901743888855 -Retrying downloading 2oce 1 -Total processing time: 283.40497970581055 -Retrying downloading 7mcy 1 -Retrying downloading 2fgi 1 -Retrying downloading 5z01 1 -Retrying downloading 8c6s 1 -Total processing time: 293.7116858959198 -Total processing time: 342.88831424713135 -Retrying downloading 5cc0 1 -Total processing time: 303.2792537212372 -Retrying downloading 5e03 1 -Total processing time: 296.03205943107605 -Retrying downloading 2qft 1 -Total processing time: 166.0706946849823 -Total processing time: 295.03443121910095 -Retrying downloading 6lvz 1 -Total processing time: 166.5716691017151 -Total processing time: 360.38008761405945 -Retrying downloading 7o4j 1 -Retrying downloading 3o3z 1 -Retrying downloading 5qft 1 -Retrying downloading 7shm 1 -Retrying downloading 2xzz 1 -Retrying downloading 6db3 1 -Retrying downloading 1ogm 1 -Total processing time: 348.41044092178345 -Retrying downloading 4n3w 1 -Total processing time: 158.91274094581604 -Retrying downloading 6rlq 1 -Total processing time: 319.52433681488037 -Retrying downloading 4p7h 1 -Total processing time: 320.86791729927063 -Retrying downloading 3zpf 1 -Total processing time: 296.2894721031189 -Total processing time: 379.5478949546814 -Retrying downloading 5af0 1 -Retrying downloading 7ohz 1 -Total processing time: 345.48388671875 -Retrying downloading 6dku 1 -Retrying downloading 5pwt 1 -Total processing time: 339.0101788043976 -Retrying downloading 5rw7 1 -Retrying downloading 3qnr 1 -Retrying downloading 5rfw 1 -Total processing time: 326.8904058933258 -Total processing time: 282.4053695201874 -Retrying downloading 5inc 1 -Total processing time: 308.12144923210144 -Retrying downloading 2vig 1 -Total processing time: 346.67214345932007 -Retrying downloading 7rbk 1 -Retrying downloading 7m5o 1 -Total processing time: 324.37418961524963 -Retrying downloading 3vzg 1 -Retrying downloading 8eae 1 -Total processing time: 275.01791620254517 -Retrying downloading 1xnz 1 -Retrying downloading 8sq8 1 -Retrying downloading 4gqx 1 -Retrying downloading 4axs 1 -Total processing time: 242.00575232505798 -Retrying downloading 6cas 1 -Retrying downloading 2fss 1 -Retrying downloading 6jwh 1 -Total processing time: 244.56594371795654 -Retrying downloading 2irp 1 -Retrying downloading 1inh 1 -Retrying downloading 3jvw 1 -Total processing time: 308.1357078552246 -Retrying downloading 4g7n 1 -Retrying downloading 4qjp 1 -Retrying downloading 3gn6 1 -Retrying downloading 3pk2 1 -Retrying downloading 4j6i 1 -Total processing time: 345.3444139957428 -Retrying downloading 7gen 1 -Retrying downloading 6n23 1 -Total processing time: 317.64353013038635 -Retrying downloading 1isr 1 -Total processing time: 308.8170499801636 -Retrying downloading 5xqo 1 -Retrying downloading 3zl8 1 -Total processing time: 344.39749097824097 -Retrying downloading 7aub 1 -Total processing time: 285.00448298454285 -Retrying downloading 1z0v 1 -Total processing time: 337.7604615688324 -Retrying downloading 5qng 1 -Retrying downloading 5v3j 1 -Retrying downloading 5acl 1 -Total processing time: 338.33704137802124 -Retrying downloading 7flu 1 -Total processing time: 296.8580541610718 -Retrying downloading 2cog 1 -Total processing time: 379.0898857116699 -Retrying downloading 2fsi 1 -Retrying downloading 4cni 1 -Total processing time: 325.20751309394836 -Retrying downloading 5s1y 1 -Retrying downloading 2lr2 1 -Retrying downloading 5w8m 1 -Retrying downloading 4cjz 1 -Retrying downloading 7bz1 1 -Total processing time: 284.62511134147644 -Total processing time: 311.77219223976135 -Retrying downloading 2qqg 1 -Total processing time: 279.784539937973 -Retrying downloading 6oog 1 -Total processing time: 290.63397097587585 -Retrying downloading 4xtr 1 -Total processing time: 334.94605708122253 -Total processing time: 339.6519601345062 -Retrying downloading 2ijd 1 -Retrying downloading 1wi3 1 -Retrying downloading 4l69 1 -Total processing time: 299.9331064224243 -Total processing time: 265.53390526771545 -Retrying downloading 7nqb 1 -Retrying downloading 1l52 1 -Retrying downloading 7prr 1 -Retrying downloading 3dve 1 -Total processing time: 358.3839359283447 -Total processing time: 268.57615303993225 -Retrying downloading 3fdd 1 -Total processing time: 261.43696641921997 -Retrying downloading 6g3a 1 -Retrying downloading 7ep9 1 -Retrying downloading 4fba 1 -Retrying downloading 6h40 1 -Retrying downloading 8qxl 1 -Total processing time: 337.7343773841858 -Retrying downloading 6vlw 1 -Retrying downloading 4mkh 1 -Retrying downloading 2v7l 1 -Retrying downloading 1rqr 1 -Retrying downloading 4moj 1 -Total processing time: 298.0343985557556 -Total processing time: 334.96778535842896 -Total processing time: 305.39479064941406 -Total processing time: 330.7923834323883 -Total processing time: 282.05709528923035 -Retrying downloading 2jni 1 -Total processing time: 334.1163811683655 -Retrying downloading 3hu9 1 -Retrying downloading 2mjz 1 -Total processing time: 292.55367064476013 -Retrying downloading 2x4x 1 -Retrying downloading 4jcg 1 -Total processing time: 295.1144585609436 -Retrying downloading 1ww1 1 -Total processing time: 357.00191283226013 -Retrying downloading 7w4t 1 -Retrying downloading 4g8o 1 -Total processing time: 271.3877046108246 -Retrying downloading 5i5r 1 -Total processing time: 303.0214283466339 -Retrying downloading 4j2o 1 -Total processing time: 256.29515171051025 -Total processing time: 284.97889852523804 -Retrying downloading 1g0s 1 -Total processing time: 323.38559222221375 -Retrying downloading 6w0y 1 -Total processing time: 341.9607181549072 -Retrying downloading 6x3b 1 -Retrying downloading 2dbh 1 -Total processing time: 310.52193331718445 -Retrying downloading 5xoo 1 -Retrying downloading 2pud 1 -Retrying downloading 3osr 1 -Total processing time: 208.94483613967896 -Retrying downloading 2d23 1 -Retrying downloading 2fa9 1 -Retrying downloading 1zz7 1 -Retrying downloading 1uvf 1 -Total processing time: 310.5111219882965 -Retrying downloading 2qdm 1 -Retrying downloading 2p88 1 -Retrying downloading 4iv8 1 -Retrying downloading 4afn 1 -Retrying downloading 8iry 1 -Total processing time: 317.3453640937805 -Retrying downloading 6v01 1 -Total processing time: 321.4482522010803 -Retrying downloading 3u3p 1 -Retrying downloading 6eax 1 -Retrying downloading 1y8z 1 -Retrying downloading 3t3n 1 -Retrying downloading 6l4k 1 -Retrying downloading 7yy5 1 -Total processing time: 197.3304488658905 -Retrying downloading 1wn2 1 -Retrying downloading 6ng7 1 -Retrying downloading 1k4c 1 -Retrying downloading 3oht 1 -Retrying downloading 3ri0 1 -Total processing time: 260.0996940135956 -Retrying downloading 3hvq 1 -Retrying downloading 6nw3 1 -Total processing time: 314.21231150627136 -Total processing time: 331.74141359329224 -Retrying downloading 6s4e 1 -Retrying downloading 5oty 1 -Total processing time: 262.37908935546875 -Retrying downloading 1i20 1 -Retrying downloading 7kia 1 -Total processing time: 319.6289482116699 -Retrying downloading 5e7y 1 -Retrying downloading 1tmj 1 -Retrying downloading 4xmu 1 -Retrying downloading 2yye 1 -Total processing time: 352.44618344306946 -Retrying downloading 5xgu 1 -Total processing time: 341.1219844818115 -Retrying downloading 1sdt 1 -Total processing time: 326.69378328323364 -Retrying downloading 3og2 1 -Total processing time: 371.90297770500183 -Retrying downloading 1czf 1 -Retrying downloading 7aip 1 -Total processing time: 286.590833902359 -Retrying downloading 6r5w 1 -Retrying downloading 8iui 1 -Retrying downloading 5wxv 1 -Retrying downloading 2o8t 1 -Retrying downloading 4cz9 1 -Total processing time: 371.2220368385315 -Retrying downloading 133l 1 -Retrying downloading 3ao5 1 -Total processing time: 306.29833579063416 -Retrying downloading 5fae 1 -Total processing time: 249.77260971069336 -Retrying downloading 1hym 1 -Retrying downloading 4f0b 1 -Retrying downloading 6r1c 1 -Total processing time: 325.5990433692932 -Retrying downloading 2hw1 1 -Retrying downloading 2ouk 1 -Retrying downloading 2rie 1 -Retrying downloading 8uqy 1 -Retrying downloading 4n8v 1 -Retrying downloading 6swo 1 -Total processing time: 207.29337358474731 -Total processing time: 273.9354598522186 -Retrying downloading 2uzl 1 -Total processing time: 229.60449647903442 -Retrying downloading 5hv1 1 -Retrying downloading 8oun 1 -Retrying downloading 5yn6 1 -Retrying downloading 3eea 1 -Retrying downloading 6kxu 1 -Total processing time: 318.79333901405334 -Total processing time: 197.15452480316162 -Retrying downloading 4e1q 1 -Retrying downloading 8h50 1 -Total processing time: 306.09631872177124 -Retrying downloading 4gu4 1 -Total processing time: 345.32963514328003 -Retrying downloading 1qo1 1 -Retrying downloading 1qj3 1 -Total processing time: 303.5882179737091 -Retrying downloading 6eny 1 -Retrying downloading 7og1 1 -Retrying downloading 2wau 1 -Retrying downloading 7w58 1 -Retrying downloading 2jo4 1 -Total processing time: 364.6757650375366 -Retrying downloading 1h17 1 -Total processing time: 318.1446363925934 -Retrying downloading 4da8 1 -Total processing time: 214.3267319202423 -Retrying downloading 7knu 1 -Total processing time: 355.6277987957001 -Total processing time: 282.4938371181488 -Retrying downloading 5ira 1 -Total processing time: 253.74963927268982 -Retrying downloading 5qdc 1 -Total processing time: 334.81666803359985 -Total processing time: 236.78256011009216 -Total processing time: 323.73480439186096 -Retrying downloading 1fi2 1 -Retrying downloading 3pmh 1 -Total processing time: 315.13100695610046 -Total processing time: 319.2199237346649 -Retrying downloading 8h6a 1 -Retrying downloading 3ife 1 -Total processing time: 330.9580948352814 -Retrying downloading 1s3q 1 -Retrying downloading 2i4o 1 -Retrying downloading 5kt9 1 -Total processing time: 340.92007756233215 -Retrying downloading 7oj6 1 -Retrying downloading 1vz3 1 -Retrying downloading 2y1z 1 -Retrying downloading 8r5u 1 -Total processing time: 228.97281432151794 -Retrying downloading 5znh 1 -Total processing time: 264.88845896720886 -Retrying downloading 3lge 1 -2024-08-12 20:23:48,790 - tornado.application - ERROR - Exception in callback functools.partial(>, exception=TimeoutError()>) -Traceback (most recent call last): - File "/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_env/lib/python3.10/asyncio/tasks.py", line 458, in wait_for - fut.result() -asyncio.exceptions.CancelledError - -The above exception was the direct cause of the following exception: - -Traceback (most recent call last): - File "/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_env/lib/python3.10/site-packages/tornado/ioloop.py", line 750, in _run_callback - ret = callback() - File "/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_env/lib/python3.10/site-packages/tornado/ioloop.py", line 774, in _discard_future_result - future.result() - File "/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_env/lib/python3.10/site-packages/distributed/deploy/spec.py", line 448, in _close - await self._correct_state() - File "/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_env/lib/python3.10/site-packages/distributed/deploy/spec.py", line 359, in _correct_state_internal - await asyncio.gather(*tasks) - File "/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_env/lib/python3.10/site-packages/distributed/nanny.py", line 623, in close - await self.kill(timeout=timeout, reason=reason) - File "/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_env/lib/python3.10/site-packages/distributed/nanny.py", line 400, in kill - await self.process.kill(reason=reason, timeout=timeout) - File "/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_env/lib/python3.10/site-packages/distributed/nanny.py", line 879, in kill - await process.join(max(0, deadline - time())) - File "/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_env/lib/python3.10/site-packages/distributed/process.py", line 330, in join - await wait_for(asyncio.shield(self._exit_future), timeout) - File "/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_env/lib/python3.10/site-packages/distributed/utils.py", line 1961, in wait_for - return await asyncio.wait_for(fut, timeout) - File "/net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_env/lib/python3.10/asyncio/tasks.py", line 460, in wait_for - raise exceptions.TimeoutError() from exc -asyncio.exceptions.TimeoutError -End time: Mon Aug 12 20:23:55 CEST 2024 -Computation time: 01:13:36 diff --git a/experiments/data/swissprot_all_192.txt b/experiments/data/swissprot_all_192.txt deleted file mode 100644 index a358ba2..0000000 --- a/experiments/data/swissprot_all_192.txt +++ /dev/null @@ -1,556 +0,0 @@ -IP -172.25.0.101 -Now please run command: -eval "$(conda shell.bash hook)" -to set conda env - Miniconda3/23.9.0-0 loaded. -Start time: Tue Aug 13 02:00:20 CEST 2024 -http://127.0.0.1:8989/status -2024-08-13 02:00:36.427130 -Downloading ids -Foldcomp downloading db: afdb_swissprot_v4 to /net/storage/pr3/plgrid/plggsano/tomaszlab/deepfri/dev_data/repo/AFDB/part_afdb_swissprot_v4/20240813_0200 -0/543 -1/543 -2/543 -3/543 -4/543 -5/543 -6/543 -7/543 -8/543 -9/543 -10/543 -11/543 -12/543 -13/543 -14/543 -15/543 -16/543 -17/543 -18/543 -19/543 -20/543 -21/543 -22/543 -23/543 -24/543 -25/543 -26/543 -27/543 -28/543 -29/543 -30/543 -31/543 -32/543 -33/543 -34/543 -35/543 -36/543 -37/543 -38/543 -39/543 -40/543 -41/543 -42/543 -43/543 -44/543 -45/543 -46/543 -47/543 -48/543 -49/543 -50/543 -51/543 -52/543 -53/543 -54/543 -55/543 -56/543 -57/543 -58/543 -59/543 -60/543 -61/543 -62/543 -63/543 -64/543 -65/543 -66/543 -67/543 -68/543 -69/543 -70/543 -71/543 -72/543 -73/543 -74/543 -75/543 -76/543 -77/543 -78/543 -79/543 -80/543 -81/543 -82/543 -83/543 -84/543 -85/543 -86/543 -87/543 -88/543 -89/543 -90/543 -91/543 -92/543 -93/543 -94/543 -95/543 -96/543 -97/543 -98/543 -99/543 -100/543 -101/543 -102/543 -103/543 -104/543 -105/543 -106/543 -107/543 -108/543 -109/543 -110/543 -111/543 -112/543 -113/543 -114/543 -115/543 -116/543 -117/543 -118/543 -119/543 -120/543 -121/543 -122/543 -123/543 -124/543 -125/543 -126/543 -127/543 -128/543 -129/543 -130/543 -131/543 -132/543 -133/543 -134/543 -135/543 -136/543 -137/543 -138/543 -139/543 -140/543 -141/543 -142/543 -143/543 -144/543 -145/543 -146/543 -147/543 -148/543 -149/543 -150/543 -151/543 -152/543 -153/543 -154/543 -155/543 -156/543 -157/543 -158/543 -159/543 -160/543 -161/543 -162/543 -163/543 -164/543 -165/543 -166/543 -167/543 -168/543 -169/543 -170/543 -171/543 -172/543 -173/543 -174/543 -175/543 -176/543 -177/543 -178/543 -179/543 -180/543 -181/543 -182/543 -183/543 -184/543 -185/543 -186/543 -187/543 -188/543 -189/543 -190/543 -191/543 -192/543 -193/543 -194/543 -195/543 -196/543 -197/543 -198/543 -199/543 -200/543 -201/543 -202/543 -203/543 -204/543 -205/543 -206/543 -207/543 -208/543 -209/543 -210/543 -211/543 -212/543 -213/543 -214/543 -215/543 -216/543 -217/543 -218/543 -219/543 -220/543 -221/543 -222/543 -223/543 -224/543 -225/543 -226/543 -227/543 -228/543 -229/543 -230/543 -231/543 -232/543 -233/543 -234/543 -235/543 -236/543 -237/543 -238/543 -239/543 -240/543 -241/543 -242/543 -243/543 -244/543 -245/543 -246/543 -247/543 -248/543 -249/543 -250/543 -251/543 -252/543 -253/543 -254/543 -255/543 -256/543 -257/543 -258/543 -259/543 -260/543 -261/543 -262/543 -263/543 -264/543 -265/543 -266/543 -267/543 -268/543 -269/543 -270/543 -271/543 -272/543 -273/543 -274/543 -275/543 -276/543 -277/543 -278/543 -279/543 -280/543 -281/543 -282/543 -283/543 -284/543 -285/543 -286/543 -287/543 -288/543 -289/543 -290/543 -291/543 -292/543 -293/543 -294/543 -295/543 -296/543 -297/543 -298/543 -299/543 -300/543 -301/543 -302/543 -303/543 -304/543 -305/543 -306/543 -307/543 -308/543 -309/543 -310/543 -311/543 -312/543 -313/543 -314/543 -315/543 -316/543 -317/543 -318/543 -319/543 -320/543 -321/543 -322/543 -323/543 -324/543 -325/543 -326/543 -327/543 -328/543 -329/543 -330/543 -331/543 -332/543 -333/543 -334/543 -335/543 -336/543 -337/543 -338/543 -339/543 -340/543 -341/543 -342/543 -343/543 -344/543 -345/543 -346/543 -347/543 -348/543 -349/543 -350/543 -351/543 -352/543 -353/543 -354/543 -355/543 -356/543 -357/543 -358/543 -359/543 -360/543 -361/543 -362/543 -363/543 -364/543 -365/543 -366/543 -367/543 -368/543 -369/543 -370/543 -371/543 -372/543 -373/543 -374/543 -375/543 -376/543 -377/543 -378/543 -379/543 -380/543 -381/543 -382/543 -383/543 -384/543 -385/543 -386/543 -387/543 -388/543 -389/543 -390/543 -391/543 -392/543 -393/543 -394/543 -395/543 -396/543 -397/543 -398/543 -399/543 -400/543 -401/543 -402/543 -403/543 -404/543 -405/543 -406/543 -407/543 -408/543 -409/543 -410/543 -411/543 -412/543 -413/543 -414/543 -415/543 -416/543 -417/543 -418/543 -419/543 -420/543 -421/543 -422/543 -423/543 -424/543 -425/543 -426/543 -427/543 -428/543 -429/543 -430/543 -431/543 -432/543 -433/543 -434/543 -435/543 -436/543 -437/543 -438/543 -439/543 -440/543 -441/543 -442/543 -443/543 -444/543 -445/543 -446/543 -447/543 -448/543 -449/543 -450/543 -451/543 -452/543 -453/543 -454/543 -455/543 -456/543 -457/543 -458/543 -459/543 -460/543 -461/543 -462/543 -463/543 -464/543 -465/543 -466/543 -467/543 -468/543 -469/543 -470/543 -471/543 -472/543 -473/543 -474/543 -475/543 -476/543 -477/543 -478/543 -479/543 -480/543 -481/543 -482/543 -483/543 -484/543 -485/543 -486/543 -487/543 -488/543 -489/543 -490/543 -491/543 -492/543 -493/543 -494/543 -495/543 -496/543 -497/543 -498/543 -499/543 -500/543 -501/543 -502/543 -503/543 -504/543 -505/543 -506/543 -507/543 -508/543 -509/543 -510/543 -511/543 -512/543 -513/543 -514/543 -515/543 -516/543 -517/543 -518/543 -519/543 -520/543 -521/543 -522/543 -523/543 -524/543 -525/543 -526/543 -527/543 -528/543 -529/543 -530/543 -531/543 -532/543 -533/543 -534/543 -535/543 -536/543 -537/543 -538/543 -539/543 -540/543 -541/543 -542/543 -End time: Tue Aug 13 02:02:16 CEST 2024 -Computation time: 00:01:56 diff --git a/experiments/distograms.py b/experiments/distograms.py deleted file mode 100644 index 1f2fd75..0000000 --- a/experiments/distograms.py +++ /dev/null @@ -1,83 +0,0 @@ -import pickle - -from typing import Optional - -import matplotlib.pyplot as plt -import numpy as np -from tqdm import tqdm - - -def plot_first_histogram(histograms_path: str): - # Load the histograms from the pickle file - with open(histograms_path, "rb") as f: - histograms_dict = pickle.load(f) - - # Get the first key in the dictionary - first_key = next(iter(histograms_dict)) - - # Extract the histogram data for the first distogram - hist_data = histograms_dict[first_key] - hist = hist_data["hist"] - bin_edges = hist_data["bin_edges"] - - # Plot the histogram - plt.figure(figsize=(10, 6)) - plt.bar(bin_edges[:-1], hist, width=np.diff(bin_edges), align="edge") - plt.title(f"Histogram of Distogram: {first_key}") - plt.xlabel("Distance") - plt.ylabel("Frequency") - plt.savefig("first_distogram_histogram.png") - plt.show() - - -def visualize_distograms_3d(histograms_path, num_distograms: Optional[int] = 5): - # Load the histograms from the pickle file - with open(histograms_path, "rb") as f: - histograms_dict = pickle.load(f) - - # Select a subset of distograms and sort them by their length - keys = sorted( - histograms_dict.keys(), - key=lambda x: len(histograms_dict[x]["hist"]), - reverse=True, - ) - keys = keys[:num_distograms] if num_distograms is not None else keys - - # Create the 3D plot - fig = plt.figure(figsize=(12, 8)) - ax = fig.add_subplot(111, projection="3d") - - # Plot each histogram - for i, key in tqdm(enumerate(keys)): - hist_data = histograms_dict[key] - hist = hist_data["hist"] - bin_edges = hist_data["bin_edges"] - bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 - - # Create the 3D bar plot - dx = dy = (bin_edges[1] - bin_edges[0]) * 0.8 # Adjust bar width - xpos, ypos = np.meshgrid(bin_centers, [i]) - xpos = xpos.flatten() - ypos = ypos.flatten() - zpos = np.zeros_like(xpos) - dz = hist - - ax.bar3d(xpos, ypos, zpos, dx, dy, dz, shade=True, alpha=0.8) - - # Customize the plot - ax.set_xlabel("Distance") - # ax.set_ylabel('Distogram Index') - ax.set_zlabel("Frequency") - ax.set_title(f"3D Visualization of {len(keys)} Distograms") - - # Set y-axis ticks to distogram indices - # ax.set_yticks(range(len(keys))) - # ax.set_yticklabels(range(1, len(keys) + 1)) - - plt.savefig("3d_distogram_visualization.png") - plt.show() - - -if __name__ == "__main__": - # plot_first_histogram('../distograms_hist_exp/histograms.pkl') - visualize_distograms_3d("../distograms_hist_exp/histograms.pkl", 2000) diff --git a/experiments/distograms/total.py b/experiments/distograms/total.py deleted file mode 100644 index 590ef6e..0000000 --- a/experiments/distograms/total.py +++ /dev/null @@ -1,6 +0,0 @@ -from experiments.plot_from_htmls import plot_from_htmls - -if __name__ == "__main__": - html_files_dir = "/Users/youngdashu/sano/deepFRI2-toolbox-dev/experiments/distograms/reports" # replace with actual path - task_names = ["process_pdbs", "collect_parallel"] - plot_from_htmls(html_files_dir, task_names) diff --git a/experiments/distograms_pdb/total.py b/experiments/distograms_pdb/total.py deleted file mode 100644 index 4ad7daa..0000000 --- a/experiments/distograms_pdb/total.py +++ /dev/null @@ -1,6 +0,0 @@ -from experiments.plot_from_htmls import plot_from_htmls - -if __name__ == "__main__": - html_files_dir = "/Users/youngdashu/sano/deepFRI2-toolbox-dev/experiments/distograms_pdb/reports" # replace with actual path - task_names = ["process_pdbs", "collect_parallel"] - plot_from_htmls(html_files_dir, task_names) diff --git a/experiments/download/extract_time_data.py b/experiments/download/extract_time_data.py deleted file mode 100644 index 5476f0a..0000000 --- a/experiments/download/extract_time_data.py +++ /dev/null @@ -1,116 +0,0 @@ -import csv -import json -from collections import defaultdict -from dask.distributed import Client, LocalCluster -import dask.bag as db - - -def analyze_single_task(data, task_name): - task_times = [] - total_time = 0 - count = 0 - - # Get the first (and presumably only) key in the data dictionary - top_level_key = next(iter(data)) - roots = data[top_level_key]["roots"] - - for reference in roots["references"]: - if "attributes" in reference and "data" in reference["attributes"]: - names = reference["attributes"]["data"].get("name", []) - durations = reference["attributes"]["data"].get("duration", []) - - for name, duration in zip(names, durations): - if name == task_name: - task_times.append(duration) - total_time += duration - count += 1 - - average_time = total_time / count if count > 0 else 0 - return { - "name": task_name, - "task_times": task_times, - "total_time": total_time, - "count": count, - "average_time": average_time, - } - - -def analyze_task_times(json_data, task_names): - # Load JSON data - if isinstance(json_data, str): - with open(json_data, "r") as f: - data = json.load(f) - elif isinstance(json_data, dict): - data = json_data - else: - raise ValueError("json_data must be either a file path or a dictionary") - - # Set up Dask local cluster and client - with LocalCluster() as cluster, Client(cluster) as client: - # Create a bag of task names - task_bag = db.from_sequence(task_names) - - # Map the analyze_single_task function to each task name - results = task_bag.map(lambda name: analyze_single_task(data, name)).compute() - - # Convert results to a dictionary - return {result["name"]: result for result in results} - - -def print_analysis(results): - print("Tasks analyzed:") - for name, result in results.items(): - print(f"\n{name}:") - print("Execution Times (ms):") - for time in result["task_times"]: - print(f" {time:.2f}") - - print("\nSummary:") - print(f" Total execution time: {result['total_time']:.2f} ms") - print(f" Number of tasks: {result['count']}") - if result["count"] > 0: - print(f" Average execution time: {result['average_time']:.2f} ms") - - -# Add function to write results to csv -def write_to_csv(results): - with open("results.csv", "w", newline="") as csvfile: - fieldnames = [ - "name", - "average_time_ms", - "average_time_s", - "average_time_min", - "total_time_ms", - "total_time_s", - "total_time_min", - ] - writer = csv.DictWriter(csvfile, fieldnames=fieldnames) - writer.writeheader() - for name, result in results.items(): - writer.writerow( - { - "name": name, - "average_time_ms": result["average_time"], - "average_time_s": result["average_time"] / 1000, - "average_time_min": result["average_time"] / 60000, - "total_time_ms": result["total_time"], - "total_time_s": result["total_time"] / 1000, - "total_time_min": result["total_time"] / 60000, - } - ) - - -# Example usage -if __name__ == "__main__": - json_file = "pretty.json" - task_names = [ - "retrieve_binary_cifs_to_pdbs", - "aggregate_results", - "create_zip_archive", - "create_pdb_zip_archive", - "compress_and_save_h5", - ] - - results = analyze_task_times(json_file, task_names) - print_analysis(results) - write_to_csv(results) # Added line to write results to csv diff --git a/experiments/download/process_html.py b/experiments/download/process_html.py deleted file mode 100644 index 3d4189f..0000000 --- a/experiments/download/process_html.py +++ /dev/null @@ -1,53 +0,0 @@ -import json -from bs4 import BeautifulSoup -import subprocess - -DEFAULT_FILE_PATH = "/Users/youngdashu/sano/deepFRI2-toolbox-dev/experiments/download/report_pdb_8_10.html" - - -def parse_html_to_json(html_file_path: str, parser="html.parser"): - with open(html_file_path, "r") as file: - html_content = file.read() - soup = BeautifulSoup(html_content, parser) - return soup.find("script", type="application/json") - - -def convert_json_to_schema(json_script: BeautifulSoup): - if json_script: - json_data = json.loads(json_script.string) - return json_data - - -def generate_schema(json_data: dict): - gen_schema_proc = subprocess.Popen( - ["genson"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True - ) - json_str = json.dumps(json_data) - schema_str, _ = gen_schema_proc.communicate(json_str) - return schema_str - - -def organise_json(json_data: dict): - jq_proc = subprocess.Popen( - ["jq", "."], stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True - ) - organized_json_str, _ = jq_proc.communicate(json.dumps(json_data)) - return organized_json_str - - -def save_to_file(data: str, filename="pretty.json"): - with open(filename, "w") as f: - f.write(data) - - -def main_json(): - html_file_path = DEFAULT_FILE_PATH - json_script = parse_html_to_json(html_file_path) - json_data = convert_json_to_schema(json_script) - if json_data is not None: - organized_json_str = organise_json(json_data) - save_to_file(organized_json_str, filename="pretty.json") - - -if __name__ == "__main__": - main_json() diff --git a/experiments/download/total.py b/experiments/download/total.py deleted file mode 100644 index b7b7f55..0000000 --- a/experiments/download/total.py +++ /dev/null @@ -1,13 +0,0 @@ -from experiments.plot_from_htmls import plot_from_htmls - -if __name__ == "__main__": - html_files_dir = "/Users/youngdashu/sano/deepFRI2-toolbox-dev/experiments/download/reports" # replace with actual path - task_names = [ - "retrieve_binary_cif", - "binary_cif_to_pdbs", - "aggregate_results", - # "create_pdb_zip_archive", - "compress_and_save_h5", - "retrieve_pdb_chunk_to_h5", - ] - plot_from_htmls(html_files_dir=html_files_dir, task_names=task_names) diff --git a/experiments/download_afdb/total.py b/experiments/download_afdb/total.py deleted file mode 100644 index 1020a85..0000000 --- a/experiments/download_afdb/total.py +++ /dev/null @@ -1,6 +0,0 @@ -from experiments.plot_from_htmls import plot_from_htmls - -if __name__ == "__main__": - html_files_dir = "/Users/youngdashu/sano/deepFRI2-toolbox-dev/experiments/download_afdb/reports" # replace with actual path - task_names = ["alphafold_chunk_to_h5"] - plot_from_htmls(html_files_dir=html_files_dir, task_names=task_names) diff --git a/experiments/download_pdb_nb/total.py b/experiments/download_pdb_nb/total.py deleted file mode 100644 index f89a1b6..0000000 --- a/experiments/download_pdb_nb/total.py +++ /dev/null @@ -1,13 +0,0 @@ -from experiments.plot_from_htmls import plot_from_htmls - -if __name__ == "__main__": - html_files_dir = "/Users/youngdashu/sano/deepFRI2-toolbox-dev/experiments/download_pdb_nb/reports" # replace with actual path - task_names = [ - "retrieve_cif", - "cif_to_pdbs", - "aggregate_results", - # "create_pdb_zip_archive", - "compress_and_save_h5", - "retrieve_pdb_chunk_to_h5", - ] - plot_from_htmls(html_files_dir=html_files_dir, task_names=task_names) diff --git a/experiments/download_to_compress_experiment.py b/experiments/download_to_compress_experiment.py deleted file mode 100644 index 28dd517..0000000 --- a/experiments/download_to_compress_experiment.py +++ /dev/null @@ -1,25 +0,0 @@ -import csv -import re - - -def main(input_string): - download_times = re.findall(r"Download time:\s*([0-9.]+)", input_string) - compress_times = re.findall(r"Compress \+ save time:\s*([0-9.]+)", input_string) - - with open("times.csv", "w", newline="") as csvfile: - fieldnames = ["download_time", "compress_time"] - writer = csv.DictWriter(csvfile, fieldnames=fieldnames) - - writer.writeheader() - for i in range(len(download_times)): - writer.writerow( - {"download_time": download_times[i], "compress_time": compress_times[i]} - ) - - -input = """ - -""" - -if __name__ == "__main__": - main(input) diff --git a/experiments/pdb_metadata/plot.py b/experiments/pdb_metadata/plot.py deleted file mode 100644 index 6590e2e..0000000 --- a/experiments/pdb_metadata/plot.py +++ /dev/null @@ -1,93 +0,0 @@ -import matplotlib.pyplot as plt -import numpy as np - -# Data -categories = { - "Experimental Method": { - "X-RAY DIFFRACTION": 187265, - "ELECTRON MICROSCOPY": 21723, - "SOLUTION NMR": 14229, - "Other": 561 + 238, # Sum of the remaining methods - }, - "Refinement Resolution (Å)": { - # "< 1.0": 1057, - "< 1.5": 19400 + 1057, - "1.5 - 2.0": 64375, - "2.0 - 2.5": 57899, - "2.5 - 3.0": 35447, - "3.0 - 3.5": 17694, - "3.5 - 4.0": 7169, - "> 4.0": 2585 + 3775, - # "> 4.5": 3775 - }, - "Release Date": { - # "earlier": 190 + 122 + 53, - "earlier": 190 + 122 + 53 + 2506, - "1995 - 1999": 8088, - "2000 - 2004": 17726, - "2005 - 2009": 33064, - "2010 - 2014": 43311, - "2015 - 2019": 53737, - "2020 - 2024": 64993, - }, - "Polymer Entity Type": { - "Protein": 219181, - "DNA": 11244, - "RNA": 7970, - "Other": 277 + 8, - }, -} - - -def fix_labels(mylabels, tooclose=0.1, sepfactor=2): - vecs = np.zeros((len(mylabels), len(mylabels), 2)) - dists = np.zeros((len(mylabels), len(mylabels))) - for i in range(0, len(mylabels) - 1): - for j in range(i + 1, len(mylabels)): - a = np.array(mylabels[i].get_position()) - b = np.array(mylabels[j].get_position()) - dists[i, j] = np.linalg.norm(a - b) - vecs[i, j, :] = a - b - if dists[i, j] < tooclose: - mylabels[i].set_x(a[0] + sepfactor * vecs[i, j, 0]) - mylabels[i].set_y(a[1] + sepfactor * vecs[i, j, 1]) - mylabels[j].set_x(b[0] - sepfactor * vecs[i, j, 0]) - mylabels[j].set_y(b[1] - sepfactor * vecs[i, j, 1]) - - -# Create subplots -fig, axs = plt.subplots(2, 2, figsize=(20, 20)) - -# Flatten the axs array for easier iteration -axs = axs.flatten() - -# Colors for the pie charts -color_maps = [plt.cm.Set3, plt.cm.Set1, plt.cm.Set2, plt.cm.Pastel1] - -# Create visualizations for each category -for i, (category, data) in enumerate(categories.items()): - ax = axs[i] - labels = list(data.keys()) - sizes = list(data.values()) - - colors = color_maps[i](np.linspace(0, 1, len(labels))) - - ind = np.arange(len(labels)) # the x locations for the groups - ax.bar(ind, sizes, color=colors) - ax.set_xticks(ind) - ax.set_xticklabels(labels, fontsize=24, rotation=20) # Adjust rotation here - ax.set_title(category, fontsize=24) - - # Set both ticks and labels to be fontsize 14 - for tick in ax.xaxis.get_major_ticks(): - tick.label.set_fontsize(20) - for tick in ax.yaxis.get_major_ticks(): - tick.label.set_fontsize(14) - -# plt.suptitle('RSCB PDB structures metadata', fontsize=24) -plt.tight_layout() - -# Save the figure as SVG -plt.savefig("pdb_structures_metadata.pdf", dpi=1200, bbox_inches="tight") - -plt.show() diff --git a/experiments/plot_all.py b/experiments/plot_all.py deleted file mode 100644 index 5339516..0000000 --- a/experiments/plot_all.py +++ /dev/null @@ -1,47 +0,0 @@ -import os -import subprocess -import sys -import concurrent.futures - - -def run_script(script_path): - print(f"Running: {script_path}") - try: - subprocess.run( - [sys.executable, script_path], check=True, capture_output=True, text=True - ) - print(f"Successfully completed: {script_path}") - return f"Success: {script_path}" - except subprocess.CalledProcessError as e: - error_message = ( - f"Error running {script_path}: {e}\nSTDOUT: {e.stdout}\nSTDERR: {e.stderr}" - ) - print(error_message) - return error_message - - -def run_total_scripts(root_dir, max_workers=None): - script_paths = [] - for dirpath, dirnames, filenames in os.walk(root_dir): - if "total.py" in filenames: - script_paths.append(os.path.join(dirpath, "total.py")) - - print(f"Found {len(script_paths)} 'total.py' scripts to run.") - - with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor: - futures = [executor.submit(run_script, path) for path in script_paths] - for future in concurrent.futures.as_completed(futures): - print(future.result()) - print("-" * 50) - - -if __name__ == "__main__": - - root_directory = "/Users/youngdashu/sano/deepFRI2-toolbox-dev/experiments" - if not os.path.isdir(root_directory): - print(f"Error: {root_directory} is not a valid directory") - sys.exit(1) - - max_workers = 8 - - run_total_scripts(root_directory, max_workers) diff --git a/experiments/plot_from_htmls.py b/experiments/plot_from_htmls.py deleted file mode 100644 index 2233a98..0000000 --- a/experiments/plot_from_htmls.py +++ /dev/null @@ -1,485 +0,0 @@ -import csv -import json -import os -import re -from typing import List - -import dask.bag as db -import dask.distributed -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -from bs4 import BeautifulSoup -from dask.distributed import get_client - -BASE_CORE_COUNT = 190 - - -def parse_html_to_json(html_file_path: str, parser="html.parser"): - with open(html_file_path, "r") as file: - html_content = file.read() - soup = BeautifulSoup(html_content, parser) - - # Extract duration - duration_text = soup.find(string=re.compile(r"Duration:")) - duration_minutes, duration_seconds = parse_duration(duration_text) - - print(duration_minutes, duration_seconds) - - return ( - soup.find("script", type="application/json"), - duration_minutes, - duration_seconds, - ) - - -def parse_duration(text): - # First, try to match the format with decimal seconds - simple_match = re.search(r"Duration:\s*(\d+(?:\.\d+)?)\s*s", text) - if simple_match: - total_seconds = float(simple_match.group(1)) - total_minutes = total_seconds / 60 - return round(total_minutes, 2), round(total_seconds, 2) - - # If simple format doesn't match, try the complex format - complex_match = re.search(r"Duration:\s*((?:\d+\s*\w+\s*)+)", text) - if complex_match: - duration_string = complex_match.group(1) - time_parts = re.findall(r"(\d+(?:\.\d+)?)\s*(\w+)", duration_string) - - total_seconds = 0 - for value, unit in time_parts: - value = float(value) - if unit.startswith("s"): - total_seconds += value - elif unit.startswith("m"): - total_seconds += value * 60 - elif unit.startswith("h"): - total_seconds += value * 3600 - elif unit.startswith("d"): - total_seconds += value * 86400 - - total_minutes = total_seconds / 60 - return round(total_minutes, 2), round(total_seconds, 2) - - return None, None - - -def convert_json_to_schema(json_script: BeautifulSoup): - if json_script: - json_data = json.loads(json_script.string) - return json_data - return None - - -def analyze_single_task(data, task_name): - task_times = [] - total_time = 0 - count = 0 - - top_level_key = next(iter(data)) - roots = data[top_level_key]["roots"] - - for reference in roots["references"]: - if "attributes" in reference and "data" in reference["attributes"]: - names = reference["attributes"]["data"].get("name", []) - durations = reference["attributes"]["data"].get("duration", []) - - for name, duration in zip(names, durations): - if name == task_name: - task_times.append(duration) - total_time += duration - count += 1 - - average_time = total_time / count if count > 0 else 0 - return { - "name": task_name, - "task_times": task_times, - "total_time": total_time, - "count": count, - "average_time": average_time, - } - - -def analyze_task_times(json_data, task_names): - # with get_client() as client: - # task_bag = db.from_sequence(task_names) - results = map(lambda name: analyze_single_task(json_data, name), task_names) - return {result["name"]: result for result in results} - - -def write_to_csv(results, output_file, duration_mins, duration_secs): - with open(output_file, "w", newline="") as csvfile: - fieldnames = [ - "name", - "average_time_ms", - "average_time_s", - "average_time_min", - "total_time_ms", - "total_time_s", - "total_time_min", - "count", - "total_duration_minutes", - "total_duration_seconds", - ] - writer = csv.DictWriter(csvfile, fieldnames=fieldnames) - writer.writeheader() - for name, result in results.items(): - writer.writerow( - { - "name": name, - "average_time_ms": result["average_time"], - "average_time_s": result["average_time"] / 1000, - "average_time_min": result["average_time"] / 60000, - "total_time_ms": result["total_time"], - "total_time_s": result["total_time"] / 1000, - "total_time_min": result["total_time"] / 60000, - "count": result["count"], - "total_duration_minutes": duration_mins, - "total_duration_seconds": duration_secs, - } - ) - - -def extract_core_count(filename): - match = re.search(r"(?:_b_|_)(\d+)_", filename) - if match: - return int(match.group(1)) - return None - - -def process_html_file(html_file_path, task_names, output_dir): - json_script, duration_mins, duration_secs = parse_html_to_json(html_file_path) - json_data = convert_json_to_schema(json_script) - - if json_data is not None: - results = analyze_task_times(json_data, task_names) - - base_name = os.path.splitext(os.path.basename(html_file_path))[0] - output_file = os.path.join(output_dir, f"{base_name}_results.csv") - write_to_csv(results, output_file, duration_mins, duration_secs) - print(f"Results for {html_file_path} written to {output_file}") - else: - print(f"Failed to extract JSON data from {html_file_path}") - - -def read_csv_files(directory): - data = [] - for filename in os.listdir(directory): - if filename.endswith(".csv"): - core_count = extract_core_count(filename) - if core_count: - df = pd.read_csv(os.path.join(directory, filename)) - df["cores"] = core_count - data.append(df) - - if len(data) <= 1: - return data[0] - return pd.concat(data, ignore_index=True) - - -def plot_scalability(df, plot_type="average"): - tasks = df["name"].unique() - colors = plt.cm.rainbow(np.linspace(0, 1, len(tasks))) - - plt.figure(figsize=(14, 10)) - - if plot_type in ["average", "total"]: - for task, color in zip(tasks, colors): - task_df = df[df["name"] == task].sort_values("cores") - - if plot_type == "average": - y_values = task_df["average_time_s"] - y_label = "Average Execution Time (seconds)" - else: # total - y_values = task_df["total_time_s"] - y_label = "Total Execution Time (seconds)" - - plt.plot(task_df["cores"], y_values, marker="o", label=task, color=color) - - plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left") - plt.title( - f"Task Scalability ({plot_type.capitalize()} Time) with Total Duration" - ) - - elif plot_type == "duration": - duration_df = ( - df.groupby("cores")["total_duration_minutes"].first().reset_index() - ) - if any(duration_df["total_duration_minutes"] < 1): - duration_df["total_duration_seconds"] = ( - df.groupby("cores")["total_duration_seconds"] - .first() - .reset_index()["total_duration_seconds"] - ) - plt.plot( - duration_df["cores"], - duration_df["total_duration_seconds"], - marker="o", - color="blue", - ) - y_label = "Total Job Duration (seconds)" - - for _, row in duration_df.iterrows(): - plt.annotate( - f"{row['total_duration_seconds']:.1f}s", - (row["cores"], row["total_duration_seconds"]), - textcoords="offset points", - xytext=(0, 10), - ha="center", - fontsize=8, - ) - - else: - plt.plot( - duration_df["cores"], - duration_df["total_duration_minutes"], - marker="o", - color="blue", - ) - y_label = "Total Job Duration (minutes)" - - for _, row in duration_df.iterrows(): - plt.annotate( - f"{row['total_duration_minutes']:.1f}m", - (row["cores"], row["total_duration_minutes"]), - textcoords="offset points", - xytext=(0, 10), - ha="center", - fontsize=8, - ) - - plt.title("Total Job Duration Scalability") - else: - raise ValueError("plot_type must be 'average', 'total', or 'duration'") - - plt.xlabel("Number of Cores (P)") - plt.ylabel(y_label) - plt.xscale("linear") - plt.grid(True, which="both", ls="-", alpha=0.2) - plt.xticks(ticks=df["cores"]) - plt.tight_layout() - plt.savefig(f"scalability_plot_{plot_type}.pdf", dpi=1200, bbox_inches="tight") - plt.show() - - -def plot_speedup(df): - duration_df = df.groupby("cores")["total_duration_seconds"].first().reset_index() - duration_df = duration_df.sort_values("cores") - - T1 = duration_df.loc[ - duration_df["cores"] == BASE_CORE_COUNT, "total_duration_seconds" - ].values[0] - duration_df["speedup"] = T1 / duration_df["total_duration_seconds"] - - plt.figure(figsize=(10, 6)) - plt.plot(duration_df["cores"], duration_df["speedup"], marker="o", color="red") - - plt.xlabel("Number of Cores") - plt.ylabel("Speedup") - plt.title("Speedup vs Number of Cores") - plt.legend() - plt.xticks(ticks=df["cores"]) - plt.grid(True) - plt.savefig("speedup_plot.pdf", dpi=1200, bbox_inches="tight") - plt.show() - - -def plot_efficiency(df): - duration_df = df.groupby("cores")["total_duration_seconds"].first().reset_index() - duration_df = duration_df.sort_values("cores") - - T1 = duration_df.loc[ - duration_df["cores"] == BASE_CORE_COUNT, "total_duration_seconds" - ].values[0] - P1 = BASE_CORE_COUNT - - duration_df["speedup"] = T1 / duration_df["total_duration_seconds"] - duration_df["efficiency"] = (T1 / duration_df["total_duration_seconds"]) / ( - duration_df["cores"] / P1 - ) - - # Set efficiency to 100% for the BASE_CORE_COUNT - duration_df.loc[duration_df["cores"] == BASE_CORE_COUNT, "efficiency"] = 1.0 - - plt.figure(figsize=(10, 6)) - plt.plot( - duration_df["cores"], - duration_df["efficiency"] * 100, - marker="o", - color="green", - markersize=8, - linewidth=2, - ) - - plt.xlabel("Number of Cores", fontsize=12) - plt.ylabel("Efficiency (%)", fontsize=12) - plt.title("Efficiency vs Number of Cores", fontsize=14) - plt.xticks(ticks=df["cores"], fontsize=10) - plt.yticks(fontsize=10) - plt.grid(True) - plt.ylim(0, 105) # Set y-axis limit to 105% for better visibility - - # Add efficiency values as text above each point - for x, y in zip(duration_df["cores"], duration_df["efficiency"]): - plt.text( - x, y * 100 + 2, f"{y * 100:.1f}%", ha="center", va="bottom", fontsize=9 - ) - - plt.tight_layout() - plt.savefig("efficiency_plot.pdf", dpi=1200, bbox_inches="tight") - plt.show() - - -def plot_grid(df, plot_dir="."): - # Define font size variables - SMALL_FONT = 18 - MEDIUM_FONT = 19 - LARGE_FONT = 22 - EXTRA_LARGE_FONT = 29 - - # Define marker (dot) and line size variables - MARKER_SIZE = 8 - LINE_WIDTH = 2 - - # Increase the default font size - plt.rcParams.update({"font.size": MEDIUM_FONT}) - - duration_df = df.groupby("cores")["total_duration_seconds"].first().reset_index() - duration_df = duration_df.sort_values("cores") - - T1 = duration_df.loc[ - duration_df["cores"] == BASE_CORE_COUNT, "total_duration_seconds" - ].values[0] - P1 = BASE_CORE_COUNT - - duration_df["speedup"] = T1 / duration_df["total_duration_seconds"] - duration_df["efficiency"] = (duration_df["speedup"] / duration_df["cores"]) * ( - P1 / 1 - ) - - # Set efficiency to 100% for the BASE_CORE_COUNT - duration_df.loc[duration_df["cores"] == BASE_CORE_COUNT, "efficiency"] = 1.0 - - fig, axs = plt.subplots(2, 2, figsize=(20, 20)) - - # Duration plot - axs[0, 0].plot( - duration_df["cores"], - duration_df["total_duration_seconds"], - marker="o", - color="blue", - markersize=MARKER_SIZE, - linewidth=LINE_WIDTH, - ) - axs[0, 0].set_xlabel("Number of Cores", fontsize=LARGE_FONT) - axs[0, 0].set_ylabel("Total Job Duration (seconds)", fontsize=LARGE_FONT) - axs[0, 0].set_title("Duration vs Number of Cores", fontsize=EXTRA_LARGE_FONT) - axs[0, 0].set_xticks(df["cores"]) - axs[0, 0].grid(True) - axs[0, 0].tick_params(axis="both", which="major", labelsize=MEDIUM_FONT) - - # Speedup plot - axs[0, 1].plot( - duration_df["cores"], - duration_df["speedup"], - marker="o", - color="red", - markersize=MARKER_SIZE, - linewidth=LINE_WIDTH, - ) - axs[0, 1].set_xlabel("Number of Cores", fontsize=LARGE_FONT) - axs[0, 1].set_ylabel("Speedup", fontsize=LARGE_FONT) - axs[0, 1].set_title("Speedup vs Number of Cores", fontsize=EXTRA_LARGE_FONT) - axs[0, 1].set_xticks(df["cores"]) - axs[0, 1].grid(True) - axs[0, 1].tick_params(axis="both", which="major", labelsize=MEDIUM_FONT) - - # Efficiency plot - axs[1, 0].plot( - duration_df["cores"], - duration_df["efficiency"] * 100, - marker="o", - color="green", - markersize=MARKER_SIZE, - linewidth=LINE_WIDTH, - ) - axs[1, 0].set_xlabel("Number of Cores", fontsize=LARGE_FONT) - axs[1, 0].set_ylabel("Efficiency (%)", fontsize=LARGE_FONT) - axs[1, 0].set_title("Efficiency vs Number of Cores", fontsize=EXTRA_LARGE_FONT) - axs[1, 0].set_xticks(df["cores"]) - axs[1, 0].grid(True) - axs[1, 0].tick_params(axis="both", which="major", labelsize=MEDIUM_FONT) - - # Average time plot (all tasks) - tasks = df["name"].unique() - colors = plt.cm.rainbow(np.linspace(0, 1, len(tasks))) - - for task, color in zip(tasks, colors): - task_df = df[df["name"] == task].sort_values("cores") - axs[1, 1].plot( - task_df["cores"], - task_df["average_time_s"], - marker="o", - label=task, - color=color, - markersize=MARKER_SIZE, - linewidth=LINE_WIDTH, - ) - - axs[1, 1].set_xlabel("Number of Cores", fontsize=LARGE_FONT) - axs[1, 1].set_ylabel("Average Execution Time (seconds)", fontsize=LARGE_FONT) - axs[1, 1].set_title( - "Average Time vs Number of Cores (All Tasks)", fontsize=EXTRA_LARGE_FONT - ) - axs[1, 1].set_xticks(df["cores"]) - axs[1, 1].grid(True) - axs[1, 1].legend(loc="upper left", fontsize=SMALL_FONT) - axs[1, 1].tick_params(axis="both", which="major", labelsize=MEDIUM_FONT) - - plt.tight_layout() - plt.savefig(plot_dir + "/grid_plot.pdf", dpi=1200, bbox_inches="tight") - plt.show() - - -def plot_from_htmls( - html_files_dir: str, - task_names: List[str], -): - output_dir = html_files_dir + "/../results" - grid_plot_dir = html_files_dir + "/.." - os.makedirs(output_dir, exist_ok=True) - - # Find all HTML files in the specified directory - html_files = [ - os.path.join(html_files_dir, f) - for f in os.listdir(html_files_dir) - if f.endswith(".html") - ] - - print(html_files) - - def process_file(html_file): - process_html_file(html_file, task_names, output_dir) - - # client = dask.distributed.Client() - - for file in html_files: - process_file(file) - - # Read CSV files and create visualization - df = read_csv_files(output_dir) - - # Plot average time - # plot_scalability(df, plot_type='average') - # - # # Plot total time - # plot_scalability(df, plot_type='total') - # - # # Plot total duration - # plot_scalability(df, plot_type='duration') - # - # # New plots - # plot_speedup(df) - # plot_efficiency(df) - plot_grid(df, grid_plot_dir) diff --git a/experiments/text_storage_compression/exp.py b/experiments/text_storage_compression/exp.py deleted file mode 100644 index 4a03ceb..0000000 --- a/experiments/text_storage_compression/exp.py +++ /dev/null @@ -1,108 +0,0 @@ -import h5py -import numpy as np -import zlib -import time -from pathlib import Path -from typing import List, Tuple - - -# Approach 1: Creating an HDF5 dataset for each protein structure -def compress_and_save_h5_individual( - path_for_batch: Path, results: Tuple[List[str], List[str], List[str]] -): - start_time = time.time() - pdbs_file = path_for_batch / "pdbs_individual.h5" - all_res_pdbs = results[0] - all_contents = results[1] - if len(all_contents) == 0 or len(all_res_pdbs) == 0: - print("No files to save") - return None - if len(all_res_pdbs) != len(all_contents): - print("Wrong length of names and pdb contents") - return None - with h5py.File(pdbs_file, "w") as hf: - for pdb_name, pdb_content in zip(all_res_pdbs, all_contents): - hf.create_dataset( - pdb_name, data=pdb_content.encode("utf-8"), compression="gzip" - ) - end_time = time.time() - total_time = end_time - start_time - print(f"Compress time (individual): {total_time}") - return str(pdbs_file) - - -def compress_and_save_h5_individual_lzf( - path_for_batch: Path, results: Tuple[List[str], List[str], List[str]] -): - start_time = time.time() - pdbs_file = path_for_batch / "pdbs_individual.h5" - all_res_pdbs = results[0] - all_contents = results[1] - if len(all_contents) == 0 or len(all_res_pdbs) == 0: - print("No files to save") - return None - if len(all_res_pdbs) != len(all_contents): - print("Wrong length of names and pdb contents") - return None - with h5py.File(pdbs_file, "w") as hf: - for pdb_name, pdb_content in zip(all_res_pdbs, all_contents): - hf.create_dataset( - pdb_name, data=pdb_content.encode("utf-8"), compression="lzf" - ) - end_time = time.time() - total_time = end_time - start_time - print(f"Compress time (individual): {total_time}") - return str(pdbs_file) - - -# Approach 2: Creating an HDF5 dataset for all protein structures -def compress_and_save_h5_combined( - path_for_batch: Path, results: Tuple[List[str], List[str], List[str]] -): - start_time = time.time() - pdbs_file = path_for_batch / "pdbs_combined.h5" - all_res_pdbs = results[0] - all_contents = results[1] - if len(all_contents) == 0 or len(all_res_pdbs) == 0: - print("No files to save") - return None - if len(all_res_pdbs) != len(all_contents): - print("Wrong length of names and pdb contents") - return None - with h5py.File(pdbs_file, "w") as hf: - combined_content = "|".join(all_contents) - compressed_content = np.frombuffer( - combined_content.encode("utf-8"), dtype=np.uint8 - ) - hf.create_dataset( - ";".join(all_res_pdbs), data=compressed_content, compression="gzip" - ) - end_time = time.time() - total_time = end_time - start_time - print(f"Compress time (combined): {total_time}") - return str(pdbs_file) - - -# Approach 3: Compressing data before storing (existing implementation) -def compress_and_save_h5( - path_for_batch: Path, results: Tuple[List[str], List[str], List[str]] -): - start_time = time.time() - pdbs_file = path_for_batch / "pdbs.h5" - all_res_pdbs = results[0] - all_contents = results[1] - if len(all_contents) == 0 or len(all_res_pdbs) == 0: - print("No files to save") - return None - if len(all_res_pdbs) != len(all_contents): - print("Wrong length of names and pdb contents") - return None - with h5py.File(pdbs_file, "w") as hf: - files_group = hf.create_group("files") - files_together = zlib.compress("|".join(all_contents).encode("utf-8")) - pdbs_content = np.frombuffer(files_together, dtype=np.uint8) - files_group.create_dataset(name=";".join(all_res_pdbs), data=pdbs_content) - end_time = time.time() - total_time = end_time - start_time - print(f"Compress time: {total_time}") - return str(pdbs_file) diff --git a/install.sh b/install.sh index 83be169..0b1700c 100644 --- a/install.sh +++ b/install.sh @@ -1,10 +1,50 @@ +#!/bin/bash +# Installation script for toolbox environment +# Usage: ./install.sh [--cpu] +# --cpu: Install CPU-only PyTorch (for CI or non-GPU systems) +# Default: Install GPU-enabled PyTorch + set -e + GROUP_DIR=$1 +CPU_ONLY=false + +# Parse optional flags +shift +for arg in "$@"; do + case $arg in + --cpu) + CPU_ONLY=true + ;; + esac +done + +if [ -z "$GROUP_DIR" ]; then + echo "Usage: ./install.sh [--cpu]" + exit 1 +fi + CONDA_DIR="$GROUP_DIR/.conda" conda config --add pkgs_dirs "$CONDA_DIR" +# Create environment from base YAML (without PyTorch) conda env create --prefix $ENV_PATH --file "toolbox_env_conda.yml" conda config --set auto_activate_base false source activate $ENV_PATH + +# Install PyTorch based on mode +if [ "$CPU_ONLY" = true ]; then + echo "Installing CPU-only PyTorch..." + conda install -y pytorch cpuonly -c pytorch -c conda-forge +else + echo "Installing GPU-enabled PyTorch..." + conda install -y pytorch-gpu -c conda-forge +fi + +# Install ESM (requires PyTorch to be installed first) +echo "Installing ESM..." +pip install esm + +echo "Installation complete!" diff --git a/tests/test_distograms.py b/tests/test_distograms.py index 3e95fe7..e10ed20 100644 --- a/tests/test_distograms.py +++ b/tests/test_distograms.py @@ -14,6 +14,9 @@ OUTPATH = Path(__file__).parent / "data" / "distograms_generated" EXPPATH = Path(__file__).parent / "data" / "distograms_expected" +# Ensure output directory exists (needed for CI where it doesn't exist) +OUTPATH.mkdir(parents=True, exist_ok=True) + RTOL = 1e-3 # TODO: small precision ATOL = 1e-8 diff --git a/toolbox_env_conda.yml b/toolbox_env_conda.yml index 68d5737..5973380 100644 --- a/toolbox_env_conda.yml +++ b/toolbox_env_conda.yml @@ -16,8 +16,8 @@ dependencies: - genson - bs4 - bio~=1.7.0 + - pytest - pytest-order - - esm - dask>=2024.4.2,<2025.0.0 - dask[dataframe] - distributed>=2024.4.2,<2025.0.0 @@ -26,7 +26,8 @@ dependencies: - biopython - bokeh>=2.4.2,<3.0.0 - pdbfixer - - pytorch-gpu + # PyTorch and ESM installed separately via install.sh (GPU or CPU variant) + # ESM requires PyTorch to be installed first - mamba - transformers - tqdm