diff --git a/mzident/src/file_format.rs b/mzident/src/file_format.rs index 750ef1fc..c58453a2 100644 --- a/mzident/src/file_format.rs +++ b/mzident/src/file_format.rs @@ -109,8 +109,17 @@ impl KnownFileFormat { Self::Fasta => Some(term!(MS:1001348|FASTA format)), Self::BasicCSV(_) => None, Self::DeepNovoFamily(_) => None, - Self::InstaNovo(InstaNovoVersion::V1_0_0) => Some(term!(MS:1003612|InstaNovo)), - Self::InstaNovo(InstaNovoVersion::PlusV1_1_4) => Some(term!(MS:1003613|InstaNovo+)), + Self::InstaNovo( + InstaNovoVersion::V1_0_0 + | InstaNovoVersion::V1_1_0 + | InstaNovoVersion::V1_1_4 + | InstaNovoVersion::V1_2_2, + ) => Some(term!(MS:1003612|InstaNovo)), + Self::InstaNovo( + InstaNovoVersion::PlusV1_1_4 + | InstaNovoVersion::PlusV1_2_2 + | InstaNovoVersion::CombinedV1_2_2, + ) => Some(term!(MS:1003613|InstaNovo+)), Self::MaxQuant(_) => Some(term!(MS:1001583|MaxQuant)), Self::MetaMorpheus(_) => Some(term!(MS:1002826|MetaMorpheus)), Self::MzTab => Some(term!(MS:1002601|mzTab)), @@ -142,8 +151,27 @@ impl TryFrom for KnownFileFormat { fn try_from(value: CVTerm) -> Result { match value.term.accession { curie!(MS:1001348|FASTA format) => Ok(Self::Fasta), - curie!(MS:1003612|InstaNovo) => Ok(Self::InstaNovo(InstaNovoVersion::V1_0_0)), - curie!(MS:1003613|InstaNovo+) => Ok(Self::InstaNovo(InstaNovoVersion::PlusV1_1_4)), + curie!(MS:1003612|InstaNovo) => Ok(Self::InstaNovo(match value.value.as_ref() { + value if value.eq_ignore_ascii_case(InstaNovoVersion::V1_1_0.name()) => { + InstaNovoVersion::V1_1_0 + } + value if value.eq_ignore_ascii_case(InstaNovoVersion::V1_1_4.name()) => { + InstaNovoVersion::V1_1_4 + } + value if value.eq_ignore_ascii_case(InstaNovoVersion::V1_2_2.name()) => { + InstaNovoVersion::V1_2_2 + } + _ => InstaNovoVersion::V1_0_0, + })), + curie!(MS:1003613|InstaNovo+) => Ok(Self::InstaNovo(match value.value.as_ref() { + value if value.eq_ignore_ascii_case(InstaNovoVersion::PlusV1_2_2.name()) => { + InstaNovoVersion::PlusV1_2_2 + } + value if value.eq_ignore_ascii_case(InstaNovoVersion::CombinedV1_2_2.name()) => { + InstaNovoVersion::CombinedV1_2_2 + } + _ => InstaNovoVersion::PlusV1_1_4, + })), curie!(MS:1001583|MaxQuant) => { for v in MaxQuantPSM::VERSIONS { if value.value.eq_ignore_ascii_case(v.version.name()) { diff --git a/mzident/src/formats/instanovo.rs b/mzident/src/formats/instanovo.rs index 7637e751..60bcabf5 100644 --- a/mzident/src/formats/instanovo.rs +++ b/mzident/src/formats/instanovo.rs @@ -31,7 +31,15 @@ static BUILT_IN_MODIFICATIONS: OnceLock = OnceLock::new format_family!( InstaNovo, - SemiAmbiguous, PeptidoformPresent, [&INSTANOVO_V1_0_0, &INSTANOVOPLUS_V1_1_4], b',', None; + SemiAmbiguous, PeptidoformPresent, [ + &INSTANOVO_COMBINED_V1_2_2, + &INSTANOVO_V1_2_2, + &INSTANOVOPLUS_V1_2_2, + &INSTANOVO_V1_1_0, + &INSTANOVO_V1_1_4, + &INSTANOVOPLUS_V1_1_4, + &INSTANOVO_V1_0_0, + ], b',', None; required { scan_number: usize, |location: Location, _| location.parse(NUMBER_ERROR); mz: MassOverCharge, |location: Location, _| location.parse::(NUMBER_ERROR).map(MassOverCharge::new::); @@ -54,21 +62,33 @@ format_family!( score: f64, |location: Location, _| location.parse::(NUMBER_ERROR); } optional { - local_confidence: Vec, |location: Location, _| location - .trim_start_matches("[").trim_end_matches("]") - .array(',') - .map(|l| l.parse::(NUMBER_ERROR)) - .collect::, _>>(); + local_confidence: Vec, |location: Location, _| { + let location = location.trim_start_matches("[").trim_end_matches("]"); + location.or_empty().map_or(Ok(Vec::new()), |location| { + location + .array(',') + .map(|l| l.parse::(NUMBER_ERROR)) + .collect::, _>>() + }) + }; used_model: UsedModel, |location: Location, _| location.parse::(("Invalid InstaNovo line", "The selected model has to be 'diffusion' or 'transformer'.")); } - fn post_process(_source: &CsvLine, mut parsed: Self, _ontologies: &Ontologies) -> Result> { + fn post_process(source: &CsvLine, mut parsed: Self, _ontologies: &Ontologies) -> Result> { + validate_instanovo_schema(source, &parsed)?; + + if parsed.local_confidence.as_ref().is_some_and(Vec::is_empty) { + parsed.local_confidence = None; + } // Only keep the parsed local_confidence is the `UsedModel == Transformer` if let Some(used_model) = parsed.used_model && used_model == UsedModel::Diffusion { parsed.local_confidence = None; } if let Some(local_confidence) = parsed.local_confidence.as_mut() && !parsed.peptide.get_n_term().is_empty() { - *local_confidence = local_confidence[parsed.peptide.get_n_term().len()..].to_vec(); + let offset = parsed.peptide.get_n_term().len(); + if local_confidence.len() >= offset { + *local_confidence = local_confidence[offset..].to_vec(); + } } Ok(parsed) } @@ -87,7 +107,33 @@ pub const INSTANOVO_V1_0_0: InstaNovoFormat = InstaNovoFormat { used_model: OptionalColumn::NotAvailable, }; -/// The only known version of InstaNovoPlus +/// InstaNovo version 1.1.0 +pub const INSTANOVO_V1_1_0: InstaNovoFormat = InstaNovoFormat { + version: InstaNovoVersion::V1_1_0, + scan_number: "scan_number", + mz: "precursor_mz", + z: "precursor_charge", + raw_file: "experiment_name", + peptide: "predictions", + score: "log_probabilities", + local_confidence: OptionalColumn::Required("token_log_probabilities"), + used_model: OptionalColumn::NotAvailable, +}; + +/// InstaNovo version 1.1.4 +pub const INSTANOVO_V1_1_4: InstaNovoFormat = InstaNovoFormat { + version: InstaNovoVersion::V1_1_4, + scan_number: "scan_number", + mz: "precursor_mz", + z: "precursor_charge", + raw_file: "experiment_name", + peptide: "preds", + score: "log_probs", + local_confidence: OptionalColumn::Required("token_log_probs"), + used_model: OptionalColumn::NotAvailable, +}; + +/// The known InstaNovoPlus 1.1.4 output schema pub const INSTANOVOPLUS_V1_1_4: InstaNovoFormat = InstaNovoFormat { version: InstaNovoVersion::PlusV1_1_4, scan_number: "scan_number", @@ -100,6 +146,45 @@ pub const INSTANOVOPLUS_V1_1_4: InstaNovoFormat = InstaNovoFormat { used_model: OptionalColumn::Required("selected_model"), }; +/// InstaNovo version 1.2.2 transformer output +pub const INSTANOVO_V1_2_2: InstaNovoFormat = InstaNovoFormat { + version: InstaNovoVersion::V1_2_2, + scan_number: "scan_number", + mz: "precursor_mz", + z: "precursor_charge", + raw_file: "experiment_name", + peptide: "predictions", + score: "log_probs", + local_confidence: OptionalColumn::Required("token_log_probs"), + used_model: OptionalColumn::NotAvailable, +}; + +/// InstaNovoPlus version 1.2.2 standalone output +pub const INSTANOVOPLUS_V1_2_2: InstaNovoFormat = InstaNovoFormat { + version: InstaNovoVersion::PlusV1_2_2, + scan_number: "scan_number", + mz: "precursor_mz", + z: "precursor_charge", + raw_file: "experiment_name", + peptide: "predictions", + score: "log_probs", + local_confidence: OptionalColumn::Required("token_log_probs"), + used_model: OptionalColumn::NotAvailable, +}; + +/// InstaNovo version 1.2.2 combined transformer and InstaNovoPlus refined output +pub const INSTANOVO_COMBINED_V1_2_2: InstaNovoFormat = InstaNovoFormat { + version: InstaNovoVersion::CombinedV1_2_2, + scan_number: "scan_number", + mz: "precursor_mz", + z: "precursor_charge", + raw_file: "experiment_name", + peptide: "predictions", + score: "log_probs", + local_confidence: OptionalColumn::Required("token_log_probs"), + used_model: OptionalColumn::NotAvailable, +}; + /// All possible InstaNovo versions #[derive( Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default, Serialize, Deserialize, @@ -108,8 +193,18 @@ pub enum InstaNovoVersion { #[default] /// InstaNovo version 1.0.0 V1_0_0, + /// InstaNovo version 1.1.0 + V1_1_0, + /// InstaNovo version 1.1.4 + V1_1_4, /// InstaNovoPlus version 1.1.4 using refinement PlusV1_1_4, + /// InstaNovo version 1.2.2 + V1_2_2, + /// InstaNovoPlus version 1.2.2 standalone predictions + PlusV1_2_2, + /// InstaNovo version 1.2.2 combined transformer and InstaNovoPlus refined predictions + CombinedV1_2_2, } impl std::fmt::Display for InstaNovoVersion { @@ -122,15 +217,99 @@ impl PSMFileFormatVersion for InstaNovoVersion { fn format(self) -> InstaNovoFormat { match self { Self::V1_0_0 => INSTANOVO_V1_0_0, + Self::V1_1_0 => INSTANOVO_V1_1_0, + Self::V1_1_4 => INSTANOVO_V1_1_4, Self::PlusV1_1_4 => INSTANOVOPLUS_V1_1_4, + Self::V1_2_2 => INSTANOVO_V1_2_2, + Self::PlusV1_2_2 => INSTANOVOPLUS_V1_2_2, + Self::CombinedV1_2_2 => INSTANOVO_COMBINED_V1_2_2, } } fn name(self) -> &'static str { match self { Self::V1_0_0 => "v1.0.0", + Self::V1_1_0 => "v1.1.0", + Self::V1_1_4 => "v1.1.4", Self::PlusV1_1_4 => "Plus v1.1.4", + Self::V1_2_2 => "v1.2.2", + Self::PlusV1_2_2 => "Plus v1.2.2", + Self::CombinedV1_2_2 => "Combined v1.2.2", + } + } +} + +fn validate_instanovo_schema( + source: &CsvLine, + parsed: &InstaNovoPSM, +) -> Result<(), BoxedError<'static, BasicKind>> { + match parsed.version { + InstaNovoVersion::V1_1_0 | InstaNovoVersion::V1_1_4 => { + if !has_column(source, "delta_mass_ppm") { + return Err(instanovo_schema_error( + source, + "This InstaNovo version requires the 'delta_mass_ppm' column", + )); + } + } + InstaNovoVersion::V1_2_2 => { + if has_column(source, "instanovoplus_predictions") { + return Err(instanovo_schema_error( + source, + "This is an InstaNovo combined output, not a transformer-only output", + )); + } + if parsed.local_confidence.as_ref().is_some_and(Vec::is_empty) { + return Err(instanovo_schema_error( + source, + "This InstaNovo transformer output requires token log probabilities", + )); + } + } + InstaNovoVersion::PlusV1_2_2 => { + if has_column(source, "instanovoplus_predictions") { + return Err(instanovo_schema_error( + source, + "This is an InstaNovo combined output, not a standalone InstaNovoPlus output", + )); + } + if parsed + .local_confidence + .as_ref() + .is_some_and(|local_confidence| !local_confidence.is_empty()) + { + return Err(instanovo_schema_error( + source, + "This is an InstaNovo transformer output, not a standalone InstaNovoPlus output", + )); + } + } + InstaNovoVersion::CombinedV1_2_2 => { + if !has_column(source, "instanovoplus_predictions") { + return Err(instanovo_schema_error( + source, + "This InstaNovo version requires the 'instanovoplus_predictions' column", + )); + } } + InstaNovoVersion::V1_0_0 | InstaNovoVersion::PlusV1_1_4 => {} } + Ok(()) +} + +fn has_column(source: &CsvLine, column: &str) -> bool { + source.index_column(column).is_ok() +} + +fn instanovo_schema_error( + source: &CsvLine, + message: &'static str, +) -> BoxedError<'static, BasicKind> { + BoxedError::new( + BasicKind::Error, + "Invalid InstaNovo line", + message, + source.full_context().to_owned(), + ) } /// The model that produced the final prediction for an InstaNovoPlus @@ -193,8 +372,13 @@ impl PSMMetaData for InstaNovoPSM { fn search_engine(&self) -> Option { Some(match self.version { - InstaNovoVersion::V1_0_0 => mzcv::term!(MS:1003612|InstaNovo), - InstaNovoVersion::PlusV1_1_4 => mzcv::term!(MS:1003613|InstaNovo+), + InstaNovoVersion::V1_0_0 + | InstaNovoVersion::V1_1_0 + | InstaNovoVersion::V1_1_4 + | InstaNovoVersion::V1_2_2 => mzcv::term!(MS:1003612|InstaNovo), + InstaNovoVersion::PlusV1_1_4 + | InstaNovoVersion::PlusV1_2_2 + | InstaNovoVersion::CombinedV1_2_2 => mzcv::term!(MS:1003613|InstaNovo+), }) } diff --git a/mzident/src/formats/instanovo_tests.rs b/mzident/src/formats/instanovo_tests.rs index 9996c58e..590a19a1 100644 --- a/mzident/src/formats/instanovo_tests.rs +++ b/mzident/src/formats/instanovo_tests.rs @@ -27,7 +27,7 @@ fn instanovo_v1_1_4() { &mzcore::ontology::STATIC_ONTOLOGIES, false, true, - Some(InstaNovoVersion::V1_0_0), + Some(InstaNovoVersion::V1_1_4), ) { Ok(n) => assert_eq!(n, 20), Err(e) => { @@ -37,6 +37,23 @@ fn instanovo_v1_1_4() { } } +#[test] +fn instanovo_v1_1_0() { + match test_format::( + BufReader::new(INSTANOVO_V1_1_0.as_bytes()), + &mzcore::ontology::STATIC_ONTOLOGIES, + false, + true, + Some(InstaNovoVersion::V1_1_0), + ) { + Ok(n) => assert_eq!(n, 1), + Err(e) => { + println!("{e}"); + panic!("Failed PSMs test"); + } + } +} + #[test] fn instanovoplus_v1_1_4() { match test_format::( @@ -54,6 +71,57 @@ fn instanovoplus_v1_1_4() { } } +#[test] +fn instanovo_v1_2_2() { + match test_format::( + BufReader::new(INSTANOVO_V1_2_2.as_bytes()), + &mzcore::ontology::STATIC_ONTOLOGIES, + false, + true, + Some(InstaNovoVersion::V1_2_2), + ) { + Ok(n) => assert_eq!(n, 1), + Err(e) => { + println!("{e}"); + panic!("Failed PSMs test"); + } + } +} + +#[test] +fn instanovoplus_v1_2_2() { + match test_format::( + BufReader::new(INSTANOVOPLUS_V1_2_2.as_bytes()), + &mzcore::ontology::STATIC_ONTOLOGIES, + false, + false, + Some(InstaNovoVersion::PlusV1_2_2), + ) { + Ok(n) => assert_eq!(n, 1), + Err(e) => { + println!("{e}"); + panic!("Failed PSMs test"); + } + } +} + +#[test] +fn instanovo_combined_v1_2_2() { + match test_format::( + BufReader::new(INSTANOVO_COMBINED_V1_2_2.as_bytes()), + &mzcore::ontology::STATIC_ONTOLOGIES, + false, + false, + Some(InstaNovoVersion::CombinedV1_2_2), + ) { + Ok(n) => assert_eq!(n, 1), + Err(e) => { + println!("{e}"); + panic!("Failed PSMs test"); + } + } +} + const INSTANOVO_V1_0_0: &str = r#"scan_number,precursor_mz,precursor_charge,experiment_name,spectrum_id,preds,preds_tokenised,log_probs,token_log_probs 0,1353.116333007813,4,20230408_F1_UM4_Peng0013_SA_EXT00_her_01_tryp,20230408_F1_UM4_Peng0013_SA_EXT00_her_01_tryp:0,LKVKVILEAEPS(+79.97)EEEEEEEEEEEEEEEEEEEEEEEEKEEK,"L, K, V, K, V, I, L, E, A, E, P, S(+79.97), E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, K, E, E, K",-47.14482498168945,"[-1.1898047924041748, -1.2532058954238892, -1.4706779718399048, -1.578391671180725, -1.910727858543396, -0.4288635551929474, -0.10262472927570343, -0.2335159033536911, -0.3816143870353699, -0.1399289071559906, -0.2679944634437561, -0.37487441301345825, -0.28003591299057007, -0.29957395792007446, -0.6062297224998474, -1.0798466205596924, -1.3055310249328613, -1.1969765424728394, -0.8466325402259827, -0.7559331655502319, -0.8520379066467285, -1.1635522842407227, -1.5230286121368408, -1.5223480463027954, -1.3874539136886597, -1.3555835485458374, -1.3308098316192627, -1.461938738822937, -1.292738437652588, -1.7667877674102783, -1.8383617401123047, -1.924727439880371, -1.5695301294326782, -1.4049240350723267, -1.2322568893432617, -1.1730256080627441, -0.09055394679307938, -3.6145036220550537, -1.8250231742858887, -3.1126556396484375]" 1,1353.116333007813,4,20230408_F1_UM4_Peng0013_SA_EXT00_her_01_tryp,20230408_F1_UM4_Peng0013_SA_EXT00_her_01_tryp:1,SSSSTSGS(+79.97)DC(+57.02)DGVHVEPEEEDMES(+79.97)EDEDEDEDLVTSTTSK,"S, S, S, S, T, S, G, S(+79.97), D, C(+57.02), D, G, V, H, V, E, P, E, E, E, D, M, E, S(+79.97), E, D, E, D, E, D, E, D, L, V, T, S, T, T, S, K",-33.68059539794922,"[-0.07847003638744354, -0.5382011532783508, -0.38900521397590637, -0.3034709692001343, -0.14283983409404755, -0.004337664693593979, -0.005992896854877472, -0.443778932094574, -2.0242862701416016, -1.3575999736785889, -1.3120659589767456, -0.8160025477409363, -0.9171149730682373, -0.1492014229297638, -0.28191035985946655, -1.03749680519104, -0.7952876687049866, -0.11219097673892975, -0.6492378115653992, -0.0394880585372448, -0.35166993737220764, -0.031147046014666557, -0.1014118641614914, -0.8919384479522705, -0.5123100876808167, -1.5009464025497437, -0.7995803952217102, -0.8618601560592651, -0.74873948097229, -1.0185350179672241, -1.0649776458740234, -1.9072270393371582, -1.3915926218032837, -1.454406499862671, -0.8135812878608704, -0.7613984942436218, -1.0519371032714844, -2.232295274734497, -2.410634994506836, -2.3764281272888184]" @@ -124,3 +192,23 @@ const INSTANOVOPLUS_V1_1_4: &str = r#"scan_number,precursor_mz,precursor_charge, 186,403.233581542969,3,msconvert_20250515_EX1_UM1_plitt001_SA_EXT00_d5_G,msconvert_20250515_EX1_UM1_plitt001_SA_EXT00_d5_G:186,"['C[UNIMOD:4]', 'M[UNIMOD:35]', 'L', 'V', 'R']",C[UNIMOD:4]M[UNIMOD:35]LVR,-0.712028980255127,LGAEALLRPGR,"L, G, A, E, A, L, L, R, P, G, R",-352.6634826660156,"[-0.37654954195022583, -1.5804766416549683, -1.115369439125061, -0.18904243409633636, -0.29284417629241943, -0.8900014758110046, -1.3177595138549805, -1.512885570526123, -0.008747827261686325, -0.2933116555213928, -0.28636452555656433]",C[UNIMOD:4]M[UNIMOD:35]LVR,"['C[UNIMOD:4]', 'M[UNIMOD:35]', 'L', 'V', 'R']",-0.712028980255127,diffusion,False 188,393.084381103516,3,msconvert_20250515_EX1_UM1_plitt001_SA_EXT00_d5_G,msconvert_20250515_EX1_UM1_plitt001_SA_EXT00_d5_G:188,"['L', 'C[UNIMOD:4]', 'R']",LC[UNIMOD:4]R,-1.0205368995666504,DRLIC[UNIMOD:4]RGEK,"D, R, L, I, C[UNIMOD:4], R, G, E, K",-364.02386474609375,"[-1.8955063819885254, -2.015449047088623, -1.8443589210510254, -2.2285003662109375, -1.100846529006958, -1.6334882974624634, -1.6147112846374512, -1.5314174890518188, -0.348799467086792]",LC[UNIMOD:4]R,"['L', 'C[UNIMOD:4]', 'R']",-1.0205368995666504,diffusion,False 2453,698.256530761719,2,msconvert_20250515_EX1_UM1_plitt001_SA_EXT00_d5_G,msconvert_20250515_EX1_UM1_plitt001_SA_EXT00_d5_G:2453,"['M[UNIMOD:35]', 'A', 'K']",M[UNIMOD:35]AK,-0.7507774829864502,[UNIMOD:5]GEEEEDDDSLSK,"[UNIMOD:5], G, E, E, E, E, D, D, D, S, L, S, K",-28.99419403076172,"[-12.311272621154785, -2.684276580810547, -0.6591862440109253, -0.2280793935060501, -0.23282620310783386, -0.817415714263916, -0.9277580976486206, -1.9244579076766968, -1.6701242923736572, -0.6400895714759827, -1.5620484352111816, -1.648794174194336, -0.2003186196088791]",M[UNIMOD:35]AK,"['M[UNIMOD:35]', 'A', 'K']",-0.7507774829864502,diffusion,False"#; + +// Derived from the first row of: +// https://zenodo.org/records/20756892/files/SF_200217_U2OS_TiO2_HCD_OT_rep1.full.mgf.instanovo-1.1.0.transformer.model-instanovo-v1.1.0.denovo.greedy.beams-1.columns-predictions.csv?download=1 +const INSTANOVO_V1_1_0: &str = r#"scan_number,precursor_mz,precursor_charge,experiment_name,spectrum_id,predictions,predictions_tokenised,log_probabilities,token_log_probabilities,delta_mass_ppm +0,419.314971923828,2,SF_200217_U2OS_TiO2_HCD_OT_rep1,SF_200217_U2OS_TiO2_HCD_OT_rep1:0,EEEEEK,"E, E, E, E, E, K",-390.9980163574219,"[-0.6682913303375244, -0.6299808621406555, -1.2502485513687134, -1.481540322303772, -1.9249613285064697, -0.5132914781570435]",52816.51838525617"#; + +// Derived from the first row of: +// https://zenodo.org/records/20756892/files/SF_200217_U2OS_TiO2_HCD_OT_rep1.full.mgf.instanovo-1.2.2.transformer.model-instanovo-v1.2.0.denovo.greedy.beams-1.normalized-columns.csv?download=1 +const INSTANOVO_V1_2_2: &str = r#"experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs,token_log_probs,group,predictions_tokenised,delta_mass_ppm +SF_200217_U2OS_TiO2_HCD_OT_rep1,0,SF_200217_U2OS_TiO2_HCD_OT_rep1:0,419.314971923828,2,0,DM[UNIMOD:35]NS[UNIMOD:21]PK,-1147.98681640625,"[-0.015801219269633293, -1.1395305395126343, -2.2013168334960938, -1.3749353885650635, -1.4705305099487305, -0.5675679445266724]",no_group,"D, M[UNIMOD:35], N, S[UNIMOD:21], P, K",58846.475981092575"#; + +// Derived from the first row of: +// https://zenodo.org/records/20756892/files/SF_200217_U2OS_TiO2_HCD_OT_rep1.full.mgf.instanovo-1.2.2.instanovoplus.model-instanovoplus-v1.1.0.denovo.no-refinement.normalized-columns.csv?download=1 +const INSTANOVOPLUS_V1_2_2: &str = r#"experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs,token_log_probs,group,predictions_tokenised,delta_mass_ppm +SF_200217_U2OS_TiO2_HCD_OT_rep1,0,SF_200217_U2OS_TiO2_HCD_OT_rep1:0,419.314971923828,2,0,MC[UNIMOD:4]IPDQPM[UNIMOD:35]EVDNEDDAPLPPPEAR,-3.6934256553649902,,no_group,"M, C[UNIMOD:4], I, P, D, Q, P, M[UNIMOD:35], E, V, D, N, E, D, D, A, P, L, P, P, P, E, A, R",2282970.310323359"#; + +// Derived from the first row of: +// https://zenodo.org/records/20756892/files/SF_200217_U2OS_TiO2_HCD_OT_rep1.full.mgf.instanovo-1.2.2.combined.model-instanovo-v1.2.0.instanovoplus-v1.1.0.denovo.refined.save-all-predictions.csv?download=1 +const INSTANOVO_COMBINED_V1_2_2: &str = r#"experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs,token_log_probs,group,instanovo_predictions,instanovo_prediction_log_probability,instanovo_prediction_token_log_probabilities,instanovo_predictions_beam_0,instanovo_predictions_log_probability_beam_0,instanovo_predictions_token_log_probabilities_beam_0,instanovo_predictions_beam_1,instanovo_predictions_log_probability_beam_1,instanovo_predictions_token_log_probabilities_beam_1,instanovo_predictions_beam_2,instanovo_predictions_log_probability_beam_2,instanovo_predictions_token_log_probabilities_beam_2,instanovo_predictions_beam_3,instanovo_predictions_log_probability_beam_3,instanovo_predictions_token_log_probabilities_beam_3,instanovo_predictions_beam_4,instanovo_predictions_log_probability_beam_4,instanovo_predictions_token_log_probabilities_beam_4,instanovoplus_predictions,instanovoplus_prediction_log_probability,instanovoplus_prediction_token_log_probabilities,instanovoplus_unrefined_predictions,predictions_tokenised,delta_mass_ppm +SF_200217_U2OS_TiO2_HCD_OT_rep1,0,SF_200217_U2OS_TiO2_HCD_OT_rep1:0,419.314971923828,2,0,LIRPLLK,-0.6334811449050903,,no_group,"['L', 'K', 'G', 'D', 'S[UNIMOD:21]', 'P', 'K']",-10.102036476135254,"[-1.716342806816101, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]",LKGDS[UNIMOD:21]PK,-10.102036476135254,"[-1.716342806816101, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]",VKGDS[UNIMOD:21]PK,-11.082494735717773,"[-2.8237648010253906, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]",SKGDS[UNIMOD:21]PK,-11.430251121520996,"[-2.7461280822753906, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]",AKGDS[UNIMOD:21]PK,-11.492465019226074,"[-3.1643409729003906, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]",PKGDS[UNIMOD:21]PK,-11.968438148498535,"[-2.6694679260253906, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]","['L', 'I', 'R', 'P', 'L', 'L', 'K']",-0.6334811449050903,,"['L', 'K', 'G', 'D', 'S[UNIMOD:21]', 'P', 'K']","L, I, R, P, L, L, K",17862.82765389216"#;