Source code for pcpfm.main

"""
This is the main module in the pcpfm. All functions that are intended to be called by 
an end user are located here, although API access to the underlying modules is possible.

Each function in the Main object maps to a single command on the command line. 
"""

import os
import json
import multiprocessing as mp
import argparse
import csv
import zipfile
import gdown
from . import Experiment
from . import EmpCpds
from . import default_parameters
from . import Report

[docs] class Main(): """ This is simply a wrapper around all the CLI functions. By putting them in this object, we can do clever things with getattr() """
[docs] @staticmethod def process_params(): """ This process parses the command line arguments and returns the parameters in a dictionary. Default parameters are specified in the example_parameters.py file and some are read dynamically from .json files as specified in that file. Note that any parameters given as .json files will be assumed to be a file path to a json file and read as such. This allows complex datastructures to be specified for some parameters. :return: parameters dictionary """ params = default_parameters.PARAMETERS parser = argparse.ArgumentParser(description='pcpfm, LC-MS end-to-end processing') parser.add_argument('subcommand', metavar='subcommand', help='one of the subcommands: _____') parser.add_argument('-p', '--parameters') parser.add_argument('-m', '--mode', default=None) parser.add_argument('--ppm', default=5, type=int) parser.add_argument('-s', '--sequence') parser.add_argument('-c', '--cores', type=int) parser.add_argument('-MS2_dir') parser.add_argument('-f', '--filter') parser.add_argument('-j', '--project') parser.add_argument('-o', '--output') parser.add_argument('-i', '--input') parser.add_argument('--name_field', default='File Name') parser.add_argument('--path_field', default='Filepath') parser.add_argument('--asari_command') parser.add_argument('-tm', '--table_moniker') parser.add_argument('-em', '--empCpd_moniker') parser.add_argument('-nm', '--new_moniker') parser.add_argument('-cb', '--color_by', default=[]) parser.add_argument('-bb', '--by_batch') parser.add_argument('-mb', '--marker_by', default=[]) parser.add_argument('-tb', '--text_by', default=[]) parser.add_argument('--all') parser.add_argument('--pca') parser.add_argument('--tsne') parser.add_argument('--spearman'), parser.add_argument('--kendall'), parser.add_argument('--missing_feature_distribution') parser.add_argument('--missing_feature_percentiles') parser.add_argument('--median_correlation_outlier_detection') parser.add_argument('--missing_feature_outlier_detection') parser.add_argument('--intensity_analysis') parser.add_argument('--feature_distribution') parser.add_argument('--feature_outlier_detection') parser.add_argument('--interactive_plots', default=False) parser.add_argument('--save_plots', default=False) parser.add_argument('--khipu_isotopes') parser.add_argument('--khipu_charges') parser.add_argument('--khipu_extended_adducts') parser.add_argument('--khipu_adducts_pos') parser.add_argument('--khipu_adducts_neg') parser.add_argument('--khipu_rt_tolerance') parser.add_argument('--blank_value') parser.add_argument('--sample_value') parser.add_argument('--query_field') parser.add_argument('--blank_intensity_ratio') parser.add_argument('--drop_name') parser.add_argument('--drop_field') parser.add_argument('--drop_value') parser.add_argument('--drop_others') parser.add_argument('--qaqc_filter') parser.add_argument('--conversion_command') parser.add_argument('--preprocessing_config') parser.add_argument('--new_csv_path') parser.add_argument('--TIC_normalization_percentile') parser.add_argument('--normalize_value') parser.add_argument('--feature_retention_percentile') parser.add_argument('--interpolation_ratio') parser.add_argument('--ms2_dir') parser.add_argument('--report_config') parser.add_argument('--sample_for_ratio') parser.add_argument('--deriv_formula') parser.add_argument('--msp_files') parser.add_argument('--skip_list') parser.add_argument('--add_singletons') parser.add_argument('--extra_asari', default=None) parser.add_argument('--targets') parser.add_argument('--annot_rt_tolerance') parser.add_argument('--annot_mz_tolerance') args = parser.parse_args() if args.parameters: with open(args.parameters, encoding='utf-8') as param_fh: params.update(json.load(param_fh)) for k, v in args.__dict__.items(): if v: params[k] = v params['multicores'] = min(mp.cpu_count(), params['multicores']) if 'targets' in params: if isinstance(params['targets'], str): params['targets'] = params['targets'].split() for k,v in params.items(): if isinstance(v, str) and v.endswith(".json"): with open(v, encoding='utf-8') as json_fh: params[k] = json.load(json_fh) if 'input' in params and not params['input'].endswith("experiment.json"): params['input'] = os.path.join(os.path.abspath(params['input']), "experiment.json") return params
[docs] @staticmethod def download_extras(params): """ This method will download the MoNA LC MS/MS library, and the HMDBv5 and LMSD in a JMS-compliant format. Currently this downloads from my google drive (I know not ideal). Will be fixed in the future. By using this method you agree to the terms and conditions laid forth in the licenses for each of those repositories :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ warning = ''' Pcpfm extras are not actively maintained by the developers of pcpfm and are redistributed forms of third party publically available tools. Any issues encountered with these extras may or may not be a problem of pcpfm; however, feel free to raise an issue for us to evaluate. These extras include a JMS-compliant version of the HMDB, LMSD, MoNA.msp files, and the ThermoRawFileParser. All use of these extras are subject to the terms and conditions outlined by their owners. Notably, the HMDB is NOT available for commercial use without a license so please do not use this verison of it for commercial use. Additionally, please cite the original publications for these tools if you use them in your project: HMDB - https://hmdb.ca/ Please cite: Wishart DS, Tzur D, Knox C, et al., HMDB: the Human Metabolome Database. Nucleic Acids Res. 2007 Jan;35(Database issue):D521-6. 17202168 Wishart DS, Knox C, Guo AC, et al., HMDB: a knowledgebase for the human metabolome. Nucleic Acids Res. 2009 37(Database issue):D603-610. 18953024 Wishart DS, Jewison T, Guo AC, Wilson M, Knox C, et al., HMDB 3.0 — The Human Metabolome Database in 2013. Nucleic Acids Res. 2013. Jan 1;41(D1):D801-7. 23161693 Wishart DS, Feunang YD, Marcu A, Guo AC, Liang K, et al., HMDB 4.0 — The Human Metabolome Database for 2018. Nucleic Acids Res. 2018. Jan 4;46(D1):D608-17. 29140435 Wishart DS, Guo AC, Oler E, et al., HMDB 5.0: the Human Metabolome Database for 2022. Nucleic Acids Res. 2022. Jan 7;50(D1):D622–31. 34986597 LMSD - https://www.lipidmaps.org/databases/lmsd/overview Please cite: LMSD: LIPID MAPS® structure databas, Sud M, Fahy E, Cotter D, Brown A, Dennis EA, Glass CK, Merrill AH Jr, Murphy RC, Raetz CR, Russell DW, Subramaniam S., Nucleic Acids Research, 2007, 35: p. D527-32., DOI: 10.1093/nar/gkl838 , PMID: 17098933 LIPID MAPS® online tools for lipid research, Fahy E, Sud M, Cotter D & Subramaniam S., Nucleic Acids Research, 2007, 35: p. W606-12., DOI: 10.1093/nar/gkm324 , PMID: 17584797 LIPID MAPS: update to databases and tools for the lipidomics community, Conroy MJ, Andrews RM, Andrews S, Cockayne L, Dennis, EA, Fahy E, Gaud C, Griffiths WJ, Jukes G, Kolchin M, Mendivelso K, Lopez-Clavijo AF, Ready C, Subramaniam S, O'Donnell, VB, Nucleic Acids Research, 2023, DOI: 10.1093/nar/gkad896 , PMID: 37855672 MoNA - https://mona.fiehnlab.ucdavis.edu/ Plase cite: https://mona.fiehnlab.ucdavis.edu/ ThermoRawFileParser - Please cite: Niels Hulstaert, Jim Shofstahl, Timo Sachsenberg, Mathias Walzer, Harald Barsnes, Lennart Martens, and Yasset Perez-Riverol Journal of Proteome Research 2020 19 (1), 537-542 DOI: 10.1021/acs.jproteome.9b00328 By downloading these extras you agree to the terms of their licenses. Please type 'yes' to acknowledge. ''' print(warning) user_input = input() if user_input == "yes": def download_from_cloud_storage(src, dst): gdown.download(src, output=dst) with zipfile.ZipFile(dst, 'r') as zip_ref: zip_ref.extractall(os.path.dirname(dst)) os.remove(dst) this_dir = os.path.abspath(os.path.dirname(__file__)) base_url = 'https://storage.googleapis.com/pcpfm-data/' converter_url = base_url + 'ThermoRawFileConverter-20240119T131510Z-001.zip' annotat_sources = base_url + '/annotation_sources-20240119T131612Z-001.zip' thermo_path = os.path.join(this_dir, "ThermoRawFileConverter.zip") annot_path = os.path.join(this_dir, "annotation_sources.zip") download_from_cloud_storage(converter_url, thermo_path) download_from_cloud_storage(annotat_sources, annot_path)
[docs] @staticmethod def preprocess(params): """ Using the mappings in the preprocessing config, this will alter a provided sequence file and add the extra fields. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ preprocess_config = params['preprocessing_config'] params["sequence"] = os.path.abspath(params["sequence"]) with open(params['new_csv_path'], 'w+', encoding='utf-8') as out_csv_fh: with open(params['sequence'], encoding='utf-8') as sequence_fh: for x, entry in enumerate(csv.DictReader(sequence_fh)): for new_field, _d in preprocess_config["mappings"].items(): entry[new_field] = [] for new_value, _dd in _d.items(): found = False for substring in _dd["substrings"]: for field_to_search in _dd["search"]: if substring in entry[field_to_search] and found is False: found = True entry[new_field].append(new_value) if found is False: if "else" in _dd: entry[new_field].append(_dd["else"]) entry[new_field] = "_".join(entry[new_field]) if os.path.exists(params["path_field"]): pass else: sequence_dir = os.path.dirname(params['sequence']) mzml_name = entry[params["name_field"]] + ".mzML" raw_name = entry[params["name_field"]] + ".raw" if os.path.exists(os.path.join(sequence_dir, mzml_name)): entry["InferredPath"] = os.path.join(sequence_dir, mzml_name) elif os.path.exists(os.path.join(sequence_dir, raw_name)): entry["InferredPath"] = os.path.join(sequence_dir, raw_name) if x == 0: writer = csv.DictWriter(out_csv_fh, fieldnames=entry.keys()) writer.writeheader() writer.writerow(entry)
[docs] @staticmethod def assemble_study(params): raise NotImplementedError
[docs] @staticmethod def assemble(params): """ This is the first command in any pcpfm analysis. Starting with a sequence file, specified by '-s', an output directory by '-o' and a project name specified by '-j', this will create the experiment directory and initialize the experiment.json. Additional arguments include the ability to add a filter on sequence file entries using the '--filter' option and a JSON dictionaries. <<TODO>> Additionally, the --name_field, and --path_field options will allow the user to specify what field name should be used for the name and filepath of the acquisitions. Also using --skip_list and a .txt formatted file containing sample_names to ignore, entries can be excluded from an analysis. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.construct_experiment_from_CSV( os.path.join(os.path.abspath(params['output']), str(params['project'])), params['sequence'], sample_filter=params['filter'], name_field=params['name_field'], path_field=params['path_field'], sample_skip_list_fp=params['skip_list'] ) experiment.save()
[docs] @staticmethod def convert(params): """ This will convert all .raw files to .mzML using a specified command. To provide the command, you can either modify the config file OR pass the command using the --conversion_command. for this use case, use whatever command will do the conversion but where the .raw file path would be, substitute with $RAW_PATH and where the output would go, put $OUT_PATH. This requires passing -i with the experiment's path. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) experiment.convert_raw_to_mzML(params['conversion_command'], num_cores=params['multicores']) experiment.save()
[docs] @staticmethod def asari(params): """ Perform asari on the experiment's acquisitions. They must be have been converted or provided in .mzML format first. The command by default assumes a ppm of 5 and the ionization mode of the experiment will be automatically inferred. If extra arguments are desired for asari, they can be provided using --extra_asari on the command line. This requires passing -i with the experiment's path. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) asari_command = params['asari_command'] if params['extra_asari']: asari_command.extend(params['extra_asari'].split(" ")) experiment.asari(asari_command) experiment.save()
[docs] @staticmethod def QAQC(params): """ This will perform various QAQC metrics on the indicated feature table. By default "all" QAQC metrics are performed which are detailed in the feature table object. This requires passing -i with the experiment's path. The feature table on which to perform the procedures must be given as well using either --table_moniker or -tm. TODO: this will be deprecated in the future and performed on lazily either during report generation or qa/qc filtering. The fields --color_by, --text_by, --marker_by can specify how to generate the figures this method generates. For each of these commands, a JSON-formatted list of sequence file fields on which to generate the corresponding cosmetic item. These are optional. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) experiment.parameters = params["experiment_config"] feature_table = experiment.retrieve_feature_table(params['table_moniker'], True) if params['table_moniker'] not in experiment.qcqa_results: experiment.qcqa_results[params['table_moniker']] = {} for qaqc_result in feature_table.QAQC(params): experiment.qcqa_results[params['table_moniker']][qaqc_result["Type"]] = qaqc_result experiment.save()
[docs] @staticmethod def summarize(params): """ Print the list of empirical compounds and feature tables registered wiht the experiment object. This requires passing -i with the experiment's path. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) experiment.summarize()
[docs] @staticmethod def build_empCpds(params): """ For a given feature table, generate empirical compounds from its features. This uses a user-defined set of isotopes and adducts. These can be overwritten, along with other parameters using the follwoing options: - --khipu_isotopes specifies the isotopes to use - --khipu_adducts specifies which adducts to use - --khipu_extended_adducts specifies which extended adducts to use - --khipu_adducts_neg specifies which adducts to use if mode is neg - --khipu_adducts_pos specifies which adducts to use if mode is pos - --add_singletons specifies if we should include single features in \ the empCpds, i.e., just one peak. - --khipu_rt_tolerance the rtime range for which to build khipus - --ppm, the mass tolerance for which to build khipus - --khipu_charges specifies which charges to consider (absolute Z) For details on these parameters, please see Khipu's documentation - This requires passing -i with the experiment's path. - This requires passing -tm with the moniker of feature table - This requires passing -em with the desired empcpd moniker :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) if experiment.ionization_mode == "pos": params['khipu_adducts'] = params['khipu_adducts_pos'] else: params['khipu_adducts'] = params['khipu_adducts_neg'] EmpCpds.EmpCpds.construct_from_feature_table(experiment, params['khipu_isotopes'], params['khipu_adducts'], params['khipu_extended_adducts'], params['table_moniker'], params['empCpd_moniker'], params['add_singletons'], params['khipu_rt_tolerance'], params['ppm'], params['khipu_charges']) experiment.save()
[docs] @staticmethod def blank_masking(params): """ Print the list of empirical compounds and feature tables registered wiht the experiment object. - This requires passing -i with the experiment's path. - This requires passing -tm with the feature table's moniker. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) feature_table = experiment.retrieve_feature_table(params['table_moniker'], True) feature_table.blank_mask(params['blank_value'], params['sample_value'], params['query_field'], float(params['blank_intensity_ratio']), params['by_batch'], params['batch_blanking_logic']) feature_table.save(params['new_moniker'])
[docs] @staticmethod def drop_outliers(params): """ This method drop samples from a feature table using the filter in the autodrop json. By default this is a |Z| > 2.5 filter on the number of features. This Z-score is calculated using the median. - This requires passing -i with the experiment's path. - This requires passing -tm with the feature table's moniker. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) feature_table = experiment.retrieve_feature_table(params['table_moniker'], True) feature_table.drop_samples_by_qaqc(params['auto_drop'], False, params=params) feature_table.save(params['new_moniker'])
[docs] @staticmethod def drop_samples(params): """ This method drop samples from a feature table. There are different modes to use this command in. - --drop_name will drop a sample with a given name - --filter will drop samples using a JSON formatted filter - --qaqc_filter drops samples using a JSON filter based on qaqc \ filters - --drop_field + --drop_value will drop all samples with a given \ value for a given field in the sequence file. Optionally each command can be augmented by passing the option \ --drop_others which will reverse the logic of the drop. - This requires passing -i with the experiment's path. - This requires passing -tm with the feature table's moniker. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) feature_table = experiment.retrieve_feature_table(params['table_moniker'], True) if params['drop_name']: feature_table.drop_sample_by_name(params['drop_name'], params['drop_others']) elif params['filter']: feature_table.drop_samples_by_filter(params['filter'], params['drop_others']) elif params['qaqc_filter']: feature_table.drop_samples_by_qaqc(params['qaqc_filter'], params['drop_others'], params=params) elif params['drop_field'] and params['drop_value']: feature_table.drop_samples_by_field(params['drop_value'], params['drop_field'], params['drop_others']) feature_table.save(params['new_moniker'])
[docs] @staticmethod def finish(params): """ This command is a no-op command for marking the end of an anlysis in the command history. This requires passing -i with the experiment's path. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) experiment.save()
[docs] @staticmethod def normalize(params): """ Normalize a feature table based on the TIC of the features present in over a certain percentile of samples. - --TIC_normalization_percentile defines this cutoff - --by_batch designates the field to group into batches, if provided, normalization will be done within batches first - --normalize_value can be 'mean' or 'median', this will be the value to which the TICs will be normalized - This requires passing -i with the experiment's path. - This requires passing -tm with the feature table's moniker. - This requires passing -nm with the new feature table's moniker :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) feature_table = experiment.retrieve_feature_table(params['table_moniker'], True) if params["TIC_normalization_percentile"]: feature_table.TIC_normalize(float(params["TIC_normalization_percentile"]), params["by_batch"], params["normalize_value"]) feature_table.save(params['new_moniker'])
[docs] @staticmethod def drop_missing_features(params): """ Drop samples below a given percentile of inclusion. - --feature_retention_percentile defines this cutoff - --by_batch designates the field to group into batches, if provided, the percentile is caluclated per batch first - --feature_drop_logic can be "or" or "and" and specifies how handle the various batches. For example, if "or", a feature will be dropped if it is below the cutoff in any batch. - This requires passing -i with the experiment's path. - This requires passing -tm with the feature table's moniker. - This requires passing -nm with the new feature table's moniker. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) feature_table = experiment.retrieve_feature_table(params['table_moniker'], True) feature_table.drop_missing_features(params["by_batch"], float(params["feature_retention_percentile"]), params["feature_drop_logic"]) feature_table.save(params['new_moniker'])
[docs] @staticmethod def impute(params): """ Replace remaining missing values with a value to aid statistics - --interpolation_ratio this value specifies what to multiply the value generated by the interpolate_method before replacement - --interpolate_method currently limited to only min - --by_batch this field specifies what field to group samples by and interpolates within each group (probably a bad idea) - This requires passing -i with the experiment's path. - This requires passing -tm with the feature table's moniker. - This requires passing -nm with the new feature table's moniker. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) feature_table = experiment.retrieve_feature_table(params['table_moniker'], True) feature_table.impute_missing_features(float(params['interpolation_ratio']), params['by_batch'], params['interpolate_method']) feature_table.save(params['new_moniker'])
[docs] @staticmethod def batch_correct(params): """ Use pyCombat to correct for batch effects using the specified batch identifier. - This requires passing -i with the experiment's path. - This requires passing -tm with the feature table's moniker. - This requires passing -nm with the new feature table's moniker. - This requires passing --by_batch with the field specifying the batch on which to correct :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) feature_table = experiment.retrieve_feature_table(params['table_moniker'], True) feature_table.batch_correct(params['by_batch']) feature_table.save(params['new_moniker'])
[docs] @staticmethod def delete(params): """ Delete a specified feature table or empCpd list by moniker. - This requires passing -i with the experiment's path. - This requires passing -tm with the table's moniker to delete or - This requires passing -em with the empcpd's moniker to delete Note: you *cannot* delete the feature tables generated by \ asari using this method. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) if params["table_moniker"]: experiment.delete_feature_table(params['table_moniker']) elif params['empCpd_moniker']: experiment.delete_empCpds(params['empCpd_moniker'])
[docs] @staticmethod def log_transform(params): """ Log transform a given table, by default, log2 --log_transform_mode can be log10 or log2 - This requires passing -i with the experiment's path. - This requires passing -tm with the table's moniker to transform - This requires passing -nm with the new feature table's moniker :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) feature_table = experiment.retrieve_feature_table(params['table_moniker'], True) feature_table.log_transform(params['log_transform_mode']) feature_table.save(params["new_moniker"])
[docs] @staticmethod def l4_annotate(params): """ This will generate MS1 annotations on a provided feature table or empcpd list. - **--log_transform_mode**: can be log10 or log2 - **--targets**: will specify what compounds to annotate, must be a \ JMS-compliant JSON file - **--annot_mz_tolerance**: this is the ppm cutoff for the search - **--annot_rt_tolerance**: this is the rtime cutoff, in sec, for the search .. - This requires passing -i with the experiment's path. - This requires passing -tm with the table's moniker to annotate or - This requires passing -em with the empCpd's moniker to annotate - This requires passing -nm with the new moniker for the table or empcpd list. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) if 'empCpd_moniker' in params: empCpd = experiment.retrieve_empCpds(params['empCpd_moniker'], True) empCpd.l4_annotate(params['targets'], float(params['annot_rt_tolerance'])) empCpd.save(params['new_moniker'])
[docs] @staticmethod def l2_annotate(params): """ This will generate MS2 annotations on a provided feature table or empCpd list. Requires that MS2 spectra first be mapped. - **--msp_files**: Designate the path to the MSP files to use for annotation. - **--annot_mz_tolerance**: PPM cutoff for the precursor ion search, default = 5ppm. - **--annot_rt_tolerance**: Time cutoff, in seconds, for the precursor ion search, default = 30sec. - **--ms2_similarity_metric**: Name of any matchms method for comparing MS2 spectra, default = CosineHungarian. - **--ms2_min_peak**: Minimum number of matching peaks required for an MS2 match, default = 3. .. - This requires passing -i with the experiment's path. - This requires passing -tm with the table's moniker to annotate or - This requires passing -em with the empCpd's moniker to annotate - This requires passing -nm with the new moniker for the table or empcpd list. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) if 'msp_files' in params: msp_file = params['msp_files'] if isinstance(msp_file, str): if msp_file.endswith(".json"): with open(msp_file, encoding='utf-8') as msp_fh: msp_file = json.load(msp_fh) else: msp_file = [msp_file] elif experiment.ionization_mode == "pos": msp_file = params['msp_files_pos'] elif experiment.ionization_mode == "neg": msp_file = params['msp_files_neg'] if 'empCpd_moniker' in params: empCpd = experiment.retrieve_empCpds(params['empCpd_moniker'], True) empCpd.l2_annotate( msp_file, params["annot_mz_tolerance"], params["ms2_similarity_metric"], params["ms2_min_peaks"], ) empCpd.save(params["new_moniker"])
[docs] @staticmethod def l1b_annotate(params): """ This will generate level 1 annotations on a empcpd list using a csv file(s) with compound names, retention times and m/z values. - **--targets**: a list of csv filepaths with mz, retention times, compound \ names with column names, "mz", "R", "CompoundName" - **--annot_mz_tolerance**: the ppm cutoff for the precursor ion search, default = 5 ppm - **--annot_rt_tolerance**: the rtime cutoff, in sec, for the precursor ion search, default = 30 sec .. - This requires passing -i with the experiment's path. - This requires passing -em with the empCpd's moniker to annotate - This requires passing -nm with the new moniker for the empcpd list. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) if 'empCpd_moniker' in params: empCpd = experiment.retrieve_empCpds(params['empCpd_moniker'], True) empCpd.l1b_annotate(params['targets'], float(params['annot_rt_tolerance']), float(params['annot_mz_tolerance']) ) empCpd.save(params['new_moniker'])
[docs] @staticmethod def l1a_annotate(params): """ This will generate level 1 annotations on a empcpd list using a csv file(s) with compound names, retention times and m/z values. --targets are a list of csv filepaths with mz, retention times, compound names with column names, "mz", "rtime", "CompoundName" --annot_mz_tolerance this is the ppm cutoff for the precursor ion search --annot_rt_tolerance this is the rtime cutoff, in sec, for the precursor ion search This requires passing -em with the empCpd's moniker to annotate This requires passing -nm with the new moniker for the table or empcpd list. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) if 'empCpd_moniker' in params: empCpd = experiment.retrieve_empCpds(params['empCpd_moniker'], True) empCpd.l1a_annotate(params['targets'], float(params['annot_rt_tolerance']), float(params['annot_mz_tolerance']), ) empCpd.save(params['new_moniker'])
[docs] @staticmethod def report(params): """ This will generate a pdf report using a JSON template - **--report_config** will override the default template - This requires passing -i with the experiment's path. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) Report.Report(experiment, params)
[docs] @staticmethod def map_ms2(params): """ This maps MS2 spectra to the empCompounds based on rt and mz similarity. Once mapped, they can be annotated using MS2 similarity via l2_annotate and l1a_annotate. --annot_mz_tolerance this is the ppm cutoff for the precursor ion search, default is 5 ppm --annot_rt_tolerance this is the rtime cutoff, in sec, for the precursor ion search, default is 30 sec This will scan for all MS2 spectra in the experiment. Additional MS2, from AcquireX for example, can be added by specifying the path to them using --ms2_dir. - This requires passing -i with the experiment's path. - This requires passing -em with the empCpd's moniker to annotate - This requires passing -nm with the new moniker for the empcpd list. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) empCpds = experiment.retrieve_empCpds(params['empCpd_moniker'], True) empCpds.map_ms2(float(params['annot_rt_tolerance']), float(params['annot_mz_tolerance']), ms2_files=params['ms2_dir']) empCpds.save(params["new_moniker"])
[docs] @staticmethod def generate_output(params): """ This command generates the three table output for downstream analysis. This includes a feature table, an annotation table, and finally the sample metadata. All results are stored in the results subdirectory according to the specified moniker. - This requires passing -i with the experiment's path. - This requires passing -tm for the table moniker to include - This requires passing -em for the empcpd moniker to include - This requires passing -nm for the new moniker to save generated results using. :param params: This is the master configuration file generated by parsing the command line arguments plus the defaults. """ experiment = Experiment.Experiment.load(params['input']) experiment.generate_output(params['empCpd_moniker'], params['table_moniker'])
[docs] def main(): """ This is the main function for the pipeline """ params = Main.process_params() if params['subcommand'] not in dir(Main): print(params['subcommand'] + " is not a valid subcommand") print("valid commands include:") for method in dir(Main): if not method.startswith('__'): print("\t", method) else: function = getattr(Main, params['subcommand']) try: function(params) except Exception as e: print("Error executing: " + params['subcommand']) print(function.__doc__) print(e)
[docs] def CLI(): ''' This function is called when 'pcpfm' is called in the terminal. Simply a wrapper around main() ''' main()
if __name__ == '__main__': main()