Source code for bipype

#!/usr/bin/python
import os
import argparse
import shutil
from metatranscriptomics_bipype import metatranscriptomics
from refseq_bipype import sample, prepare_taxonomy_stats
from settings_bipype import *


[docs]def parse_arguments(args=None): parser = argparse.ArgumentParser( fromfile_prefix_chars='@', description='bipype stands for BioInformatics-PYthon-PipE', formatter_class=argparse.ArgumentDefaultsHelpFormatter, epilog='All commands may be presented in a configuration file, ' 'fed to bipype with @ prefix as "bipype @my_conf_file", ' 'my_conf_file should contain all desired commands and their' 'options (if applicable) one per line') general = parser.add_argument_group( 'general', 'performance and I/O related options') general.add_argument( "-t", "--threads", help='number of threads to be used', type=int, default=8) general.add_argument( "-m", "--mode", help='available modes: test, run', choices=['test', 'run'], type=str, default='test') general.add_argument( "--out_dir", "-o", type=str, metavar='OUTPUT_DIRECTORY', default='in_situ', help='Directory for output files, ' 'default value is not usable for metatranscriptomics') general.add_argument( "--input", "-i", nargs='*', default=None) general.add_argument( "--ins_len", default='9999', help='insert length - be advised - you better use it for single run', type=int) general.add_argument( "-postfix", type=str, help='alphanumerical postfix of processed file', default='') general.add_argument( "-e", help="use existing files", action='store_true') taxonomy_stats = parser.add_argument_group( 'taxonomy_stats options', 'options related to process of preparing taxonomy results') taxonomy_stats.add_argument( "-ot", "--output_type", nargs='*', default=['ITS', '16S', 'txt'], help='Choice of files searched for an analysis, coded: ' '16S and ITS on usearches - file .usearch_ITS, .usearch_16S') dataclean = parser.add_argument_group( 'input cleaning', 'methods of cleaning input from noise') dataclean.add_argument( "-ic", "--initial_cleaning", choices=['usearch', 'fastx'], type=str, default='', help='Choice of initial cleaning method') dataclean.add_argument( "--cutadapt", type=str, default='', nargs=2, help='-cutadapt ADAPTER_FILE search_options, ' 'location of file with adapters to be used by cutadapt ' '(possible "use_filenames" to determine adapters from hardcode), ' 'and list of usearches to be run on created files - ' 'possible options are 16S, ITS, both. ' 'Please note, that mapping options -16S, -ITS are completely ' 'irrelevant if you use cutadapt. ' 'Other note - this is !!!IMPORTANT!!! to present location of file ' 'with adapters as first option of this argument') mappings = parser.add_argument_group( 'mapping', 'mappings to be done during the run') mappings.add_argument( "-ITS", help='usearch ITS database', dest='to_calculate', action='append_const', const='ITS') mappings.add_argument( "-16S", help='usearch 16S database', dest='to_calculate', action='append_const', const='16S') mappings.add_argument( "-refseq", help='map samples on refseq: p - plant, f - fungi, b - both', nargs='?', choices=['f', 'p', 'b'], dest='to_calculate', action='append', const='f', default='f') mappings.add_argument( "-MEGAN", help='generate .rma files using MEGAN', dest='to_calculate', action='append_const', const='MEGAN') contigs = parser.add_argument_group( 'contig reconstruction', 'methods of contig reconstruction to be used during the run') contigs.add_argument( "-assembler", default=None, type=str, help='choose assembler: MH for Megahit, MV for MetaVelvet', choices=['MH', 'MV', None]) contigs.add_argument( "-MV", default=None, help="MetaVelvet's options:" "[initial k-mer size - default = 31," "[final k-mer size, [step - default=2 (odd numbers)]]]", nargs='*') contigs.add_argument( "-reconstruct", default=None, const="rec_", help='reconstruct relating to database, options: database_loc, prefix', nargs='?') contigs.add_argument( "-humann", help='mapping rapsearch using humann', dest='to_calculate', action='append_const', const='humann') contigs.add_argument( "-rapsearch", help='RAPsearch on protein database', nargs='?', choices=['rap_KEGG', 'rap_prot', 'rap_KO'], dest='to_calculate', action='append', const='rap_prot', default='rap_prot') db_loc = parser.add_argument_group( 'databases', 'locations of databases to be used for searches') db_loc.add_argument( "--db_16S", type=str, default=PATH_X16S_DB, help='location of 16S database used in usearch (bowtie indexed') db_loc.add_argument( "--db_ITS", type=str, default=PATH_ITS_DB, help='location of ITS database used in usearch (bowtie indexed)') db_loc.add_argument( "--db_refseq_fungi", type=str, default=[PREF_PATH_REF_FUNGI], nargs='*', help='location of refseq database to use for fungi analysis. ' 'Up to two paths allowed. If there are multiple databases with' 'filenames like <path_which_you_specified><second_part_of_name>, ' 'all will be loaded.') db_loc.add_argument( "--db_refseq_plant", type=str, default=[PREF_PATH_REF_PLANT_1, PREF_PATH_REF_PLANT_2], nargs='*', help='location of refseq database to use for plants analysis. ' 'Up to two paths allowed. If there are multiple databases with' 'filenames like <path_which_you_specified><second_part_of_name>, ' 'all will be loaded.') db_loc.add_argument( "--db_NCBI_taxonomy", type=str, default=PATH_NCBI_TAXA_DB, help='location of cPickled NCBI_tax_id, ' 'NCBI tax names and NCBI tax ids') db_loc.add_argument( "--db_reconstruct", type=str, default=PATH_RECONSTRUCT_DB, help='location of database for reconstruction') db_loc.add_argument( "--db_taxonomy_16S", type=str, default=PATH_16S_DATABASE, help='location of 16S database used in taxonomy classification ' '(FASTA with specially formatted headers)') db_loc.add_argument( "--db_taxonomy_ITS", type=str, default=PATH_ITS_DATABASE, help='location of ITS database used in taxonomy classification ' '(FASTA with specially formatted headers)') metatr = parser.add_argument_group( 'metatranscriptomics', 'parameters for metatranscriptomic pipe') metatr.add_argument( "--metatr_config", type=str, help='configuration file which is necessary to launch ' 'metatranscriptomic part of bipype'), metatr.add_argument( "-mot", "--metatr_output_type", type=str, choices=['old', 'new', 'both'], default='both', help='determines type of the one of the outputs from ' 'metatranscriptomic part of bipype') return parser.parse_args(args)
if __name__ == '__main__': opts = parse_arguments() if opts.metatr_config: metatranscriptomics(opts) else: if opts.input: temp_path = os.getcwd() if not os.path.exists(temp_path): os.makedirs(temp_path) for input_file in opts.input: shutil.copy(input_file, os.path.join(temp_path)) sample(opts) prepare_taxonomy_stats(opts)