diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b89fbab --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +__pycache__/ +*.pyc +*.pyo +*.pyd +.pytest_cache/ +*.egg-info/ +dist/ +build/ +.eggs/ diff --git a/pyvcfaz-0.6.8-py27_0/bin/vcf_filter.py b/pyvcfaz-0.6.8-py27_0/bin/vcf_filter.py deleted file mode 100755 index 2816588..0000000 --- a/pyvcfaz-0.6.8-py27_0/bin/vcf_filter.py +++ /dev/null @@ -1,168 +0,0 @@ -#!/opt/anaconda1anaconda2anaconda3/bin/python -import sys -import argparse -import pkg_resources - -import vcf -from vcf.parser import _Filter - -def create_filt_parser(name): - parser = argparse.ArgumentParser(description='Parser for %s' % name, - add_help=False - ) - parser.add_argument('rest', nargs=argparse.REMAINDER, help=argparse.SUPPRESS) - - return parser - -def create_core_parser(): - # we have to use custom formatted usage, because of the - # multi-stage argument parsing (otherwise the filter arguments - # are grouped together with the other optionals) - parser = argparse.ArgumentParser(description='Filter a VCF file', - add_help=False, - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - usage="""%(prog)s [-h] [--no-short-circuit] [--no-filtered] - [--output OUTPUT] [--local-script LOCAL_SCRIPT] - input filter [filter_args] [filter [filter_args]] ... - """ - ) - parser.add_argument('-h', '--help', action='store_true', - help='Show this help message and exit.') - parser.add_argument('input', metavar='input', type=argparse.FileType('rb'), nargs='?', default=None, - help='File to process (use - for STDIN)') -# parser.add_argument('filters', metavar='filter', type=str, nargs='*', default=None, -# help='Filters to use') - parser.add_argument('--no-short-circuit', action='store_true', - help='Do not stop filter processing on a site if any filter is triggered') - parser.add_argument('--output', action='store', default=sys.stdout, - help='Filename to output [STDOUT]') - parser.add_argument('--no-filtered', action='store_true', - help='Output only sites passing the filters') - parser.add_argument('--local-script', action='store', default=None, - help='Python file in current working directory with the filter classes') - parser.add_argument('rest', nargs=argparse.REMAINDER, help=argparse.SUPPRESS) - - return parser - -# argument parsing strategy -# loading a script given at the command line poses a difficulty -# for using the argparse in a simple way -- the command line arguments -# are not completely known the first time command line is parsed -# requirements: -# - display all filters with options grouped by the filters in help screen -# - check if only arguments for currently used filters are given -# - to increase legibility when using more filters, arguments should -# follow the filter name -# - it is good to specify the filters explicitly by name, -# because the order of filtering can matter -# solution -# - change the command syntax to -# vcf_filter.py --core-options input filter1 --filter1-args filter2 filter3 -# - parse the core program options with parse_known_args -# - use add_argument_group for filters (subparsers won't work, they require -# the second command in argv[1]) -# - create all-filters parser when displaying the help -# - parse the arguments incrementally on argparse.REMAINDER of the previous - - # TODO: allow filter specification by short name - # TODO: flag that writes filter output into INFO column - # TODO: argument use implies filter use - # TODO: parallelize - # TODO: prevent plugins raising an exception from crashing the script - -def main(): - # dynamically build the list of available filters - filters = {} - - # parse command line args - # (mainly because of local_script) - parser = create_core_parser() - (args, unknown_args) = parser.parse_known_args() - - # add filter to dictionary, extend help message - # with help/arguments of each filter - def addfilt(filt): - filters[filt.name] = filt - arg_group = parser.add_argument_group(filt.name, filt.__doc__) - filt.customize_parser(arg_group) - - # look for global extensions - for p in pkg_resources.iter_entry_points('vcf.filters'): - filt = p.load() - addfilt(filt) - - # add all classes from local script, if present - if args.local_script != None: - import inspect - import os - sys.path.insert(0, os.getcwd()) - module_name = args.local_script.replace('.py', '') - mod = __import__(module_name) - classes = inspect.getmembers(mod, inspect.isclass) - for name, cls in classes: - addfilt(cls) - - # go through the filters on the command line - # one by one, trying to consume only the declared arguments - used_filters = [] - while len(args.rest): - filter_name = args.rest.pop(0) - if filter_name not in filters: - sys.exit("%s is not a known filter (%s)" % (filter_name, str(filters.keys()))) - - # create a parser only for arguments of current filter - filt_parser = create_filt_parser(filter_name) - filters[filter_name].customize_parser(filt_parser) - (known_filt_args, unknown_filt_args) = filt_parser.parse_known_args(args.rest) - if len(unknown_filt_args): - sys.exit("%s has no arguments like %s" % (filter_name, unknown_filt_args)) - - used_filters.append((filter_name, known_filt_args)) - args.rest = known_filt_args.rest - - # print help using the 'help' parser, so it includes - # all possible filters and arguments - if args.help or len(used_filters) == 0 or args.input == None: - parser.print_help() - parser.exit() - - inp = vcf.Reader(args.input) - - # build filter chain - chain = [] - for (name, filter_args) in used_filters: - f = filters[name](filter_args) - chain.append(f) - # add a filter record to the output - short_doc = f.__doc__ or '' - short_doc = short_doc.split('\n')[0].lstrip() - inp.filters[f.filter_name()] = _Filter(f.filter_name(), short_doc) - - # output must be created after all the filter records have been added - output = vcf.Writer(args.output, inp) - - # apply filters - short_circuit = not args.no_short_circuit - drop_filtered = args.no_filtered - - for record in inp: - output_record = True - for filt in chain: - result = filt(record) - if result == None: continue - - # save some work by skipping the rest of the code - if drop_filtered: - output_record = False - break - - record.add_filter(filt.filter_name()) - if short_circuit: break - - if output_record: - # use PASS only if other filter names appear in the FILTER column - #FIXME: is this good idea? - if record.FILTER is None and not drop_filtered: record.FILTER = 'PASS' - output.write_record(record) - -if __name__ == '__main__': main() diff --git a/pyvcfaz-0.6.8-py27_0/bin/vcf_melt b/pyvcfaz-0.6.8-py27_0/bin/vcf_melt deleted file mode 100755 index 7d6d585..0000000 --- a/pyvcfaz-0.6.8-py27_0/bin/vcf_melt +++ /dev/null @@ -1,48 +0,0 @@ -#!/opt/anaconda1anaconda2anaconda3/bin/python -""" Melt a VCF file into a tab delimited set of calls, one per line - -VCF files have all the calls from different samples on one line. This -script reads vcf on stdin and writes all calls to stdout in tab delimited -format with one call in one sample per line. This makes it easy to find -a given sample's genotype with, say, grep. -""" - -import sys -import csv -import vcf - -out = csv.writer(sys.stdout, delimiter='\t') -if len(sys.argv) > 1: - inp = file(sys.argv[1]) -else: - inp = sys.stdin -reader = vcf.VCFReader(inp) - -formats = reader.formats.keys() -infos = reader.infos.keys() - -header = ["SAMPLE"] + formats + ['FILTER', 'CHROM', 'POS', 'REF', 'ALT', 'ID'] \ - + ['info.' + x for x in infos] - - -out.writerow(header) - - -def flatten(x): - if type(x) == type([]): - x = ','.join(map(str, x)) - return x - -for record in reader: - info_row = [flatten(record.INFO.get(x, None)) for x in infos] - fixed = [record.CHROM, record.POS, record.REF, record.ALT, record.ID] - - for sample in record.samples: - row = [sample.sample] - # Format fields not present will simply end up "blank" - # in the output - row += [flatten(getattr(sample.data, x, None)) for x in formats] - row += [record.FILTER or '.'] - row += fixed - row += info_row - out.writerow(row) diff --git a/pyvcfaz-0.6.8-py27_0/bin/vcf_sample_filter.py b/pyvcfaz-0.6.8-py27_0/bin/vcf_sample_filter.py deleted file mode 100755 index d80f357..0000000 --- a/pyvcfaz-0.6.8-py27_0/bin/vcf_sample_filter.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/opt/anaconda1anaconda2anaconda3/bin/python - -# Author: Lenna X. Peterson -# github.com/lennax -# arklenna at gmail dot com - -import argparse -import logging - -from vcf import SampleFilter - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("file", help="VCF file to filter") - parser.add_argument("-o", metavar="outfile", - help="File to write out filtered samples") - parser.add_argument("-f", metavar="filters", - help="Comma-separated list of sample indices or names \ - to filter") - parser.add_argument("-i", "--invert", action="store_true", - help="Keep rather than discard the filtered samples") - parser.add_argument("-q", "--quiet", action="store_true", - help="Less output") - - args = parser.parse_args() - - if args.quiet: - log_level = logging.WARNING - else: - log_level = logging.INFO - logging.basicConfig(format='%(message)s', level=log_level) - - sf = SampleFilter(infile=args.file, outfile=args.o, - filters=args.f, invert=args.invert) - if args.f is None: - print "Samples:" - for idx, val in enumerate(sf.samples): - print "{0}: {1}".format(idx, val) diff --git a/pyvcfaz-0.6.8-py27_0/info/files b/pyvcfaz-0.6.8-py27_0/info/files deleted file mode 100644 index 6182c16..0000000 --- a/pyvcfaz-0.6.8-py27_0/info/files +++ /dev/null @@ -1,28 +0,0 @@ -bin/vcf_filter.py -bin/vcf_melt -bin/vcf_sample_filter.py -lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/PKG-INFO -lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/SOURCES.txt -lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/dependency_links.txt -lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/entry_points.txt -lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/requires.txt -lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/top_level.txt -lib/python2.7/site-packages/vcfaz/__init__.py -lib/python2.7/site-packages/vcfaz/__init__.pyc -lib/python2.7/site-packages/vcfaz/cparse.pyx -lib/python2.7/site-packages/vcfaz/filters.py -lib/python2.7/site-packages/vcfaz/filters.pyc -lib/python2.7/site-packages/vcfaz/model.py -lib/python2.7/site-packages/vcfaz/model.pyc -lib/python2.7/site-packages/vcfaz/parser.py -lib/python2.7/site-packages/vcfaz/parser.pyc -lib/python2.7/site-packages/vcfaz/sample_filter.py -lib/python2.7/site-packages/vcfaz/sample_filter.pyc -lib/python2.7/site-packages/vcfaz/test/__init__.py -lib/python2.7/site-packages/vcfaz/test/__init__.pyc -lib/python2.7/site-packages/vcfaz/test/prof.py -lib/python2.7/site-packages/vcfaz/test/prof.pyc -lib/python2.7/site-packages/vcfaz/test/test_vcf.py -lib/python2.7/site-packages/vcfaz/test/test_vcf.pyc -lib/python2.7/site-packages/vcfaz/utils.py -lib/python2.7/site-packages/vcfaz/utils.pyc diff --git a/pyvcfaz-0.6.8-py27_0/info/has_prefix b/pyvcfaz-0.6.8-py27_0/info/has_prefix deleted file mode 100644 index 06f793c..0000000 --- a/pyvcfaz-0.6.8-py27_0/info/has_prefix +++ /dev/null @@ -1,3 +0,0 @@ -/opt/anaconda1anaconda2anaconda3 text bin/vcf_filter.py -/opt/anaconda1anaconda2anaconda3 text bin/vcf_melt -/opt/anaconda1anaconda2anaconda3 text bin/vcf_sample_filter.py diff --git a/pyvcfaz-0.6.8-py27_0/info/index.json b/pyvcfaz-0.6.8-py27_0/info/index.json deleted file mode 100644 index d6554de..0000000 --- a/pyvcfaz-0.6.8-py27_0/info/index.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "arch": "x86_64", - "build": "py27_0", - "build_number": 0, - "depends": [ - "python 2.7*" - ], - "license": "MIT", - "name": "pyvcfaz", - "platform": "linux", - "subdir": "linux-64", - "version": "0.6.8" -} diff --git a/pyvcfaz-0.6.8-py27_0/info/recipe.json b/pyvcfaz-0.6.8-py27_0/info/recipe.json deleted file mode 100644 index 8471b6d..0000000 --- a/pyvcfaz-0.6.8-py27_0/info/recipe.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "about": { - "home": "https://github.com/jamescasbon/PyVCF", - "license": "BSD", - "summary": "A Variant Call Format reader for Python" - }, - "app": {}, - "build": { - "entry_points": [], - "features": [], - "number": "0", - "pin_depends": "", - "script_env": [], - "string": "", - "track_features": [] - }, - "package": { - "name": "pyvcfaz", - "version": "0.6.8" - }, - "requirements": { - "build": [ - "python", - "setuptools" - ], - "conflicts": [], - "run": [ - "python" - ] - }, - "source": { - "fn": "PyVCF_az-0.6.8.tar.gz", - "git_rev": "", - "patches": [], - "path": "", - "svn_rev": "", - }, - "test": { - "commands": [], - "files": [], - "imports": [ - "vcfaz.test", - "vcfaz" - ], - "requires": [] - } -} diff --git a/pyvcfaz-0.6.8-py27_0/info/recipe/build.sh b/pyvcfaz-0.6.8-py27_0/info/recipe/build.sh deleted file mode 100644 index 8948114..0000000 --- a/pyvcfaz-0.6.8-py27_0/info/recipe/build.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -$PYTHON setup.py install --single-version-externally-managed --record=record.txt diff --git a/pyvcfaz-0.6.8-py27_0/info/recipe/meta.yaml b/pyvcfaz-0.6.8-py27_0/info/recipe/meta.yaml deleted file mode 100644 index f1f2249..0000000 --- a/pyvcfaz-0.6.8-py27_0/info/recipe/meta.yaml +++ /dev/null @@ -1,27 +0,0 @@ -package: - name: pyvcfaz - version: '0.6.8' - -source: - fn: PyVCF_az-0.6.8.tar.gz - -build: - number: 0 - -requirements: - build: - - python - - setuptools - - run: - - python - -test: - imports: - - vcfaz.test - - vcfaz - -about: - home: https://github.com/andreazauli/PyVCF_az - license: MIT - summary: A Variant Call Format reader for Python diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/PKG-INFO b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/PKG-INFO deleted file mode 100644 index f7b1e67..0000000 --- a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/PKG-INFO +++ /dev/null @@ -1,27 +0,0 @@ -Metadata-Version: 1.1 -Name: PyVCF_az -Version: 0.6.8 -Summary: Variant Call Format (VCF) parser for Python -Home-page: https://github.com/andreazauli/PyVCF_az -Author: Andrea Zauli by fork of James Casbon and @jdoughertyii -Author-email: andrea.zauli@gmail.com -License: UNKNOWN -Description: UNKNOWN -Keywords: bioinformatics -Platform: UNKNOWN -Classifier: Development Status :: 4 - Beta -Classifier: Intended Audience :: Developers -Classifier: Intended Audience :: Science/Research -Classifier: License :: OSI Approved :: BSD License -Classifier: License :: OSI Approved :: MIT License -Classifier: Operating System :: OS Independent -Classifier: Programming Language :: Cython -Classifier: Programming Language :: Python -Classifier: Programming Language :: Python :: 2 -Classifier: Programming Language :: Python :: 2.6 -Classifier: Programming Language :: Python :: 2.7 -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.2 -Classifier: Programming Language :: Python :: 3.3 -Classifier: Programming Language :: Python :: 3.4 -Classifier: Topic :: Scientific/Engineering :: Bio-Informatics diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/SOURCES.txt b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/SOURCES.txt deleted file mode 100644 index 5e73363..0000000 --- a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/SOURCES.txt +++ /dev/null @@ -1,23 +0,0 @@ -MANIFEST.in -README.rst -setup.cfg -setup.py -PyVCF_az.egg-info/PKG-INFO -PyVCF_az.egg-info/SOURCES.txt -PyVCF_az.egg-info/dependency_links.txt -PyVCF_az.egg-info/entry_points.txt -PyVCF_az.egg-info/requires.txt -PyVCF_az.egg-info/top_level.txt -scripts/vcf_filter.py -scripts/vcf_melt -scripts/vcf_sample_filter.py -vcfaz/__init__.py -vcfaz/cparse.pyx -vcfaz/filters.py -vcfaz/model.py -vcfaz/parser.py -vcfaz/sample_filter.py -vcfaz/utils.py -vcfaz/test/__init__.py -vcfaz/test/prof.py -vcfaz/test/test_vcf.py diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/dependency_links.txt b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/entry_points.txt b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/entry_points.txt deleted file mode 100644 index ca0b63c..0000000 --- a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/entry_points.txt +++ /dev/null @@ -1,8 +0,0 @@ -[vcfaz.filters] -avg-dps = vcfaz.filters:AvgDepthPerSample -dps = vcfaz.filters:DepthPerSample -eb = vcfaz.filters:ErrorBiasFilter -site_quality = vcfaz.filters:SiteQuality -snp-only = vcfaz.filters:SnpOnly -vgq = vcfaz.filters:VariantGenotypeQuality - diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/requires.txt b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/requires.txt deleted file mode 100644 index 49fe098..0000000 --- a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/requires.txt +++ /dev/null @@ -1 +0,0 @@ -setuptools diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/top_level.txt b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/top_level.txt deleted file mode 100644 index 907d0f9..0000000 --- a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/PyVCF_az-0.6.8-py2.7.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -vcfaz diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/__init__.py b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/__init__.py deleted file mode 100644 index e1aae58..0000000 --- a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env python -""" -A VCFv4.0 and 4.1 parser for Python. - -Online version of PyVCF documentation is available at http://pyvcf.rtfd.org/ -""" - - -from vcf.parser import Reader, Writer -from vcf.parser import VCFReader, VCFWriter -from vcf.filters import Base as Filter -from vcf.parser import RESERVED_INFO, RESERVED_FORMAT -from vcf.sample_filter import SampleFilter - -VERSION = '0.6.8' diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/__init__.pyc b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/__init__.pyc deleted file mode 100644 index 9de2a3b..0000000 Binary files a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/__init__.pyc and /dev/null differ diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/cparse.pyx b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/cparse.pyx deleted file mode 100644 index 8a71d64..0000000 --- a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/cparse.pyx +++ /dev/null @@ -1,84 +0,0 @@ -from model import _Call - -cdef _map(func, iterable, bad='.'): - '''``map``, but make bad values None.''' - return [func(x) if x != bad else None - for x in iterable] - -INTEGER = 'Integer' -FLOAT = 'Float' -NUMERIC = 'Numeric' - -def parse_samples( - list names, list samples, samp_fmt, - list samp_fmt_types, list samp_fmt_nums, site): - - cdef char *name, *fmt, *entry_type, *sample - cdef int i, j - cdef list samp_data = [] - cdef dict sampdict - cdef list sampvals - n_samples = len(samples) - n_formats = len(samp_fmt._fields) - - for i in range(n_samples): - name = names[i] - sample = samples[i] - - # parse the data for this sample - sampdat = [None] * n_formats - - sampvals = sample.split(':') - - for j in range(n_formats): - if j >= len(sampvals): - break - vals = sampvals[j] - - # short circuit the most common - if samp_fmt._fields[j] == 'GT': - sampdat[j] = vals - continue - elif not vals or vals == '.': - sampdat[j] = None - continue - - entry_type = samp_fmt_types[j] - # TODO: entry_num is None for unbounded lists - entry_num = samp_fmt_nums[j] - - # we don't need to split single entries - if entry_num == 1 or ',' not in vals: - - if entry_type == INTEGER: - try: - sampdat[j] = int(vals) - except ValueError: - sampdat[j] = float(vals) - elif entry_type == FLOAT or entry_type == NUMERIC: - sampdat[j] = float(vals) - else: - sampdat[j] = vals - - if entry_num != 1: - sampdat[j] = (sampdat[j]) - - continue - - vals = vals.split(',') - - if entry_type == INTEGER: - try: - sampdat[j] = _map(int, vals) - except ValueError: - sampdat[j] = map(float, vals) - elif entry_type == FLOAT or entry_type == NUMERIC: - sampdat[j] = _map(float, vals) - else: - sampdat[j] = vals - - # create a call object - call = _Call(site, name, samp_fmt(*sampdat)) - samp_data.append(call) - - return samp_data diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/filters.pyc b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/filters.pyc deleted file mode 100644 index dc459ba..0000000 Binary files a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/filters.pyc and /dev/null differ diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/model.pyc b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/model.pyc deleted file mode 100644 index d76a757..0000000 Binary files a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/model.pyc and /dev/null differ diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/parser.pyc b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/parser.pyc deleted file mode 100644 index dbc5efe..0000000 Binary files a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/parser.pyc and /dev/null differ diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/sample_filter.pyc b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/sample_filter.pyc deleted file mode 100644 index 628b166..0000000 Binary files a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/sample_filter.pyc and /dev/null differ diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/test/__init__.py b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/test/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/test/__init__.pyc b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/test/__init__.pyc deleted file mode 100644 index beaa20a..0000000 Binary files a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/test/__init__.pyc and /dev/null differ diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/test/prof.py b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/test/prof.py deleted file mode 100644 index 953d169..0000000 --- a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/test/prof.py +++ /dev/null @@ -1,33 +0,0 @@ -import vcf as vcf -import cProfile -import timeit -import pstats -import sys - -def parse_1kg(): - for line in vcf.Reader(filename='vcf/test/1kg.vcf.gz'): - pass - -if len(sys.argv) == 1: - sys.argv.append(None) - -if sys.argv[1] == 'profile': - cProfile.run('parse_1kg()', '1kg.prof') - p = pstats.Stats('1kg.prof') - p.strip_dirs().sort_stats('time').print_stats() - -elif sys.argv[1] == 'time': - n = 1 - t = timeit.timeit('parse_1kg()', "from __main__ import parse_1kg", number=n) - print t/n - -elif sys.argv[1] == 'stat': - import statprof - statprof.start() - try: - parse_1kg() - finally: - statprof.stop() - statprof.display() -else: - print 'prof.py profile/time' diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/test/prof.pyc b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/test/prof.pyc deleted file mode 100644 index 176ab16..0000000 Binary files a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/test/prof.pyc and /dev/null differ diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/test/test_vcf.py b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/test/test_vcf.py deleted file mode 100644 index 20b71ad..0000000 --- a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/test/test_vcf.py +++ /dev/null @@ -1,1594 +0,0 @@ -from __future__ import print_function -import unittest -try: - unittest.skip -except AttributeError: - import unittest2 as unittest -import doctest -import os -import commands -import cPickle -from StringIO import StringIO -import subprocess -import sys - -try: - import pysam -except ImportError: - pysam = None - -import vcf -from vcf import model, utils - -IS_PYTHON2 = sys.version_info[0] == 2 -IS_NOT_PYPY = 'PyPy' not in sys.version - -suite = doctest.DocTestSuite(vcf) - - -def fh(fname, mode='rt'): - return open(os.path.join(os.path.dirname(__file__), fname), mode) - - -class TestVcfSpecs(unittest.TestCase): - - def test_vcf_4_0(self): - reader = vcf.Reader(fh('example-4.0.vcf')) - self.assertEqual(reader.metadata['fileformat'], 'VCFv4.0') - - # test we can walk the file at least - for r in reader: - - if r.POS == 1230237: - assert r.is_monomorphic - else: - assert not r.is_monomorphic - - if 'AF' in r.INFO: - self.assertEqual(type(r.INFO['AF']), type([])) - - for c in r: - assert c - - # issue 19, in the example ref the GQ is length 1 - if c.called: - self.assertEqual(type(c.data.GQ), type(1)) - if 'HQ' in c.data and c.data.HQ is not None: - self.assertEqual(type(c.data.HQ), type([])) - - - - def test_vcf_4_1(self): - reader = vcf.Reader(fh('example-4.1.vcf')) - self.assertEqual(reader.metadata['fileformat'], 'VCFv4.1') - - # contigs were added in vcf4.1 - self.assertEqual(reader.contigs['20'].length, 62435964) - - # test we can walk the file at least - for r in reader: - for c in r: - assert c - - def test_vcf_4_1_sv(self): - reader = vcf.Reader(fh('example-4.1-sv.vcf')) - - assert 'SVLEN' in reader.infos - assert 'fileDate' in reader.metadata - assert 'DEL' in reader.alts - - # test we can walk the file at least - for r in reader: - print(r) - for a in r.ALT: - print(a) - for c in r: - print(c) - assert c - - def test_vcf_4_1_bnd(self): - reader = vcf.Reader(fh('example-4.1-bnd.vcf')) - - # test we can walk the file at least - for r in reader: - print(r) - for a in r.ALT: - print(a) - if r.ID == "bnd1": - self.assertEqual(len(r.ALT), 1) - self.assertEqual(r.ALT[0].type, "BND") - self.assertEqual(r.ALT[0].chr, "2") - self.assertEqual(r.ALT[0].pos, 3) - self.assertEqual(r.ALT[0].orientation, False) - self.assertEqual(r.ALT[0].remoteOrientation, True) - self.assertEqual(r.ALT[0].connectingSequence, "T") - if r.ID == "bnd4": - self.assertEqual(len(r.ALT), 1) - self.assertEqual(r.ALT[0].type, "BND") - self.assertEqual(r.ALT[0].chr, "1") - self.assertEqual(r.ALT[0].pos, 2) - self.assertEqual(r.ALT[0].orientation, True) - self.assertEqual(r.ALT[0].remoteOrientation, False) - self.assertEqual(r.ALT[0].connectingSequence, "G") - for c in r: - print(c) - assert c - - def test_vcf_4_2(self): - reader = vcf.Reader(fh('example-4.2.vcf')) - self.assertEqual(reader.metadata['fileformat'], 'VCFv4.2') - - # If INFO contains no Source and Version keys, they should be None. - self.assertEqual(reader.infos['DP'].source, None) - self.assertEqual(reader.infos['DP'].version, None) - - # According to spec, INFO Version key is required to be double quoted, - # but at least SAMtools 1.0 does not quote it. So we want to be - # forgiving here. - self.assertEqual(reader.infos['VDB'].source, None) - self.assertEqual(reader.infos['VDB'].version, '3') - - # test we can walk the file at least - for r in reader: - for c in r: - assert c - - def test_contig_idonly(self): - """Test VCF inputs with ##contig inputs containing only IDs. produced by bcftools 1.2+ - """ - reader = vcf.Reader(fh("contig_idonly.vcf")) - for cid, contig in reader.contigs.items(): - if cid == "1": - assert contig.length is None - elif cid == "2": - assert contig.length == 2000 - elif cid == "3": - assert contig.length == 3000 - -class TestGatkOutput(unittest.TestCase): - - filename = 'gatk.vcf' - - samples = ['BLANK', 'NA12878', 'NA12891', 'NA12892', - 'NA19238', 'NA19239', 'NA19240'] - formats = ['AD', 'DP', 'GQ', 'GT', 'PL'] - infos = ['AC', 'AF', 'AN', 'BaseQRankSum', 'DB', 'DP', 'DS', - 'Dels', 'FS', 'HRun', 'HaplotypeScore', 'InbreedingCoeff', - 'MQ', 'MQ0', 'MQRankSum', 'QD', 'ReadPosRankSum'] - - n_calls = 37 - - def setUp(self): - self.reader = vcf.Reader(fh(self.filename)) - - def testSamples(self): - self.assertEqual(self.reader.samples, self.samples) - - def testFormats(self): - self.assertEqual(set(self.reader.formats), set(self.formats)) - - def testInfos(self): - self.assertEqual(set(self.reader.infos), set(self.infos)) - - - def testCalls(self): - n = 0 - - for site in self.reader: - n += 1 - self.assertEqual(len(site.samples), len(self.samples)) - - - # check sample name lookup - for s in self.samples: - assert site.genotype(s) - - # check ordered access - self.assertEqual([x.sample for x in site.samples], self.samples) - - self.assertEqual(n, self.n_calls) - - -class TestFreebayesOutput(TestGatkOutput): - - filename = 'freebayes.vcf' - formats = ['AO', 'DP', 'GL', 'GLE', 'GQ', 'GT', 'QA', 'QR', 'RO'] - infos = ['AB', 'ABP', 'AC', 'AF', 'AN', 'AO', 'BVAR', 'CIGAR', - 'DB', 'DP', 'DPRA', 'EPP', 'EPPR', 'HWE', 'LEN', 'MEANALT', - 'NUMALT', 'RPP', 'MQMR', 'ODDS', 'MQM', 'PAIREDR', 'PAIRED', - 'SAP', 'XRM', 'RO', 'REPEAT', 'XRI', 'XAS', 'XAI', 'SRP', - 'XAM', 'XRS', 'RPPR', 'NS', 'RUN', 'CpG', 'TYPE'] - n_calls = 104 - - - def testParse(self): - reader = vcf.Reader(fh('freebayes.vcf')) - print(reader.samples) - self.assertEqual(len(reader.samples), 7) - n = 0 - for r in reader: - n+=1 - for x in r: - assert x - self.assertEqual(n, self.n_calls) - -class TestSamtoolsOutput(unittest.TestCase): - - def testParse(self): - reader = vcf.Reader(fh('samtools.vcf')) - - self.assertEqual(len(reader.samples), 1) - self.assertEqual(sum(1 for _ in reader), 11) - - -class TestBcfToolsOutput(unittest.TestCase): - def testParse(self): - reader = vcf.Reader(fh('bcftools.vcf')) - self.assertEqual(len(reader.samples), 1) - for r in reader: - for s in r.samples: - s.phased - -class TestIssue214(unittest.TestCase): - """ See https://github.com/jamescasbon/PyVCF/issues/214 """ - - def test_issue_214_is_snp(self): - reader=vcf.Reader(fh('issue-214.vcf')) - r=next(reader) - self.assertTrue(r.is_snp) - - def test_issue_214_var_type(self): - reader=vcf.Reader(fh('issue-214.vcf')) - r=next(reader) - self.assertEqual(r.var_type,'snp') - - # Can the ref even be a spanning deletion? - # Note, this does not trigger issue 214, but I've added it here for completeness - def test_issue_214_ref_is_del_is_snp(self): - reader=vcf.Reader(fh('issue-214.vcf')) - next(reader) - r=next(reader) - self.assertTrue(r.is_snp) - - # Can the ref even be a spanning deletion? - # Note, this does not trigger issue 214, but I've added it here for completeness - def test_issue_214_ref_is_del_var_type(self): - reader=vcf.Reader(fh('issue-214.vcf')) - next(reader) - r=next(reader) - self.assertEqual(r.var_type,'snp') - -class Test1kg(unittest.TestCase): - - def testParse(self): - reader = vcf.Reader(fh('1kg.vcf.gz', 'rb')) - - assert 'FORMAT' in reader._column_headers - - self.assertEqual(len(reader.samples), 629) - for _ in reader: - pass - - def test_issue_49(self): - """docstring for test_issue_49""" - reader = vcf.Reader(fh('issue_49.vcf', 'r')) - - self.assertEqual(len(reader.samples), 0) - for _ in reader: - pass - - -class Test1kgSites(unittest.TestCase): - - def test_reader(self): - """The samples attribute should be the empty list.""" - reader = vcf.Reader(fh('1kg.sites.vcf', 'r')) - - assert 'FORMAT' not in reader._column_headers - - self.assertEqual(reader.samples, []) - for record in reader: - self.assertEqual(record.samples, []) - - def test_writer(self): - """FORMAT should not be written if not present in the template and no - extra tab character should be printed if there are no FORMAT fields.""" - reader = vcf.Reader(fh('1kg.sites.vcf', 'r')) - out = StringIO() - writer = vcf.Writer(out, reader, lineterminator='\n') - - for record in reader: - writer.write_record(record) - out.seek(0) - out_str = out.getvalue() - for line in out_str.split('\n'): - if line.startswith('##'): - continue - if line.startswith('#CHROM'): - assert 'FORMAT' not in line - assert not line.endswith('\t') - - -class TestGoNL(unittest.TestCase): - - def testParse(self): - reader = vcf.Reader(fh('gonl.chr20.release4.gtc.vcf')) - for _ in reader: - pass - - def test_contig_line(self): - reader = vcf.Reader(fh('gonl.chr20.release4.gtc.vcf')) - self.assertEqual(reader.contigs['1'].length, 249250621) - - -class TestStringAsFlag(unittest.TestCase): - - def test_string_as_flag(self): - """A flag INFO field is declared as string (not allowed by the spec, - but seen in practice).""" - reader = vcf.Reader(fh('string_as_flag.vcf', 'r')) - for _ in reader: - pass - - -class TestInfoOrder(unittest.TestCase): - - def _assert_order(self, definitions, fields): - """ - Elements common to both lists should be in the same order. Elements - only in `fields` should be last and in alphabetical order. - """ - used_definitions = [d for d in definitions if d in fields] - self.assertEqual(used_definitions, fields[:len(used_definitions)]) - self.assertEqual(fields[len(used_definitions):], - sorted(fields[len(used_definitions):])) - - def test_writer(self): - """ - Order of INFO fields should be compatible with the order of their - definition in the header and undefined fields should be last and in - alphabetical order. - """ - reader = vcf.Reader(fh('1kg.sites.vcf', 'r')) - out = StringIO() - writer = vcf.Writer(out, reader, lineterminator='\n') - - for record in reader: - writer.write_record(record) - out.seek(0) - out_str = out.getvalue() - - definitions = [] - for line in out_str.split('\n'): - if line.startswith('##INFO='): - definitions.append(line.split('ID=')[1].split(',')[0]) - if not line or line.startswith('#'): - continue - fields = [f.split('=')[0] for f in line.split('\t')[7].split(';')] - self._assert_order(definitions, fields) - - -class TestInfoTypeCharacter(unittest.TestCase): - def test_parse(self): - reader = vcf.Reader(fh('info-type-character.vcf')) - record = next(reader) - self.assertEqual(record.INFO['FLOAT_1'], 123.456) - self.assertEqual(record.INFO['CHAR_1'], 'Y') - self.assertEqual(record.INFO['FLOAT_N'], [123.456]) - self.assertEqual(record.INFO['CHAR_N'], ['Y']) - - def test_write(self): - reader = vcf.Reader(fh('info-type-character.vcf')) - out = StringIO() - writer = vcf.Writer(out, reader) - - records = list(reader) - - for record in records: - writer.write_record(record) - out.seek(0) - reader2 = vcf.Reader(out) - - for l, r in zip(records, reader2): - self.assertEquals(l.INFO, r.INFO) - - -class TestParseMetaLine(unittest.TestCase): - def test_parse(self): - reader = vcf.Reader(fh('parse-meta-line.vcf')) - f = reader.metadata['MYFIELD'][0] - self.assertEqual(f['ID'], 'SomeField') - self.assertEqual(f['Version'], '3.4-0-g7e26428') - self.assertEqual(f['Date'], '"Wed Oct 07 09:11:47 CEST 2015"') - self.assertEqual(f['Options'], '"< 4 and > 3"') - next(reader) - - def test_write(self): - reader = vcf.Reader(fh('parse-meta-line.vcf')) - out = StringIO() - writer = vcf.Writer(out, reader) - - records = list(reader) - - for record in records: - writer.write_record(record) - out.seek(0) - reader2 = vcf.Reader(out) - - f = reader2.metadata['MYFIELD'][0] - self.assertEqual(f['ID'], 'SomeField') - self.assertEqual(f['Version'], '3.4-0-g7e26428') - self.assertEqual(f['Date'], '"Wed Oct 07 09:11:47 CEST 2015"') - self.assertEqual(f['Options'], '"< 4 and > 3"') - - for l, r in zip(records, reader2): - self.assertEquals(l.INFO, r.INFO) - - -class TestGatkOutputWriter(unittest.TestCase): - - def testWrite(self): - - reader = vcf.Reader(fh('gatk.vcf')) - out = StringIO() - writer = vcf.Writer(out, reader) - - records = list(reader) - - for record in records: - writer.write_record(record) - out.seek(0) - out_str = out.getvalue() - for line in out_str.split("\n"): - if line.startswith("##contig"): - assert line.startswith('##contig=<'), "Found dictionary in contig line: {0}".format(line) - print (out_str) - reader2 = vcf.Reader(out) - - self.assertEquals(reader.samples, reader2.samples) - self.assertEquals(reader.formats, reader2.formats) - self.assertEquals(reader.infos, reader2.infos) - self.assertEquals(reader.contigs, reader2.contigs) - - for l, r in zip(records, reader2): - self.assertEquals(l.samples, r.samples) - - # test for call data equality, since equality on the sample calls - # may not always mean their data are all equal - for l_call, r_call in zip(l.samples, r.samples): - self.assertEqual(l_call.data, r_call.data) - - -class TestBcfToolsOutputWriter(unittest.TestCase): - - def testWrite(self): - - reader = vcf.Reader(fh('bcftools.vcf')) - out = StringIO() - writer = vcf.Writer(out, reader) - - records = list(reader) - - for record in records: - writer.write_record(record) - out.seek(0) - print (out.getvalue()) - reader2 = vcf.Reader(out) - - self.assertEquals(reader.samples, reader2.samples) - self.assertEquals(reader.formats, reader2.formats) - self.assertEquals(reader.infos, reader2.infos) - - for l, r in zip(records, reader2): - self.assertEquals(l.samples, r.samples) - - # test for call data equality, since equality on the sample calls - # may not always mean their data are all equal - for l_call, r_call in zip(l.samples, r.samples): - self.assertEqual(l_call.data, r_call.data) - - -class TestWriterDictionaryMeta(unittest.TestCase): - - def testWrite(self): - - reader = vcf.Reader(fh('example-4.1-bnd.vcf')) - out = StringIO() - writer = vcf.Writer(out, reader) - - records = list(reader) - - for record in records: - writer.write_record(record) - out.seek(0) - out_str = out.getvalue() - for line in out_str.split("\n"): - if line.startswith("##PEDIGREE"): - self.assertEquals(line, '##PEDIGREE=') - if line.startswith("##SAMPLE"): - assert line.startswith('##SAMPLE=<'), "Found dictionary in meta line: {0}".format(line) - - -class TestSamplesSpace(unittest.TestCase): - filename = 'samples-space.vcf' - samples = ['NA 00001', 'NA 00002', 'NA 00003'] - def test_samples(self): - self.reader = vcf.Reader(fh(self.filename), strict_whitespace=True) - self.assertEqual(self.reader.samples, self.samples) - - -class TestMetadataWhitespace(unittest.TestCase): - filename = 'metadata-whitespace.vcf' - def test_metadata_whitespace(self): - """ - Test parsing metadata header lines with whitespace. - """ - self.reader = vcf.Reader(fh(self.filename)) - - # Pick one INFO line and assert that we parsed it correctly. - info_indel = self.reader.infos['INDEL'] - assert info_indel.id == 'INDEL' - assert info_indel.num == 0 - assert info_indel.type == 'Flag' - assert info_indel.desc == 'Indicates that the variant is an INDEL.' - - # Test we can walk the file at least. - for r in self.reader: - for c in r: - pass - - -class TestMixedFiltering(unittest.TestCase): - filename = 'mixed-filtering.vcf' - def test_mixed_filtering(self): - """ - Test mix of FILTER values (pass, filtered, no filtering). - """ - reader = vcf.Reader(fh(self.filename)) - self.assertEqual(next(reader).FILTER, []) - self.assertEqual(next(reader).FILTER, ['q10']) - self.assertEqual(next(reader).FILTER, []) - self.assertEqual(next(reader).FILTER, None) - self.assertEqual(next(reader).FILTER, ['q10', 'q50']) - - -class TestRecord(unittest.TestCase): - - def test_num_calls(self): - reader = vcf.Reader(fh('example-4.0.vcf')) - for var in reader: - num_calls = (var.num_hom_ref + var.num_hom_alt + \ - var.num_het + var.num_unknown) - self.assertEqual(len(var.samples), num_calls) - - def test_dunder_eq(self): - rec = next(vcf.Reader(fh('example-4.0.vcf'))) - self.assertFalse(rec == None) - self.assertFalse(None == rec) - - def test_call_rate(self): - reader = vcf.Reader(fh('example-4.0.vcf')) - for var in reader: - call_rate = var.call_rate - if var.POS == 14370: - self.assertEqual(3.0/3.0, call_rate) - if var.POS == 17330: - self.assertEqual(3.0/3.0, call_rate) - if var.POS == 1110696: - self.assertEqual(3.0/3.0, call_rate) - if var.POS == 1230237: - self.assertEqual(3.0/3.0, call_rate) - elif var.POS == 1234567: - self.assertEqual(2.0/3.0, call_rate) - - def test_aaf(self): - reader = vcf.Reader(fh('example-4.0.vcf')) - for var in reader: - aaf = var.aaf - if var.POS == 14370: - self.assertEqual([3.0/6.0], aaf) - if var.POS == 17330: - self.assertEqual([1.0/6.0], aaf) - if var.POS == 1110696: - self.assertEqual([2.0/6.0, 4.0/6.0], aaf) - if var.POS == 1230237: - self.assertEqual([0.0/6.0], aaf) - elif var.POS == 1234567: - self.assertEqual([2.0/4.0, 1.0/4.0], aaf) - reader = vcf.Reader(fh('example-4.1-ploidy.vcf')) - for var in reader: - aaf = var.aaf - if var.POS == 60034: - self.assertEqual([4.0/6.0], aaf) - elif var.POS == 60387: - self.assertEqual([1.0/3.0], aaf) - - def test_pi(self): - reader = vcf.Reader(fh('example-4.0.vcf')) - for var in reader: - pi = var.nucl_diversity - if var.POS == 14370: - self.assertEqual(6.0/10.0, pi) - if var.POS == 17330: - self.assertEqual(1.0/3.0, pi) - if var.POS == 1110696: - self.assertEqual(None, pi) - if var.POS == 1230237: - self.assertEqual(0.0/6.0, pi) - elif var.POS == 1234567: - self.assertEqual(None, pi) - - def test_heterozygosity(self): - reader = vcf.Reader(fh('example-4.0.vcf')) - for var in reader: - het = var.heterozygosity - if var.POS == 14370: - self.assertEqual(0.5, het) - if var.POS == 17330: - self.assertEqual(1-((1.0/6)**2 + (5.0/6)**2), het) - if var.POS == 1110696: - self.assertEqual(4.0/9.0, het) - if var.POS == 1230237: - self.assertEqual(0.0, het) - elif var.POS == 1234567: - self.assertEqual(5.0/8.0, het) - - def test_is_snp(self): - reader = vcf.Reader(fh('example-4.0.vcf')) - for r in reader: - print(r) - for c in r: - print(c) - assert c - for var in reader: - is_snp = var.is_snp - if var.POS == 14370: - self.assertEqual(True, is_snp) - if var.POS == 17330: - self.assertEqual(True, is_snp) - if var.POS == 1110696: - self.assertEqual(True, is_snp) - if var.POS == 1230237: - self.assertEqual(False, is_snp) - elif var.POS == 1234567: - self.assertEqual(False, is_snp) - - - def test_is_snp_for_n_alt(self): - record = model._Record( - '1', - 10, - 'id1', - 'C', - [model._Substitution('N')], - None, - None, - {}, - None, - {}, - None - ) - self.assertTrue(record.is_snp) - - - def test_is_indel(self): - reader = vcf.Reader(fh('example-4.0.vcf')) - for var in reader: - is_indel = var.is_indel - if var.POS == 14370: - self.assertEqual(False, is_indel) - if var.POS == 17330: - self.assertEqual(False, is_indel) - if var.POS == 1110696: - self.assertEqual(False, is_indel) - if var.POS == 1230237: - self.assertEqual(True, is_indel) - elif var.POS == 1234567: - self.assertEqual(True, is_indel) - - def test_is_transition(self): - reader = vcf.Reader(fh('example-4.0.vcf')) - for var in reader: - is_trans = var.is_transition - if var.POS == 14370: - self.assertEqual(True, is_trans) - if var.POS == 17330: - self.assertEqual(False, is_trans) - if var.POS == 1110696: - self.assertEqual(False, is_trans) - if var.POS == 1230237: - self.assertEqual(False, is_trans) - elif var.POS == 1234567: - self.assertEqual(False, is_trans) - - def test_is_deletion(self): - reader = vcf.Reader(fh('example-4.0.vcf')) - for var in reader: - is_del = var.is_deletion - if var.POS == 14370: - self.assertEqual(False, is_del) - if var.POS == 17330: - self.assertEqual(False, is_del) - if var.POS == 1110696: - self.assertEqual(False, is_del) - if var.POS == 1230237: - self.assertEqual(True, is_del) - elif var.POS == 1234567: - self.assertEqual(False, is_del) - - def test_var_type(self): - reader = vcf.Reader(fh('example-4.0.vcf')) - for var in reader: - type = var.var_type - if var.POS == 14370: - self.assertEqual("snp", type) - if var.POS == 17330: - self.assertEqual("snp", type) - if var.POS == 1110696: - self.assertEqual("snp", type) - if var.POS == 1230237: - self.assertEqual("indel", type) - elif var.POS == 1234567: - self.assertEqual("indel", type) - # SV tests - reader = vcf.Reader(fh('example-4.1-sv.vcf')) - for var in reader: - type = var.var_type - if var.POS == 2827693: - self.assertEqual("sv", type) - if var.POS == 321682: - self.assertEqual("sv", type) - if var.POS == 14477084: - self.assertEqual("sv", type) - if var.POS == 9425916: - self.assertEqual("sv", type) - elif var.POS == 12665100: - self.assertEqual("sv", type) - elif var.POS == 18665128: - self.assertEqual("sv", type) - - - def test_var_subtype(self): - reader = vcf.Reader(fh('example-4.0.vcf')) - for var in reader: - subtype = var.var_subtype - if var.POS == 14370: - self.assertEqual("ts", subtype) - if var.POS == 17330: - self.assertEqual("tv", subtype) - if var.POS == 1110696: - self.assertEqual("unknown", subtype) - if var.POS == 1230237: - self.assertEqual("del", subtype) - elif var.POS == 1234567: - self.assertEqual("unknown", subtype) - # SV tests - reader = vcf.Reader(fh('example-4.1-sv.vcf')) - for var in reader: - subtype = var.var_subtype - if var.POS == 2827693: - self.assertEqual("DEL", subtype) - if var.POS == 321682: - self.assertEqual("DEL", subtype) - if var.POS == 14477084: - self.assertEqual("DEL:ME:ALU", subtype) - if var.POS == 9425916: - self.assertEqual("INS:ME:L1", subtype) - elif var.POS == 12665100: - self.assertEqual("DUP", subtype) - elif var.POS == 18665128: - self.assertEqual("DUP:TANDEM", subtype) - - def test_is_sv(self): - reader = vcf.Reader(fh('example-4.1-sv.vcf')) - for var in reader: - is_sv = var.is_sv - if var.POS == 2827693: - self.assertEqual(True, is_sv) - if var.POS == 321682: - self.assertEqual(True, is_sv) - if var.POS == 14477084: - self.assertEqual(True, is_sv) - if var.POS == 9425916: - self.assertEqual(True, is_sv) - elif var.POS == 12665100: - self.assertEqual(True, is_sv) - elif var.POS == 18665128: - self.assertEqual(True, is_sv) - - reader = vcf.Reader(fh('example-4.0.vcf')) - for var in reader: - is_sv = var.is_sv - if var.POS == 14370: - self.assertEqual(False, is_sv) - if var.POS == 17330: - self.assertEqual(False, is_sv) - if var.POS == 1110696: - self.assertEqual(False, is_sv) - if var.POS == 1230237: - self.assertEqual(False, is_sv) - elif var.POS == 1234567: - self.assertEqual(False, is_sv) - - def test_is_sv_precise(self): - reader = vcf.Reader(fh('example-4.1-sv.vcf')) - for var in reader: - is_precise = var.is_sv_precise - if var.POS == 2827693: - self.assertEqual(True, is_precise) - if var.POS == 321682: - self.assertEqual(False, is_precise) - if var.POS == 14477084: - self.assertEqual(False, is_precise) - if var.POS == 9425916: - self.assertEqual(False, is_precise) - elif var.POS == 12665100: - self.assertEqual(False, is_precise) - elif var.POS == 18665128: - self.assertEqual(False, is_precise) - - reader = vcf.Reader(fh('example-4.0.vcf')) - for var in reader: - is_precise = var.is_sv_precise - if var.POS == 14370: - self.assertEqual(False, is_precise) - if var.POS == 17330: - self.assertEqual(False, is_precise) - if var.POS == 1110696: - self.assertEqual(False, is_precise) - if var.POS == 1230237: - self.assertEqual(False, is_precise) - elif var.POS == 1234567: - self.assertEqual(False, is_precise) - - def test_sv_end(self): - reader = vcf.Reader(fh('example-4.1-sv.vcf')) - for var in reader: - sv_end = var.sv_end - if var.POS == 2827693: - self.assertEqual(2827680, sv_end) - if var.POS == 321682: - self.assertEqual(321887, sv_end) - if var.POS == 14477084: - self.assertEqual(14477381, sv_end) - if var.POS == 9425916: - self.assertEqual(9425916, sv_end) - elif var.POS == 12665100: - self.assertEqual(12686200, sv_end) - elif var.POS == 18665128: - self.assertEqual(18665204, sv_end) - - reader = vcf.Reader(fh('example-4.0.vcf')) - for var in reader: - sv_end = var.sv_end - if var.POS == 14370: - self.assertEqual(None, sv_end) - if var.POS == 17330: - self.assertEqual(None, sv_end) - if var.POS == 1110696: - self.assertEqual(None, sv_end) - if var.POS == 1230237: - self.assertEqual(None, sv_end) - elif var.POS == 1234567: - self.assertEqual(None, sv_end) - - def test_qual(self): - reader = vcf.Reader(fh('example-4.0.vcf')) - for var in reader: - qual = var.QUAL - qtype = type(qual) - if var.POS == 14370: - expected = 29 - if var.POS == 17330: - expected = 3.0 - if var.POS == 1110696: - expected = 1e+03 - if var.POS == 1230237: - expected = 47 - elif var.POS == 1234567: - expected = None - self.assertEqual(expected, qual) - self.assertEqual(type(expected), qtype) - - def test_info_multiple_values(self): - reader = vcf.Reader(fh('example-4.1-info-multiple-values.vcf')) - var = next(reader) - # check Float type INFO field with multiple values - expected = [19.3, 47.4, 14.0] - actual = var.INFO['RepeatCopies'] - self.assertEqual(expected, actual) - # check Integer type INFO field with multiple values - expected = [42, 14, 56] - actual = var.INFO['RepeatSize'] - self.assertEqual(expected, actual) - # check String type INFO field with multiple values - expected = ['TCTTATCTTCTTACTTTTCATTCCTTACTCTTACTTACTTAC', 'TTACTCTTACTTAC', 'TTACTCTTACTTACTTACTCTTACTTACTTACTCTTACTTACTTACTCTTATCTTC'] - actual = var.INFO['RepeatConsensus'] - self.assertEqual(expected, actual) - - def test_pickle(self): - reader = vcf.Reader(fh('example-4.0.vcf')) - for var in reader: - self.assertEqual(cPickle.loads(cPickle.dumps(var)), var) - - - def assert_has_expected_coordinates( - self, - record, - expected_coordinates, - expected_affected_coordinates - ): - self.assertEqual( - (record.start, record.end), - expected_coordinates - ) - self.assertEqual( - (record.affected_start, record.affected_end), - expected_affected_coordinates - ) - - - def test_coordinates_for_snp(self): - record = model._Record( - '1', - 10, - 'id1', - 'C', - [model._Substitution('A')], - None, - None, - {}, - None, - {}, - None - ) - self.assert_has_expected_coordinates(record, (9, 10), (9, 10)) - - - def test_coordinates_for_insertion(self): - record = model._Record( - '1', - 10, - 'id2', - 'C', - [model._Substitution('CTA')], - None, - None, - {}, - None, - {}, - None - ) - self.assert_has_expected_coordinates(record, (9, 10), (10, 10)) - - - def test_coordinates_for_deletion(self): - record = model._Record( - '1', - 10, - 'id3', - 'CTA', - [model._Substitution('C')], - None, - None, - {}, - None, - {}, - None - ) - self.assert_has_expected_coordinates(record, (9, 12), (10, 12)) - - - def test_coordinates_for_None_alt(self): - record = model._Record( - '1', - 10, - 'id4', - 'C', - [None], - None, - None, - {}, - None, - {}, - None - ) - self.assert_has_expected_coordinates(record, (9, 10), (9, 10)) - - - def test_coordinates_for_multiple_snps(self): - record = model._Record( - '1', - 10, - 'id5', - 'C', - [ - model._Substitution('A'), - model._Substitution('G'), - model._Substitution('T') - ], - None, - None, - {}, - None, - {}, - None - ) - self.assert_has_expected_coordinates(record, (9, 10), (9, 10)) - - - def test_coordinates_for_insert_and_snp(self): - record = model._Record( - '1', - 10, - 'id6', - 'C', - [ - model._Substitution('GTA'), - model._Substitution('G'), - ], - None, - None, - {}, - None, - {}, - None - ) - self.assert_has_expected_coordinates(record, (9, 10), (9, 10)) - record = model._Record( - '1', - 10, - 'id7', - 'C', - [ - model._Substitution('G'), - model._Substitution('GTA'), - ], - None, - None, - {}, - None, - {}, - None - ) - self.assert_has_expected_coordinates(record, (9, 10), (9, 10)) - - - def test_coordinates_for_snp_and_deletion(self): - record = model._Record( - '1', - 10, - 'id8', - 'CTA', - [ - model._Substitution('C'), - model._Substitution('CTG'), - ], - None, - None, - {}, - None, - {}, - None - ) - self.assert_has_expected_coordinates(record, (9, 12), (10, 12)) - record = model._Record( - '1', - 10, - 'id9', - 'CTA', - [ - model._Substitution('CTG'), - model._Substitution('C'), - ], - None, - None, - {}, - None, - {}, - None - ) - self.assert_has_expected_coordinates(record, (9, 12), (10, 12)) - - - def test_coordinates_for_insertion_and_deletion(self): - record = model._Record( - '1', - 10, - 'id10', - 'CT', - [ - model._Substitution('CA'), - model._Substitution('CTT'), - ], - None, - None, - {}, - None, - {}, - None - ) - self.assert_has_expected_coordinates(record, (9, 11), (10, 11)) - record = model._Record( - '1', - 10, - 'id11', - 'CT', - [ - model._Substitution('CTT'), - model._Substitution('CA'), - ], - None, - None, - {}, - None, - {}, - None - ) - self.assert_has_expected_coordinates(record, (9, 11), (10, 11)) - - - def test_coordinates_for_breakend(self): - record = model._Record( - '1', - 10, - 'id12', - 'CTA', - [model._Breakend('1', 500, False, True, 'GGTC', True)], - None, - None, - {}, - None, - {}, - None - ) - self.assert_has_expected_coordinates(record, (9, 12), (9, 12)) - - -class TestCall(unittest.TestCase): - - def test_dunder_eq(self): - reader = vcf.Reader(fh('example-4.0.vcf')) - var = next(reader) - example_call = var.samples[0] - self.assertFalse(example_call == None) - self.assertFalse(None == example_call) - - def test_phased(self): - reader = vcf.Reader(fh('example-4.0.vcf')) - for var in reader: - phases = [s.phased for s in var.samples] - if var.POS == 14370: - self.assertEqual([True, True, False], phases) - if var.POS == 17330: - self.assertEqual([True, True, False], phases) - if var.POS == 1110696: - self.assertEqual([True, True, False], phases) - if var.POS == 1230237: - self.assertEqual([True, True, False], phases) - elif var.POS == 1234567: - self.assertEqual([False, False, False], phases) - - def test_gt_bases(self): - reader = vcf.Reader(fh('example-4.0.vcf')) - for var in reader: - gt_bases = [s.gt_bases for s in var.samples] - if var.POS == 14370: - self.assertEqual(['G|G', 'A|G', 'A/A'], gt_bases) - elif var.POS == 17330: - self.assertEqual(['T|T', 'T|A', 'T/T'], gt_bases) - elif var.POS == 1110696: - self.assertEqual(['G|T', 'T|G', 'T/T'], gt_bases) - elif var.POS == 1230237: - self.assertEqual(['T|T', 'T|T', 'T/T'], gt_bases) - elif var.POS == 1234567: - self.assertEqual([None, 'GTCT/GTACT', 'G/G'], gt_bases) - - def test_gt_types(self): - reader = vcf.Reader(fh('example-4.0.vcf')) - for var in reader: - for s in var: - print(s.data) - gt_types = [s.gt_type for s in var.samples] - if var.POS == 14370: - self.assertEqual([0,1,2], gt_types) - elif var.POS == 17330: - self.assertEqual([0,1,0], gt_types) - elif var.POS == 1110696: - self.assertEqual([1,1,2], gt_types) - elif var.POS == 1230237: - self.assertEqual([0,0,0], gt_types) - elif var.POS == 1234567: - self.assertEqual([None,1,2], gt_types) - - -@unittest.skipUnless(pysam, "test requires installation of PySAM.") -class TestFetch(unittest.TestCase): - - def setUp(self): - self.reader = vcf.Reader(fh('tb.vcf.gz', 'rb')) - - - def assertFetchedExpectedPositions( - self, fetched_variants, expected_positions): - fetched_positions = [var.POS for var in fetched_variants] - self.assertEqual(fetched_positions, expected_positions) - - - def testNoVariantsInRange(self): - fetched_variants = self.reader.fetch('20', 14370, 17329) - self.assertFetchedExpectedPositions(fetched_variants, []) - - - def testNoVariantsForZeroLengthInterval(self): - fetched_variants = self.reader.fetch('20', 14369, 14369) - self.assertFetchedExpectedPositions(fetched_variants, []) - - - def testFetchRange(self): - fetched_variants = self.reader.fetch('20', 14369, 14370) - self.assertFetchedExpectedPositions(fetched_variants, [14370]) - - fetched_variants = self.reader.fetch('20', 14369, 17330) - self.assertFetchedExpectedPositions( - fetched_variants, [14370, 17330]) - - fetched_variants = self.reader.fetch('20', 1110695, 1234567) - self.assertFetchedExpectedPositions( - fetched_variants, [1110696, 1230237, 1234567]) - - - def testFetchesFromStartIfStartOnlySpecified(self): - fetched_variants = self.reader.fetch('20', 1110695) - self.assertFetchedExpectedPositions( - fetched_variants, [1110696, 1230237, 1234567]) - - - def testFetchesAllFromChromIfOnlyChromSpecified(self): - fetched_variants = self.reader.fetch('20') - self.assertFetchedExpectedPositions( - fetched_variants, - [14370, 17330, 1110696, 1230237, 1234567] - ) - - -@unittest.skipUnless(pysam, "test requires installation of PySAM.") -class TestIssue201(unittest.TestCase): - def setUp(self): - # This file contains some non-ASCII characters in a UTF-8 encoding. - # https://github.com/jamescasbon/PyVCF/issues/201 - self.reader = vcf.Reader(fh('issue-201.vcf.gz', 'rb'), - encoding='utf-8') - - def testIterate(self): - for record in self.reader: - # Should not raise decoding errors. - pass - - def testFetch(self): - for record in self.reader.fetch(chrom='17'): - # Should not raise decoding errors. - pass - - -class TestOpenMethods(unittest.TestCase): - - samples = 'NA00001 NA00002 NA00003'.split() - - def fp(self, fname): - return os.path.join(os.path.dirname(__file__), fname) - - - def testOpenFilehandle(self): - r = vcf.Reader(fh('example-4.0.vcf')) - self.assertEqual(self.samples, r.samples) - self.assertEqual('example-4.0.vcf', os.path.split(r.filename)[1]) - - def testOpenFilename(self): - r = vcf.Reader(filename=self.fp('example-4.0.vcf')) - self.assertEqual(self.samples, r.samples) - - def testOpenFilehandleGzipped(self): - r = vcf.Reader(fh('tb.vcf.gz', 'rb')) - self.assertEqual(self.samples, r.samples) - - def testOpenFilenameGzipped(self): - r = vcf.Reader(filename=self.fp('tb.vcf.gz')) - self.assertEqual(self.samples, r.samples) - - -class TestSampleFilter(unittest.TestCase): - @unittest.skipUnless(IS_PYTHON2, "test broken for Python 3") - def testCLIListSamples(self): - proc = subprocess.Popen('python scripts/vcf_sample_filter.py vcf/test/example-4.1.vcf', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = proc.communicate() - self.assertEqual(proc.returncode, 0) - self.assertFalse(err) - expected_out = ['Samples:', '0: NA00001', '1: NA00002', '2: NA00003'] - self.assertEqual(out.splitlines(), expected_out) - - @unittest.skipUnless(IS_PYTHON2, "test broken for Python 3") - def testCLIWithFilter(self): - proc = subprocess.Popen('python scripts/vcf_sample_filter.py vcf/test/example-4.1.vcf -f 1,2 --quiet', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = proc.communicate() - self.assertEqual(proc.returncode, 0) - self.assertTrue(out) - self.assertFalse(err) - buf = StringIO() - buf.write(out) - buf.seek(0) - #print(buf.getvalue()) - reader = vcf.Reader(buf) - self.assertEqual(reader.samples, ['NA00001']) - rec = next(reader) - self.assertEqual(len(rec.samples), 1) - - @unittest.skipUnless(IS_NOT_PYPY, "test broken for PyPy") - def testSampleFilterModule(self): - # init filter with filename, get list of samples - filt = vcf.SampleFilter('vcf/test/example-4.1.vcf') - self.assertEqual(filt.samples, ['NA00001', 'NA00002', 'NA00003']) - # set filter, check which samples will be kept - filtered = filt.set_filters(filters="0", invert=True) - self.assertEqual(filtered, ['NA00001']) - # write filtered file to StringIO - buf = StringIO() - filt.write(buf) - buf.seek(0) - #print(buf.getvalue()) - # undo monkey patch by destroying instance - del filt - self.assertTrue('sample_filter' not in dir(vcf.Reader)) - # read output - reader = vcf.Reader(buf) - self.assertEqual(reader.samples, ['NA00001']) - rec = next(reader) - self.assertEqual(len(rec.samples), 1) - - -class TestFilter(unittest.TestCase): - - - @unittest.skip("test currently broken") - def testApplyFilter(self): - # FIXME: broken with distribute - s, out = commands.getstatusoutput('python scripts/vcf_filter.py --site-quality 30 test/example-4.0.vcf sq') - #print(out) - self.assertEqual(s, 0) - buf = StringIO() - buf.write(out) - buf.seek(0) - - print(buf.getvalue()) - reader = vcf.Reader(buf) - - - # check filter got into output file - assert 'sq30' in reader.filters - - print(reader.filters) - - # check sites were filtered - n = 0 - for r in reader: - if r.QUAL < 30: - assert 'sq30' in r.FILTER - n += 1 - else: - assert 'sq30' not in r.FILTER - self.assertEqual(n, 2) - - - @unittest.skip("test currently broken") - def testApplyMultipleFilters(self): - # FIXME: broken with distribute - s, out = commands.getstatusoutput('python scripts/vcf_filter.py --site-quality 30 ' - '--genotype-quality 50 test/example-4.0.vcf sq mgq') - self.assertEqual(s, 0) - #print(out) - buf = StringIO() - buf.write(out) - buf.seek(0) - reader = vcf.Reader(buf) - - print(reader.filters) - - assert 'mgq50' in reader.filters - assert 'sq30' in reader.filters - - -class TestRegression(unittest.TestCase): - - def test_issue_16(self): - reader = vcf.Reader(fh('issue-16.vcf')) - n = next(reader) - assert n.QUAL == None - - def test_null_mono(self): - # null qualities were written as blank, causing subsequent parse to fail - print(os.path.abspath(os.path.join(os.path.dirname(__file__), 'null_genotype_mono.vcf') )) - p = vcf.Reader(fh('null_genotype_mono.vcf')) - assert p.samples - out = StringIO() - writer = vcf.Writer(out, p) - for record in p: - writer.write_record(record) - out.seek(0) - print(out.getvalue()) - p2 = vcf.Reader(out) - rec = next(p2) - assert rec.samples - - -class TestUtils(unittest.TestCase): - - def test_walk(self): - # easy case: all same sites - reader1 = vcf.Reader(fh('example-4.0.vcf')) - reader2 = vcf.Reader(fh('example-4.0.vcf')) - reader3 = vcf.Reader(fh('example-4.0.vcf')) - - n = 0 - for x in utils.walk_together(reader1, reader2, reader3): - self.assertEqual(len(x), 3) - self.assertEqual(x[0], x[1]) - self.assertEqual(x[1], x[2]) - n+= 1 - self.assertEqual(n, 5) - - # artificial case 2 from the left, 2 from the right, 2 together, 1 from the right, 1 from the left - expected = 'llrrttrl' - reader1 = vcf.Reader(fh('walk_left.vcf')) - reader2 = vcf.Reader(fh('example-4.0.vcf')) - - for ex, recs in zip(expected, utils.walk_together(reader1, reader2)): - if ex == 'l': - assert recs[0] is not None - assert recs[1] is None - if ex == 'r': - assert recs[1] is not None - assert recs[0] is None - if ex == 't': - assert recs[0] is not None - assert recs[1] is not None - - # test files with many chromosomes, set 'vcf_record_sort_key' to define chromosome order - chr_order = map(str, range(1, 30)) + ['X', 'Y', 'M'] - get_key = lambda r: (chr_order.index(r.CHROM.replace('chr','')), r.POS) - reader1 = vcf.Reader(fh('issue-140-file1.vcf')) - reader2 = vcf.Reader(fh('issue-140-file2.vcf')) - reader3 = vcf.Reader(fh('issue-140-file3.vcf')) - expected = "66642577752767662466" # each char is an integer bit flag - like file permissions - for ex, recs in zip(expected, utils.walk_together(reader1, reader2, reader3, vcf_record_sort_key = get_key)): - ex = int(ex) - for i, flag in enumerate([0x4, 0x2, 0x1]): - if ex & flag: - self.assertNotEqual(recs[i], None) - else: - self.assertEqual(recs[i], None) - - def test_trim(self): - tests = [('TAA GAA', 'T G'), - ('TA TA', 'T T'), - ('AGTTTTTA AGTTTA', 'AGTT AG'), - ('TATATATA TATATA', 'TAT T'), - ('TATATA TATATATA', 'T TAT'), - ('ACCCCCCC ACCCCCCCCCC ACCCCCCCCC ACCCCCCCCCCC', 'A ACCC ACC ACCCC')] - for sequences, expected in tests: - self.assertEqual(utils.trim_common_suffix(*sequences.split()), - expected.split()) - - - -class TestGATKMeta(unittest.TestCase): - - def test_meta(self): - # expect no exceptions raised - reader = vcf.Reader(fh('gatk_26_meta.vcf')) - assert 'GATKCommandLine' in reader.metadata - self.assertEqual(reader.metadata['GATKCommandLine'][0]['CommandLineOptions'], '"analysis_type=LeftAlignAndTrimVariants"') - self.assertEqual(reader.metadata['GATKCommandLine'][1]['CommandLineOptions'], '"analysis_type=VariantAnnotator annotation=[HomopolymerRun, VariantType, TandemRepeatAnnotator]"') - - - -class TestUncalledGenotypes(unittest.TestCase): - """Test the handling of uncalled (., ./.) sample genotypes.""" - - def test_read_uncalled(self): - """Test that uncalled genotypes are properly read into - gt_nums, gt_bases, ploidity, and gt_alleles properties - of _Call objects. For uncalled _Call objects: - - - gt_nums should be None - - gt_bases should be None - - ploidity should match the input ploidity - - gt_alleles should be a list of None's with length - matching the ploidity""" - - reader = vcf.Reader(fh('uncalled_genotypes.vcf')) - for var in reader: - gt_bases = [s.gt_bases for s in var.samples] - gt_nums = [s.gt_nums for s in var.samples] - ploidity = [s.ploidity for s in var.samples] - gt_alleles = [s.gt_alleles for s in var.samples] - - if var.POS == 14370: - self.assertEqual(['0|0', None, '1/1'], gt_nums) - self.assertEqual(['G|G', None, 'A/A'], gt_bases) - self.assertEqual([2,2,2], ploidity) - self.assertEqual([['0','0'], [None,None], ['1','1']], gt_alleles) - elif var.POS == 17330: - self.assertEqual([None, '0|1', '0/0'], gt_nums) - self.assertEqual([None, 'T|A', 'T/T'], gt_bases) - self.assertEqual([3,2,2], ploidity) - self.assertEqual([[None,None,None], ['0','1'], ['0','0']], gt_alleles) - elif var.POS == 1234567: - self.assertEqual(['0/1', '0/2', None], gt_nums) - self.assertEqual(['GTC/G', 'GTC/GTCT', None], gt_bases) - self.assertEqual([2,2,1], ploidity) - self.assertEqual([['0','1'], ['0','2'], [None]], gt_alleles) - reader._reader.close() - - - def test_write_uncalled(self): - """Test that uncalled genotypes are written just as - they were read in the input file.""" - - reader = vcf.Reader(fh('uncalled_genotypes.vcf')) - - # Write all reader records to a stream. - out = StringIO() - writer = vcf.Writer(out, reader, lineterminator='\n') - for record in reader: - writer.write_record(record) - reader._reader.close() - - - # Compare the written stream to the input reader line-by-line. - out.seek(0) - out_lines = out.getvalue().split('\n') - in_file = fh('uncalled_genotypes.vcf') - in_lines = [l.rstrip('\n') for l in in_file] - in_file.close() - for (in_line, out_line) in zip(in_lines, out_lines): - self.assertEqual(in_line,out_line) - -class TestStrelka(unittest.TestCase): - - def test_strelka(self): - reader = vcf.Reader(fh('strelka.vcf')) - n = next(reader) - assert n is not None - - -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestVcfSpecs)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGatkOutput)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFreebayesOutput)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSamtoolsOutput)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestBcfToolsOutput)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue214)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kg)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Test1kgSites)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGoNL)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestStringAsFlag)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestInfoOrder)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestInfoTypeCharacter)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestParseMetaLine)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGatkOutputWriter)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestBcfToolsOutputWriter)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestWriterDictionaryMeta)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSamplesSpace)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestMetadataWhitespace)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestMixedFiltering)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRecord)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestCall)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFetch)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestIssue201)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestOpenMethods)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestSampleFilter)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestFilter)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestRegression)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestUtils)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGATKMeta)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestUncalledGenotypes)) -suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestStrelka)) diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/test/test_vcf.pyc b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/test/test_vcf.pyc deleted file mode 100644 index de73e7f..0000000 Binary files a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/test/test_vcf.pyc and /dev/null differ diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/utils.pyc b/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/utils.pyc deleted file mode 100644 index 179d552..0000000 Binary files a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/utils.pyc and /dev/null differ diff --git a/src/vcfaz/__init__.py b/src/vcfaz/__init__.py index 3ab3762..8a1acd8 100644 --- a/src/vcfaz/__init__.py +++ b/src/vcfaz/__init__.py @@ -1,3 +1,15 @@ -# vcfaz __init__.py +#!/usr/bin/env python +""" +A VCFv4.0 and 4.1 parser for Python. -# This is a package for vcfaz +Online version of PyVCF documentation is available at http://pyvcf.rtfd.org/ +""" + + +from .parser import Reader, Writer +from .parser import VCFReader, VCFWriter +from .filters import Base as Filter +from .parser import RESERVED_INFO, RESERVED_FORMAT +from .sample_filter import SampleFilter + +VERSION = '0.6.8' diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/filters.py b/src/vcfaz/filters.py similarity index 100% rename from pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/filters.py rename to src/vcfaz/filters.py diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/model.py b/src/vcfaz/model.py similarity index 98% rename from pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/model.py rename to src/vcfaz/model.py index ef1edb7..5481669 100644 --- a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/model.py +++ b/src/vcfaz/model.py @@ -3,10 +3,7 @@ import sys import re -try: - from collections import Counter -except ImportError: - from counter import Counter +from collections import Counter allele_delimiter = re.compile(r'''[|/]''') # to split a genotype into alleles @@ -238,11 +235,6 @@ def _compute_coordinates_for_sv(self): return (start, end) - # For Python 2 - def __cmp__(self, other): - return cmp((self.CHROM, self.POS), (getattr(other, "CHROM", None), getattr(other, "POS", None))) - - # For Python 3 def __eq__(self, other): """ _Records are equal if they describe the same variant (same position, alleles) """ return (self.CHROM == getattr(other, "CHROM", None) and @@ -250,7 +242,6 @@ def __eq__(self, other): self.REF == getattr(other, "REF", None) and self.ALT == getattr(other, "ALT", None)) - # For Python 3 def __lt__(self, other): return (self.CHROM, self.POS) < (getattr(other, "CHROM", None), getattr(other, "POS", None)) @@ -537,9 +528,8 @@ def is_monomorphic(self): return len(self.ALT) == 1 and self.ALT[0] is None -class _AltRecord(object): +class _AltRecord(object, metaclass=ABCMeta): '''An alternative allele record: either replacement string, SV placeholder, or breakend''' - __metaclass__ = ABCMeta def __init__(self, type, **kwargs): super(_AltRecord, self).__init__(**kwargs) @@ -575,7 +565,7 @@ def __len__(self): return len(self.sequence) def __eq__(self, other): - if isinstance(other, basestring): + if isinstance(other, str): return self.sequence == other elif not isinstance(other, self.__class__): return False diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/parser.py b/src/vcfaz/parser.py similarity index 94% rename from pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/parser.py rename to src/vcfaz/parser.py index a0b10e1..af82111 100644 --- a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/parser.py +++ b/src/vcfaz/parser.py @@ -7,10 +7,7 @@ import re import sys -try: - from collections import OrderedDict -except ImportError: - from ordereddict import OrderedDict +from collections import OrderedDict try: import pysam @@ -18,12 +15,12 @@ pysam = None try: - import cparse + from . import cparse except ImportError: cparse = None -from model import _Call, _Record, make_calldata_tuple -from model import _Substitution, _Breakend, _SingleBreakend, _SV +from .model import _Call, _Record, make_calldata_tuple +from .model import _Substitution, _Breakend, _SingleBreakend, _SV # Metadata parsers/constants @@ -266,8 +263,7 @@ def __init__(self, fsock=None, filename=None, compressed=None, prepend_chr=False self.filename = filename if compressed: self._reader = gzip.GzipFile(fileobj=self._reader) - if sys.version > '3': - self._reader = codecs.getreader(encoding)(self._reader) + self._reader = codecs.getreader(encoding)(self._reader) if strict_whitespace: self._separator = '\t' @@ -275,7 +271,7 @@ def __init__(self, fsock=None, filename=None, compressed=None, prepend_chr=False self._separator = '\t| +' self._row_pattern = re.compile(self._separator) - self._alt_pattern = re.compile('[\[\]]') + self._alt_pattern = re.compile(r'[\[\]]') self.reader = (line.strip() for line in self._reader if line.strip()) @@ -421,7 +417,7 @@ def _parse_info(self, info_str): ##justkeep it and change type into String entry_type == 'String' vals = entry[1].split(',') #commas are reserved characters indicating multiple value - val = self._map(str, val) + val = self._map(str, vals) elif "|" in entry[1]: entry_type == 'String' vals = entry[1].split('|') @@ -433,14 +429,14 @@ def _parse_info(self, info_str): elif entry[1] == '': val = '' else: - ## try to treat as real Flota but it fails switchagain to string + ## try to treat as real Float but if it fails switch to string try: vals = entry[1].split(',') val = self._map(float, vals) except: entry_type == 'String' vals = entry[1].split(',') #commas are reserved characters indicating multiple value - val = self._map(str, val) + val = self._map(str, vals) elif entry_type == 'Flag': val = True #elif entry_type in ('String', 'Character'): @@ -460,7 +456,7 @@ def _parse_info(self, info_str): ## Modify by Zauli (merge multiple effect EFF fileds into a single one) ## merge only if val is a list - if retdict.has_key(ID) and type(val) == type([]): + if ID in retdict and type(val) == type([]): retdict[ID] = set(retdict[ID]) retdict[ID] = retdict[ID].union(val) retdict[ID] = list(retdict[ID]) @@ -510,7 +506,7 @@ def _parse_samples(self, samples, samp_fmt, site): nfields = len(samp_fmt._fields) - for name, sample in itertools.izip(self.samples, samples): + for name, sample in zip(self.samples, samples): # parse the data for this sample sampdat = [None] * nfields @@ -577,7 +573,7 @@ def _parse_alt(self, str): withinMainAssembly = True pos = remoteCoords[1] orientation = (str[0] == '[' or str[0] == ']') - remoteOrientation = (re.search('\[', str) is not None) + remoteOrientation = (re.search(r'\[', str) is not None) if orientation: connectingSequence = items[2] else: @@ -592,7 +588,7 @@ def _parse_alt(self, str): else: return _Substitution(str) - def next(self): + def __next__(self): '''Return the next record in the file.''' line = next(self.reader) row = self._row_pattern.split(line.rstrip()) @@ -643,6 +639,9 @@ def next(self): return record + # backwards compatibility alias + next = __next__ + def fetch(self, chrom, start=None, end=None): """ Fetches records from a tabix-indexed VCF file and returns an iterable of ``_Record`` instances @@ -684,16 +683,6 @@ def fetch(self, chrom, start=None, end=None): if self._prepend_chr and chrom[:3] == 'chr': chrom = chrom[3:] -# # not sure why tabix needs position -1 -# start = start - 1 -# -# if end is None: -# self.reader = self._tabix.fetch(chrom, start, start + 1) -# try: -# return self.next() -# except StopIteration: -# return None - self.reader = self._tabix.fetch(chrom, start, end) return self @@ -702,7 +691,7 @@ class Writer(object): """VCF Writer. On Windows Python 2, open stream with 'wb'.""" # Reverse keys and values in header field count dictionary - counts = dict((v,k) for k,v in field_counts.iteritems()) + counts = dict((v, k) for k, v in field_counts.items()) def __init__(self, stream, template, lineterminator="\n"): self.writer = csv.writer(stream, delimiter="\t", @@ -715,12 +704,12 @@ def __init__(self, stream, template, lineterminator="\n"): # get a maximum key). self.info_order = collections.defaultdict( lambda: len(template.infos), - dict(zip(template.infos.iterkeys(), itertools.count()))) + dict(zip(template.infos.keys(), itertools.count()))) two = '##{key}=\n' four = '##{key}=\n' _num = self._fix_field_count - for (key, vals) in template.metadata.iteritems(): + for (key, vals) in template.metadata.items(): if key in SINGULAR_METADATA: vals = [vals] for val in vals: @@ -730,15 +719,15 @@ def __init__(self, stream, template, lineterminator="\n"): stream.write('##{0}=<{1}>\n'.format(key, values)) else: stream.write('##{0}={1}\n'.format(key, val)) - for line in template.infos.itervalues(): + for line in template.infos.values(): stream.write(four.format(key="INFO", *line, num=_num(line.num))) - for line in template.formats.itervalues(): + for line in template.formats.values(): stream.write(four.format(key="FORMAT", *line, num=_num(line.num))) - for line in template.filters.itervalues(): + for line in template.filters.values(): stream.write(two.format(key="FILTER", *line)) - for line in template.alts.itervalues(): + for line in template.alts.values(): stream.write(two.format(key="ALT", *line)) - for line in template.contigs.itervalues(): + for line in template.contigs.values(): if line.length: stream.write('##contig=\n'.format(*line)) else: @@ -829,11 +818,6 @@ def _map(self, func, iterable, none='.'): for x in iterable] -def __update_readme(): - import sys, vcf - file('README.rst', 'w').write(vcf.__doc__) - - # backwards compatibility VCFReader = Reader VCFWriter = Writer diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/sample_filter.py b/src/vcfaz/sample_filter.py similarity index 97% rename from pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/sample_filter.py rename to src/vcfaz/sample_filter.py index b156b45..7432215 100644 --- a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/sample_filter.py +++ b/src/vcfaz/sample_filter.py @@ -7,7 +7,7 @@ import warnings -from parser import Reader, Writer +from .parser import Reader, Writer class SampleFilter(object): @@ -87,7 +87,7 @@ def filt2idx(item): warnings.warn("Invalid filters, ignoring", RuntimeWarning) if self.invert: - filters = set(xrange(len(self.samples))).difference(filters) + filters = set(range(len(self.samples))).difference(filters) # `sample_filter` setter updates `samples` self.parser.sample_filter = filters diff --git a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/utils.py b/src/vcfaz/utils.py similarity index 98% rename from pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/utils.py rename to src/vcfaz/utils.py index 2881dc2..ad8c2b6 100644 --- a/pyvcfaz-0.6.8-py27_0/lib/python2.7/site-packages/vcfaz/utils.py +++ b/src/vcfaz/utils.py @@ -49,7 +49,7 @@ def walk_together(*readers, **kwargs): for i in min_k_idxs: try: - nexts[i] = readers[i].next() + nexts[i] = next(readers[i]) except StopIteration: nexts[i] = None diff --git a/tests/test_import.py b/tests/test_import.py new file mode 100644 index 0000000..b132352 --- /dev/null +++ b/tests/test_import.py @@ -0,0 +1,75 @@ +"""Smoke tests to verify the vcfaz Python 3 port imports correctly.""" + +import sys +import os + +# Ensure the src/ layout is on the path when running without install +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src')) + + +def test_import_vcfaz(): + """Test that the vcfaz package can be imported.""" + import vcfaz + assert vcfaz.__file__ is not None + + +def test_import_reader(): + """Test that Reader can be imported from vcfaz.parser.""" + from vcfaz.parser import Reader + assert Reader is not None + + +def test_import_writer(): + """Test that Writer can be imported from vcfaz.parser.""" + from vcfaz.parser import Writer + assert Writer is not None + + +def test_import_vcfreader_alias(): + """Test that VCFReader alias is available.""" + from vcfaz.parser import VCFReader + from vcfaz.parser import Reader + assert VCFReader is Reader + + +def test_import_vcfwriter_alias(): + """Test that VCFWriter alias is available.""" + from vcfaz.parser import VCFWriter + from vcfaz.parser import Writer + assert VCFWriter is Writer + + +def test_import_filter(): + """Test that Filter base class can be imported from vcfaz.""" + import vcfaz + assert vcfaz.Filter is not None + + +def test_import_reserved_constants(): + """Test that RESERVED_INFO and RESERVED_FORMAT are available.""" + import vcfaz + assert isinstance(vcfaz.RESERVED_INFO, dict) + assert isinstance(vcfaz.RESERVED_FORMAT, dict) + assert 'DP' in vcfaz.RESERVED_INFO + assert 'GT' in vcfaz.RESERVED_FORMAT + + +def test_reader_from_stream(): + """Test that Reader can parse a minimal VCF from a StringIO stream.""" + import io + from vcfaz.parser import Reader + + vcf_content = ( + "##fileformat=VCFv4.1\n" + "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n" + "chr1\t100\t.\tA\tG\t50\tPASS\t.\n" + ) + stream = io.StringIO(vcf_content) + reader = Reader(fsock=stream) + records = list(reader) + assert len(records) == 1 + record = records[0] + assert record.CHROM == 'chr1' + assert record.POS == 100 + assert record.REF == 'A' + assert str(record.ALT[0]) == 'G'