Skip to content

Commit

Permalink
Merge pull request #8 from dbmi-bgm/utf8_encode
Browse files Browse the repository at this point in the history
Utf8 encode
  • Loading branch information
B3rse authored Mar 17, 2021
2 parents 7360889 + 5765bab commit b451781
Show file tree
Hide file tree
Showing 28 changed files with 2,758 additions and 85 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ For more details, see granite [*documentation*](https://granite-suite.readthedoc
## Availability and requirements
A ready-to-use docker image is available to download.

docker pull b3rse/granite:v0.1.9
docker pull b3rse/granite:v0.1.10

To run locally, install the following libraries:

Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
author = 'Michele Berselli'

# The full version, including alpha/beta/rc tags
release = '0.1.9'
release = '0.1.10'


# -- General configuration ---------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion docs/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

A ready-to-use docker image is available to download.

docker pull b3rse/granite:v0.1.9
docker pull b3rse/granite:v0.1.10

To run locally, Python 3.6+ is required together with the following libraries:

Expand Down
2 changes: 1 addition & 1 deletion granite/_version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Version information."""

# The following line *must* be the last in the module, exactly as formatted:
__version__ = "0.1.9"
__version__ = "0.1.10"
2 changes: 1 addition & 1 deletion granite/blackList.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def main(args):
#end if

# Buffers
fo = open(args['outputfile'], 'w')
fo = open(args['outputfile'], 'w', encoding='utf-8')

# Loading big if specified
if is_bigfile: big_dict = load_big(args['bigfile'])
Expand Down
2 changes: 1 addition & 1 deletion granite/cleanVCF.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def main(args):
is_verbose = True if args['verbose'] else False

# Buffers
fo = open(args['outputfile'], 'w')
fo = open(args['outputfile'], 'w', encoding='utf-8')

# Creating Vcf object
vcf_obj = vcf_parser.Vcf(args['inputfile'])
Expand Down
2 changes: 1 addition & 1 deletion granite/comHet.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ def main(args, test=False):
is_verbose = True if args['verbose'] else False

# Buffers
fo = open(args['outputfile'], 'w')
fo = open(args['outputfile'], 'w', encoding='utf-8')

# Creating Vcf object
vcf_obj = vcf_parser.Vcf(args['inputfile'])
Expand Down
2 changes: 1 addition & 1 deletion granite/geneList.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def main(args):
#end with

# Buffers
fo = open(args['outputfile'], 'w')
fo = open(args['outputfile'], 'w', encoding='utf-8')

# Creating Vcf object
vcf_obj = vcf_parser.Vcf(args['inputfile'])
Expand Down
37 changes: 29 additions & 8 deletions granite/lib/pedigree_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#
#################################################################
import sys, os

import json

#################################################################
#
Expand Down Expand Up @@ -118,12 +118,12 @@ def has_parents(self):

#end class Member

def __init__(self, pedigree):
def __init__(self, pedigree, format='json'):
''' initialize Pedigree object,
pedigree information must be provided as json '''
self.members = {} # dictionary of Member objects by name
self.samples = {} # dictionary to map sample to name
self.parse_pedigree(pedigree)
self.parse_pedigree(pedigree, format)
#end def __init__

def add_member(self, member):
Expand Down Expand Up @@ -217,15 +217,36 @@ def get_family(self, sample):
return family
#end def get_family

def parse_pedigree(self, pedigree):
''' read pedigree information to build Pedigree object,
pedigree information must be provided as json '''
def read_json(self, pedigree):
''' read pedigree as json '''
# Reading pedigree
if os.path.isfile(pedigree):
with open(pedigree) as fi:
pedigree_json = json.load(fi)
#end with
else:
try: pedigree_json = json.loads(pedigree)
except Exception:
raise ValueError('\nERROR in parsing arguments, pedigree must be either a json file or a string representing a json\n')
#end try
#end if
return pedigree_json
#end def read_json

def parse_pedigree(self, pedigree, format):
''' use pedigree information to build Pedigree object,
pedigree information must be provided in specified format '''
if format == 'json':
pedigree_json = self.read_json(pedigree)
else:
raise ValueError('\nERROR in parsing pedigree, specified pedigree format is not supported\n')
#end if
# Creating Member objects
for member in pedigree:
for member in pedigree_json:
self.add_member(member)
#end for
# Adding relations
for member in pedigree:
for member in pedigree_json:
name = member['individual']
if len(member['parents']) > 2:
raise ValueError('\nERROR in pedigree structure, {0} has more than two parents\n'
Expand Down
8 changes: 5 additions & 3 deletions granite/lib/vcf_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

#################################################################
#
Expand Down Expand Up @@ -311,14 +312,15 @@ def get_genotype_value(self, ID_genotype, tag_to_get, sep=':'):
def read_vcf(inputfile):
''' read vcf file, gzipped or ungzipped,
return a generator '''
if inputfile.endswith('.gz'):
if inputfile.endswith('.gz') or \
inputfile.endswith('.bgz'):
with gzip.open(inputfile, 'rb') as fz:
for byteline in fz:
yield byteline.decode()
yield byteline.decode('utf-8')
#end for
#end with
else:
with open(inputfile) as fi:
with open(inputfile, encoding='utf-8') as fi:
for line in fi:
yield line
#end for
Expand Down
2 changes: 1 addition & 1 deletion granite/novoCaller.py
Original file line number Diff line number Diff line change
Expand Up @@ -819,7 +819,7 @@ def main(args, test=False):
is_verbose = True if args['verbose'] else False

# Buffers
fo = open(args['outputfile'], 'w')
fo = open(args['outputfile'], 'w', encoding='utf-8')

# Creating Vcf object
vcf_obj = vcf_parser.Vcf(args['inputfile'])
Expand Down
14 changes: 1 addition & 13 deletions granite/qcVCF.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,20 +298,8 @@ def main(args):
# Get list of sample IDs to use
ID_list = args['samples'] # list of sample IDs

# Loading pedigree
if os.path.isfile(args['pedigree']):
with open(args['pedigree']) as fi:
pedigree = json.load(fi)
#end with
else:
try: pedigree = json.loads(args['pedigree'])
except Exception:
sys.exit('\nERROR in parsing arguments: pedigree must be either a json file or a string representing a json\n')
#end try
#end if

# Creating Pedigree object
pedigree_obj = pedigree_parser.Pedigree(pedigree)
pedigree_obj = pedigree_parser.Pedigree(args['pedigree'])

# Initializing stat_dict
for ID in ID_list:
Expand Down
14 changes: 1 addition & 13 deletions granite/toPED.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,8 @@ def main(args):
encode_gender = {'M': 1, 'F': 2}
familyID = args['family'] if args['family'] else 'FAM'

# Loading pedigree
if os.path.isfile(args['pedigree']):
with open(args['pedigree']) as fi:
pedigree = json.load(fi)
#end with
else:
try: pedigree = json.loads(args['pedigree'])
except Exception:
sys.exit('\nERROR in parsing arguments: pedigree must be either a json file or a string representing a json\n')
#end try
#end if

# Creating Pedigree object
pedigree_obj = pedigree_parser.Pedigree(pedigree)
pedigree_obj = pedigree_parser.Pedigree(args['pedigree'])

# Buffers
fo = open(args['outputfile'], 'w')
Expand Down
19 changes: 1 addition & 18 deletions granite/validateVCF.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,23 +715,6 @@ def main(args):
# Creating Vcf object
vcf_obj = vcf_parser.Vcf(args['inputfile'])

# Get pedigree / pedigrees information
pedigree_list = []
for pedigree in args['pedigree']:
# Loading pedigree
if os.path.isfile(pedigree):
with open(pedigree) as fi:
pedigree_list.append(json.load(fi))
#end with
else:
try: pedigree_list.append(json.loads(pedigree))
except Exception:
sys.exit('\nERROR in parsing arguments: {0} must be either a json file or a string representing a json\n'
.format(pedigree))
#end try
#end if
#end for

# Check novoPP
if sample_novo:
try: novotag, _ = vcf_obj.header.check_tag_definition('novoPP')
Expand All @@ -742,7 +725,7 @@ def main(args):

# Creating Pedigree object / objects
pedigree_obj_list = []
for pedigree in pedigree_list:
for pedigree in args['pedigree']:
pedigree_obj_list.append(pedigree_parser.Pedigree(pedigree))
#end for

Expand Down
2 changes: 1 addition & 1 deletion granite/whiteList.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def main(args):
is_verbose = True if args['verbose'] else False

# Buffers
fo = open(args['outputfile'], 'w')
fo = open(args['outputfile'], 'w', encoding='utf-8')

# Creating Vcf object
vcf_obj = vcf_parser.Vcf(args['inputfile'])
Expand Down
Loading

0 comments on commit b451781

Please sign in to comment.