From 7ad615bf3a2e97607f9e0e5c79625bac74db220d Mon Sep 17 00:00:00 2001 From: Warren Kretzschmar Date: Thu, 7 May 2020 14:21:44 +0200 Subject: [PATCH 1/4] Add option to just provide list of cell names --- src/monovar.py | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/src/monovar.py b/src/monovar.py index f1c07df..7f29d91 100755 --- a/src/monovar.py +++ b/src/monovar.py @@ -6,6 +6,8 @@ The University of Texas MD Anderson Cancer Center Hamim Zafar and Ken Chen (kchen3@mdanderson.org) +Copyright (c) 2020 Warren W. Kretzschmar + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including @@ -79,6 +81,8 @@ def _pickle_method(m): # Process the inputs argc = len(sys.argv) i = 1 +bam_file_list = None +cell_name_list = None while (i < argc): if (sys.argv[i] == '-n'): n_cells = int(sys.argv[i + 1]) # Number of input bam files @@ -93,7 +97,8 @@ def _pickle_method(m): input_args['-f'] = 'Provided' elif (sys.argv[i] == '-b'): bam_file_list = sys.argv[i + 1] # File containing list of bam files - input_args['-b'] = 'Provided' + elif (sys.argv[i] == '-l'): + cell_name_list = sys.argv[i + 1] # File containing list of cell names elif (sys.argv[i] == '-o'): outfile = sys.argv[i + 1] # Output File input_args['-o'] = 'Provided' @@ -114,10 +119,8 @@ def _pickle_method(m): print "Error: Reference genome file not provided. Use '-f' for reference genome file.\n" exit(3) -try: - b = input_args['-b'] -except KeyError: - print "Error: List of Bam files not provided. Use '-b' for list of Bam files.\n" +if not( '-b' in input_args or '-l' in input_args): + print "Error: List of Bam files or list of read groups not provided. Use '-b' or '-l', respectively.\n" exit(3) try: @@ -131,13 +134,21 @@ def _pickle_method(m): print "CF_flag can have value 0 or 1. Use '-c' with proper value.\n" exit(3) -# Obtain the RG IDs from the bam files bam_id_list = [] -f_bam_list = open(bam_file_list) -for filename in f_bam_list: - filename = filename.replace('\n', '') - bam_id = U.Get_BAM_RG(filename) - bam_id_list.append(bam_id) +if bam_file_list: + # Obtain the RG IDs from the bam files + f_bam_list = open(bam_file_list) + for filename in f_bam_list: + filename = filename.replace('\n', '') + bam_id = U.Get_BAM_RG(filename) + bam_id_list.append(bam_id) +elif cell_name_list: + # Obtain the RG IDs from the rg name list + f_bam_list = open(cell_name_list) + for filename in f_bam_list: + bam_id_list.append(filename.rstrip('\n')) +else: + raise Exception("Neither -b nor -l provided") n_cells = len(bam_id_list) From ac54448e80421125cb28abc48d0d41b78a6528ea Mon Sep 17 00:00:00 2001 From: Warren Kretzschmar Date: Thu, 7 May 2020 14:26:35 +0200 Subject: [PATCH 2/4] Fix args --- src/monovar.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/monovar.py b/src/monovar.py index 7f29d91..a480b4b 100755 --- a/src/monovar.py +++ b/src/monovar.py @@ -119,7 +119,7 @@ def _pickle_method(m): print "Error: Reference genome file not provided. Use '-f' for reference genome file.\n" exit(3) -if not( '-b' in input_args or '-l' in input_args): +if not(bam_file_list or cell_name_list): print "Error: List of Bam files or list of read groups not provided. Use '-b' or '-l', respectively.\n" exit(3) From 7c7d8ea676294bfe0f1caaab4aa8ba2622eb7db5 Mon Sep 17 00:00:00 2001 From: Warren Kretzschmar Date: Thu, 7 May 2020 18:33:00 +0200 Subject: [PATCH 3/4] Extend float precision --- src/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.py b/src/utils.py index 2f07a97..567cf4b 100644 --- a/src/utils.py +++ b/src/utils.py @@ -109,7 +109,7 @@ def Create_Factorial_List(self, max_allele_cnt): return factorial_list def Create_nCr_mat(self, max_allele_cnt, factorial_list): - ncr_mat = np.zeros((max_allele_cnt, max_allele_cnt)) + ncr_mat = np.zeros((max_allele_cnt, max_allele_cnt), dtype=np.longdouble) for i in range(max_allele_cnt): for j in range(max_allele_cnt): ncr_mat[j, i] = factorial_list[j] / \ From 2591f6ee184d3630598549383ff75f9ab2b82ab2 Mon Sep 17 00:00:00 2001 From: Warren Kretzschmar Date: Fri, 8 May 2020 09:11:38 +0200 Subject: [PATCH 4/4] Improve numpy calculation speed --- src/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/utils.py b/src/utils.py index 567cf4b..48e33c0 100644 --- a/src/utils.py +++ b/src/utils.py @@ -110,10 +110,10 @@ def Create_Factorial_List(self, max_allele_cnt): def Create_nCr_mat(self, max_allele_cnt, factorial_list): ncr_mat = np.zeros((max_allele_cnt, max_allele_cnt), dtype=np.longdouble) + factorial_list = np.array(factorial_list) for i in range(max_allele_cnt): - for j in range(max_allele_cnt): - ncr_mat[j, i] = factorial_list[j] / \ - (factorial_list[i] * factorial_list[j - i]) + ncr_mat[:, i] = factorial_list / \ + (factorial_list[i] * np.roll(factorial_list, i)) return ncr_mat def CheckAltAllele(self, single_cell_dict):