Skip to content

Commit

Permalink
Merge pull request #87 from Hoeze/master
Browse files Browse the repository at this point in the history
Resolve int conversion bug when inferring UTR from CDS
  • Loading branch information
Hoeze authored Feb 22, 2021
2 parents e67fab6 + 64c3fd6 commit 1d72daf
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 10 deletions.
22 changes: 13 additions & 9 deletions kipoiseq/extractors/gtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,23 +346,27 @@ def get_utr_from_gtf(
"""
if infer_from_cds:
# get start and end of cds for each transcript
cds = CDSFetcher.get_cds_from_gtf(df=df, on_error_warn=on_error_warn) \
.groupby('transcript_id') \
cds = (
CDSFetcher.get_cds_from_gtf(df=df, on_error_warn=on_error_warn)
.groupby('transcript_id')
.agg({'Start': min, 'End': max})
)

# join cds start and end to utr df
utr_df = df.query("Feature == 'transcript'") \
.set_index('transcript_id') \
.join(cds, rsuffix="_cds") \
utr_df = (
df.query("Feature == 'transcript'")
.set_index('transcript_id')
.join(cds, rsuffix="_cds")
.dropna(subset=['Start_cds', 'End_cds'], axis=0)
)

if feature_type.upper() == "5UTR":
utr_df['Start'] = np.where(utr_df['Strand'] == '+', int(utr_df['Start']), int(utr_df['End_cds']))
utr_df['End'] = np.where(utr_df['Strand'] == '+', int(utr_df['Start_cds']), int(utr_df['End']))
utr_df['Start'] = np.where(utr_df['Strand'] == '+', utr_df['Start'].astype("int"), utr_df['End_cds'].astype("int"))
utr_df['End'] = np.where(utr_df['Strand'] == '+', utr_df['Start_cds'].astype("int"), utr_df['End'].astype("int"))
utr_df['Feature'] = pd.Categorical("5UTR", categories = utr_df['Feature'])
if feature_type.upper() == "3UTR":
utr_df['Start'] = np.where(utr_df['Strand'] == '+', int(utr_df['End_cds']), int(utr_df['Start']))
utr_df['End'] = np.where(utr_df['Strand'] == '+', int(utr_df['End']), int(utr_df['Start_cds']))
utr_df['Start'] = np.where(utr_df['Strand'] == '+', utr_df['End_cds'].astype("int"), utr_df['Start'].astype("int"))
utr_df['End'] = np.where(utr_df['Strand'] == '+', utr_df['End'].astype("int"), utr_df['Start_cds'].astype("int"))
utr_df['Feature'] = pd.Categorical("3UTR", categories = utr_df['Feature'])

utr_df.drop(['Start_cds', 'End_cds'], axis=1, inplace=True)
Expand Down
2 changes: 1 addition & 1 deletion kipoiseq/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def to_scalar(obj):
"""Convert numpy scalar to native scalar
"""
if isinstance(obj, np.generic):
return np.asscalar(obj)
return obj.item()
else:
return obj

Expand Down

0 comments on commit 1d72daf

Please sign in to comment.