Skip to content

Commit

Permalink
Better audio length distribution.
Browse files Browse the repository at this point in the history
  • Loading branch information
patriotyk committed Jan 11, 2024
1 parent 5f55053 commit 6ae4a84
Showing 1 changed file with 3 additions and 6 deletions.
9 changes: 3 additions & 6 deletions narizaka/splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def _split(region, threshold=46, deep=4):
energy_threshold=threshold # threshold of detection
):
r.meta = {'start': r.meta.start+region.meta.start, 'end': r.meta.end+region.meta.start}
if r.duration > 10.0 and deep:
if r.duration > 8.0 and deep:
regions = _split(r, threshold+2, deep-1)
if len(regions)> 1:
audio_regions = audio_regions + regions
Expand Down Expand Up @@ -62,18 +62,15 @@ def _split(region, threshold=46, deep=4):
temp_reg = None
start_word = found[2]+1

# elif gap_dur > 3.5: #FIXME Should find split here
# print('GAPPP')
# temp_reg = None


ready_segment = {}
for segment in regions_by_punct:
if not ready_segment:
ready_segment = segment
continue

if ready_segment['text'].endswith(',') and (segment['end'] - ready_segment['start']) < 10:
if ((segment['start'] - ready_segment['end']) < 0.4 and ready_segment['text'][-1] in [',', ':', '-', '»', '\'', '.', '?', '!'] and (segment['end'] - ready_segment['start']) < 11)\
or (ready_segment['text'][-1] in [',', ':', '-', '»', '\''] and (segment['end'] - ready_segment['start']) < 20):
ready_segment['end'] = segment['end']
ready_segment['text'] += segment['text']
else:
Expand Down

0 comments on commit 6ae4a84

Please sign in to comment.