Skip to content

Commit

Permalink
More robust alignment
Browse files Browse the repository at this point in the history
  • Loading branch information
patriotyk committed Apr 2, 2024
1 parent bed80e6 commit 5b18c15
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
7 changes: 6 additions & 1 deletion narizaka/textbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,16 @@ def _get_text(self, el):

def more_text(self):
text = ''
skip = False
for i in self.iter:
if i.tag.endswith('}p'):
if i.tag.endswith('}p') and not skip:
text += self.norm(self._get_text(i)) + ' '
if len(text) >= self.min_text_length:
break
elif i.tag.endswith('}empty-line'):
skip = True
elif i.tag.endswith('}p') and skip and i.text == None:
skip = False
return text

def __del__(self):
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,3 @@ retry2
ukrainian_word_stress
ipa_uk@git+https://github.com/patriotyk/ipa-uk.git
num2words@git+https://github.com/patriotyk/num2words.git

0 comments on commit 5b18c15

Please sign in to comment.