You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
bpes = [self.cls]
indexes = [0]
spans = []
ins_lst = []
new_ent_str = Counter()
for _raw_words, _raw_ents in zip(raw_sents, raw_entss):
_indexes = []
_bpes = []
for s, e, t in _raw_ents:
new_ent_str[''.join(_raw_words[s:e+1])] += 1
for idx, word in enumerate(_raw_words, start=0):
if word in word2bpes:
__bpes = word2bpes[word]
else:
__bpes = self.tokenizer.encode(' '+word if self.add_prefix_space else word,
add_special_tokens=False)
word2bpes[word] = __bpes
_indexes.extend([idx]*len(__bpes))
_bpes.extend(__bpes)
next_word_idx = indexes[-1]+1
if len(bpes) + len(_bpes) <= self.max_len:
bpes = bpes + _bpes
indexes += [i + next_word_idx for i in _indexes]
spans += [(s+next_word_idx-1, e+next_word_idx-1, label2idx.get(t), ) for s, e, t in _raw_ents]
else:
new_ins = get_new_ins(bpes, spans, indexes)
ins_lst.append(new_ins)
indexes = [0] + [i + 1 for i in _indexes]
spans = [(s, e, label2idx.get(t), ) for s, e, t in _raw_ents]
bpes = [self.cls] + _bpes
if bpes:
ins_lst.append(get_new_ins(bpes, spans, indexes))
关于论文中这一部分的代码我没有理解是想处理什么
The text was updated successfully, but these errors were encountered:
bpes = [self.cls]
indexes = [0]
spans = []
ins_lst = []
new_ent_str = Counter()
for _raw_words, _raw_ents in zip(raw_sents, raw_entss):
_indexes = []
_bpes = []
for s, e, t in _raw_ents:
new_ent_str[''.join(_raw_words[s:e+1])] += 1
关于论文中这一部分的代码我没有理解是想处理什么
The text was updated successfully, but these errors were encountered: