Skip to content

Commit

Permalink
Adjust magic numbers used for XML manipulation
Browse files Browse the repository at this point in the history
To avoid parsing XML, some magic number are used to trim opening and
closing markup. The dump format has slightly changed and these magic
numbers had to be adapted.
  • Loading branch information
benoit74 committed May 7, 2024
1 parent 9c17555 commit 314314a
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions src/sotoki/utils/preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def read_sub():
main_id = get_id_in(main_line, field_index_in_main)

# write main line to dest; removing tag end (/> -> >) and CRLF
dsth.write(main_line[:-4])
dsth.write(main_line[:-3])
dsth.write(b">")

# fetch subs matching this ID (IDs are sorted so it's continuous)
Expand All @@ -248,7 +248,7 @@ def read_sub():
dsth.write(node_start)
# write the sub line removing the 2 heading spaces, node name (<row)
# removing trailing CRLF as well. node already self closed in source
dsth.write(current_sub[1][6:-2])
dsth.write(current_sub[1][4:-1])
current_sub = read_sub()

if has_subs:
Expand Down Expand Up @@ -335,7 +335,7 @@ def split_posts_by_posttypeid(
try:
# rewrite with new name replacing ` <row` and `row>`
fhs[found_id].write(starts[found_id])
fhs[found_id].write(line[6:-5])
fhs[found_id].write(line[4:-5])
fhs[found_id].write(ends[found_id])
except KeyError:
continue
Expand Down Expand Up @@ -404,7 +404,7 @@ def read_csv():
if current_csv[0] == post_id:
# write user line to dest; removing tag end and CRLF
dsth.write(b"<link")
dsth.write(line[6:-4])
dsth.write(line[4:-3])
# CSV title already includes appropriate quoting
dsth.write(b" PostName=")
dsth.write(current_csv[1])
Expand Down

0 comments on commit 314314a

Please sign in to comment.