-
Notifications
You must be signed in to change notification settings - Fork 1
/
collect_ilm_data.py
43 lines (42 loc) · 1.67 KB
/
collect_ilm_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import random
import json
import math
if __name__ == '__main__':
with open('data/recipes_with_nutritional_info.json') as f:
# with open('data/instructions.txt', 'w') as f_out:
# recipes = json.load(f)
# for recipe in recipes:
# for instruction in recipe['instructions']:
# f_out.write(instruction['text'] + '\n')
# print('finished')
recipes = []
recipes_json = json.load(f)
for recipe_json in recipes_json:
lines = []
for instruction in recipe_json['instructions']:
lines.append(instruction['text'])
recipes.append(lines)
random.shuffle(recipes)
size = len(recipes)
# train_recipes = recipes[:math.floor(size * 0.8)]
train_recipes = recipes[:1000]
# val_recipes = recipes[math.floor(size * 0.8):math.floor(size * 0.9)]
val_recipes = recipes[1000:1100]
# test_recipes = recipes[math.floor(size * 0.9):]
test_recipes = recipes[1100:1200]
with open('data/train.txt', 'w') as f_out:
for lines in train_recipes:
for line in lines:
f_out.write(line + '\n')
f_out.write('\n\n\n')
with open('data/valid.txt', 'w') as f_out:
for lines in val_recipes:
for line in lines:
f_out.write(line + '\n')
f_out.write('\n\n\n')
with open('data/test.txt', 'w') as f_out:
for lines in test_recipes:
for line in lines:
f_out.write(line + '\n')
f_out.write('\n\n\n')
print('finished')