Skip to content

Commit

Permalink
Merge pull request #15 from t4guw/data-formatting
Browse files Browse the repository at this point in the history
Complete data formatting
  • Loading branch information
isabelgiang authored May 25, 2020
2 parents d5d0022 + 8af2886 commit 5434163
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__pycache__
1 change: 0 additions & 1 deletion src/data/statements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
~~~STATEMENT_START~~~
1
Given an array of integers, return indices of the two numbers such that they add up to a specific target.
You may assume that each input would have exactly one solution, and you may not use the same element twice.
Expand Down
73 changes: 73 additions & 0 deletions src/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/usr/bin/env python

'''
Parses solutions.txt and statements.txt.
Creates problem objects for each LeetCode problem that are serialized as JSON.
Produces JSON file that is intended to be the input of the labeling functions
5/27/2020
'''

import json
from problem import Problem
from itertools import islice

OUTPUT_FILE="problems.json"
PROBLEMS = {}

'''
Example solution:
~~~START_SOLUTION~~~
<problem_number>
public int{} ...
....
}
~~~END_SOLUTION~~~
'''
def process_solutions(filename):
with open(filename) as f:
while line := f.readline():
if "~~~START_SOLUTION~~~" in line:
continue
else:
number = int(line)
solution = read_section(f, "~~~END_SOLUTION~~~")
PROBLEMS[number].solutions.append(solution)


'''
Example statement:
~~~STATEMENT_START~~~
1
Given an array of integers....
'''
def process_statements(filename):
with open(filename) as f:
while line := f.readline():
number = int(line)
statement = read_section(f, "~~~STATEMENT_START~~~")
PROBLEMS[number] = Problem(number, statement)


def read_section(file, marker=''):
content = str()
while line := file.readline():
if marker in line:
return content
else:
content += line

def create_output_file():
with open(OUTPUT_FILE, 'w') as outfile:
for k, v in PROBLEMS.items():
json.dump(v.serialize(), outfile)

def main():
process_statements("data/statements.txt")
process_solutions("data/solutions.txt")
for k,v in PROBLEMS.items():
print(k, v.serialize())
break
create_output_file()

if __name__ == "__main__":
main()
19 changes: 19 additions & 0 deletions src/problem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import json

# Class for serializing custom objects into JSON.
# See https://docs.python.org/3/library/json.html#json.JSONEncoder.default
class ComplexEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Problem):
return obj.__dict__
return json.JSONEncoder(self, obj)

# Class that defines the properties of each Problem object
class Problem(object):
def __init__(self, number, statement):
self.number = number
self.statement = statement
self.solutions = []

def serialize(self):
return json.dumps(self, cls=ComplexEncoder)

0 comments on commit 5434163

Please sign in to comment.