-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #15 from t4guw/data-formatting
Complete data formatting
- Loading branch information
Showing
4 changed files
with
93 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#!/usr/bin/env python | ||
|
||
''' | ||
Parses solutions.txt and statements.txt. | ||
Creates problem objects for each LeetCode problem that are serialized as JSON. | ||
Produces JSON file that is intended to be the input of the labeling functions | ||
5/27/2020 | ||
''' | ||
|
||
import json | ||
from problem import Problem | ||
from itertools import islice | ||
|
||
OUTPUT_FILE="problems.json" | ||
PROBLEMS = {} | ||
|
||
''' | ||
Example solution: | ||
~~~START_SOLUTION~~~ | ||
<problem_number> | ||
public int{} ... | ||
.... | ||
} | ||
~~~END_SOLUTION~~~ | ||
''' | ||
def process_solutions(filename): | ||
with open(filename) as f: | ||
while line := f.readline(): | ||
if "~~~START_SOLUTION~~~" in line: | ||
continue | ||
else: | ||
number = int(line) | ||
solution = read_section(f, "~~~END_SOLUTION~~~") | ||
PROBLEMS[number].solutions.append(solution) | ||
|
||
|
||
''' | ||
Example statement: | ||
~~~STATEMENT_START~~~ | ||
1 | ||
Given an array of integers.... | ||
''' | ||
def process_statements(filename): | ||
with open(filename) as f: | ||
while line := f.readline(): | ||
number = int(line) | ||
statement = read_section(f, "~~~STATEMENT_START~~~") | ||
PROBLEMS[number] = Problem(number, statement) | ||
|
||
|
||
def read_section(file, marker=''): | ||
content = str() | ||
while line := file.readline(): | ||
if marker in line: | ||
return content | ||
else: | ||
content += line | ||
|
||
def create_output_file(): | ||
with open(OUTPUT_FILE, 'w') as outfile: | ||
for k, v in PROBLEMS.items(): | ||
json.dump(v.serialize(), outfile) | ||
|
||
def main(): | ||
process_statements("data/statements.txt") | ||
process_solutions("data/solutions.txt") | ||
for k,v in PROBLEMS.items(): | ||
print(k, v.serialize()) | ||
break | ||
create_output_file() | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import json | ||
|
||
# Class for serializing custom objects into JSON. | ||
# See https://docs.python.org/3/library/json.html#json.JSONEncoder.default | ||
class ComplexEncoder(json.JSONEncoder): | ||
def default(self, obj): | ||
if isinstance(obj, Problem): | ||
return obj.__dict__ | ||
return json.JSONEncoder(self, obj) | ||
|
||
# Class that defines the properties of each Problem object | ||
class Problem(object): | ||
def __init__(self, number, statement): | ||
self.number = number | ||
self.statement = statement | ||
self.solutions = [] | ||
|
||
def serialize(self): | ||
return json.dumps(self, cls=ComplexEncoder) |