-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Export data as CSV and JSON #9
Changes from all commits
41e3f99
37df726
78e7bc0
29d1211
c277bdd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
name: Check YAML Consistency | ||
'on': | ||
pull_request: | ||
paths: | ||
- '**/*.yaml' | ||
jobs: | ||
check_yaml: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v2 | ||
- name: Set up Python | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: 3.12 | ||
- name: Install dependencies | ||
run: pip install -r requirements.txt | ||
- name: Run export and check consistency | ||
run: | | ||
python export.py | ||
git diff --exit-code | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
repos: | ||
- repo: local | ||
hooks: | ||
- id: export-yaml | ||
name: Export YAML to CSV and JSON | ||
language: python | ||
entry: python export.py | ||
additional_dependencies: ['pyyaml'] | ||
|
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,89 @@ | ||||||||||
|
||||||||||
import yaml | ||||||||||
import csv | ||||||||||
import json | ||||||||||
import csv | ||||||||||
import os | ||||||||||
def read_yaml(file_path): | ||||||||||
with open(file_path, 'r') as file: | ||||||||||
data = yaml.safe_load(file) | ||||||||||
return data | ||||||||||
|
||||||||||
def clean_value(value): | ||||||||||
if isinstance(value, list): | ||||||||||
return ', '.join(map(str, value)) | ||||||||||
Comment on lines
+13
to
+14
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The format of dictionaries in the CSV was weird, so I figure JSON is a good format, and it works well for both lists and dicts:
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
brands.csv:
brands.csv: I used There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, I think you're right that the If you look at the sabra row, there is a dict for the We're not really sure how the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oooh the plot thickens.. it's an array of objects. Even more reason to ignore that for now. |
||||||||||
else: | ||||||||||
return value | ||||||||||
def export_to_csv(input_dir, output_csv, schema_file): | ||||||||||
schema = read_yaml(schema_file) | ||||||||||
|
||||||||||
with open(output_csv, 'w', newline='') as csvfile: | ||||||||||
schema_fields = list(schema['properties'].keys()) | ||||||||||
if 'stakeholders' in schema_fields: | ||||||||||
# Haven't decided how to represent stakeholders in the CSV format, so just remove it for now. | ||||||||||
schema_fields.remove('stakeholders') | ||||||||||
fieldnames = ['id'] + schema_fields | ||||||||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | ||||||||||
|
||||||||||
# Write header based on the schema | ||||||||||
writer.writeheader() | ||||||||||
|
||||||||||
for yaml_file in sorted(os.listdir(input_dir)): | ||||||||||
if yaml_file.endswith(".yaml"): | ||||||||||
yaml_file_path = os.path.join(input_dir, yaml_file) | ||||||||||
with open(yaml_file_path, 'r') as file: | ||||||||||
data = yaml.safe_load(file) | ||||||||||
|
||||||||||
cleaned_data = {key: clean_value(data.get(key, None)) for key in fieldnames} | ||||||||||
cleaned_data['id'] = os.path.splitext(yaml_file)[0] | ||||||||||
|
||||||||||
writer.writerow(cleaned_data) | ||||||||||
|
||||||||||
print(f"Converted {yaml_file} to CSV") | ||||||||||
def convert_yaml_to_json(directory_path, key): | ||||||||||
data = {} | ||||||||||
|
||||||||||
for file_name in sorted(os.listdir(directory_path)): | ||||||||||
if file_name.endswith(".yaml"): | ||||||||||
file_path = os.path.join(directory_path, file_name) | ||||||||||
with open(file_path, 'r') as yaml_file: | ||||||||||
yaml_data = {} | ||||||||||
|
||||||||||
yaml_data = yaml.safe_load(yaml_file) | ||||||||||
|
||||||||||
data[os.path.splitext(file_name)[0]] = {'id':os.path.splitext(file_name)[0], **yaml_data} | ||||||||||
|
||||||||||
return {key: data} | ||||||||||
|
||||||||||
def export_to_json(directory1, directory2, output_json): | ||||||||||
brands_data = convert_yaml_to_json(directory1, 'brands') | ||||||||||
companies_data = convert_yaml_to_json(directory2, 'companies') | ||||||||||
|
||||||||||
combined_data = {**brands_data, **companies_data} | ||||||||||
|
||||||||||
with open(output_json, 'w') as json_file: | ||||||||||
json.dump(combined_data, json_file, indent=2) | ||||||||||
|
||||||||||
print(f"Converted data to JSON") | ||||||||||
|
||||||||||
|
||||||||||
if __name__ == "__main__": | ||||||||||
brands_yaml = 'data/brands' | ||||||||||
companies_yaml = 'data/companies' | ||||||||||
|
||||||||||
brands_csv_file = 'output/csv/brands.csv' | ||||||||||
companies_csv_file = 'output/csv/companies.csv' | ||||||||||
|
||||||||||
data_json_file = 'output/json/data.json' | ||||||||||
|
||||||||||
brand_schema = 'schemas/brand_schema.yaml' | ||||||||||
company_schema = 'schemas/company_schema.yaml' | ||||||||||
|
||||||||||
export_to_csv(brands_yaml, brands_csv_file, brand_schema) | ||||||||||
export_to_csv(companies_yaml, companies_csv_file, company_schema) | ||||||||||
export_to_json(brands_yaml,companies_yaml,data_json_file) | ||||||||||
|
||||||||||
|
||||||||||
|
||||||||||
|
||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice! Thanks for adding this check