Skip to content
This repository has been archived by the owner on Feb 12, 2024. It is now read-only.

Feature/slim configs rebased #32

Merged
merged 2 commits into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62,354 changes: 13,013 additions & 49,341 deletions sciphi/data/stock_config/textbooks_are_all_you_need_evol/evol_grade_school.yaml

Large diffs are not rendered by default.

87,793 changes: 27,289 additions & 60,504 deletions sciphi/data/stock_config/textbooks_are_all_you_need_evol/evol_seminar_i.yaml

Large diffs are not rendered by default.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion sciphi/writers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from sciphi.writers.jsonl_writer import JsonlDataWriter
from sciphi.writers.raw_writer import RawDataWriter

__all__ = ["JsonlDataWriter"]
__all__ = ["JsonlDataWriter", "RawDataWriter"]
9 changes: 9 additions & 0 deletions sciphi/writers/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""A module which defines the abstract bata class for data writers."""
import os
import time
from abc import ABC, abstractmethod


Expand All @@ -8,6 +10,13 @@ class DataWriter(ABC):
def __init__(self, output_path):
self.output_path = output_path

def _get_modified_path(self):
"""Modify the output path if the file already exists and overwriting is not allowed."""
if not self.overwrite and os.path.exists(self.output_path):
base_name, ext = os.path.splitext(self.output_path)
return f"{base_name}_{int(time.time())}{ext}"
return self.output_path

@abstractmethod
def write(self, data):
"""Write data to the specified path."""
Expand Down
9 changes: 0 additions & 9 deletions sciphi/writers/jsonl_writer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
"""A module which facilitates JSONL data writing."""
import json
import os
import time

from sciphi.writers.base import DataWriter

Expand All @@ -14,13 +12,6 @@ def __init__(self, output_path, overwrite=True):
self.output_path = output_path
self.overwrite = overwrite

def _get_modified_path(self):
"""Modify the output path if the file already exists and overwriting is not allowed."""
if not self.overwrite and os.path.exists(self.output_path):
base_name, ext = os.path.splitext(self.output_path)
return f"{base_name}_{int(time.time())}{ext}"
return self.output_path

def write(self, data: list[dict]) -> None:
"""
Write the provided data to the specified path.
Expand Down
23 changes: 23 additions & 0 deletions sciphi/writers/raw_writer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""A module which facilitates raw data writing."""
from sciphi.writers.base import DataWriter


class RawDataWriter(DataWriter):
"""A class to write raw data file."""

def __init__(self, output_path, overwrite=True):
"""Initialize the DataWriter."""
self.output_path = output_path
self.overwrite = overwrite

def write(self, data: str) -> None:
"""
Write the provided data to the specified path.

Args:
data (list): List of data entries to be written.
"""
path = self._get_modified_path()

with open(path, "a") as f:
f.write(data + "\n")