TabbyML · yan91083 · Nov 15, 2023 · Nov 15, 2023 · Nov 17, 2023 · Nov 18, 2023
diff --git a/python/tabby-eval/README.md b/python/tabby-eval/README.md
@@ -0,0 +1,48 @@
+# tabby_data_pipeline
+
+This is a [Dagster](https://dagster.io/) project scaffolded with [`dagster project scaffold`](https://docs.dagster.io/getting-started/create-new-project).
+
+## Getting started
+
+First, install your Dagster code location as a Python package. By using the --editable flag, pip will install your Python package in ["editable mode"](https://pip.pypa.io/en/latest/topics/local-project-installs/#editable-installs) so that as you develop, local code changes will automatically apply.
+
+```bash
+pip install -e ".[dev]"
+```
+
+Then, start the Dagster UI web server:
+
+```bash
+dagster dev
+```
+
+Open http://localhost:3000 with your browser to see the project.
+
+You can start writing assets in `tabby_data_pipeline/assets.py`. The assets are automatically loaded into the Dagster code location as you define them.
+
+## Development
+
+
+### Adding new Python dependencies
+
+You can specify new Python dependencies in `setup.py`.
+
+### Unit testing
+
+Tests are in the `tabby_data_pipeline_tests` directory and you can run tests using `pytest`:
+
+```bash
+pytest tabby_data_pipeline_tests
+```
+
+### Schedules and sensors
+
+If you want to enable Dagster [Schedules](https://docs.dagster.io/concepts/partitions-schedules-sensors/schedules) or [Sensors](https://docs.dagster.io/concepts/partitions-schedules-sensors/sensors) for your jobs, the [Dagster Daemon](https://docs.dagster.io/deployment/dagster-daemon) process must be running. This is done automatically when you run `dagster dev`.
+
+Once your Dagster Daemon is running, you can start turning on schedules and sensors for your jobs.
+
+## Deploy on Dagster Cloud
+
+The easiest way to deploy your Dagster project is to use Dagster Cloud.
+
+Check out the [Dagster Cloud Documentation](https://docs.dagster.cloud) to learn more.
diff --git a/python/tabby-eval/edit_distance_analysis.ipynb b/python/tabby-eval/edit_distance_analysis.ipynb
diff --git a/python/tabby-eval/log.txt b/python/tabby-eval/log.txt
@@ -0,0 +1,6 @@
+model: TabbyML/StarCoder-1B; language: python; file: line_completion.jsonlSkipped 0 rows, 10 rows with predictions, 0 rows with errors
+
+model: TabbyML/StarCoder-1B; language: python; file: line_completion_rg1_bm25.jsonlSkipped 0 rows, 10 rows with predictions, 0 rows with errors
+
+model: TabbyML/StarCoder-1B; language: python; file: line_completion_oracle_bm25.jsonlSkipped 0 rows, 10 rows with predictions, 0 rows with errors
+
diff --git a/python/tabby-eval/pyproject.toml b/python/tabby-eval/pyproject.toml
@@ -0,0 +1,6 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[tool.dagster]
+module_name = "tabby_data_pipeline"
diff --git a/python/tabby-eval/setup.cfg b/python/tabby-eval/setup.cfg
@@ -0,0 +1,2 @@
+[metadata]
+name = tabby_data_pipeline
diff --git a/python/tabby-eval/setup.py b/python/tabby-eval/setup.py
@@ -0,0 +1,17 @@
+from setuptools import find_packages, setup
+
+setup(
+    name="tabby_data_pipeline",
+    packages=find_packages(exclude=["tabby_data_pipeline_tests"]),
+    install_requires=[
+        "dagster",
+        "dagster-cloud",
+        "dagstermill",
+        "papermill-origami>=0.0.8",
+        "pandas",
+        "matplotlib",
+        "seaborn",
+        "scikit-learn",
+    ],
+    extras_require={"dev": ["dagster-webserver", "pytest"]},
+)
diff --git a/python/tabby-eval/tabby_data_pipeline.egg-info/PKG-INFO b/python/tabby-eval/tabby_data_pipeline.egg-info/PKG-INFO
@@ -0,0 +1,8 @@
+Metadata-Version: 2.1
+Name: tabby-data-pipeline
+Version: 0.0.0
+Requires-Dist: dagster
+Requires-Dist: dagster-cloud
+Provides-Extra: dev
+Requires-Dist: dagster-webserver; extra == "dev"
+Requires-Dist: pytest; extra == "dev"
diff --git a/python/tabby-eval/tabby_data_pipeline.egg-info/SOURCES.txt b/python/tabby-eval/tabby_data_pipeline.egg-info/SOURCES.txt
@@ -0,0 +1,13 @@
+README.md
+pyproject.toml
+setup.cfg
+setup.py
+tabby_data_pipeline/__init__.py
+tabby_data_pipeline/analyze.py
+tabby_data_pipeline/assets.py
+tabby_data_pipeline/predict.py
+tabby_data_pipeline.egg-info/PKG-INFO
+tabby_data_pipeline.egg-info/SOURCES.txt
+tabby_data_pipeline.egg-info/dependency_links.txt
+tabby_data_pipeline.egg-info/requires.txt
+tabby_data_pipeline.egg-info/top_level.txt
diff --git a/python/tabby-eval/tabby_data_pipeline.egg-info/dependency_links.txt b/python/tabby-eval/tabby_data_pipeline.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/python/tabby-eval/tabby_data_pipeline.egg-info/requires.txt b/python/tabby-eval/tabby_data_pipeline.egg-info/requires.txt
@@ -0,0 +1,6 @@
+dagster
+dagster-cloud
+
+[dev]
+dagster-webserver
+pytest
diff --git a/python/tabby-eval/tabby_data_pipeline.egg-info/top_level.txt b/python/tabby-eval/tabby_data_pipeline.egg-info/top_level.txt
@@ -0,0 +1 @@
+tabby_data_pipeline
diff --git a/python/tabby-eval/tabby_data_pipeline/__init__.py b/python/tabby-eval/tabby_data_pipeline/__init__.py
@@ -0,0 +1,18 @@
+from dagster import Definitions, load_assets_from_modules
+
+from dagstermill import define_dagstermill_asset, ConfigurableLocalOutputNotebookIOManager
+
+from dagster import AssetIn, Field, Int, asset, file_relative_path
+
+from . import assets
+
+all_assets = load_assets_from_modules([assets])
+
+defs = Definitions(
+    assets=all_assets,
+    resources = {
+        "output_notebook_io_manager": ConfigurableLocalOutputNotebookIOManager()
+    }
+)
+
+
diff --git a/python/tabby-eval/tabby_data_pipeline/analyze.py b/python/tabby-eval/tabby_data_pipeline/analyze.py
@@ -0,0 +1,82 @@
+import json
+import sys
+#from eval_utils import postprocess_code_lines, remove_comments
+#from tree_sitter import Language, Parser
+import pandas as pd
+
+def get_bracket_lang_statement(completion):
+    end_idx = None
+    for i in range(len(completion)):
+        if completion[i] in [";", "{", "}"]:
+            end_idx = i
+            break
+    return completion[:end_idx+1] if end_idx else completion
+
+
+def postprocess_code_lines(prompt, target, language):
+    try:
+        if language in ["java", "csharp", "typescript"]:
+            return get_bracket_lang_statement(target)
+        elif language == "python":
+            return target.split("\n")[0]
+    except Exception as e:
+        return target
+
+def analyze(model, language, file):
+
+    line_match = 0
+    statement_match = 0
+
+    input_file = f"./data/{model}/{language}/{file}"
+    output_file = f"./data/{model}/{language}/result_{file}"
+
+    with open(output_file, 'w') as fout:
+        with open(input_file) as fin:
+            for line in fin:
+                obj = json.loads(line)
+                result = {}
+                prediction = ""
+
+                for k in obj.keys():
+                    if k == "prediction":
+                        prediction = str(obj[k])
+                        break
+                    elif k == "error":
+                        break
+                    else:
+                        result[k] = obj[k]
+
+                tabby_eval = {}
+                if file == "line_completion.jsonl":
+                    tabby_eval["raw_prompt"] = obj["prompt"]
+                else:
+                    tabby_eval["raw_prompt"] = obj["crossfile_context"]["text"] + obj["prompt"]
+
+                tabby_eval["prediction"] = prediction
+
+                groundtruth = obj["groundtruth"]
+
+                tabby_eval["first_line_prediction"] = prediction.split("\n")[0]
+                tabby_eval["first_line_groundtruth"] = groundtruth.split("\n")[0]
+                if tabby_eval["first_line_prediction"] == tabby_eval["first_line_groundtruth"]:
+                    tabby_eval["first_line_matched"] = True
+                    line_match += 1
+                else:
+                    tabby_eval["first_line_matched"] = False
+
+                tabby_eval["first_statement_prediction"] = postprocess_code_lines(tabby_eval["raw_prompt"], prediction, language)
+                tabby_eval["first_statement_groundtruth"] = postprocess_code_lines(tabby_eval["raw_prompt"], groundtruth, language)
+                if tabby_eval["first_statement_prediction"] == tabby_eval["first_statement_groundtruth"]:
+                    tabby_eval["first_statement_matched"] = True
+                    statement_match += 1
+                else:
+                    tabby_eval["first_statement_matched"] = False
+
+                result["tabby_eval"] = tabby_eval
+
+                json.dump(result, fout)
+                fout.write("\n")
+
+
+
+
diff --git a/python/tabby-eval/tabby_data_pipeline/assets.py b/python/tabby-eval/tabby_data_pipeline/assets.py
@@ -0,0 +1,87 @@
+import json
+import os, subprocess
+import modal
+
+import requests
+import pandas as pd
+
+import base64
+from io import BytesIO
+
+import matplotlib.pyplot as plt
+
+from typing import Dict, List
+
+from dagster import (
+    AssetExecutionContext,
+    MetadataValue,
+    asset,
+    get_dagster_logger,
+    op,
+    StaticPartitionsDefinition,
+    MultiPartitionsDefinition,
+    AssetIn,
+    Field,
+    Int,
+    file_relative_path
+)
+from . import analyze, create_csv
+from dagstermill import define_dagstermill_asset
+
+
+@asset(
+    partitions_def=MultiPartitionsDefinition(
+        {
+            "model_id" : StaticPartitionsDefinition(['TabbyML/StarCoder-1B', 'TabbyML/StarCoder-3B', 'TabbyML/StarCoder-7B', 'TabbyML/WizardCoder-1B', 'TabbyML/WizardCoder-3B', 'TabbyML/CodeLlama-7B', 'TabbyML/CodeLlama-13B']),
+            "language" : StaticPartitionsDefinition(["python", "java", "csharp", "typescript"]),
+
+        }
+    ))
+def model_predict(context: AssetExecutionContext) -> None:
+    model_id = context.partition_key.keys_by_dimension["model_id"]
+    language = context.partition_key.keys_by_dimension["language"]
+
+    my_env = os.environ.copy()
+    my_env["MODEL_ID"] = model_id
+
+    context.add_output_metadata(metadata={"model_id": MetadataValue.md(model_id)})
+
+    files = 'line_completion.jsonl, line_completion_rg1_bm25.jsonl, line_completion_oracle_bm25.jsonl'
+
+    p = subprocess.Popen(["modal", "run", "./modal/predict.py","--language", language, "--files", files], env=my_env)
+    p.wait()
+    context.add_output_metadata(metadata={'modal run': MetadataValue.md("success!")})
+
+
+@asset(
+    partitions_def=MultiPartitionsDefinition(
+        {
+            "model_id" : StaticPartitionsDefinition(['TabbyML/StarCoder-1B', 'TabbyML/StarCoder-3B', 'TabbyML/StarCoder-7B', 'TabbyML/WizardCoder-1B', 'TabbyML/WizardCoder-3B', 'TabbyML/CodeLlama-7B', 'TabbyML/CodeLlama-13B']),
+            "language" : StaticPartitionsDefinition(["python", "java", "csharp", "typescript"]),       
+        }
+    ), deps=[model_predict])
+def matching(context) -> None:
+    model_id = context.partition_key.keys_by_dimension["model_id"]
+    language = context.partition_key.keys_by_dimension["language"]
+
+
+    model = model_id.split("/")[-1]
+    for file in ["line_completion.jsonl", "line_completion_rg1_bm25.jsonl", "line_completion_oracle_bm25.jsonl"]:
+        analyze.analyze(model, language, file)
+
+@asset
+def tabby_eval_result():
+    create_csv.create_csv()
+
+
+
+@asset(deps=[tabby_eval_result])
+def tabby_dataset():
+    return pd.read_csv(file_relative_path(__file__,'tabby.csv'))
+
+tabby_jupyter_notebook = define_dagstermill_asset(
+    name = 'tabby_jupyter',
+    notebook_path = file_relative_path(__file__, "tabby_eval.ipynb"),
+    ins={"df": AssetIn("tabby_dataset")},
+)
+
diff --git a/python/tabby-eval/tabby_data_pipeline/create_csv.py b/python/tabby-eval/tabby_data_pipeline/create_csv.py
@@ -0,0 +1,35 @@
+import csv
+import json
+import pandas as pd
+
+models = ["StarCoder-1B", "StarCoder-3B", "StarCoder-7B", "CodeLlama-7B", "CodeLlama-13B", "WizardCoder-1B", "WizardCoder-3B", "DeepseekCoder-1.3B", "DeepseekCoder-6.7B"]
+languages = {"csharp": "C#", "java": "Java", "python": "Python", "typescript": "Typescript"}
+files = ["line_completion.jsonl", 'line_completion_rg1_bm25.jsonl', 'line_completion_oracle_bm25.jsonl']
+total_records = {'python': 2665, 'java': 2139, 'typescript': 3356, 'csharp': 1768}
+
+headers = ['Model', 'Dataset', 'Records', 'baseline', 'bm25', 'oracle']
+
+stat = []
+def get_match(model, language, file):
+    count = 0
+    with open(f"./data/{model}/{language}/result_{file}") as f:
+        for line in f:
+            obj = json.loads(line)
+            if obj["tabby_eval"]["first_line_matched"]:
+                count += 1
+
+    return count
+
+def create_csv():
+    for model in models:
+        for language in languages.keys():
+            x = [model, languages[language], total_records[language]]
+            for f in files:
+                x.append(get_match(model, language, f))
+
+            stat.append(x)
+
+    df = pd.DataFrame(stat, columns=headers)
+    print(df)
+
+    df.to_csv('./tabby_data_pipeline/tabby.csv', index=False)
diff --git a/python/tabby-eval/tabby_data_pipeline/tabby.csv b/python/tabby-eval/tabby_data_pipeline/tabby.csv
@@ -0,0 +1,37 @@
+Model,Dataset,Records,baseline,bm25,oracle
+StarCoder-1B,C#,1768,31,100,138
+StarCoder-1B,Java,2139,17,81,98
+StarCoder-1B,Python,2665,30,237,338
+StarCoder-1B,Typescript,3356,144,270,342
+StarCoder-3B,C#,1768,51,118,165
+StarCoder-3B,Java,2139,39,93,114
+StarCoder-3B,Python,2665,130,322,445
+StarCoder-3B,Typescript,3356,229,351,422
+StarCoder-7B,C#,1768,64,134,170
+StarCoder-7B,Java,2139,60,116,138
+StarCoder-7B,Python,2665,176,376,494
+StarCoder-7B,Typescript,3356,267,391,457
+CodeLlama-7B,C#,1768,63,133,181
+CodeLlama-7B,Java,2139,82,150,175
+CodeLlama-7B,Python,2665,193,394,488
+CodeLlama-7B,Typescript,3356,483,689,766
+CodeLlama-13B,C#,1768,84,152,197
+CodeLlama-13B,Java,2139,99,154,182
+CodeLlama-13B,Python,2665,208,411,538
+CodeLlama-13B,Typescript,3356,525,733,869
+WizardCoder-1B,C#,1768,30,87,138
+WizardCoder-1B,Java,2139,25,77,99
+WizardCoder-1B,Python,2665,37,218,343
+WizardCoder-1B,Typescript,3356,159,264,338
+WizardCoder-3B,C#,1768,51,118,165
+WizardCoder-3B,Java,2139,39,93,114
+WizardCoder-3B,Python,2665,142,334,446
+WizardCoder-3B,Typescript,3356,234,348,422
+DeepseekCoder-1.3B,C#,1768,58,121,166
+DeepseekCoder-1.3B,Java,2139,54,115,136
+DeepseekCoder-1.3B,Python,2665,149,332,433
+DeepseekCoder-1.3B,Typescript,3356,437,590,708
+DeepseekCoder-6.7B,C#,1768,87,166,198
+DeepseekCoder-6.7B,Java,2139,83,150,165
+DeepseekCoder-6.7B,Python,2665,247,436,571
+DeepseekCoder-6.7B,Typescript,3356,555,740,857