Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lots of refactoring #16

Merged
merged 1 commit into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,7 @@ dist
syftbox.egg-info
keys/**
scheduler.lock
jobs.sqlite
jobs.sqlite
notebooks/crypto
netflix_data/*
backup/*
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ repos:
always_run: true
- id: check-added-large-files
always_run: true
exclude: '.*Pyfhel-3\.4\.2-cp311-cp311-macosx_13_0_arm64\.whl|.*syftbox-0.1.0-py3-none-any\.whl'
- id: check-yaml
always_run: true
- id: check-merge-conflict
Expand Down
4 changes: 4 additions & 0 deletions app.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/sh
cp -r ./apps/$1 ./users/$2/apps/$1
rm -rf ./users/$2/apps/$1/output
rm ./users/$2/apps/$1/cache/last_run.json
File renamed without changes.
File renamed without changes.
6 changes: 6 additions & 0 deletions apps/netflix/data/NetflixViewingHistory_TMDB_IMDB.mock.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
netflix_title,netflix_date,tmdb_id,tmdb_title,tmdb_media_type,tmdb_poster_url,homepage,imdb_id,facebook_id,instagram_id,twitter_id,genre_ids,genre_names,imdb_runtime_minutes,imdb_rating
Psych: Season 1: Pilot: Part 1,2024-08-21,1447,Psych,tv,https://image.tmdb.org/t/p/w500/fDI15gTVbtW5Sbv5QenqecRxWKJ.jpg,http://www.usanetwork.com/series/psych,tt0491738,PsychPeacock,PsychPeacock,PsychPeacock,"[35, 18, 9648, 80]","['Comedy', 'Drama', 'Mystery', 'Crime']",44,8.4
Monk: Season 1: Mr. Monk and the Candidate: Part 1,2024-08-12,1695,Monk,tv,https://image.tmdb.org/t/p/w500/3axGMbUecXXOPSeG47v2i9wK5y5.jpg,http://www.usanetwork.com/series/monk,tt0312172,,,,"[35, 80, 18, 9648]","['Comedy', 'Crime', 'Drama', 'Mystery']",44,8.1
3 Body Problem: Season 1: Countdown,2024-03-26,108545,3 Body Problem,tv,https://image.tmdb.org/t/p/w500/ykZ7hlShkdRQaL2aiieXdEMmrLb.jpg,https://www.netflix.com/title/81024821,tt13016388,,3bodyproblem,3body,"[10765, 9648, 18]","['Sci-Fi & Fantasy', 'Mystery', 'Drama']",60,7.5
Fool Me Once: Limited Series: Episode 1,2024-01-29,220801,Fool Me Once,tv,https://image.tmdb.org/t/p/w500/Ertv4WLEyHgi8zN4ldOKgPcGAZ.jpg,https://www.netflix.com/title/81588093,tt5611024,,,,"[18, 80, 9648]","['Drama', 'Crime', 'Mystery']",50,6.8
Exploding Kittens: Pilot,2024-07-19,219532,Exploding Kittens,tv,https://image.tmdb.org/t/p/w500/4WctqRtusYpTLHNkuVjQe4R51DZ.jpg,https://www.netflix.com/title/81459282,tt19734104,,,,"[16, 35]","['Animation', 'Comedy']",25,6.8
45 changes: 45 additions & 0 deletions apps/netflix/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import os

import pandas as pd

from syftbox.lib import ClientConfig, SyftVault, TabularDataset


def run():
try:
imdb_df = pd.read_csv("./temp/3_imdb.csv")

dataset_filename = "NetflixViewingHistory_TMDB_IMDB.csv"
imdb_mock_df = pd.read_csv("./data/NetflixViewingHistory_TMDB_IMDB.mock.csv")

if set(imdb_df.columns) != set(imdb_mock_df.columns):
raise Exception("Netflix real vs mock schema are different")

config_path = os.environ.get("SYFTBOX_CLIENT_CONFIG_PATH", None)
client_config = ClientConfig.load(config_path)
manifest = client_config.manifest

# create public datasets folder
datasets_path = manifest.create_public_folder("datasets")

dataset_path = datasets_path / "netflix_tmdb_imdb"
csv_file = dataset_path / dataset_filename
os.makedirs(dataset_path, exist_ok=True)

# write mock data
imdb_mock_df.to_csv(csv_file)

dataset = TabularDataset.from_csv(
csv_file, name="Netflix_TMDB_IMDB", has_private=True
)
dataset.publish(manifest, overwrite=True)

# write private file
private_path = os.path.abspath(f"./output/{dataset_filename}")
imdb_df.to_csv(private_path)
print(f"> Writing private {dataset_filename} to {private_path}")

SyftVault.link_private(csv_file, private_path)

except Exception as e:
print("Failed to make dataset with dataset.py", e)
4 changes: 4 additions & 0 deletions syftbox/client/apps/netflix/imdb.py → apps/netflix/imdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,7 @@ def run():

print(traceback.print_exc())
print("Failed to run imdb.py", e)


if __name__ == "__main__":
run()
2 changes: 2 additions & 0 deletions syftbox/client/apps/netflix/main.py → apps/netflix/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import argparse
import os

from dataset import run as make_dataset
from imdb import run as add_imdb_data
from netflix import run as preprocess_netflix
from page import run as make_page
Expand Down Expand Up @@ -72,6 +73,7 @@ def main():
preprocess_netflix()
get_tmdb_data(tmdb_api_key, missing_file)
add_imdb_data()
make_dataset()
make_page()

last_run = {"input_hash": input_hash}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@ def run():

except Exception as e:
print("Failed to run netflix.py", e)


if __name__ == "__main__":
run()
6 changes: 5 additions & 1 deletion syftbox/client/apps/netflix/page.py → apps/netflix/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,4 +134,8 @@ def run():
import traceback

print(traceback.print_exc())
print("Failed to run html.py", e)
print("Failed to run page.py", e)


if __name__ == "__main__":
run()
3 changes: 2 additions & 1 deletion syftbox/client/apps/netflix/run.sh → apps/netflix/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ uv venv .venv
uv pip install -r requirements.txt
TMDB_API_KEY=$(cat inputs/TMDB_API_KEY.txt)

uv run main.py --tmdb-api-key=$TMDB_API_KEY --missing-imdb-file=inputs/missing_imdb_id.json $( [ "$1" = "--force" ] && echo '--force' )
uv run python -c "import syftbox; print(syftbox.__version__)"
uv run main.py --tmdb-api-key=$TMDB_API_KEY --missing-imdb-file=inputs/missing_imdb_id.json "$@"
File renamed without changes.
File renamed without changes.
File renamed without changes.
3 changes: 2 additions & 1 deletion build.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/bash
rm -rf dist
uv build
uv build
cp dist/syftbox-0.1.0-py3-none-any.whl ./
2 changes: 2 additions & 0 deletions davo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
uv run syftbox/client/client.py --config_path=./users/davo.json --sync_folder=./users/davo [email protected] --port=8089 --server=http://localhost:5001
1 change: 1 addition & 0 deletions me.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
#!/bin/bash
export SYFTBOX_DEV="true"
uv run syftbox/client/client.py --config_path=./users/me.json --sync_folder=./users/me [email protected] --port=8085 --server=http://localhost:5001
51 changes: 37 additions & 14 deletions notebooks/01-trade-create.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,8 @@
"metadata": {},
"outputs": [],
"source": [
"csv_file = datasets_path / \"trade_data\" / \"trade_mock.csv\"\n",
"csv_file"
"dataset_path = datasets_path / \"trade_data\"\n",
"dataset_path"
]
},
{
Expand All @@ -188,7 +188,9 @@
"metadata": {},
"outputs": [],
"source": [
"mock_ca_data.to_csv(csv_file)"
"import os\n",
"\n",
"os.makedirs(dataset_path, exist_ok=True)"
]
},
{
Expand All @@ -197,6 +199,27 @@
"id": "19",
"metadata": {},
"outputs": [],
"source": [
"csv_file = dataset_path / \"trade_mock.csv\"\n",
"csv_file"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "20",
"metadata": {},
"outputs": [],
"source": [
"mock_ca_data.to_csv(csv_file)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21",
"metadata": {},
"outputs": [],
"source": [
"dataset = TabularDataset.from_csv(csv_file, name=\"Trade Data\", has_private=True)\n",
"dataset"
Expand All @@ -205,7 +228,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "20",
"id": "22",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -215,7 +238,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "21",
"id": "23",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -225,7 +248,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "22",
"id": "24",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -235,14 +258,14 @@
{
"cell_type": "code",
"execution_count": null,
"id": "23",
"id": "25",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "24",
"id": "26",
"metadata": {},
"source": [
"# Link Private Data"
Expand All @@ -251,7 +274,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "25",
"id": "27",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -261,7 +284,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "26",
"id": "28",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -272,7 +295,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "27",
"id": "29",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -282,7 +305,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "28",
"id": "30",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -292,7 +315,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "29",
"id": "31",
"metadata": {},
"outputs": [],
"source": []
Expand All @@ -314,7 +337,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
"version": "3.11.6"
}
},
"nbformat": 4,
Expand Down
Loading