diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 00000000..ded50c79
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,8 @@
+.git
+data
+default_apps
+dist
+docker
+notebooks
+projects
+tests
diff --git a/MANIFEST.in b/MANIFEST.in
index ea61a195..e77d35e7 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1 +1,2 @@
recursive-include syftbox *.html *.js *.css *.zip
+recursive-include default_apps *.py *.sh *.html *.js *.css *.zip *.png *.txt *.csv
diff --git a/README.md b/README.md
index ac756db8..f891b9c4 100644
--- a/README.md
+++ b/README.md
@@ -7,38 +7,60 @@
|___/
```
+# Quickstart User Installation
+
+## install uv
+
+curl -LsSf https://astral.sh/uv/install.sh | sh
+
+## create a virtualenv somewhere
+
+uv venv .venv
+
+## install the wheel
+
+uv pip install http://20.168.10.234:8080/wheel/syftbox-0.1.0-py3-none-any.whl --reinstall
+
+## run the client
+
+uv run syftbox client
+
# Quickstart Client Developer Installation
-### Step 0: Open your terminal to the root of this Github repository
+### Step 0: Open your terminal to the root of this Github repository
Begin by opening your terminal and navigating to the root directory of this github repository (so when you run 'ls' it should show folders like "syftbox", "server", "tests", etc.). Then run the commands in steps 1-4:
### Step 1: Install Homebrew
+
```
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
```
### Step 2: Install uv (using homebrew — which is better for this than pip)
+
```
brew install uv
```
### Step 3: Install a virtual environment using uv
+
```
uv venv
```
### Step 4: Install a relative version of uv.
+
```
uv pip install -e .
```
### Step 5: Run the client
+
```
-syftbox client
+uv run syftbox/client/client.py
```
-
# Alternative Options
### Run Client
diff --git a/default_apps/adder/main.py b/default_apps/adder/main.py
index 1d51bbb5..8056523f 100644
--- a/default_apps/adder/main.py
+++ b/default_apps/adder/main.py
@@ -1,18 +1,28 @@
import json
import os
-input_file_path = "../../andrew@openmined.org/app_pipelines/adder/inputs/data.json"
-output_file_path = "../../andrew@openmined.org/app_pipelines/adder/done/data.json"
+from syftbox.lib import ClientConfig
+
+config_path = os.environ.get("SYFTBOX_CLIENT_CONFIG_PATH", None)
+client_config = ClientConfig.load(config_path)
+
+input_folder = f"{client_config.sync_folder}/app_pipelines/adder/inputs/"
+output_folder = f"{client_config.sync_folder}/app_pipelines/adder/done/"
+os.makedirs(input_folder, exist_ok=True)
+os.makedirs(output_folder, exist_ok=True)
+
+input_file_path = f"{input_folder}/data.json"
+output_file_path = f"{output_folder}/data.json"
if os.path.exists(input_file_path):
- with open(input_file_path, 'r') as f:
+ with open(input_file_path, "r") as f:
data = json.load(f)
- data['datum'] += 1
+ data["datum"] += 1
- with open(output_file_path, 'w') as f:
+ with open(output_file_path, "w") as f:
json.dump(data, f)
os.remove(input_file_path)
else:
- print(f"Input file {input_file_path} does not exist.")
\ No newline at end of file
+ print(f"Input file {input_file_path} does not exist.")
diff --git a/default_apps/manual_pipeline/manual_pipeline_app.py b/default_apps/manual_pipeline/manual_pipeline_app.py
index 44c153ea..da792730 100644
--- a/default_apps/manual_pipeline/manual_pipeline_app.py
+++ b/default_apps/manual_pipeline/manual_pipeline_app.py
@@ -1,4 +1,3 @@
-
@dataclass
class SyftLink(Jsonable):
@classmethod
@@ -271,8 +270,6 @@ def create_datasite_import_path(datasite: str) -> str:
return import_path
-
-
@dataclass
class DatasiteManifest(Jsonable):
datasite: str
@@ -295,9 +292,6 @@ class Dataset:
sync_path: str
-
-
-
def extract_leftmost_email(text: str) -> str:
# Define a regex pattern to match an email address
email_regex = r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+"
@@ -1230,7 +1224,6 @@ class TaskManifest(Jsonable):
write_back_denied_path: str
-
@dataclass
class PipelineActionRun(PipelineAction):
exit_code: int | None = None
diff --git a/default_apps/netflix/.gitignore b/default_apps/netflix/.gitignore
new file mode 100644
index 00000000..99c82da1
--- /dev/null
+++ b/default_apps/netflix/.gitignore
@@ -0,0 +1,4 @@
+temp/*
+output/*
+inputs/*
+cache/*
\ No newline at end of file
diff --git a/default_apps/netflix/README.md b/default_apps/netflix/README.md
new file mode 100644
index 00000000..84a8bb2b
--- /dev/null
+++ b/default_apps/netflix/README.md
@@ -0,0 +1,57 @@
+# Netflix App
+
+## Download your Netflix data
+
+Go here and request your netflix data for download:
+https://www.netflix.com/account/getmyinfo
+
+## Get a TMDB API key
+
+Signup here:
+https://www.themoviedb.org/signup
+
+Create an API key here:
+https://www.themoviedb.org/settings/api
+
+## Setup
+
+Put the following files in the `inputs` folder:
+
+- NetflixViewingHistory.csv (downloaded from netflix)
+- TMDB_API_KEY.txt (put the key in this text file)
+- missing_imdb_id.json (optional: put json in here to fix titles missing from TMDB)
+
+## Create your Netflix Page
+
+```
+./run.sh
+```
+
+Force it to run again:
+
+```
+./run.sh --force
+```
+
+## Debugging
+
+Check the temp folder for intermediate files that are generated.
+You can view these dataframes in Pandas to see whats going on.
+The main.py runs each step one after the other so you can look at the code where your
+issue is happening.
+
+## Missing IMDB file
+
+The missing IMDB file is there so you can manually tell the system of an IMDB ID for a
+particular title.
+
+The format is:
+
+```json
+{
+ "Life: Primates": "tt1533395"
+}
+```
+
+Each item can be partial or exact match but don't be too short as it will match other
+titles with a string in string comparison.
diff --git a/default_apps/netflix/data/NetflixViewingHistory_TMDB_IMDB.mock.csv b/default_apps/netflix/data/NetflixViewingHistory_TMDB_IMDB.mock.csv
new file mode 100644
index 00000000..a73bae35
--- /dev/null
+++ b/default_apps/netflix/data/NetflixViewingHistory_TMDB_IMDB.mock.csv
@@ -0,0 +1,6 @@
+netflix_title,netflix_date,tmdb_id,tmdb_title,tmdb_media_type,tmdb_poster_url,homepage,imdb_id,facebook_id,instagram_id,twitter_id,genre_ids,genre_names,imdb_runtime_minutes,imdb_rating
+Psych: Season 1: Pilot: Part 1,2024-08-21,1447,Psych,tv,https://image.tmdb.org/t/p/w500/fDI15gTVbtW5Sbv5QenqecRxWKJ.jpg,http://www.usanetwork.com/series/psych,tt0491738,PsychPeacock,PsychPeacock,PsychPeacock,"[35, 18, 9648, 80]","['Comedy', 'Drama', 'Mystery', 'Crime']",44,8.4
+Monk: Season 1: Mr. Monk and the Candidate: Part 1,2024-08-12,1695,Monk,tv,https://image.tmdb.org/t/p/w500/3axGMbUecXXOPSeG47v2i9wK5y5.jpg,http://www.usanetwork.com/series/monk,tt0312172,,,,"[35, 80, 18, 9648]","['Comedy', 'Crime', 'Drama', 'Mystery']",44,8.1
+3 Body Problem: Season 1: Countdown,2024-03-26,108545,3 Body Problem,tv,https://image.tmdb.org/t/p/w500/ykZ7hlShkdRQaL2aiieXdEMmrLb.jpg,https://www.netflix.com/title/81024821,tt13016388,,3bodyproblem,3body,"[10765, 9648, 18]","['Sci-Fi & Fantasy', 'Mystery', 'Drama']",60,7.5
+Fool Me Once: Limited Series: Episode 1,2024-01-29,220801,Fool Me Once,tv,https://image.tmdb.org/t/p/w500/Ertv4WLEyHgi8zN4ldOKgPcGAZ.jpg,https://www.netflix.com/title/81588093,tt5611024,,,,"[18, 80, 9648]","['Drama', 'Crime', 'Mystery']",50,6.8
+Exploding Kittens: Pilot,2024-07-19,219532,Exploding Kittens,tv,https://image.tmdb.org/t/p/w500/4WctqRtusYpTLHNkuVjQe4R51DZ.jpg,https://www.netflix.com/title/81459282,tt19734104,,,,"[16, 35]","['Animation', 'Comedy']",25,6.8
\ No newline at end of file
diff --git a/default_apps/netflix/dataset.py b/default_apps/netflix/dataset.py
new file mode 100644
index 00000000..b953a521
--- /dev/null
+++ b/default_apps/netflix/dataset.py
@@ -0,0 +1,45 @@
+import os
+
+import pandas as pd
+
+from syftbox.lib import ClientConfig, SyftVault, TabularDataset
+
+
+def run():
+ try:
+ imdb_df = pd.read_csv("./temp/3_imdb.csv")
+
+ dataset_filename = "NetflixViewingHistory_TMDB_IMDB.csv"
+ imdb_mock_df = pd.read_csv("./data/NetflixViewingHistory_TMDB_IMDB.mock.csv")
+
+ if set(imdb_df.columns) != set(imdb_mock_df.columns):
+ raise Exception("Netflix real vs mock schema are different")
+
+ config_path = os.environ.get("SYFTBOX_CLIENT_CONFIG_PATH", None)
+ client_config = ClientConfig.load(config_path)
+ manifest = client_config.manifest
+
+ # create public datasets folder
+ datasets_path = manifest.create_public_folder("datasets")
+
+ dataset_path = datasets_path / "netflix_tmdb_imdb"
+ csv_file = dataset_path / dataset_filename
+ os.makedirs(dataset_path, exist_ok=True)
+
+ # write mock data
+ imdb_mock_df.to_csv(csv_file)
+
+ dataset = TabularDataset.from_csv(
+ csv_file, name="Netflix_TMDB_IMDB", has_private=True
+ )
+ dataset.publish(manifest, overwrite=True)
+
+ # write private file
+ private_path = os.path.abspath(f"./output/{dataset_filename}")
+ imdb_df.to_csv(private_path)
+ print(f"> Writing private {dataset_filename} to {private_path}")
+
+ SyftVault.link_private(csv_file, private_path)
+
+ except Exception as e:
+ print("Failed to make dataset with dataset.py", e)
diff --git a/default_apps/netflix/imdb.py b/default_apps/netflix/imdb.py
new file mode 100644
index 00000000..4617c1ba
--- /dev/null
+++ b/default_apps/netflix/imdb.py
@@ -0,0 +1,83 @@
+import os
+import warnings
+
+import pandas as pd
+from utils import download_file
+
+# Suppress only DtypeWarning
+warnings.filterwarnings("ignore", category=pd.errors.DtypeWarning)
+
+
+download_urls = [
+ "https://datasets.imdbws.com/title.basics.tsv.gz",
+ "https://datasets.imdbws.com/title.ratings.tsv.gz",
+]
+
+
+def run():
+ try:
+ temp_folder = "./temp/"
+ output_file = "3_imdb.csv"
+
+ imdb_df = pd.read_csv("./temp/2_tmdb.csv")
+
+ for download_url in download_urls:
+ filename = os.path.basename(download_url)
+ file_path = f"{temp_folder}/{filename}"
+ if not os.path.exists(file_path):
+ print(f"> Downloading {download_url} to {file_path}")
+ download_file(download_url, temp_folder)
+ else:
+ # print(f"> File {file_path} already downloaded")
+ pass
+
+ titles = pd.read_csv(
+ temp_folder + "/title.basics.tsv.gz",
+ sep="\t",
+ compression="gzip",
+ )
+
+ title_ratings = pd.read_csv(
+ temp_folder + "/title.ratings.tsv.gz",
+ sep="\t",
+ compression="gzip",
+ )
+
+ titles_merged = titles.merge(title_ratings, on="tconst", how="right")
+ titles_cleaned = titles_merged.dropna()
+ titles_cleaned = titles_cleaned[titles_cleaned["isAdult"] == 0]
+
+ titles_cleaned["startYear"] = titles_cleaned["startYear"].replace("\\N", None)
+ titles_cleaned["runtimeMinutes"] = titles_cleaned["runtimeMinutes"].replace(
+ "\\N", None
+ )
+
+ df_merged = imdb_df.merge(
+ titles_cleaned[["tconst", "runtimeMinutes", "averageRating"]],
+ how="left",
+ left_on="imdb_id",
+ right_on="tconst",
+ )
+
+ df_merged = df_merged.rename(
+ columns={
+ "runtimeMinutes": "imdb_runtime_minutes",
+ "averageRating": "imdb_rating",
+ }
+ )
+
+ df_merged = df_merged.drop(columns=["tconst"])
+
+ path = os.path.abspath(temp_folder + "/" + output_file)
+ print(f"Writing {output_file} to {temp_folder}")
+ df_merged.to_csv(path, index=False)
+
+ except Exception as e:
+ import traceback
+
+ print(traceback.print_exc())
+ print("Failed to run imdb.py", e)
+
+
+if __name__ == "__main__":
+ run()
diff --git a/default_apps/netflix/main.py b/default_apps/netflix/main.py
new file mode 100644
index 00000000..cdcc4cc5
--- /dev/null
+++ b/default_apps/netflix/main.py
@@ -0,0 +1,112 @@
+import argparse
+import os
+import shutil
+
+# from dataset import run as make_dataset
+from imdb import run as add_imdb_data
+from netflix import run as preprocess_netflix
+from page import run as make_page
+from tmdb import run as get_tmdb_data
+from utils import compute_file_hash, load_cache, save_cache
+
+
+def publish_page(output_path):
+ try:
+ from syftbox.lib import ClientConfig
+
+ config_path = os.environ.get("SYFTBOX_CLIENT_CONFIG_PATH", None)
+ client_config = ClientConfig.load(config_path)
+
+ file_name = "index.html"
+ destination = "public/apps/netflix/"
+ destination_path = client_config.datasite_path + "/" + destination
+ os.makedirs(destination_path, exist_ok=True)
+
+ shutil.copy2(output_path, destination_path + "/" + file_name)
+ print(
+ f"> Netflix app published to: {client_config.server_url}/datasites/{client_config.email}/apps/netflix/"
+ )
+ except Exception as e:
+ import traceback
+
+ print(traceback.format_exc())
+ print("Couldnt publish", e)
+ pass
+
+
+def main():
+ # Create the argument parser
+ parser = argparse.ArgumentParser(description="Enter your TMDB API key.")
+
+ # Add an argument for the TMDB API key
+ parser.add_argument("--tmdb-api-key", required=False, help="Your TMDB API key")
+ parser.add_argument(
+ "--missing-imdb-file", required=False, help="Your missing IMDB title file"
+ )
+ parser.add_argument(
+ "--force", action="store_true", default=False, help="Override hash check"
+ )
+
+ os.makedirs("./cache", exist_ok=True)
+ os.makedirs("./inputs", exist_ok=True)
+ os.makedirs("./temp", exist_ok=True)
+ os.makedirs("./output", exist_ok=True)
+
+ input_file = "./inputs/NetflixViewingHistory.csv"
+ if not os.path.exists(input_file):
+ print(f"Error: Netflix file {input_file} required.")
+ return
+
+ # Parse the arguments
+ args = parser.parse_args()
+
+ # If the API key is not provided via args, ask for it interactively
+ tmdb_api_key = args.tmdb_api_key
+ if tmdb_api_key is None or tmdb_api_key == "":
+ tmdb_api_key = os.environ.get("TMDB_API_KEY", None)
+ if not tmdb_api_key:
+ tmdb_api_key = input("Please enter your TMDB API key: ")
+
+ if tmdb_api_key is None or tmdb_api_key == "":
+ print("Error: TMDB_API_KEY required")
+ return
+
+ print(f"Your TMDB API key is: {tmdb_api_key}")
+
+ missing_file = None
+ if args.missing_imdb_file:
+ if not os.path.exists(args.missing_imdb_file):
+ print(f"Can't find missing imdb id file at: {args.missing_imdb_file}")
+ missing_file = args.missing_imdb_file
+
+ input_hash = compute_file_hash(input_file)
+ output_path = "output/index.html"
+ output_hash = None
+ if os.path.exists(output_path):
+ output_hash = compute_file_hash(output_path)
+ last_run = load_cache("last_run.json")
+ if (
+ "input_hash" in last_run
+ and "output_hash" in last_run
+ and last_run["input_hash"] == input_hash
+ and last_run["output_hash"] == output_hash
+ and not args.force
+ ):
+ print(f"Already generated html for {input_file} with hash: {input_hash}")
+ return
+
+ preprocess_netflix()
+ get_tmdb_data(tmdb_api_key, missing_file)
+ add_imdb_data()
+ # make_dataset()
+ make_page()
+
+ last_run = {"input_hash": input_hash}
+ if os.path.exists(output_path):
+ last_run["output_hash"] = compute_file_hash(output_path)
+ save_cache(last_run, "last_run.json")
+ publish_page(output_path)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/default_apps/netflix/netflix.py b/default_apps/netflix/netflix.py
new file mode 100644
index 00000000..c33cf25e
--- /dev/null
+++ b/default_apps/netflix/netflix.py
@@ -0,0 +1,25 @@
+import os
+
+import pandas as pd
+
+
+def run():
+ try:
+ temp_folder = "./temp/"
+ output_file = "1_netflix.csv"
+
+ netflix_df = pd.read_csv("./inputs/NetflixViewingHistory.csv")
+ netflix_df = netflix_df.rename(
+ columns={"Title": "netflix_title", "Date": "netflix_date"}
+ )
+
+ path = os.path.abspath(temp_folder + "/" + output_file)
+ netflix_df.to_csv(path, index=False)
+ print(f"> Writing {output_file} to {temp_folder}")
+
+ except Exception as e:
+ print("Failed to run netflix.py", e)
+
+
+if __name__ == "__main__":
+ run()
diff --git a/default_apps/netflix/page.py b/default_apps/netflix/page.py
new file mode 100644
index 00000000..58e6dbff
--- /dev/null
+++ b/default_apps/netflix/page.py
@@ -0,0 +1,141 @@
+import datetime
+import os
+
+import pandas as pd
+from utils import evaluate_list, image_to_base64
+
+
+def get_this_year(df, year):
+ return df[df["netflix_date"].dt.year == year]
+
+
+def get_imdb_id_rows(df, imdb_id):
+ show_df = df[df["imdb_id"] == imdb_id]
+ return show_df
+
+
+def get_top_n_tv_shows(df, n):
+ top_ids = df[df["tmdb_media_type"] == "tv"]["imdb_id"].value_counts().head(n).index
+ return df.loc[
+ df["imdb_id"].isin(top_ids) & (df["tmdb_media_type"] == "tv")
+ ].drop_duplicates(subset="imdb_id", keep="first")
+
+
+def format_minutes(total_minutes):
+ hours = int(total_minutes // 60)
+ minutes = int(total_minutes % 60)
+ result = []
+
+ if hours > 0:
+ result.append(f"{hours} h{'s' if hours > 1 else ''}")
+ if minutes > 0:
+ result.append(f"{minutes} m{'s' if minutes > 1 else ''}")
+
+ return ", ".join(result) if result else "0 minutes"
+
+
+def get_week_counts(df):
+ day_counts = df["day_of_week"].value_counts()
+ favorite_days = day_counts.to_dict()
+ return favorite_days
+
+
+def first_day(favourite_days):
+ keys = list(favourite_days.keys())
+ if len(keys) > 0:
+ return keys[0]
+ return "Unknown"
+
+
+def run():
+ try:
+ templates_folder = "./templates"
+ output_file = "index.html"
+
+ imdb_df = pd.read_csv("./temp/3_imdb.csv")
+ imdb_df["netflix_date"] = pd.to_datetime(imdb_df["netflix_date"])
+ imdb_df["day_of_week"] = imdb_df["netflix_date"].dt.day_name()
+ imdb_df["genre_names"] = imdb_df["genre_names"].apply(evaluate_list)
+ imdb_df["genre_ids"] = imdb_df["genre_ids"].apply(evaluate_list)
+
+ current_year = datetime.datetime.now().year
+ year_df = get_this_year(imdb_df, current_year)
+ year_tv_df = year_df[year_df["tmdb_media_type"] == "tv"]
+
+ # year stats
+ total_time = format_minutes(year_tv_df["imdb_runtime_minutes"].sum())
+ year_fav_day = first_day(get_week_counts(year_tv_df))
+ total_unique_show_views = year_tv_df["imdb_id"].nunique()
+ total_views = len(year_tv_df)
+
+ top_5_shows = get_top_n_tv_shows(year_df, 5)
+
+ css = ""
+ with open(templates_folder + "/" + "index.css") as f:
+ css = f.read()
+
+ page = ""
+ with open(templates_folder + "/" + "page.html") as f:
+ page = f.read()
+
+ show_list_card_template = ""
+ with open(templates_folder + "/" + "card.html") as f:
+ show_list_card_template = f.read()
+
+ show_list_html = ""
+ order = 0
+ for _, row in top_5_shows.iterrows():
+ show_rows = get_imdb_id_rows(year_tv_df, row.imdb_id)
+ genres = ", ".join(sorted(row.genre_names))
+ order += 1
+ # fav_days = get_week_counts_for_imdbid(df, row.imdbID)
+ fav_day = first_day(get_week_counts(show_rows))
+ count = len(show_rows)
+ average_rating = row.imdb_rating
+ tmdb_title = row.tmdb_title
+ imdb_id = row.imdb_id
+ tmdb_poster_url = row.tmdb_poster_url
+ template_vars = {
+ "year": current_year,
+ "imdb_id": imdb_id,
+ "order": order,
+ "tmdb_poster_url": tmdb_poster_url,
+ "tmdb_title": tmdb_title,
+ "average_rating": average_rating,
+ "genres": genres,
+ "count": count,
+ "fav_day": fav_day,
+ }
+ show_list_html += show_list_card_template.format(**template_vars)
+
+ logo_path = "templates/images/nf_logo.png"
+ logo_src = image_to_base64(logo_path)
+
+ page_vars = {
+ "logo_src": logo_src,
+ "css": css,
+ "year": current_year,
+ "total_time": total_time,
+ "year_fav_day": year_fav_day,
+ "total_unique_show_views": total_unique_show_views,
+ "total_views": total_views,
+ "show_list_html": show_list_html,
+ }
+ page_html = page.format(**page_vars)
+
+ print(f"Writing {output_file} to output")
+ path = "output" + "/" + output_file
+ with open(path, "w") as f:
+ f.write(page_html)
+ full_path = os.path.abspath(path)
+ print(f"\nOpen: file:///{full_path}")
+
+ except Exception as e:
+ import traceback
+
+ print(traceback.print_exc())
+ print("Failed to run page.py", e)
+
+
+if __name__ == "__main__":
+ run()
diff --git a/default_apps/netflix/requirements.txt b/default_apps/netflix/requirements.txt
new file mode 100644
index 00000000..69de461a
--- /dev/null
+++ b/default_apps/netflix/requirements.txt
@@ -0,0 +1,2 @@
+pandas
+requests
diff --git a/default_apps/netflix/run.sh b/default_apps/netflix/run.sh
new file mode 100755
index 00000000..dc7a6ff9
--- /dev/null
+++ b/default_apps/netflix/run.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+uv venv .venv
+uv pip install -r requirements.txt
+TMDB_API_KEY=$(cat inputs/TMDB_API_KEY.txt)
+
+uv run python -c "import syftbox; print(syftbox.__version__)"
+uv run main.py --tmdb-api-key=$TMDB_API_KEY --missing-imdb-file=inputs/missing_imdb_id.json "$@"
diff --git a/default_apps/netflix/stats.py b/default_apps/netflix/stats.py
new file mode 100644
index 00000000..0b685079
--- /dev/null
+++ b/default_apps/netflix/stats.py
@@ -0,0 +1,237 @@
+import datetime
+import json
+import os
+
+import pandas as pd
+import requests
+from utils import evaluate_list, image_to_base64, load_cache, save_cache
+
+TMDB_BASE_URL = "https://api.themoviedb.org/3"
+IMAGE_BASE_URL = "https://image.tmdb.org/t/p/w500" # w500 refers to image size
+
+tmdb_id_cache = load_cache("tmdb_id.json")
+
+
+def get_this_year(df, year):
+ return df[df["netflix_date"].dt.year == year]
+
+
+def get_imdb_id_rows(df, imdb_id):
+ show_df = df[df["imdb_id"] == imdb_id]
+ return show_df
+
+
+def get_top_n_tv_shows(df, n):
+ top_ids = df[df["tmdb_media_type"] == "tv"]["imdb_id"].value_counts().head(n).index
+ return df.loc[
+ df["imdb_id"].isin(top_ids) & (df["tmdb_media_type"] == "tv")
+ ].drop_duplicates(subset="imdb_id", keep="first")
+
+
+def format_minutes(total_minutes):
+ hours = int(total_minutes // 60)
+ minutes = int(total_minutes % 60)
+ result = []
+
+ if hours > 0:
+ result.append(f"{hours} h{'s' if hours > 1 else ''}")
+ if minutes > 0:
+ result.append(f"{minutes} m{'s' if minutes > 1 else ''}")
+
+ return ", ".join(result) if result else "0 minutes"
+
+
+def get_week_counts(df):
+ day_counts = df["day_of_week"].value_counts()
+ favorite_days = day_counts.to_dict()
+ return favorite_days
+
+
+def first_day(favourite_days):
+ keys = list(favourite_days.keys())
+ if len(keys) > 0:
+ return keys[0]
+ return "Unknown"
+
+
+def flatten_tmdb_dict(data):
+ flattened_dict = {}
+ flattened_dict["homepage"] = data.get("homepage", None)
+ external_ids = data.get("external_ids", {})
+ flattened_dict["imdb_id"] = external_ids.get("imdb_id", None)
+ flattened_dict["facebook_id"] = external_ids.get("facebook_id", None)
+ flattened_dict["instagram_id"] = external_ids.get("instagram_id", None)
+ flattened_dict["twitter_id"] = external_ids.get("twitter_id", None)
+ genres = data.get("genres", {})
+ genre_ids = []
+ genre_names = []
+ for genre in genres:
+ genre_ids.append(genre["id"])
+ genre_names.append(genre["name"])
+ flattened_dict["genre_ids"] = genre_ids
+ flattened_dict["genre_names"] = genre_names
+
+ flattened_dict["tmdb_title"] = data["name"]
+ poster_path = data["poster_path"]
+ tmdb_poster_url = f"{IMAGE_BASE_URL}{poster_path}"
+ flattened_dict["tmdb_poster_url"] = tmdb_poster_url
+ return flattened_dict
+
+
+def get_tmdb_details_for_tv(tmdb_id, api_key):
+ media_type = "tv"
+ url = f"{TMDB_BASE_URL}/{media_type}/{tmdb_id}"
+ params = {"api_key": api_key, "append_to_response": "external_ids"}
+
+ cache_key = f"{tmdb_id}_{media_type}"
+ if cache_key in tmdb_id_cache:
+ result = tmdb_id_cache[cache_key]
+ out_dict = flatten_tmdb_dict(result)
+ out_dict["tmdb_id"] = tmdb_id
+ return pd.Series(out_dict)
+
+ print(f"> Querying tmdb for {cache_key}")
+ response = requests.get(url, params=params)
+
+ if response.status_code == 200:
+ result = response.json()
+ if result:
+ tmdb_id_cache[cache_key] = result
+ save_cache(tmdb_id_cache, "tmdb_id.json")
+ out_dict = flatten_tmdb_dict(result)
+ out_dict["tmdb_id"] = tmdb_id
+ return pd.Series(out_dict)
+
+ return None
+
+
+def run(api_key):
+ try:
+ templates_folder = "./templates"
+ output_file = "stats.html"
+
+ stats_data = {}
+ with open("./inputs/stats_data.json") as f:
+ stats_data = json.loads(f.read())
+
+ total_time = format_minutes(stats_data["total_time"])
+ total_views = stats_data["total_views"]
+ total_unique_show_views = stats_data["total_unique_show_views"]
+ year_fav_day = stats_data["year_fav_day"]
+
+ current_year = datetime.datetime.now().year
+ top_5 = stats_data["top_5"]
+
+ series = []
+ for tmdb_id, count in top_5.items():
+ series.append(get_tmdb_details_for_tv(tmdb_id, api_key))
+
+ imdb_df = pd.DataFrame(series)
+
+ # add imdb
+ temp_folder = "./temp/"
+
+ titles = pd.read_csv(
+ temp_folder + "/title.basics.tsv.gz",
+ sep="\t",
+ compression="gzip",
+ )
+
+ title_ratings = pd.read_csv(
+ temp_folder + "/title.ratings.tsv.gz",
+ sep="\t",
+ compression="gzip",
+ )
+
+ titles_merged = titles.merge(title_ratings, on="tconst", how="right")
+ titles_cleaned = titles_merged.dropna()
+ titles_cleaned = titles_cleaned[titles_cleaned["isAdult"] == 0]
+
+ titles_cleaned["startYear"] = titles_cleaned["startYear"].replace("\\N", None)
+ titles_cleaned["runtimeMinutes"] = titles_cleaned["runtimeMinutes"].replace(
+ "\\N", None
+ )
+
+ df_merged = imdb_df.merge(
+ titles_cleaned[["tconst", "runtimeMinutes", "averageRating"]],
+ how="left",
+ left_on="imdb_id",
+ right_on="tconst",
+ )
+
+ df_merged = df_merged.rename(
+ columns={
+ "runtimeMinutes": "imdb_runtime_minutes",
+ "averageRating": "imdb_rating",
+ }
+ )
+
+ df_merged = df_merged.drop(columns=["tconst"])
+
+ css = ""
+ with open(templates_folder + "/" + "index.css") as f:
+ css = f.read()
+
+ page = ""
+ with open(templates_folder + "/" + "page.html") as f:
+ page = f.read()
+
+ show_list_card_template = ""
+ with open(templates_folder + "/" + "card.html") as f:
+ show_list_card_template = f.read()
+
+ show_list_html = ""
+ order = 0
+
+ for _, row in df_merged.iterrows():
+ count = top_5[row.tmdb_id]
+ genres = ", ".join(sorted(evaluate_list(row.genre_names)))
+ order += 1
+ average_rating = row.imdb_rating
+ tmdb_title = row.tmdb_title
+ imdb_id = row.imdb_id
+ tmdb_poster_url = row.tmdb_poster_url
+ template_vars = {
+ "year": current_year,
+ "imdb_id": imdb_id,
+ "order": order,
+ "tmdb_poster_url": tmdb_poster_url,
+ "tmdb_title": tmdb_title,
+ "average_rating": average_rating,
+ "genres": genres,
+ "count": count,
+ "fav_day": "",
+ }
+ show_list_html += show_list_card_template.format(**template_vars)
+
+ logo_path = "templates/images/nf_logo.png"
+ logo_src = image_to_base64(logo_path)
+
+ page_vars = {
+ "logo_src": logo_src,
+ "css": css,
+ "year": current_year,
+ "total_time": total_time,
+ "year_fav_day": year_fav_day,
+ "total_unique_show_views": total_unique_show_views,
+ "total_views": total_views,
+ "show_list_html": show_list_html,
+ }
+ page_html = page.format(**page_vars)
+
+ print(f"Writing {output_file} to output")
+ path = "output" + "/" + output_file
+ with open(path, "w") as f:
+ f.write(page_html)
+ full_path = os.path.abspath(path)
+ print(f"\nOpen: file:///{full_path}")
+
+ except Exception as e:
+ import traceback
+
+ print(traceback.print_exc())
+ print("Failed to run html.py", e)
+
+
+api_key = "010de1bcf60f0e14b92765a3f9485662"
+run(api_key)
diff --git a/default_apps/netflix/templates/card.html b/default_apps/netflix/templates/card.html
new file mode 100644
index 00000000..e9ab58e7
--- /dev/null
+++ b/default_apps/netflix/templates/card.html
@@ -0,0 +1,29 @@
+
+
+
+
{order}
+
+
+
+ {tmdb_title}
+ IMDb {average_rating}
+
+
+
+ Genres: {genres}
+
+
+ Views: {count}
+
+
+ Fav Day: {fav_day}
+
+
+
+
+
+
diff --git a/default_apps/netflix/templates/images/nf_logo.png b/default_apps/netflix/templates/images/nf_logo.png
new file mode 100644
index 00000000..4b512829
Binary files /dev/null and b/default_apps/netflix/templates/images/nf_logo.png differ
diff --git a/default_apps/netflix/templates/index.css b/default_apps/netflix/templates/index.css
new file mode 100644
index 00000000..621d99d1
--- /dev/null
+++ b/default_apps/netflix/templates/index.css
@@ -0,0 +1,132 @@
+body {
+ font-family: Arial, sans-serif;
+ background-color: #141414;
+ color: white;
+ margin: 0;
+ padding: 20px;
+ box-sizing: border-box;
+}
+.wrapper {
+ background-color: #1f1f1f;
+ padding: 20px;
+ border-radius: 10px;
+ width: 100%;
+ max-width: 450px;
+ margin: 0 auto;
+ box-shadow: 0 0 10px rgba(255, 255, 255, 0.1);
+}
+
+.viewing-stats {
+ display: flex;
+ justify-content: space-between;
+ margin-bottom: 20px;
+ padding-bottom: 15px;
+ border-bottom: 1px solid #333;
+}
+.stat-item {
+ text-align: center;
+}
+.stat-value {
+ font-size: 18px;
+ font-weight: bold;
+ color: #e50914;
+ display: block;
+ margin-bottom: 5px;
+}
+.stat-label {
+ font-size: 12px;
+ color: #999;
+}
+.show-list {
+ list-style-type: none;
+ padding: 0;
+ margin: 0;
+}
+.show-item {
+ display: block;
+ text-decoration: none;
+ color: white;
+ margin-bottom: 15px;
+ transition:
+ background-color 0.3s,
+ transform 0.2s;
+ padding: 10px;
+ border-radius: 8px;
+}
+.show-item:hover {
+ background-color: #2f2f2f;
+ transform: translateX(5px);
+}
+.show-content {
+ display: flex;
+ align-items: flex-start;
+}
+.show-number {
+ font-size: 24px;
+ margin-right: 15px;
+ min-width: 25px;
+ color: #e50914;
+ font-weight: bold;
+}
+.show-thumbnail {
+ height: 80px;
+ width: auto;
+ object-fit: cover;
+ object-position: top center;
+ border-radius: 8px;
+ margin-right: 15px;
+}
+.show-info {
+ flex-grow: 1;
+ min-width: 0;
+}
+.show-title {
+ display: flex;
+ align-items: center;
+ justify-content: space-between;
+ width: 100%;
+}
+.show-name {
+ font-size: 16px;
+ font-weight: bold;
+ white-space: nowrap;
+ overflow: hidden;
+ text-overflow: ellipsis;
+ max-width: calc(
+ 100% - 80px
+ ); /* Adjust based on the width of your IMDb rating */
+}
+.show-details {
+ font-size: 12px;
+ color: #999;
+}
+.show-detail {
+ margin-bottom: 2px;
+}
+.detail-label {
+ font-weight: bold;
+ color: #bbb;
+}
+.header {
+ display: flex;
+ align-items: center;
+ margin-bottom: 20px;
+}
+.netflix-logo {
+ height: 40px;
+ margin-right: 15px;
+}
+h1 {
+ margin: 0;
+ color: #e50914;
+ font-size: 24px;
+}
+.imdb-rating {
+ background-color: #f5c518;
+ color: #000;
+ padding: 2px 5px;
+ border-radius: 4px;
+ font-weight: bold;
+ font-size: 12px;
+ white-space: nowrap;
+}
diff --git a/default_apps/netflix/templates/page.html b/default_apps/netflix/templates/page.html
new file mode 100644
index 00000000..084ab10d
--- /dev/null
+++ b/default_apps/netflix/templates/page.html
@@ -0,0 +1,40 @@
+
+
+
+
+
+ My {year} Top Netflix Series
+
+
+
+
+
+
+
+ {total_time}
+ Total Time
+
+
+ {year_fav_day}
+ Top Day
+
+
+ {total_unique_show_views}
+ Shows
+
+
+ {total_views}
+ Eps
+
+
+
+ {show_list_html}
+
+
+
+
diff --git a/default_apps/netflix/tmdb.py b/default_apps/netflix/tmdb.py
new file mode 100644
index 00000000..db7fcae6
--- /dev/null
+++ b/default_apps/netflix/tmdb.py
@@ -0,0 +1,294 @@
+import datetime
+import json
+import math
+import os
+
+import pandas as pd
+import requests
+from utils import load_cache, normalize_title, save_cache
+
+TMDB_BASE_URL = "https://api.themoviedb.org/3"
+IMAGE_BASE_URL = "https://image.tmdb.org/t/p/w500" # w500 refers to image size
+
+tmdb_id_cache = load_cache("tmdb_id.json")
+tmdb_search_cache = load_cache("tmdb_search.json")
+imdb_tmdb_cache = load_cache("imdb_tmdb.json")
+
+
+def add_to_missing(one_or_many, tmdb_id, missing_imdb_id):
+ if not isinstance(one_or_many, list):
+ one_or_many = [one_or_many]
+ for one in one_or_many:
+ missing_imdb_id[one] = tmdb_id
+ save_cache(missing_imdb_id, "missing_imdb_id.json")
+
+
+def in_manual_mapping(original_title, missing_imdb_id):
+ titles = []
+ titles.append(original_title.lower())
+ titles.append(normalize_title(original_title).lower())
+ lower_keys = {k.lower(): v for k, v in missing_imdb_id.items()}
+ for title in titles:
+ for key, value in lower_keys.items():
+ if title in key or key in title:
+ return value
+ return None
+
+
+def search_tmdb_title(title, api_key, missing_imdb_id):
+ url = f"{TMDB_BASE_URL}/search/multi"
+ params = {"api_key": api_key, "query": title}
+ if title in tmdb_search_cache:
+ result = tmdb_search_cache[title]
+ return pd.Series(result)
+
+ data = None
+
+ # check manual mapping where a user can set the imdb tconst id by hand
+ manual_tmdb_id = in_manual_mapping(title, missing_imdb_id)
+ if manual_tmdb_id:
+ print(
+ f"> Resolving {title} imdb_id: {manual_tmdb_id} from supplied missing file"
+ )
+ data = get_tmdb_details_by_imdb_id(manual_tmdb_id, api_key)
+ tmdb_search_cache[title] = data
+ save_cache(tmdb_search_cache, "tmdb_search.json")
+ return pd.Series(data)
+
+ if data is None:
+ print(f"> Searching tmdb for {title}")
+ response = requests.get(url, params=params)
+ if response.status_code == 200:
+ data = response.json()
+ if "results" in data:
+ for result in data["results"]:
+ if result["media_type"] in ["tv", "movie"]:
+ tmdb_search_cache[title] = result
+ save_cache(tmdb_search_cache, "tmdb_search.json")
+ return pd.Series(result)
+
+ return None
+
+
+def flatten_tmdb_dict(data):
+ flattened_dict = {}
+ flattened_dict["homepage"] = data.get("homepage", None)
+ external_ids = data.get("external_ids", {})
+ flattened_dict["imdb_id"] = external_ids.get("imdb_id", None)
+ flattened_dict["facebook_id"] = external_ids.get("facebook_id", None)
+ flattened_dict["instagram_id"] = external_ids.get("instagram_id", None)
+ flattened_dict["twitter_id"] = external_ids.get("twitter_id", None)
+ genres = data.get("genres", {})
+ genre_ids = []
+ genre_names = []
+ for genre in genres:
+ genre_ids.append(genre["id"])
+ genre_names.append(genre["name"])
+ flattened_dict["genre_ids"] = genre_ids
+ flattened_dict["genre_names"] = genre_names
+ return flattened_dict
+
+
+def get_tmdb_id_field(row) -> int | None:
+ try:
+ if "tmdb_id" in row:
+ return int(row["tmdb_id"])
+ except Exception:
+ pass
+ return None
+
+
+def get_tmdb_media_type_field(row) -> int | None:
+ try:
+ if "tmdb_media_type" in row:
+ math.isnan(row["tmdb_media_type"])
+ except Exception:
+ if isinstance(row["tmdb_media_type"], str):
+ return row["tmdb_media_type"]
+ pass
+ return None
+
+
+def get_tmdb_details(row, api_key):
+ tmdb_id = get_tmdb_id_field(row)
+ media_type = get_tmdb_media_type_field(row)
+
+ if not isinstance(tmdb_id, int) or not isinstance(media_type, str):
+ print(f"> Skipping {row.netflix_title} no tmdb_id")
+ return None
+ url = f"{TMDB_BASE_URL}/{media_type}/{tmdb_id}"
+ params = {"api_key": api_key, "append_to_response": "external_ids"}
+
+ cache_key = f"{tmdb_id}_{media_type}"
+ if cache_key in tmdb_id_cache:
+ result = tmdb_id_cache[cache_key]
+ return pd.Series(flatten_tmdb_dict(result))
+
+ print(f"> Querying tmdb for {cache_key}")
+ response = requests.get(url, params=params)
+
+ if response.status_code == 200:
+ result = response.json()
+ if result:
+ tmdb_id_cache[cache_key] = result
+ save_cache(tmdb_id_cache, "tmdb_id.json")
+ return pd.Series(flatten_tmdb_dict(result))
+
+ return None
+
+
+def get_tmdb_details_by_imdb_id(imdb_id, api_key):
+ if imdb_id in imdb_tmdb_cache:
+ print(f"Getting imdb_id: {imdb_id} from cache")
+ return imdb_tmdb_cache[imdb_id]
+
+ url = f"https://api.themoviedb.org/3/find/{imdb_id}"
+ params = {"api_key": api_key, "external_source": "imdb_id"}
+
+ print(f"> Querying tmdb for imdb_id: {imdb_id}")
+ response = requests.get(url, params=params)
+
+ if response.status_code == 200:
+ data = response.json()
+ sections = [
+ "movie_results",
+ "person_results",
+ "tv_results",
+ "tv_episode_results",
+ "tv_season_results",
+ ]
+
+ data_dict = None
+ for section in sections:
+ if data.get(section):
+ # Get the first result in the section
+ data_dict = data[section][0]
+ imdb_tmdb_cache[imdb_id] = data_dict
+ save_cache(imdb_tmdb_cache, "imdb_tmdb.json")
+ return data_dict
+
+
+def get_tmdb_id(row, tmdb_api_key, missing_imdb_id):
+ original_title = row["netflix_title"]
+ title = normalize_title(original_title)
+
+ # Check for season/episode/series/volume in the title
+ if any(
+ keyword in title.lower()
+ for keyword in ["season", "episode", "series", "volume", " part"]
+ ):
+ # Split by colon and take the first part
+ title = title.split(":")[0].strip()
+
+ result = search_tmdb_title(title, tmdb_api_key, missing_imdb_id)
+ if result is None:
+ title = title.split(":")[0].strip()
+ # if splitting it changes it lets try again anyway
+ if title != normalize_title(original_title):
+ result = search_tmdb_title(title, tmdb_api_key, missing_imdb_id)
+ if result is not None:
+ # make sure repeated search gets cached at first title as well
+ tmdb_search_cache[normalize_title(original_title)] = result.to_dict()
+ save_cache(tmdb_search_cache, "tmdb_search.json")
+
+ if result is not None:
+ # shows have names and movies have titles
+ df = pd.DataFrame([result])
+ if "name" in df.columns:
+ title_name = "name"
+ elif "title" in df.columns:
+ title_name = "title"
+ else:
+ raise Exception(f"Title is missing {row}")
+
+ poster_path = result.get("poster_path")
+ tmdb_poster_url = f"{IMAGE_BASE_URL}{poster_path}"
+ df["tmdb_poster_url"] = tmdb_poster_url
+
+ df = df.rename(
+ columns={
+ title_name: "tmdb_title",
+ "id": "tmdb_id",
+ "media_type": "tmdb_media_type",
+ }
+ )
+
+ keep_cols = ["tmdb_id", "tmdb_title", "tmdb_media_type", "tmdb_poster_url"]
+ df = df[keep_cols]
+ return pd.Series(df.iloc[0])
+
+ return None
+
+
+def get_this_year(df, year):
+ return df[df["netflix_date"].dt.year == year]
+
+
+def run(api_key, missing_file):
+ try:
+ missing_imdb_id = {}
+ temp_folder = "./temp/"
+ output_file = "2_tmdb.csv"
+
+ if missing_file is not None:
+ missing_file_path = os.path.abspath(missing_file)
+ if os.path.exists(missing_file_path):
+ try:
+ with open(missing_file_path, "r") as f:
+ missing_imdb_id = json.load(f)
+ except Exception as e:
+ print(f"Failed to load file: {missing_file_path}. {e}")
+
+ tmdb_df = pd.read_csv("./temp/1_netflix.csv")
+
+ tmdb_df["netflix_date"] = pd.to_datetime(
+ tmdb_df["netflix_date"], format=r"%m/%d/%y"
+ )
+
+ current_year = datetime.datetime.now().year
+ tmdb_df = get_this_year(tmdb_df, current_year)
+
+ sample_tmdb_id = tmdb_df.apply(
+ lambda row: pd.concat([row, get_tmdb_id(row, api_key, missing_imdb_id)]),
+ axis=1,
+ )
+
+ df = sample_tmdb_id.apply(
+ lambda row: pd.concat(
+ [
+ row,
+ get_tmdb_details(row, api_key),
+ ]
+ ),
+ axis=1,
+ )
+
+ # split and save missing imdb_id records
+ column_name = "imdb_id"
+ df_missing = df[df[column_name].isna()]
+ if len(df_missing) > 0:
+ missing_path = temp_folder + "/" + "2_missing.csv"
+ print(f"> You have {len(df_missing)} missing rows see: {missing_path}")
+ helper = r"""
+To fix your missing imdb IDs you can create a manual json file.
+
+Run:
+echo '{"Life: Primates": "tt1533395"}' > my-missing-ids.json
+python main.py --missing-imdb-file=my-missing-ids.json
+
+Note: The titles can be partial string matches.
+"""
+ print(helper)
+ df_missing.to_csv(missing_path, index=False)
+
+ df_imdb_id = df[df[column_name].notna()]
+
+ path = os.path.abspath(temp_folder + "/" + output_file)
+ df_imdb_id.to_csv(path, index=False)
+ print(f"> Writing {output_file} to {temp_folder}")
+
+ except Exception as e:
+ import traceback
+
+ print(traceback.print_exc())
+ print("Failed to run tmdb.py", e)
diff --git a/default_apps/netflix/utils.py b/default_apps/netflix/utils.py
new file mode 100644
index 00000000..166f3d27
--- /dev/null
+++ b/default_apps/netflix/utils.py
@@ -0,0 +1,97 @@
+import ast
+import base64
+import hashlib
+import json
+import os
+import re
+import unicodedata
+
+import requests
+
+cache_folder = "./cache/"
+
+
+def save_cache(results_cache, file_name):
+ path = cache_folder + "/" + file_name
+ with open(path, "w") as f:
+ json.dump(results_cache, f)
+
+
+def load_cache(file_name):
+ path = cache_folder + "/" + file_name
+ if os.path.exists(path):
+ with open(path, "r") as f:
+ return json.load(f)
+ else:
+ return {}
+
+
+# Function to normalize the title for search, keeping colons
+def normalize_title(title):
+ # Step 1: Normalize Unicode characters (decompose accents)
+ title = (
+ unicodedata.normalize("NFKD", title).encode("ASCII", "ignore").decode("utf-8")
+ )
+
+ # Step 2: Convert to lowercase
+ title = title.lower()
+
+ # Step 3: Remove unnecessary punctuation except for colons (keep ':')
+ title = re.sub(
+ r"[^\w\s:]", "", title
+ ) # Keeps only letters, numbers, whitespace, and colons
+
+ # Step 4: Strip leading/trailing whitespace
+ return title.strip()
+
+
+def download_file(url, folder_path, file_name=None):
+ # Ensure the folder exists
+ os.makedirs(folder_path, exist_ok=True)
+
+ # Get the file name from the URL if not provided
+ if file_name is None:
+ file_name = url.split("/")[-1]
+
+ # Define the full path to save the file
+ file_path = os.path.join(folder_path, file_name)
+
+ # Download the file
+ response = requests.get(url, stream=True)
+ response.raise_for_status() # Check for errors
+
+ # Write the file to the specified folder
+ with open(file_path, "wb") as file:
+ for chunk in response.iter_content(chunk_size=8192):
+ if chunk:
+ file.write(chunk)
+
+ print(f"File downloaded successfully and saved to: {file_path}")
+
+
+def evaluate_list(value):
+ try:
+ # Use ast.literal_eval to safely evaluate strings into Python literals (like lists, dicts)
+ return ast.literal_eval(value)
+ except (ValueError, SyntaxError):
+ # Return the original value if it's not a valid Python literal
+ return value
+
+
+def image_to_base64(image_path):
+ with open(image_path, "rb") as image_file:
+ encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
+ return f"data:image/png;base64,{encoded_string}"
+
+
+def compute_file_hash(filepath, hash_algorithm="sha256"):
+ # Choose the hash algorithm
+ hash_func = getattr(hashlib, hash_algorithm)()
+
+ # Read file in binary mode and update hash in chunks
+ with open(filepath, "rb") as file:
+ while chunk := file.read(8192):
+ hash_func.update(chunk)
+
+ # Return the hex representation of the hash
+ return hash_func.hexdigest()
diff --git a/docker/syftbox.dockerfile b/docker/syftbox.dockerfile
new file mode 100644
index 00000000..3d20c30d
--- /dev/null
+++ b/docker/syftbox.dockerfile
@@ -0,0 +1,13 @@
+# Start with the Alpine base image with Python 3
+FROM python:3.12-alpine
+
+# Set the working directory inside the container
+WORKDIR /app
+COPY . /app
+
+RUN pip install uv
+RUN uv venv .venv
+RUN uv pip install -e .
+
+# CMD ["ash", "/app/scripts/server.sh"]
+
diff --git a/notebooks/01-trade-create.ipynb b/notebooks/01-trade-create.ipynb
index eb9122d8..3cd728c9 100644
--- a/notebooks/01-trade-create.ipynb
+++ b/notebooks/01-trade-create.ipynb
@@ -20,18 +20,18 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": null,
"id": "2",
"metadata": {},
"outputs": [],
"source": [
- "from syftbox.lib import SyftVault, TabularDataset, autocache, config_for_user, ClientConfig"
+ "from syftbox.lib import SyftVault, TabularDataset, autocache, config_for_user"
]
},
{
"cell_type": "code",
- "execution_count": 15,
- "id": "8ed4993c-e154-40a0-8ada-9a9eb320dc79",
+ "execution_count": null,
+ "id": "3",
"metadata": {},
"outputs": [],
"source": [
@@ -40,21 +40,10 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"id": "4",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "ClientConfig(config_path='/Users/atrask/Documents/GitHub/syft/client_config.json', sync_folder='/Users/atrask/Desktop/SyftBox', port=8080, email='andrew@openmined.org', token=6461387628315936375, server_url='http://20.168.10.234:8080', email_token=None)"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"client_config = config_for_user(\"andrew@openmined.org\")\n",
"client_config"
@@ -62,28 +51,17 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": null,
"id": "5",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "ClientConfig(config_path='/Users/atrask/Documents/GitHub/syft/client_config.json', sync_folder='/Users/atrask/Desktop/SyftBox', port=8080, email='andrew@openmined.org', token=6461387628315936375, server_url='http://20.168.10.234:8080', email_token=None)"
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"client_config"
]
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": null,
"id": "6",
"metadata": {},
"outputs": [],
@@ -93,28 +71,17 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": null,
"id": "7",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "DatasiteManifest(datasite='andrew@openmined.org', file_path='/Users/atrask/Desktop/SyftBox/andrew@openmined.org/public/manifest/manifest.json', datasets={'Netflix_TMDB_IMDB': {'name': 'Netflix_TMDB_IMDB', 'syft_link': {'host_or_ip': 'andrew@openmined.org', 'path': '/public/datasets/netflix_tmdb_imdb/NetflixViewingHistory_TMDB_IMDB.csv', 'port': 80, 'protocol': 'syft', 'query': ''}, 'schema': {'Unnamed: 0': 'int64', 'netflix_title': 'object', 'netflix_date': 'object', 'tmdb_id': 'int64', 'tmdb_title': 'object', 'tmdb_media_type': 'object', 'tmdb_poster_url': 'object', 'homepage': 'object', 'imdb_id': 'object', 'facebook_id': 'object', 'instagram_id': 'object', 'twitter_id': 'object', 'genre_ids': 'object', 'genre_names': 'object', 'imdb_runtime_minutes': 'int64', 'imdb_rating': 'float64'}, 'readme_link': {'host_or_ip': 'andrew@openmined.org', 'path': '/public/datasets/netflix_tmdb_imdb/README.md', 'port': 80, 'protocol': 'syft', 'query': ''}, 'loader_link': {'host_or_ip': 'andrew@openmined.org', 'path': '/public/datasets/netflix_tmdb_imdb/loader.py', 'port': 80, 'protocol': 'syft', 'query': ''}, 'has_private': True}}, code={})"
- ]
- },
- "execution_count": 19,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"manifest"
]
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": null,
"id": "8",
"metadata": {},
"outputs": [],
@@ -124,7 +91,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": null,
"id": "9",
"metadata": {},
"outputs": [],
@@ -134,388 +101,20 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": null,
"id": "10",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/var/folders/d4/s582723j2hqbtw60rnn5345r0000gn/T/ipykernel_94848/754433127.py:1: DtypeWarning: Columns (14) have mixed types. Specify dtype option on import or set low_memory=False.\n",
- " df = pd.read_csv(autocache(canada_dataset_url))\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"df = pd.read_csv(autocache(canada_dataset_url))"
]
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": null,
"id": "11",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Classification | \n",
- " Year | \n",
- " Period | \n",
- " Period Desc. | \n",
- " Aggregate Level | \n",
- " Is Leaf Code | \n",
- " Trade Flow Code | \n",
- " Trade Flow | \n",
- " Reporter Code | \n",
- " Reporter | \n",
- " ... | \n",
- " Partner | \n",
- " Partner ISO | \n",
- " Commodity Code | \n",
- " Commodity | \n",
- " Qty Unit Code | \n",
- " Qty Unit | \n",
- " Qty | \n",
- " Netweight (kg) | \n",
- " Trade Value (US$) | \n",
- " Flag | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 4 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Other Asia, nes | \n",
- " NaN | \n",
- " 6117 | \n",
- " Clothing accessories; made up, knitted or croc... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 9285 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Egypt | \n",
- " NaN | \n",
- " 18 | \n",
- " Cocoa and cocoa preparations | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 116604 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " United Kingdom | \n",
- " NaN | \n",
- " 18 | \n",
- " Cocoa and cocoa preparations | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 1495175 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " United Rep. of Tanzania | \n",
- " NaN | \n",
- " 18 | \n",
- " Cocoa and cocoa preparations | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 2248 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Singapore | \n",
- " NaN | \n",
- " 18 | \n",
- " Cocoa and cocoa preparations | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 47840 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Viet Nam | \n",
- " NaN | \n",
- " 18 | \n",
- " Cocoa and cocoa preparations | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 3526 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " South Africa | \n",
- " NaN | \n",
- " 18 | \n",
- " Cocoa and cocoa preparations | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 5462 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Spain | \n",
- " NaN | \n",
- " 18 | \n",
- " Cocoa and cocoa preparations | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 311425 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Sweden | \n",
- " NaN | \n",
- " 18 | \n",
- " Cocoa and cocoa preparations | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 11786 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Venezuela | \n",
- " NaN | \n",
- " 18 | \n",
- " Cocoa and cocoa preparations | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 33715 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
10 rows × 22 columns
\n",
- "
"
- ],
- "text/plain": [
- " Classification Year Period Period Desc. Aggregate Level Is Leaf Code \\\n",
- "0 HS 2021 202102 February 2021 4 0 \n",
- "1 HS 2021 202102 February 2021 2 0 \n",
- "2 HS 2021 202102 February 2021 2 0 \n",
- "3 HS 2021 202102 February 2021 2 0 \n",
- "4 HS 2021 202102 February 2021 2 0 \n",
- "5 HS 2021 202102 February 2021 2 0 \n",
- "6 HS 2021 202102 February 2021 2 0 \n",
- "7 HS 2021 202102 February 2021 2 0 \n",
- "8 HS 2021 202102 February 2021 2 0 \n",
- "9 HS 2021 202102 February 2021 2 0 \n",
- "\n",
- " Trade Flow Code Trade Flow Reporter Code Reporter ... \\\n",
- "0 1 Imports 124 Canada ... \n",
- "1 1 Imports 124 Canada ... \n",
- "2 1 Imports 124 Canada ... \n",
- "3 1 Imports 124 Canada ... \n",
- "4 1 Imports 124 Canada ... \n",
- "5 1 Imports 124 Canada ... \n",
- "6 1 Imports 124 Canada ... \n",
- "7 1 Imports 124 Canada ... \n",
- "8 1 Imports 124 Canada ... \n",
- "9 1 Imports 124 Canada ... \n",
- "\n",
- " Partner Partner ISO Commodity Code \\\n",
- "0 Other Asia, nes NaN 6117 \n",
- "1 Egypt NaN 18 \n",
- "2 United Kingdom NaN 18 \n",
- "3 United Rep. of Tanzania NaN 18 \n",
- "4 Singapore NaN 18 \n",
- "5 Viet Nam NaN 18 \n",
- "6 South Africa NaN 18 \n",
- "7 Spain NaN 18 \n",
- "8 Sweden NaN 18 \n",
- "9 Venezuela NaN 18 \n",
- "\n",
- " Commodity Qty Unit Code Qty Unit \\\n",
- "0 Clothing accessories; made up, knitted or croc... 0 NaN \n",
- "1 Cocoa and cocoa preparations 0 NaN \n",
- "2 Cocoa and cocoa preparations 0 NaN \n",
- "3 Cocoa and cocoa preparations 0 NaN \n",
- "4 Cocoa and cocoa preparations 0 NaN \n",
- "5 Cocoa and cocoa preparations 0 NaN \n",
- "6 Cocoa and cocoa preparations 0 NaN \n",
- "7 Cocoa and cocoa preparations 0 NaN \n",
- "8 Cocoa and cocoa preparations 0 NaN \n",
- "9 Cocoa and cocoa preparations 0 NaN \n",
- "\n",
- " Qty Netweight (kg) Trade Value (US$) Flag \n",
- "0 NaN NaN 9285 0 \n",
- "1 NaN 0.0 116604 0 \n",
- "2 NaN 0.0 1495175 0 \n",
- "3 NaN 0.0 2248 0 \n",
- "4 NaN 0.0 47840 0 \n",
- "5 NaN 0.0 3526 0 \n",
- "6 NaN 0.0 5462 0 \n",
- "7 NaN 0.0 311425 0 \n",
- "8 NaN 0.0 11786 0 \n",
- "9 NaN 0.0 33715 0 \n",
- "\n",
- "[10 rows x 22 columns]"
- ]
- },
- "execution_count": 23,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# private data samples\n",
"ca_data = df[0:10]\n",
@@ -524,369 +123,10 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": null,
"id": "12",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Classification | \n",
- " Year | \n",
- " Period | \n",
- " Period Desc. | \n",
- " Aggregate Level | \n",
- " Is Leaf Code | \n",
- " Trade Flow Code | \n",
- " Trade Flow | \n",
- " Reporter Code | \n",
- " Reporter | \n",
- " ... | \n",
- " Partner | \n",
- " Partner ISO | \n",
- " Commodity Code | \n",
- " Commodity | \n",
- " Qty Unit Code | \n",
- " Qty Unit | \n",
- " Qty | \n",
- " Netweight (kg) | \n",
- " Trade Value (US$) | \n",
- " Flag | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 10 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Bangladesh | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 227222 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Haiti | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 14748 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Guatemala | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 1314 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Iraq | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 1825 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Israel | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 1063627 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Italy | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 8359327 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Jordan | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 16858 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Rep. of Moldova | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 29897 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Nigeria | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 22235 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Slovenia | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 1328 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
10 rows × 22 columns
\n",
- "
"
- ],
- "text/plain": [
- " Classification Year Period Period Desc. Aggregate Level Is Leaf Code \\\n",
- "10 HS 2021 202102 February 2021 2 0 \n",
- "11 HS 2021 202102 February 2021 2 0 \n",
- "12 HS 2021 202102 February 2021 2 0 \n",
- "13 HS 2021 202102 February 2021 2 0 \n",
- "14 HS 2021 202102 February 2021 2 0 \n",
- "15 HS 2021 202102 February 2021 2 0 \n",
- "16 HS 2021 202102 February 2021 2 0 \n",
- "17 HS 2021 202102 February 2021 2 0 \n",
- "18 HS 2021 202102 February 2021 2 0 \n",
- "19 HS 2021 202102 February 2021 2 0 \n",
- "\n",
- " Trade Flow Code Trade Flow Reporter Code Reporter ... Partner \\\n",
- "10 1 Imports 124 Canada ... Bangladesh \n",
- "11 1 Imports 124 Canada ... Haiti \n",
- "12 1 Imports 124 Canada ... Guatemala \n",
- "13 1 Imports 124 Canada ... Iraq \n",
- "14 1 Imports 124 Canada ... Israel \n",
- "15 1 Imports 124 Canada ... Italy \n",
- "16 1 Imports 124 Canada ... Jordan \n",
- "17 1 Imports 124 Canada ... Rep. of Moldova \n",
- "18 1 Imports 124 Canada ... Nigeria \n",
- "19 1 Imports 124 Canada ... Slovenia \n",
- "\n",
- " Partner ISO Commodity Code \\\n",
- "10 NaN 19 \n",
- "11 NaN 19 \n",
- "12 NaN 19 \n",
- "13 NaN 19 \n",
- "14 NaN 19 \n",
- "15 NaN 19 \n",
- "16 NaN 19 \n",
- "17 NaN 19 \n",
- "18 NaN 19 \n",
- "19 NaN 19 \n",
- "\n",
- " Commodity Qty Unit Code Qty Unit \\\n",
- "10 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "11 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "12 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "13 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "14 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "15 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "16 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "17 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "18 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "19 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "\n",
- " Qty Netweight (kg) Trade Value (US$) Flag \n",
- "10 NaN 0.0 227222 0 \n",
- "11 NaN 0.0 14748 0 \n",
- "12 NaN 0.0 1314 0 \n",
- "13 NaN 0.0 1825 0 \n",
- "14 NaN 0.0 1063627 0 \n",
- "15 NaN 0.0 8359327 0 \n",
- "16 NaN 0.0 16858 0 \n",
- "17 NaN 0.0 29897 0 \n",
- "18 NaN 0.0 22235 0 \n",
- "19 NaN 0.0 1328 0 \n",
- "\n",
- "[10 rows x 22 columns]"
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# Mock data samples\n",
"mock_ca_data = df[10:20]\n",
@@ -895,369 +135,10 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": null,
"id": "13",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Classification | \n",
- " Year | \n",
- " Period | \n",
- " Period Desc. | \n",
- " Aggregate Level | \n",
- " Is Leaf Code | \n",
- " Trade Flow Code | \n",
- " Trade Flow | \n",
- " Reporter Code | \n",
- " Reporter | \n",
- " ... | \n",
- " Partner | \n",
- " Partner ISO | \n",
- " Commodity Code | \n",
- " Commodity | \n",
- " Qty Unit Code | \n",
- " Qty Unit | \n",
- " Qty | \n",
- " Netweight (kg) | \n",
- " Trade Value (US$) | \n",
- " Flag | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 10 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Bangladesh | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 227222 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Haiti | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 14748 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Guatemala | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 1314 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Iraq | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 1825 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Israel | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 1063627 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Italy | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 8359327 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Jordan | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 16858 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Rep. of Moldova | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 29897 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Nigeria | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 22235 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " HS | \n",
- " 2021 | \n",
- " 202102 | \n",
- " February 2021 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " Imports | \n",
- " 124 | \n",
- " Canada | \n",
- " ... | \n",
- " Slovenia | \n",
- " NaN | \n",
- " 19 | \n",
- " Preparations of cereals, flour, starch or milk... | \n",
- " 0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.0 | \n",
- " 1328 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
10 rows × 22 columns
\n",
- "
"
- ],
- "text/plain": [
- " Classification Year Period Period Desc. Aggregate Level Is Leaf Code \\\n",
- "10 HS 2021 202102 February 2021 2 0 \n",
- "11 HS 2021 202102 February 2021 2 0 \n",
- "12 HS 2021 202102 February 2021 2 0 \n",
- "13 HS 2021 202102 February 2021 2 0 \n",
- "14 HS 2021 202102 February 2021 2 0 \n",
- "15 HS 2021 202102 February 2021 2 0 \n",
- "16 HS 2021 202102 February 2021 2 0 \n",
- "17 HS 2021 202102 February 2021 2 0 \n",
- "18 HS 2021 202102 February 2021 2 0 \n",
- "19 HS 2021 202102 February 2021 2 0 \n",
- "\n",
- " Trade Flow Code Trade Flow Reporter Code Reporter ... Partner \\\n",
- "10 1 Imports 124 Canada ... Bangladesh \n",
- "11 1 Imports 124 Canada ... Haiti \n",
- "12 1 Imports 124 Canada ... Guatemala \n",
- "13 1 Imports 124 Canada ... Iraq \n",
- "14 1 Imports 124 Canada ... Israel \n",
- "15 1 Imports 124 Canada ... Italy \n",
- "16 1 Imports 124 Canada ... Jordan \n",
- "17 1 Imports 124 Canada ... Rep. of Moldova \n",
- "18 1 Imports 124 Canada ... Nigeria \n",
- "19 1 Imports 124 Canada ... Slovenia \n",
- "\n",
- " Partner ISO Commodity Code \\\n",
- "10 NaN 19 \n",
- "11 NaN 19 \n",
- "12 NaN 19 \n",
- "13 NaN 19 \n",
- "14 NaN 19 \n",
- "15 NaN 19 \n",
- "16 NaN 19 \n",
- "17 NaN 19 \n",
- "18 NaN 19 \n",
- "19 NaN 19 \n",
- "\n",
- " Commodity Qty Unit Code Qty Unit \\\n",
- "10 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "11 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "12 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "13 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "14 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "15 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "16 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "17 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "18 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "19 Preparations of cereals, flour, starch or milk... 0 NaN \n",
- "\n",
- " Qty Netweight (kg) Trade Value (US$) Flag \n",
- "10 NaN 0.0 227222 0 \n",
- "11 NaN 0.0 14748 0 \n",
- "12 NaN 0.0 1314 0 \n",
- "13 NaN 0.0 1825 0 \n",
- "14 NaN 0.0 1063627 0 \n",
- "15 NaN 0.0 8359327 0 \n",
- "16 NaN 0.0 16858 0 \n",
- "17 NaN 0.0 29897 0 \n",
- "18 NaN 0.0 22235 0 \n",
- "19 NaN 0.0 1328 0 \n",
- "\n",
- "[10 rows x 22 columns]"
- ]
- },
- "execution_count": 25,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"mock_ca_data"
]
@@ -1272,7 +153,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": null,
"id": "15",
"metadata": {},
"outputs": [],
@@ -1282,21 +163,10 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": null,
"id": "16",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "PosixPath('/Users/atrask/Desktop/SyftBox/andrew@openmined.org/public/datasets')"
- ]
- },
- "execution_count": 27,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"datasets_path = manifest.create_public_folder(\"datasets\")\n",
"datasets_path"
@@ -1304,21 +174,10 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": null,
"id": "17",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "PosixPath('/Users/atrask/Desktop/SyftBox/andrew@openmined.org/public/datasets/trade_data')"
- ]
- },
- "execution_count": 28,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"dataset_path = datasets_path / \"trade_data\"\n",
"dataset_path"
@@ -1326,7 +185,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": null,
"id": "18",
"metadata": {},
"outputs": [],
@@ -1338,21 +197,10 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": null,
"id": "19",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "PosixPath('/Users/atrask/Desktop/SyftBox/andrew@openmined.org/public/datasets/trade_data/trade_mock.csv')"
- ]
- },
- "execution_count": 30,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"csv_file = dataset_path / \"trade_mock.csv\"\n",
"csv_file"
@@ -1360,7 +208,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": null,
"id": "20",
"metadata": {},
"outputs": [],
@@ -1370,160 +218,28 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": null,
"id": "21",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "Trade Data\n",
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Attribute | \n",
- " Value | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " Name | \n",
- " Trade Data | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " Syft Link | \n",
- " ..._data/trade_mock.csv | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " Schema | \n",
- " {'Unnamed: 0': 'int64', 'Classification': 'obj... | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " Readme | \n",
- " ...None | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " Loader | \n",
- " ...None | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- "TabularDataset(name='Trade Data', syft_link=, schema={'Unnamed: 0': 'int64', 'Classification': 'object', 'Year': 'int64', 'Period': 'int64', 'Period Desc.': 'object', 'Aggregate Level': 'int64', 'Is Leaf Code': 'int64', 'Trade Flow Code': 'int64', 'Trade Flow': 'object', 'Reporter Code': 'int64', 'Reporter': 'object', 'Reporter ISO': 'float64', 'Partner Code': 'int64', 'Partner': 'object', 'Partner ISO': 'float64', 'Commodity Code': 'int64', 'Commodity': 'object', 'Qty Unit Code': 'int64', 'Qty Unit': 'float64', 'Qty': 'float64', 'Netweight (kg)': 'float64', 'Trade Value (US$)': 'int64', 'Flag': 'int64'}, readme_link=None, loader_link=None, _client_config=None, has_private=True)"
- ]
- },
- "execution_count": 32,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "dataset = TabularDataset.from_csv(csv_file, \n",
- " name=\"Trade Data\", \n",
- " has_private=True)\n",
+ "dataset = TabularDataset.from_csv(csv_file, name=\"Trade Data\", has_private=True)\n",
"dataset"
]
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": null,
"id": "22",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "Trade Data\n",
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Attribute | \n",
- " Value | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " Name | \n",
- " Trade Data | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " Syft Link | \n",
- " ..._data/trade_mock.csv | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " Schema | \n",
- " {'Unnamed: 0': 'int64', 'Classification': 'obj... | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " Readme | \n",
- " ...None | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " Loader | \n",
- " ...None | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- "TabularDataset(name='Trade Data', syft_link=, schema={'Unnamed: 0': 'int64', 'Classification': 'object', 'Year': 'int64', 'Period': 'int64', 'Period Desc.': 'object', 'Aggregate Level': 'int64', 'Is Leaf Code': 'int64', 'Trade Flow Code': 'int64', 'Trade Flow': 'object', 'Reporter Code': 'int64', 'Reporter': 'object', 'Reporter ISO': 'float64', 'Partner Code': 'int64', 'Partner': 'object', 'Partner ISO': 'float64', 'Commodity Code': 'int64', 'Commodity': 'object', 'Qty Unit Code': 'int64', 'Qty Unit': 'float64', 'Qty': 'float64', 'Netweight (kg)': 'float64', 'Trade Value (US$)': 'int64', 'Flag': 'int64'}, readme_link=None, loader_link=None, _client_config=None, has_private=True)"
- ]
- },
- "execution_count": 33,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"dataset"
]
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": null,
"id": "23",
"metadata": {},
"outputs": [],
@@ -1533,18 +249,10 @@
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": null,
"id": "24",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "✅ Dataset Published\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"dataset.publish(manifest, overwrite=True)"
]
@@ -1567,7 +275,7 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": null,
"id": "27",
"metadata": {},
"outputs": [],
@@ -1577,7 +285,7 @@
},
{
"cell_type": "code",
- "execution_count": 45,
+ "execution_count": null,
"id": "28",
"metadata": {},
"outputs": [],
@@ -1588,42 +296,20 @@
},
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": null,
"id": "29",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "True"
- ]
- },
- "execution_count": 53,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"SyftVault.link_private(csv_file, private_path)"
]
},
{
"cell_type": "code",
- "execution_count": 54,
+ "execution_count": null,
"id": "30",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "SyftVault(mapping={'andrew@openmined.org/public/datasets/trade_data/trade_mock.csv': '/Users/atrask/Documents/GitHub/syft/notebooks/trade_private.csv'})"
- ]
- },
- "execution_count": 54,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"SyftVault.load_vault()"
]
diff --git a/notebooks/02-trade-code.ipynb b/notebooks/02-trade-code.ipynb
index db51b561..4916f115 100644
--- a/notebooks/02-trade-code.ipynb
+++ b/notebooks/02-trade-code.ipynb
@@ -12,7 +12,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"id": "1",
"metadata": {},
"outputs": [],
@@ -22,21 +22,10 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"id": "2",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "ClientConfig(config_path='/Users/atrask/Documents/GitHub/syft/client_config.json', sync_folder='/Users/atrask/Desktop/SyftBox', port=8080, email='andrew@openmined.org', token=6461387628315936375, server_url='http://20.168.10.234:8080', email_token=None)"
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"client_config = config_for_user(\"andrew@openmined.org\")\n",
"client_config"
@@ -44,39 +33,20 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"id": "3",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "> Setting Sync Dir to: /Users/atrask/Desktop/SyftBox\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"client_config.use()"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"id": "4",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "DatasiteManifest(datasite='andrew@openmined.org', file_path='/Users/atrask/Desktop/SyftBox/andrew@openmined.org/public/manifest/manifest.json', datasets={'Netflix_TMDB_IMDB': {'name': 'Netflix_TMDB_IMDB', 'syft_link': {'host_or_ip': 'andrew@openmined.org', 'path': '/public/datasets/netflix_tmdb_imdb/NetflixViewingHistory_TMDB_IMDB.csv', 'port': 80, 'protocol': 'syft', 'query': ''}, 'schema': {'Unnamed: 0': 'int64', 'netflix_title': 'object', 'netflix_date': 'object', 'tmdb_id': 'int64', 'tmdb_title': 'object', 'tmdb_media_type': 'object', 'tmdb_poster_url': 'object', 'homepage': 'object', 'imdb_id': 'object', 'facebook_id': 'object', 'instagram_id': 'object', 'twitter_id': 'object', 'genre_ids': 'object', 'genre_names': 'object', 'imdb_runtime_minutes': 'int64', 'imdb_rating': 'float64'}, 'readme_link': {'host_or_ip': 'andrew@openmined.org', 'path': '/public/datasets/netflix_tmdb_imdb/README.md', 'port': 80, 'protocol': 'syft', 'query': ''}, 'loader_link': {'host_or_ip': 'andrew@openmined.org', 'path': '/public/datasets/netflix_tmdb_imdb/loader.py', 'port': 80, 'protocol': 'syft', 'query': ''}, 'has_private': True}, 'Trade Data': {'name': 'Trade Data', 'syft_link': {'host_or_ip': 'andrew@openmined.org', 'path': '/public/datasets/trade_data/trade_mock.csv', 'port': 80, 'protocol': 'syft', 'query': ''}, 'schema': {'Unnamed: 0': 'int64', 'Classification': 'object', 'Year': 'int64', 'Period': 'int64', 'Period Desc.': 'object', 'Aggregate Level': 'int64', 'Is Leaf Code': 'int64', 'Trade Flow Code': 'int64', 'Trade Flow': 'object', 'Reporter Code': 'int64', 'Reporter': 'object', 'Reporter ISO': 'float64', 'Partner Code': 'int64', 'Partner': 'object', 'Partner ISO': 'float64', 'Commodity Code': 'int64', 'Commodity': 'object', 'Qty Unit Code': 'int64', 'Qty Unit': 'float64', 'Qty': 'float64', 'Netweight (kg)': 'float64', 'Trade Value (US$)': 'int64', 'Flag': 'int64'}, 'readme_link': {'host_or_ip': 'andrew@openmined.org', 'path': '/public/datasets/trade_data/README.md', 'port': 80, 'protocol': 'syft', 'query': ''}, 'loader_link': {'host_or_ip': 'andrew@openmined.org', 'path': '/public/datasets/trade_data/loader.py', 'port': 80, 'protocol': 'syft', 'query': ''}, 'has_private': True}}, code={})"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"manifest = client_config.manifest\n",
"manifest"
@@ -84,80 +54,10 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"id": "5",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Name | \n",
- " Private | \n",
- " Syft Link | \n",
- " Schema | \n",
- " Readme | \n",
- " Loader | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " Netflix_TMDB_IMDB | \n",
- " True | \n",
- " ...istory_TMDB_IMDB.csv | \n",
- " ['Unnamed: 0', 'netflix_title', 'netflix_date'... | \n",
- " _tmdb_imdb/README.md | \n",
- " _tmdb_imdb/loader.py | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " Trade Data | \n",
- " True | \n",
- " ..._data/trade_mock.csv | \n",
- " ['Unnamed: 0', 'Classification', 'Year', 'Peri... | \n",
- " trade_data/README.md | \n",
- " trade_data/loader.py | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " Netflix_TMDB_IMDB | \n",
- " True | \n",
- " ...istory_TMDB_IMDB.csv | \n",
- " ['Unnamed: 0', 'netflix_title', 'netflix_date'... | \n",
- " _tmdb_imdb/README.md | \n",
- " _tmdb_imdb/loader.py | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- "DatasetResults(data=[TabularDataset(name='Netflix_TMDB_IMDB', syft_link=, schema={'Unnamed: 0': 'int64', 'netflix_title': 'object', 'netflix_date': 'object', 'tmdb_id': 'int64', 'tmdb_title': 'object', 'tmdb_media_type': 'object', 'tmdb_poster_url': 'object', 'homepage': 'object', 'imdb_id': 'object', 'facebook_id': 'object', 'instagram_id': 'object', 'twitter_id': 'object', 'genre_ids': 'object', 'genre_names': 'object', 'imdb_runtime_minutes': 'int64', 'imdb_rating': 'float64'}, readme_link=, loader_link=, _client_config=ClientConfig(config_path='/Users/atrask/Documents/GitHub/syft/client_config.json', sync_folder='/Users/atrask/Desktop/SyftBox', port=8080, email='andrew@openmined.org', token=6461387628315936375, server_url='http://20.168.10.234:8080', email_token=None), has_private=True), TabularDataset(name='Trade Data', syft_link=, schema={'Unnamed: 0': 'int64', 'Classification': 'object', 'Year': 'int64', 'Period': 'int64', 'Period Desc.': 'object', 'Aggregate Level': 'int64', 'Is Leaf Code': 'int64', 'Trade Flow Code': 'int64', 'Trade Flow': 'object', 'Reporter Code': 'int64', 'Reporter': 'object', 'Reporter ISO': 'float64', 'Partner Code': 'int64', 'Partner': 'object', 'Partner ISO': 'float64', 'Commodity Code': 'int64', 'Commodity': 'object', 'Qty Unit Code': 'int64', 'Qty Unit': 'float64', 'Qty': 'float64', 'Netweight (kg)': 'float64', 'Trade Value (US$)': 'int64', 'Flag': 'int64'}, readme_link=, loader_link=, _client_config=ClientConfig(config_path='/Users/atrask/Documents/GitHub/syft/client_config.json', sync_folder='/Users/atrask/Desktop/SyftBox', port=8080, email='andrew@openmined.org', token=6461387628315936375, server_url='http://20.168.10.234:8080', email_token=None), has_private=True), TabularDataset(name='Netflix_TMDB_IMDB', syft_link=, schema={'Unnamed: 0': 'int64', 'netflix_title': 'object', 'netflix_date': 'object', 'tmdb_id': 'int64', 'tmdb_title': 'object', 'tmdb_media_type': 'object', 'tmdb_poster_url': 'object', 'homepage': 'object', 'imdb_id': 'object', 'facebook_id': 'object', 'instagram_id': 'object', 'twitter_id': 'object', 'genre_ids': 'object', 'genre_names': 'object', 'imdb_runtime_minutes': 'int64', 'imdb_rating': 'float64'}, readme_link=, loader_link=, _client_config=ClientConfig(config_path='/Users/atrask/Documents/GitHub/syft/client_config.json', sync_folder='/Users/atrask/Desktop/SyftBox', port=8080, email='andrew@openmined.org', token=6461387628315936375, server_url='http://20.168.10.234:8080', email_token=None), has_private=True)])"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"datasets = client_config.get_datasets()\n",
"datasets"
@@ -173,85 +73,10 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"id": "7",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "Netflix_TMDB_IMDB\n",
- "\n",
- "README:\n",
- "Netflix_TMDB_IMDB
\n",
- "Private data: True
\n",
- "Schema: {'Unnamed: 0': 'int64', 'netflix_title': 'object', 'netflix_date': 'object', 'tmdb_id': 'int64', 'tmdb_title': 'object', 'tmdb_media_type': 'object', 'tmdb_poster_url': 'object', 'homepage': 'object', 'imdb_id': 'object', 'facebook_id': 'object', 'instagram_id': 'object', 'twitter_id': 'object', 'genre_ids': 'object', 'genre_names': 'object', 'imdb_runtime_minutes': 'int64', 'imdb_rating': 'float64'}
\n",
- "Import Syntax
\n",
- "client_config.use()\n",
- "from syftbox.lib.andrew.at.openmined.org.datasets import netflix_tmdb_imdb
\n",
- "Python Loader Example
\n",
- "df = pd.read_csv(sy_path(\"syft://andrew@openmined.org:80/public/datasets/netflix_tmdb_imdb/NetflixViewingHistory_TMDB_IMDB.csv\"))
\n",
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Attribute | \n",
- " Value | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " Name | \n",
- " Netflix_TMDB_IMDB | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " Syft Link | \n",
- " ...istory_TMDB_IMDB.csv | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " Schema | \n",
- " {'Unnamed: 0': 'int64', 'netflix_title': 'obje... | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " Readme | \n",
- " ..._tmdb_imdb/README.md | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " Loader | \n",
- " ..._tmdb_imdb/loader.py | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- "TabularDataset(name='Netflix_TMDB_IMDB', syft_link=, schema={'Unnamed: 0': 'int64', 'netflix_title': 'object', 'netflix_date': 'object', 'tmdb_id': 'int64', 'tmdb_title': 'object', 'tmdb_media_type': 'object', 'tmdb_poster_url': 'object', 'homepage': 'object', 'imdb_id': 'object', 'facebook_id': 'object', 'instagram_id': 'object', 'twitter_id': 'object', 'genre_ids': 'object', 'genre_names': 'object', 'imdb_runtime_minutes': 'int64', 'imdb_rating': 'float64'}, readme_link=, loader_link=, _client_config=ClientConfig(config_path='/Users/atrask/Documents/GitHub/syft/client_config.json', sync_folder='/Users/atrask/Desktop/SyftBox', port=8080, email='andrew@openmined.org', token=6461387628315936375, server_url='http://20.168.10.234:8080', email_token=None), has_private=True)"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"trade_data = datasets[0]\n",
"trade_data"
@@ -259,42 +84,20 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"id": "8",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "PosixPath('/Users/atrask/Desktop/SyftBox/andrew@openmined.org/public/datasets/netflix_tmdb_imdb/NetflixViewingHistory_TMDB_IMDB.csv')"
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"trade_data.file_path"
]
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"id": "9",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'from syftbox.lib.andrew.at.openmined.org.datasets import netflix_tmdb_imdb'"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"trade_data.import_string"
]
@@ -309,85 +112,10 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"id": "11",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "Trade Data\n",
- "\n",
- "README:\n",
- "Trade Data
\n",
- "Private data: True
\n",
- "Schema: {'Unnamed: 0': 'int64', 'Classification': 'object', 'Year': 'int64', 'Period': 'int64', 'Period Desc.': 'object', 'Aggregate Level': 'int64', 'Is Leaf Code': 'int64', 'Trade Flow Code': 'int64', 'Trade Flow': 'object', 'Reporter Code': 'int64', 'Reporter': 'object', 'Reporter ISO': 'float64', 'Partner Code': 'int64', 'Partner': 'object', 'Partner ISO': 'float64', 'Commodity Code': 'int64', 'Commodity': 'object', 'Qty Unit Code': 'int64', 'Qty Unit': 'float64', 'Qty': 'float64', 'Netweight (kg)': 'float64', 'Trade Value (US$)': 'int64', 'Flag': 'int64'}
\n",
- "Import Syntax
\n",
- "client_config.use()\n",
- "from syftbox.lib.andrew.at.openmined.org.datasets import trade_data
\n",
- "Python Loader Example
\n",
- "df = pd.read_csv(sy_path(\"syft://andrew@openmined.org:80/public/datasets/trade_data/trade_mock.csv\"))
\n",
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Attribute | \n",
- " Value | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " Name | \n",
- " Trade Data | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " Syft Link | \n",
- " ..._data/trade_mock.csv | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " Schema | \n",
- " {'Unnamed: 0': 'int64', 'Classification': 'obj... | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " Readme | \n",
- " ...trade_data/README.md | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " Loader | \n",
- " ...trade_data/loader.py | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- "TabularDataset(name='Trade Data', syft_link=, schema={'Unnamed: 0': 'int64', 'Classification': 'object', 'Year': 'int64', 'Period': 'int64', 'Period Desc.': 'object', 'Aggregate Level': 'int64', 'Is Leaf Code': 'int64', 'Trade Flow Code': 'int64', 'Trade Flow': 'object', 'Reporter Code': 'int64', 'Reporter': 'object', 'Reporter ISO': 'float64', 'Partner Code': 'int64', 'Partner': 'object', 'Partner ISO': 'float64', 'Commodity Code': 'int64', 'Commodity': 'object', 'Qty Unit Code': 'int64', 'Qty Unit': 'float64', 'Qty': 'float64', 'Netweight (kg)': 'float64', 'Trade Value (US$)': 'int64', 'Flag': 'int64'}, readme_link=, loader_link=, _client_config=ClientConfig(config_path='/Users/atrask/Documents/GitHub/syft/client_config.json', sync_folder='/Users/atrask/Desktop/SyftBox', port=8080, email='andrew@openmined.org', token=6461387628315936375, server_url='http://20.168.10.234:8080', email_token=None), has_private=True)"
- ]
- },
- "execution_count": 11,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"from syftbox.lib.andrew.at.openmined.org.datasets import trade_data\n",
"\n",
@@ -396,50 +124,20 @@
},
{
"cell_type": "code",
- "execution_count": 13,
- "id": "c48a659a-3560-47a9-a53f-e0d79664bed1",
+ "execution_count": null,
+ "id": "12",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[2K\u001b[2mResolved \u001b[1m1 package\u001b[0m \u001b[2min 345ms\u001b[0m\u001b[0m \u001b[0m\n",
- "\u001b[2K\u001b[2mPrepared \u001b[1m1 package\u001b[0m \u001b[2min 2.06s\u001b[0m\u001b[0m \n",
- "\u001b[2K\u001b[2mInstalled \u001b[1m1 package\u001b[0m \u001b[2min 2ms\u001b[0m\u001b[0m \u001b[0m\n",
- " \u001b[32m+\u001b[39m \u001b[1mopendp\u001b[0m\u001b[2m==0.11.1\u001b[0m\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"!uv pip install opendp"
]
},
{
"cell_type": "code",
- "execution_count": 14,
- "id": "12",
+ "execution_count": null,
+ "id": "13",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(9.738381, -11.181064117927578)\n",
- "Got mock\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "(9.738381, -11.181064117927578)"
- ]
- },
- "execution_count": 14,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"@syftbox_code\n",
"def myanalysis(trade_data):\n",
@@ -471,21 +169,10 @@
},
{
"cell_type": "code",
- "execution_count": 15,
- "id": "13",
+ "execution_count": null,
+ "id": "14",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'/Users/atrask/Desktop/SyftBox/staging/myanalysis'"
- ]
- },
- "execution_count": 15,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# TEMP bug where we cant use theirs_with_my_read because the parent write is ignored but allowing the perm file to set its own\n",
"# rules wont work either so we need to solve the permissioning of files themselves\n",
@@ -498,7 +185,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "14",
+ "id": "15",
"metadata": {},
"outputs": [],
"source": [
@@ -508,7 +195,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "15",
+ "id": "16",
"metadata": {},
"outputs": [],
"source": [
@@ -518,7 +205,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "16",
+ "id": "17",
"metadata": {},
"outputs": [],
"source": []
diff --git a/notebooks/03-netflix-code.ipynb b/notebooks/03-netflix-code.ipynb
index e8986ea0..681fd0ff 100644
--- a/notebooks/03-netflix-code.ipynb
+++ b/notebooks/03-netflix-code.ipynb
@@ -220,7 +220,6 @@
"\n",
" import numpy as np\n",
" import pandas as pd\n",
- "\n",
" from Pyfhel import PyCtxt, Pyfhel\n",
"\n",
" crypto_folder = \"./crypto\"\n",
@@ -319,7 +318,6 @@
"source": [
"def decode_results(HE, stat_keys, path):\n",
" import numpy as np\n",
- "\n",
" from Pyfhel import PyCtxt\n",
"\n",
" crypto_folder = path + \"/crypto\"\n",
diff --git a/projects/netflix_stats/main.py b/projects/netflix_stats/main.py
index 8e7c591f..d485ae41 100644
--- a/projects/netflix_stats/main.py
+++ b/projects/netflix_stats/main.py
@@ -106,7 +106,6 @@ def netflix_stats(datasite, df):
import numpy as np
import pandas as pd
-
from Pyfhel import Pyfhel
from Pyfhel.PyCtxt import PyCtxt
diff --git a/pyproject.toml b/pyproject.toml
index b56ef7b6..89499cd3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,7 +30,7 @@ include-package-data = true # Include package data
[tool.setuptools.package-data]
-syftbox = ["*.css", ".js", ".html", ".zip"]
+syftbox = ["*.css", ".js", ".html", ".zip", ".sh"]
[project.scripts]
syftbox = "syftbox.main:main"
diff --git a/scripts/deploy.sh b/scripts/deploy.sh
index 3d145e4c..8c85eb35 100755
--- a/scripts/deploy.sh
+++ b/scripts/deploy.sh
@@ -1,9 +1,9 @@
#!/bin/bash
-source ./build.sh
-source ./ssh.sh
+source ./scripts/build.sh
+source ./scripts/ssh.sh
-LOCAL_FILE="dist/syftbox-0.1.0-py3-none-any.whl"
+LOCAL_FILE="./dist/syftbox-0.1.0-py3-none-any.whl"
REMOTE_PATH="~"
# Use scp to transfer the file to the remote server
diff --git a/syftbox/client/client.py b/syftbox/client/client.py
index 904865e2..7fb09f75 100644
--- a/syftbox/client/client.py
+++ b/syftbox/client/client.py
@@ -2,6 +2,7 @@
import atexit
import importlib
import os
+import platform
import subprocess
import sys
import threading
@@ -108,10 +109,13 @@ def copy_icon_file(icon_folder: str, dest_folder: str) -> None:
def load_or_create_config(args) -> ClientConfig:
+ syft_config_dir = os.path.abspath(os.path.expanduser("~/.syftbox"))
+ os.makedirs(syft_config_dir, exist_ok=True)
+
client_config = None
try:
client_config = ClientConfig.load(args.config_path)
- except Exception as e:
+ except Exception:
pass
if client_config is None and args.config_path:
@@ -141,7 +145,8 @@ def load_or_create_config(args) -> ClientConfig:
if not os.path.exists(client_config.sync_folder):
os.makedirs(client_config.sync_folder, exist_ok=True)
- # copy_icon_file(ICON_FOLDER, client_config.sync_folder)
+ if platform.system() == "Darwin":
+ copy_icon_file(ICON_FOLDER, client_config.sync_folder)
if args.email:
client_config.email = args.email
@@ -323,14 +328,16 @@ def parse_args():
parser = argparse.ArgumentParser(
description="Run the web application with plugins.",
)
- parser.add_argument("--config_path", type=str, default=DEFAULT_CONFIG_PATH, help="config path")
+ parser.add_argument(
+ "--config_path", type=str, default=DEFAULT_CONFIG_PATH, help="config path"
+ )
parser.add_argument("--sync_folder", type=str, help="sync folder path")
parser.add_argument("--email", type=str, help="email")
parser.add_argument("--port", type=int, default=8080, help="Port number")
parser.add_argument(
"--server",
type=str,
- default="http://20.168.10.234:8080",
+ default="http://20.168.10.234:8080",
help="Server",
)
return parser.parse_args()
@@ -594,8 +601,6 @@ def main() -> None:
print("Dev Mode: ", os.environ.get("SYFTBOX_DEV"))
print("Wheel: ", os.environ.get("SYFTBOX_WHEEL"))
- print(client_config)
-
debug = True
uvicorn.run(
"syftbox.client.client:app"
diff --git a/syftbox/client/plugins/apps.py b/syftbox/client/plugins/apps.py
index 83baca09..13905517 100644
--- a/syftbox/client/plugins/apps.py
+++ b/syftbox/client/plugins/apps.py
@@ -9,6 +9,7 @@
perm_file_path,
)
+
def find_and_run_script(task_path, extra_args):
script_path = os.path.join(task_path, "run.sh")
env = os.environ.copy() # Copy the current environment
@@ -19,12 +20,16 @@ def find_and_run_script(task_path, extra_args):
os.chmod(script_path, os.stat(script_path).st_mode | 0o111)
# Check if the script has a shebang
- with open(script_path, 'r') as script_file:
+ with open(script_path, "r") as script_file:
first_line = script_file.readline().strip()
- has_shebang = first_line.startswith('#!')
+ has_shebang = first_line.startswith("#!")
# Prepare the command based on whether there's a shebang or not
- command = [script_path] + extra_args if has_shebang else ["/bin/bash", script_path] + extra_args
+ command = (
+ [script_path] + extra_args
+ if has_shebang
+ else ["/bin/bash", script_path] + extra_args
+ )
try:
result = subprocess.run(
@@ -44,13 +49,15 @@ def find_and_run_script(task_path, extra_args):
raise FileNotFoundError(f"run.sh not found in {task_path}")
-
logger = logging.getLogger(__name__)
DEFAULT_SCHEDULE = 10000
DESCRIPTION = "Runs Apps"
-DEFAULT_APPS_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'default_apps'))
+DEFAULT_APPS_PATH = os.path.abspath(
+ os.path.join(os.path.dirname(__file__), "..", "..", "..", "default_apps")
+)
+
def copy_default_apps(apps_path):
if not os.path.exists(DEFAULT_APPS_PATH):
@@ -60,13 +67,16 @@ def copy_default_apps(apps_path):
for app in os.listdir(DEFAULT_APPS_PATH):
src_app_path = os.path.join(DEFAULT_APPS_PATH, app)
dst_app_path = os.path.join(apps_path, app)
-
+
if os.path.isdir(src_app_path):
if os.path.exists(dst_app_path):
- shutil.rmtree(dst_app_path)
- shutil.copytree(src_app_path, dst_app_path)
+ print(f"App already installed at: {dst_app_path}")
+ # shutil.rmtree(dst_app_path)
+ else:
+ shutil.copytree(src_app_path, dst_app_path)
print(f"Copied default app: {app}")
+
def run_apps(client_config):
# create the directory
apps_path = client_config.sync_folder + "/" + "apps"
diff --git a/syftbox/client/plugins/create_datasite.py b/syftbox/client/plugins/create_datasite.py
index 1b0f52a9..4df42fdf 100644
--- a/syftbox/client/plugins/create_datasite.py
+++ b/syftbox/client/plugins/create_datasite.py
@@ -25,6 +25,19 @@ def claim_datasite(client_config):
except Exception as e:
print("Failed to create perm file", e)
+ public_path = client_config.datasite_path + "/" + "public"
+ os.makedirs(public_path, exist_ok=True)
+ public_file_path = perm_file_path(public_path)
+ if os.path.exists(public_file_path):
+ public_perm_file = SyftPermission.load(public_file_path)
+ else:
+ print(f"> {client_config.email} Creating Public Permfile")
+ try:
+ public_perm_file = SyftPermission.mine_with_public_read(client_config.email)
+ public_perm_file.save(public_file_path)
+ except Exception as e:
+ print("Failed to create perm file", e)
+
def run(shared_state):
client_config = shared_state.client_config
diff --git a/syftbox/client/plugins/sync.py b/syftbox/client/plugins/sync.py
index 1bf683e1..17056249 100644
--- a/syftbox/client/plugins/sync.py
+++ b/syftbox/client/plugins/sync.py
@@ -5,6 +5,7 @@
from threading import Event
import requests
+from watchdog.events import DirModifiedEvent
from syftbox.lib import (
DirState,
@@ -230,7 +231,10 @@ def push_changes(client_config, changes):
f"> {client_config.email} FAILED /write {change.kind} {change.internal_path}",
)
except Exception as e:
- print(f"Failed to call /write on the server for {change.internal_path}", str(e))
+ print(
+ f"Failed to call /write on the server for {change.internal_path}",
+ str(e),
+ )
return written_changes
@@ -335,14 +339,14 @@ def ascii_for_change(changes) -> str:
def handle_empty_folders(client_config, datasite):
changes = []
datasite_path = os.path.join(client_config.sync_folder, datasite)
-
+
for root, dirs, files in os.walk(datasite_path):
if not files and not dirs:
# This is an empty folder
relative_path = os.path.relpath(root, datasite_path)
- if relative_path == '.':
+ if relative_path == ".":
continue # Skip the root folder
-
+
change = FileChange(
kind=FileChangeKind.CREATE,
parent_path=datasite,
@@ -352,7 +356,7 @@ def handle_empty_folders(client_config, datasite):
sync_folder=client_config.sync_folder,
)
changes.append(change)
-
+
return changes
@@ -397,7 +401,7 @@ def sync_up(client_config):
# get the new dir state
new_dir_state = hash_dir(client_config.sync_folder, datasite, IGNORE_FOLDERS)
changes = diff_dirstate(old_dir_state, new_dir_state)
-
+
# Add handling for empty folders
empty_folder_changes = handle_empty_folders(client_config, datasite)
changes.extend(empty_folder_changes)
@@ -469,7 +473,7 @@ def sync_down(client_config) -> int:
continue
changes = diff_dirstate(new_dir_state, remote_dir_state)
-
+
# Add handling for empty folders
empty_folder_changes = handle_empty_folders(client_config, datasite)
changes.extend(empty_folder_changes)
@@ -596,11 +600,21 @@ def do_sync(shared_state):
def run(shared_state, *args, **kwargs):
if len(args) == 1:
event = args[0]
-
# ignore certain files / folders
if hasattr(event, "src_path"):
if CLIENT_CHANGELOG_FOLDER in event.src_path:
return
+
+ # ignore these events for now on linux
+ # FileOpenedEvent
+ # FileClosedNoWriteEvent
+ # DirModifiedEvent
+ if event.event_type in ["opened", "closed_no_write"]:
+ return
+
+ if isinstance(event, DirModifiedEvent):
+ return
+
shared_state.fs_events.append(event)
if "sync" not in shared_state.timers:
diff --git a/syftbox/lib/lib.py b/syftbox/lib/lib.py
index c4f3cf55..56256fae 100644
--- a/syftbox/lib/lib.py
+++ b/syftbox/lib/lib.py
@@ -1,36 +1,20 @@
from __future__ import annotations
-import ast
import base64
-import copy
import hashlib
-import inspect
import json
import os
-import pkgutil
import re
-import shutil
-import subprocess
-import sys
-import sysconfig
-import textwrap
import threading
-import types
import zlib
from collections.abc import Callable
-from dataclasses import dataclass, field
+from dataclasses import dataclass
from datetime import datetime
from enum import Enum
-from importlib.abc import Loader, MetaPathFinder
-from importlib.util import spec_from_loader
from pathlib import Path
from threading import Lock
from typing import Any
-from urllib.parse import urlparse
-import markdown
-import pandas as pd
-import pkg_resources
import requests
from typing_extensions import Self
@@ -49,6 +33,7 @@ def is_primitive_json_serializable(obj):
return True
return False
+
def pack(obj) -> Any:
if is_primitive_json_serializable(obj):
return obj
@@ -539,6 +524,7 @@ def filter_read_state(user_email: str, dir_state: DirState, perm_tree: Permissio
filtered_tree[file_path] = file_info
return filtered_tree
+
class ResettableTimer:
def __init__(self, timeout, callback, *args, **kwargs):
self.timeout = timeout
@@ -659,6 +645,7 @@ def str_to_bool(bool_str: str | None) -> bool:
result = True
return result
+
def validate_email(email: str) -> bool:
# Define a regex pattern for a valid email
email_regex = r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$"
@@ -765,7 +752,6 @@ def use(self):
os.environ["SYFTBOX_SYNC_DIR"] = self.sync_folder
print(f"> Setting Sync Dir to: {self.sync_folder}")
-
@classmethod
def create_manifest(cls, path: str, email: str):
# make a dir and set the permissions
diff --git a/syftbox/server/server.py b/syftbox/server/server.py
index 98f5eb81..d5762389 100644
--- a/syftbox/server/server.py
+++ b/syftbox/server/server.py
@@ -170,6 +170,20 @@ async def lifespan(app: FastAPI):
___) | |_| | _| |_| |_) | (_) > <
|____/ \__, |_| \__|____/ \___/_/\_\
|___/
+
+
+# MacOS and Linux
+Install uv
+curl -LsSf https://astral.sh/uv/install.sh | sh
+
+# create a virtualenv somewhere
+uv venv .venv
+
+# install the wheel
+uv pip install http://20.168.10.234:8080/wheel/syftbox-0.1.0-py3-none-any.whl --reinstall
+
+# run the client
+uv run syftbox client
"""
@@ -178,6 +192,18 @@ async def get_ascii_art():
return ascii_art
+@app.get("/wheel/{path:path}", response_class=HTMLResponse)
+async def browse_datasite(request: Request, path: str):
+ if path == "": # Check if path is empty (meaning "/datasites/")
+ return RedirectResponse(url="/")
+
+ filename = path.split("/")[0]
+ if filename.endswith(".whl"):
+ wheel_path = os.path.expanduser("~/syftbox-0.1.0-py3-none-any.whl")
+ return FileResponse(wheel_path, media_type="application/octet-stream")
+ return filename
+
+
def get_file_list(directory="."):
file_list = []
for item in os.listdir(directory):