From 9594ecc1a86d346f74bb89967d9b9e841508e00c Mon Sep 17 00:00:00 2001 From: Madhava Jay Date: Mon, 30 Sep 2024 18:21:37 +1000 Subject: [PATCH 1/3] Added wheel to cache server - fixed missing .sh scripts from wheel package - added netflix app back - disabled auto replace of apps every tick --- .dockerignore | 8 + MANIFEST.in | 1 + README.md | 16 +- default_apps/adder/main.py | 14 +- default_apps/netflix/.gitignore | 4 + default_apps/netflix/README.md | 57 ++++ .../NetflixViewingHistory_TMDB_IMDB.mock.csv | 6 + default_apps/netflix/dataset.py | 45 +++ default_apps/netflix/imdb.py | 83 +++++ default_apps/netflix/main.py | 108 +++++++ default_apps/netflix/netflix.py | 25 ++ default_apps/netflix/page.py | 141 +++++++++ default_apps/netflix/requirements.txt | 2 + default_apps/netflix/run.sh | 7 + default_apps/netflix/stats.py | 237 ++++++++++++++ default_apps/netflix/templates/card.html | 29 ++ .../netflix/templates/images/nf_logo.png | Bin 0 -> 16831 bytes default_apps/netflix/templates/index.css | 132 ++++++++ default_apps/netflix/templates/page.html | 40 +++ default_apps/netflix/tmdb.py | 294 ++++++++++++++++++ default_apps/netflix/utils.py | 97 ++++++ docker/syftbox.dockerfile | 13 + pyproject.toml | 2 +- scripts/deploy.sh | 6 +- syftbox/client/client.py | 15 +- syftbox/client/plugins/apps.py | 26 +- syftbox/client/plugins/create_datasite.py | 13 + syftbox/client/plugins/sync.py | 13 +- syftbox/server/server.py | 25 ++ 29 files changed, 1438 insertions(+), 21 deletions(-) create mode 100644 .dockerignore create mode 100644 default_apps/netflix/.gitignore create mode 100644 default_apps/netflix/README.md create mode 100644 default_apps/netflix/data/NetflixViewingHistory_TMDB_IMDB.mock.csv create mode 100644 default_apps/netflix/dataset.py create mode 100644 default_apps/netflix/imdb.py create mode 100644 default_apps/netflix/main.py create mode 100644 default_apps/netflix/netflix.py create mode 100644 default_apps/netflix/page.py create mode 100644 default_apps/netflix/requirements.txt create mode 100755 default_apps/netflix/run.sh create mode 100644 default_apps/netflix/stats.py create mode 100644 default_apps/netflix/templates/card.html create mode 100644 default_apps/netflix/templates/images/nf_logo.png create mode 100644 default_apps/netflix/templates/index.css create mode 100644 default_apps/netflix/templates/page.html create mode 100644 default_apps/netflix/tmdb.py create mode 100644 default_apps/netflix/utils.py create mode 100644 docker/syftbox.dockerfile diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..ded50c79 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,8 @@ +.git +data +default_apps +dist +docker +notebooks +projects +tests diff --git a/MANIFEST.in b/MANIFEST.in index ea61a195..e77d35e7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,2 @@ recursive-include syftbox *.html *.js *.css *.zip +recursive-include default_apps *.py *.sh *.html *.js *.css *.zip *.png *.txt *.csv diff --git a/README.md b/README.md index ac756db8..25b6cbfb 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,20 @@ |___/ ``` +# Quickstart User Installation + +## install uv +curl -LsSf https://astral.sh/uv/install.sh | sh + +## create a virtualenv somewhere +uv venv .venv + +## install the wheel +uv pip install http://20.168.10.234:8080/wheel/syftbox-0.1.0-py3-none-any.whl --reinstall + +## run the client +uv run syftbox client + # Quickstart Client Developer Installation ### Step 0: Open your terminal to the root of this Github repository @@ -35,7 +49,7 @@ uv pip install -e . ### Step 5: Run the client ``` -syftbox client +uv run syftbox/client/client.py ``` diff --git a/default_apps/adder/main.py b/default_apps/adder/main.py index 1d51bbb5..bb44aa7c 100644 --- a/default_apps/adder/main.py +++ b/default_apps/adder/main.py @@ -1,8 +1,18 @@ import json import os +from syftbox.lib import ClientConfig -input_file_path = "../../andrew@openmined.org/app_pipelines/adder/inputs/data.json" -output_file_path = "../../andrew@openmined.org/app_pipelines/adder/done/data.json" + +config_path = os.environ.get("SYFTBOX_CLIENT_CONFIG_PATH", None) +client_config = ClientConfig.load(config_path) + +input_folder = f"{client_config.sync_folder}/app_pipelines/adder/inputs/" +output_folder = f"{client_config.sync_folder}/app_pipelines/adder/done/" +os.makedirs(input_folder, exist_ok=True) +os.makedirs(output_folder, exist_ok=True) + +input_file_path = f"{input_folder}/data.json" +output_file_path = f"{output_folder}/data.json" if os.path.exists(input_file_path): with open(input_file_path, 'r') as f: diff --git a/default_apps/netflix/.gitignore b/default_apps/netflix/.gitignore new file mode 100644 index 00000000..99c82da1 --- /dev/null +++ b/default_apps/netflix/.gitignore @@ -0,0 +1,4 @@ +temp/* +output/* +inputs/* +cache/* \ No newline at end of file diff --git a/default_apps/netflix/README.md b/default_apps/netflix/README.md new file mode 100644 index 00000000..84a8bb2b --- /dev/null +++ b/default_apps/netflix/README.md @@ -0,0 +1,57 @@ +# Netflix App + +## Download your Netflix data + +Go here and request your netflix data for download: +https://www.netflix.com/account/getmyinfo + +## Get a TMDB API key + +Signup here: +https://www.themoviedb.org/signup + +Create an API key here: +https://www.themoviedb.org/settings/api + +## Setup + +Put the following files in the `inputs` folder: + +- NetflixViewingHistory.csv (downloaded from netflix) +- TMDB_API_KEY.txt (put the key in this text file) +- missing_imdb_id.json (optional: put json in here to fix titles missing from TMDB) + +## Create your Netflix Page + +``` +./run.sh +``` + +Force it to run again: + +``` +./run.sh --force +``` + +## Debugging + +Check the temp folder for intermediate files that are generated. +You can view these dataframes in Pandas to see whats going on. +The main.py runs each step one after the other so you can look at the code where your +issue is happening. + +## Missing IMDB file + +The missing IMDB file is there so you can manually tell the system of an IMDB ID for a +particular title. + +The format is: + +```json +{ + "Life: Primates": "tt1533395" +} +``` + +Each item can be partial or exact match but don't be too short as it will match other +titles with a string in string comparison. diff --git a/default_apps/netflix/data/NetflixViewingHistory_TMDB_IMDB.mock.csv b/default_apps/netflix/data/NetflixViewingHistory_TMDB_IMDB.mock.csv new file mode 100644 index 00000000..a73bae35 --- /dev/null +++ b/default_apps/netflix/data/NetflixViewingHistory_TMDB_IMDB.mock.csv @@ -0,0 +1,6 @@ +netflix_title,netflix_date,tmdb_id,tmdb_title,tmdb_media_type,tmdb_poster_url,homepage,imdb_id,facebook_id,instagram_id,twitter_id,genre_ids,genre_names,imdb_runtime_minutes,imdb_rating +Psych: Season 1: Pilot: Part 1,2024-08-21,1447,Psych,tv,https://image.tmdb.org/t/p/w500/fDI15gTVbtW5Sbv5QenqecRxWKJ.jpg,http://www.usanetwork.com/series/psych,tt0491738,PsychPeacock,PsychPeacock,PsychPeacock,"[35, 18, 9648, 80]","['Comedy', 'Drama', 'Mystery', 'Crime']",44,8.4 +Monk: Season 1: Mr. Monk and the Candidate: Part 1,2024-08-12,1695,Monk,tv,https://image.tmdb.org/t/p/w500/3axGMbUecXXOPSeG47v2i9wK5y5.jpg,http://www.usanetwork.com/series/monk,tt0312172,,,,"[35, 80, 18, 9648]","['Comedy', 'Crime', 'Drama', 'Mystery']",44,8.1 +3 Body Problem: Season 1: Countdown,2024-03-26,108545,3 Body Problem,tv,https://image.tmdb.org/t/p/w500/ykZ7hlShkdRQaL2aiieXdEMmrLb.jpg,https://www.netflix.com/title/81024821,tt13016388,,3bodyproblem,3body,"[10765, 9648, 18]","['Sci-Fi & Fantasy', 'Mystery', 'Drama']",60,7.5 +Fool Me Once: Limited Series: Episode 1,2024-01-29,220801,Fool Me Once,tv,https://image.tmdb.org/t/p/w500/Ertv4WLEyHgi8zN4ldOKgPcGAZ.jpg,https://www.netflix.com/title/81588093,tt5611024,,,,"[18, 80, 9648]","['Drama', 'Crime', 'Mystery']",50,6.8 +Exploding Kittens: Pilot,2024-07-19,219532,Exploding Kittens,tv,https://image.tmdb.org/t/p/w500/4WctqRtusYpTLHNkuVjQe4R51DZ.jpg,https://www.netflix.com/title/81459282,tt19734104,,,,"[16, 35]","['Animation', 'Comedy']",25,6.8 \ No newline at end of file diff --git a/default_apps/netflix/dataset.py b/default_apps/netflix/dataset.py new file mode 100644 index 00000000..b953a521 --- /dev/null +++ b/default_apps/netflix/dataset.py @@ -0,0 +1,45 @@ +import os + +import pandas as pd + +from syftbox.lib import ClientConfig, SyftVault, TabularDataset + + +def run(): + try: + imdb_df = pd.read_csv("./temp/3_imdb.csv") + + dataset_filename = "NetflixViewingHistory_TMDB_IMDB.csv" + imdb_mock_df = pd.read_csv("./data/NetflixViewingHistory_TMDB_IMDB.mock.csv") + + if set(imdb_df.columns) != set(imdb_mock_df.columns): + raise Exception("Netflix real vs mock schema are different") + + config_path = os.environ.get("SYFTBOX_CLIENT_CONFIG_PATH", None) + client_config = ClientConfig.load(config_path) + manifest = client_config.manifest + + # create public datasets folder + datasets_path = manifest.create_public_folder("datasets") + + dataset_path = datasets_path / "netflix_tmdb_imdb" + csv_file = dataset_path / dataset_filename + os.makedirs(dataset_path, exist_ok=True) + + # write mock data + imdb_mock_df.to_csv(csv_file) + + dataset = TabularDataset.from_csv( + csv_file, name="Netflix_TMDB_IMDB", has_private=True + ) + dataset.publish(manifest, overwrite=True) + + # write private file + private_path = os.path.abspath(f"./output/{dataset_filename}") + imdb_df.to_csv(private_path) + print(f"> Writing private {dataset_filename} to {private_path}") + + SyftVault.link_private(csv_file, private_path) + + except Exception as e: + print("Failed to make dataset with dataset.py", e) diff --git a/default_apps/netflix/imdb.py b/default_apps/netflix/imdb.py new file mode 100644 index 00000000..4617c1ba --- /dev/null +++ b/default_apps/netflix/imdb.py @@ -0,0 +1,83 @@ +import os +import warnings + +import pandas as pd +from utils import download_file + +# Suppress only DtypeWarning +warnings.filterwarnings("ignore", category=pd.errors.DtypeWarning) + + +download_urls = [ + "https://datasets.imdbws.com/title.basics.tsv.gz", + "https://datasets.imdbws.com/title.ratings.tsv.gz", +] + + +def run(): + try: + temp_folder = "./temp/" + output_file = "3_imdb.csv" + + imdb_df = pd.read_csv("./temp/2_tmdb.csv") + + for download_url in download_urls: + filename = os.path.basename(download_url) + file_path = f"{temp_folder}/{filename}" + if not os.path.exists(file_path): + print(f"> Downloading {download_url} to {file_path}") + download_file(download_url, temp_folder) + else: + # print(f"> File {file_path} already downloaded") + pass + + titles = pd.read_csv( + temp_folder + "/title.basics.tsv.gz", + sep="\t", + compression="gzip", + ) + + title_ratings = pd.read_csv( + temp_folder + "/title.ratings.tsv.gz", + sep="\t", + compression="gzip", + ) + + titles_merged = titles.merge(title_ratings, on="tconst", how="right") + titles_cleaned = titles_merged.dropna() + titles_cleaned = titles_cleaned[titles_cleaned["isAdult"] == 0] + + titles_cleaned["startYear"] = titles_cleaned["startYear"].replace("\\N", None) + titles_cleaned["runtimeMinutes"] = titles_cleaned["runtimeMinutes"].replace( + "\\N", None + ) + + df_merged = imdb_df.merge( + titles_cleaned[["tconst", "runtimeMinutes", "averageRating"]], + how="left", + left_on="imdb_id", + right_on="tconst", + ) + + df_merged = df_merged.rename( + columns={ + "runtimeMinutes": "imdb_runtime_minutes", + "averageRating": "imdb_rating", + } + ) + + df_merged = df_merged.drop(columns=["tconst"]) + + path = os.path.abspath(temp_folder + "/" + output_file) + print(f"Writing {output_file} to {temp_folder}") + df_merged.to_csv(path, index=False) + + except Exception as e: + import traceback + + print(traceback.print_exc()) + print("Failed to run imdb.py", e) + + +if __name__ == "__main__": + run() diff --git a/default_apps/netflix/main.py b/default_apps/netflix/main.py new file mode 100644 index 00000000..277513e7 --- /dev/null +++ b/default_apps/netflix/main.py @@ -0,0 +1,108 @@ +import argparse +import os +import shutil + +# from dataset import run as make_dataset +from imdb import run as add_imdb_data +from netflix import run as preprocess_netflix +from page import run as make_page +from tmdb import run as get_tmdb_data +from utils import compute_file_hash, load_cache, save_cache + +def publish_page(output_path): + try: + from syftbox.lib import ClientConfig + config_path = os.environ.get("SYFTBOX_CLIENT_CONFIG_PATH", None) + client_config = ClientConfig.load(config_path) + + file_name = "index.html" + destination = "public/apps/netflix/" + destination_path = client_config.datasite_path + "/" + destination + os.makedirs(destination_path, exist_ok=True) + + shutil.copy2(output_path, destination_path + "/" + file_name) + print( + f"> Netflix app published to: {client_config.server_url}/datasites/apps/netflix/{client_config.email}" + ) + except Exception as e: + import traceback + print(traceback.format_exc()) + print("Couldnt publish", e) + pass + +def main(): + # Create the argument parser + parser = argparse.ArgumentParser(description="Enter your TMDB API key.") + + # Add an argument for the TMDB API key + parser.add_argument("--tmdb-api-key", required=False, help="Your TMDB API key") + parser.add_argument( + "--missing-imdb-file", required=False, help="Your missing IMDB title file" + ) + parser.add_argument( + "--force", action="store_true", default=False, help="Override hash check" + ) + + os.makedirs("./cache", exist_ok=True) + os.makedirs("./inputs", exist_ok=True) + os.makedirs("./temp", exist_ok=True) + os.makedirs("./output", exist_ok=True) + + input_file = "./inputs/NetflixViewingHistory.csv" + if not os.path.exists(input_file): + print(f"Error: Netflix file {input_file} required.") + return + + # Parse the arguments + args = parser.parse_args() + + # If the API key is not provided via args, ask for it interactively + tmdb_api_key = args.tmdb_api_key + if tmdb_api_key is None or tmdb_api_key == "": + tmdb_api_key = os.environ.get("TMDB_API_KEY", None) + if not tmdb_api_key: + tmdb_api_key = input("Please enter your TMDB API key: ") + + if tmdb_api_key is None or tmdb_api_key == "": + print("Error: TMDB_API_KEY required") + return + + print(f"Your TMDB API key is: {tmdb_api_key}") + + missing_file = None + if args.missing_imdb_file: + if not os.path.exists(args.missing_imdb_file): + print(f"Can't find missing imdb id file at: {args.missing_imdb_file}") + missing_file = args.missing_imdb_file + + input_hash = compute_file_hash(input_file) + output_path = "output/index.html" + output_hash = None + if os.path.exists(output_path): + output_hash = compute_file_hash(output_path) + last_run = load_cache("last_run.json") + if ( + "input_hash" in last_run + and "output_hash" in last_run + and last_run["input_hash"] == input_hash + and last_run["output_hash"] == output_hash + and not args.force + ): + print(f"Already generated html for {input_file} with hash: {input_hash}") + return + + preprocess_netflix() + get_tmdb_data(tmdb_api_key, missing_file) + add_imdb_data() + # make_dataset() + make_page() + + last_run = {"input_hash": input_hash} + if os.path.exists(output_path): + last_run["output_hash"] = compute_file_hash(output_path) + save_cache(last_run, "last_run.json") + publish_page(output_path) + + +if __name__ == "__main__": + main() diff --git a/default_apps/netflix/netflix.py b/default_apps/netflix/netflix.py new file mode 100644 index 00000000..c33cf25e --- /dev/null +++ b/default_apps/netflix/netflix.py @@ -0,0 +1,25 @@ +import os + +import pandas as pd + + +def run(): + try: + temp_folder = "./temp/" + output_file = "1_netflix.csv" + + netflix_df = pd.read_csv("./inputs/NetflixViewingHistory.csv") + netflix_df = netflix_df.rename( + columns={"Title": "netflix_title", "Date": "netflix_date"} + ) + + path = os.path.abspath(temp_folder + "/" + output_file) + netflix_df.to_csv(path, index=False) + print(f"> Writing {output_file} to {temp_folder}") + + except Exception as e: + print("Failed to run netflix.py", e) + + +if __name__ == "__main__": + run() diff --git a/default_apps/netflix/page.py b/default_apps/netflix/page.py new file mode 100644 index 00000000..58e6dbff --- /dev/null +++ b/default_apps/netflix/page.py @@ -0,0 +1,141 @@ +import datetime +import os + +import pandas as pd +from utils import evaluate_list, image_to_base64 + + +def get_this_year(df, year): + return df[df["netflix_date"].dt.year == year] + + +def get_imdb_id_rows(df, imdb_id): + show_df = df[df["imdb_id"] == imdb_id] + return show_df + + +def get_top_n_tv_shows(df, n): + top_ids = df[df["tmdb_media_type"] == "tv"]["imdb_id"].value_counts().head(n).index + return df.loc[ + df["imdb_id"].isin(top_ids) & (df["tmdb_media_type"] == "tv") + ].drop_duplicates(subset="imdb_id", keep="first") + + +def format_minutes(total_minutes): + hours = int(total_minutes // 60) + minutes = int(total_minutes % 60) + result = [] + + if hours > 0: + result.append(f"{hours} h{'s' if hours > 1 else ''}") + if minutes > 0: + result.append(f"{minutes} m{'s' if minutes > 1 else ''}") + + return ", ".join(result) if result else "0 minutes" + + +def get_week_counts(df): + day_counts = df["day_of_week"].value_counts() + favorite_days = day_counts.to_dict() + return favorite_days + + +def first_day(favourite_days): + keys = list(favourite_days.keys()) + if len(keys) > 0: + return keys[0] + return "Unknown" + + +def run(): + try: + templates_folder = "./templates" + output_file = "index.html" + + imdb_df = pd.read_csv("./temp/3_imdb.csv") + imdb_df["netflix_date"] = pd.to_datetime(imdb_df["netflix_date"]) + imdb_df["day_of_week"] = imdb_df["netflix_date"].dt.day_name() + imdb_df["genre_names"] = imdb_df["genre_names"].apply(evaluate_list) + imdb_df["genre_ids"] = imdb_df["genre_ids"].apply(evaluate_list) + + current_year = datetime.datetime.now().year + year_df = get_this_year(imdb_df, current_year) + year_tv_df = year_df[year_df["tmdb_media_type"] == "tv"] + + # year stats + total_time = format_minutes(year_tv_df["imdb_runtime_minutes"].sum()) + year_fav_day = first_day(get_week_counts(year_tv_df)) + total_unique_show_views = year_tv_df["imdb_id"].nunique() + total_views = len(year_tv_df) + + top_5_shows = get_top_n_tv_shows(year_df, 5) + + css = "" + with open(templates_folder + "/" + "index.css") as f: + css = f.read() + + page = "" + with open(templates_folder + "/" + "page.html") as f: + page = f.read() + + show_list_card_template = "" + with open(templates_folder + "/" + "card.html") as f: + show_list_card_template = f.read() + + show_list_html = "" + order = 0 + for _, row in top_5_shows.iterrows(): + show_rows = get_imdb_id_rows(year_tv_df, row.imdb_id) + genres = ", ".join(sorted(row.genre_names)) + order += 1 + # fav_days = get_week_counts_for_imdbid(df, row.imdbID) + fav_day = first_day(get_week_counts(show_rows)) + count = len(show_rows) + average_rating = row.imdb_rating + tmdb_title = row.tmdb_title + imdb_id = row.imdb_id + tmdb_poster_url = row.tmdb_poster_url + template_vars = { + "year": current_year, + "imdb_id": imdb_id, + "order": order, + "tmdb_poster_url": tmdb_poster_url, + "tmdb_title": tmdb_title, + "average_rating": average_rating, + "genres": genres, + "count": count, + "fav_day": fav_day, + } + show_list_html += show_list_card_template.format(**template_vars) + + logo_path = "templates/images/nf_logo.png" + logo_src = image_to_base64(logo_path) + + page_vars = { + "logo_src": logo_src, + "css": css, + "year": current_year, + "total_time": total_time, + "year_fav_day": year_fav_day, + "total_unique_show_views": total_unique_show_views, + "total_views": total_views, + "show_list_html": show_list_html, + } + page_html = page.format(**page_vars) + + print(f"Writing {output_file} to output") + path = "output" + "/" + output_file + with open(path, "w") as f: + f.write(page_html) + full_path = os.path.abspath(path) + print(f"\nOpen: file:///{full_path}") + + except Exception as e: + import traceback + + print(traceback.print_exc()) + print("Failed to run page.py", e) + + +if __name__ == "__main__": + run() diff --git a/default_apps/netflix/requirements.txt b/default_apps/netflix/requirements.txt new file mode 100644 index 00000000..69de461a --- /dev/null +++ b/default_apps/netflix/requirements.txt @@ -0,0 +1,2 @@ +pandas +requests diff --git a/default_apps/netflix/run.sh b/default_apps/netflix/run.sh new file mode 100755 index 00000000..dc7a6ff9 --- /dev/null +++ b/default_apps/netflix/run.sh @@ -0,0 +1,7 @@ +#!/bin/sh +uv venv .venv +uv pip install -r requirements.txt +TMDB_API_KEY=$(cat inputs/TMDB_API_KEY.txt) + +uv run python -c "import syftbox; print(syftbox.__version__)" +uv run main.py --tmdb-api-key=$TMDB_API_KEY --missing-imdb-file=inputs/missing_imdb_id.json "$@" diff --git a/default_apps/netflix/stats.py b/default_apps/netflix/stats.py new file mode 100644 index 00000000..0b685079 --- /dev/null +++ b/default_apps/netflix/stats.py @@ -0,0 +1,237 @@ +import datetime +import json +import os + +import pandas as pd +import requests +from utils import evaluate_list, image_to_base64, load_cache, save_cache + +TMDB_BASE_URL = "https://api.themoviedb.org/3" +IMAGE_BASE_URL = "https://image.tmdb.org/t/p/w500" # w500 refers to image size + +tmdb_id_cache = load_cache("tmdb_id.json") + + +def get_this_year(df, year): + return df[df["netflix_date"].dt.year == year] + + +def get_imdb_id_rows(df, imdb_id): + show_df = df[df["imdb_id"] == imdb_id] + return show_df + + +def get_top_n_tv_shows(df, n): + top_ids = df[df["tmdb_media_type"] == "tv"]["imdb_id"].value_counts().head(n).index + return df.loc[ + df["imdb_id"].isin(top_ids) & (df["tmdb_media_type"] == "tv") + ].drop_duplicates(subset="imdb_id", keep="first") + + +def format_minutes(total_minutes): + hours = int(total_minutes // 60) + minutes = int(total_minutes % 60) + result = [] + + if hours > 0: + result.append(f"{hours} h{'s' if hours > 1 else ''}") + if minutes > 0: + result.append(f"{minutes} m{'s' if minutes > 1 else ''}") + + return ", ".join(result) if result else "0 minutes" + + +def get_week_counts(df): + day_counts = df["day_of_week"].value_counts() + favorite_days = day_counts.to_dict() + return favorite_days + + +def first_day(favourite_days): + keys = list(favourite_days.keys()) + if len(keys) > 0: + return keys[0] + return "Unknown" + + +def flatten_tmdb_dict(data): + flattened_dict = {} + flattened_dict["homepage"] = data.get("homepage", None) + external_ids = data.get("external_ids", {}) + flattened_dict["imdb_id"] = external_ids.get("imdb_id", None) + flattened_dict["facebook_id"] = external_ids.get("facebook_id", None) + flattened_dict["instagram_id"] = external_ids.get("instagram_id", None) + flattened_dict["twitter_id"] = external_ids.get("twitter_id", None) + genres = data.get("genres", {}) + genre_ids = [] + genre_names = [] + for genre in genres: + genre_ids.append(genre["id"]) + genre_names.append(genre["name"]) + flattened_dict["genre_ids"] = genre_ids + flattened_dict["genre_names"] = genre_names + + flattened_dict["tmdb_title"] = data["name"] + poster_path = data["poster_path"] + tmdb_poster_url = f"{IMAGE_BASE_URL}{poster_path}" + flattened_dict["tmdb_poster_url"] = tmdb_poster_url + return flattened_dict + + +def get_tmdb_details_for_tv(tmdb_id, api_key): + media_type = "tv" + url = f"{TMDB_BASE_URL}/{media_type}/{tmdb_id}" + params = {"api_key": api_key, "append_to_response": "external_ids"} + + cache_key = f"{tmdb_id}_{media_type}" + if cache_key in tmdb_id_cache: + result = tmdb_id_cache[cache_key] + out_dict = flatten_tmdb_dict(result) + out_dict["tmdb_id"] = tmdb_id + return pd.Series(out_dict) + + print(f"> Querying tmdb for {cache_key}") + response = requests.get(url, params=params) + + if response.status_code == 200: + result = response.json() + if result: + tmdb_id_cache[cache_key] = result + save_cache(tmdb_id_cache, "tmdb_id.json") + out_dict = flatten_tmdb_dict(result) + out_dict["tmdb_id"] = tmdb_id + return pd.Series(out_dict) + + return None + + +def run(api_key): + try: + templates_folder = "./templates" + output_file = "stats.html" + + stats_data = {} + with open("./inputs/stats_data.json") as f: + stats_data = json.loads(f.read()) + + total_time = format_minutes(stats_data["total_time"]) + total_views = stats_data["total_views"] + total_unique_show_views = stats_data["total_unique_show_views"] + year_fav_day = stats_data["year_fav_day"] + + current_year = datetime.datetime.now().year + top_5 = stats_data["top_5"] + + series = [] + for tmdb_id, count in top_5.items(): + series.append(get_tmdb_details_for_tv(tmdb_id, api_key)) + + imdb_df = pd.DataFrame(series) + + # add imdb + temp_folder = "./temp/" + + titles = pd.read_csv( + temp_folder + "/title.basics.tsv.gz", + sep="\t", + compression="gzip", + ) + + title_ratings = pd.read_csv( + temp_folder + "/title.ratings.tsv.gz", + sep="\t", + compression="gzip", + ) + + titles_merged = titles.merge(title_ratings, on="tconst", how="right") + titles_cleaned = titles_merged.dropna() + titles_cleaned = titles_cleaned[titles_cleaned["isAdult"] == 0] + + titles_cleaned["startYear"] = titles_cleaned["startYear"].replace("\\N", None) + titles_cleaned["runtimeMinutes"] = titles_cleaned["runtimeMinutes"].replace( + "\\N", None + ) + + df_merged = imdb_df.merge( + titles_cleaned[["tconst", "runtimeMinutes", "averageRating"]], + how="left", + left_on="imdb_id", + right_on="tconst", + ) + + df_merged = df_merged.rename( + columns={ + "runtimeMinutes": "imdb_runtime_minutes", + "averageRating": "imdb_rating", + } + ) + + df_merged = df_merged.drop(columns=["tconst"]) + + css = "" + with open(templates_folder + "/" + "index.css") as f: + css = f.read() + + page = "" + with open(templates_folder + "/" + "page.html") as f: + page = f.read() + + show_list_card_template = "" + with open(templates_folder + "/" + "card.html") as f: + show_list_card_template = f.read() + + show_list_html = "" + order = 0 + + for _, row in df_merged.iterrows(): + count = top_5[row.tmdb_id] + genres = ", ".join(sorted(evaluate_list(row.genre_names))) + order += 1 + average_rating = row.imdb_rating + tmdb_title = row.tmdb_title + imdb_id = row.imdb_id + tmdb_poster_url = row.tmdb_poster_url + template_vars = { + "year": current_year, + "imdb_id": imdb_id, + "order": order, + "tmdb_poster_url": tmdb_poster_url, + "tmdb_title": tmdb_title, + "average_rating": average_rating, + "genres": genres, + "count": count, + "fav_day": "", + } + show_list_html += show_list_card_template.format(**template_vars) + + logo_path = "templates/images/nf_logo.png" + logo_src = image_to_base64(logo_path) + + page_vars = { + "logo_src": logo_src, + "css": css, + "year": current_year, + "total_time": total_time, + "year_fav_day": year_fav_day, + "total_unique_show_views": total_unique_show_views, + "total_views": total_views, + "show_list_html": show_list_html, + } + page_html = page.format(**page_vars) + + print(f"Writing {output_file} to output") + path = "output" + "/" + output_file + with open(path, "w") as f: + f.write(page_html) + full_path = os.path.abspath(path) + print(f"\nOpen: file:///{full_path}") + + except Exception as e: + import traceback + + print(traceback.print_exc()) + print("Failed to run html.py", e) + + +api_key = "010de1bcf60f0e14b92765a3f9485662" +run(api_key) diff --git a/default_apps/netflix/templates/card.html b/default_apps/netflix/templates/card.html new file mode 100644 index 00000000..e9ab58e7 --- /dev/null +++ b/default_apps/netflix/templates/card.html @@ -0,0 +1,29 @@ +
  • + +
    + {order} + {tmdb_title} +
    +
    + {tmdb_title} + IMDb {average_rating} +
    +
    +
    + Genres: {genres} +
    +
    + Views: {count} +
    +
    + Fav Day: {fav_day} +
    +
    +
    +
    +
    +
  • diff --git a/default_apps/netflix/templates/images/nf_logo.png b/default_apps/netflix/templates/images/nf_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..4b512829ec26c8e493fee38d5c1c3b9e48198161 GIT binary patch literal 16831 zcmW+;bzD?U7bcfl8e!?~?nSym8l=0sL6C+;8tD)al^TkdSayRTOlApZ7>e$b*<5;I~0#G(RLH*%(y?+1G*V zze1f;?bh_S@3wMU4bLA$dIM6FP|$>$>5{lpsUlFh#8N?@&E*E4;hxX;7@sl!U3x0# zA~AhQl8GE691GA5;51553i;$ZRdD%# zQy1^>CV=l1(H->@n;E|(Ohr@7>*ja-fzyvGgS#1txowR`>skA8S|a>}+v)jZVG&$7 z#BOHRdnYmKoc*uZ)?;s)fCa;Y1jP{}5(L@mSHRQ@qbcEI<+(Zx(O`(WNp8Stb>Q-Z z&>96nW2?u>?~DRzMa$D0e2x%FM^Eu*$!{Y&O&2??bf1KAyd{FP6pGp+Mm-d^|7{66 zD)|XFH=?xVCl;I1(UpS`N^YE7zS|!RZo<%!h7Y!%cKRQC>Z7lbh?dU>IJ$fy(nlAq zrl`>H)rj@DIe6}*L8NI)bPIm6n*g+5&o+;;I zwKcn4#|c)u^s2-kloFmkV`eW7!bWycsN&eAOKSkVe?cBR5LrQr!yHCC%p8($4n%7z z`j+JsO!{Isgee!(s?M7=-n4kdKGOKH;#}(tT3Is}p@Sj$NRiK^l!Uq2*t8uNTXxx` zxEt$FKM$@KqFEf66S}ag@-z{?9Jp_9zdidnZ|}5N@q^17`40X zieKX2V+?Q5=gJ(KtDj74qmnAhbR85mkr={0X|z4ht^b}3a19r`hSKA|(PfR*rd39E zFdHOVMA0-bjf9bu>VOQROE1AN!Or|z@? z{?jnDkdC*DqqoNPUWzixO_HF7G(NgR`aq5@RAh}136||Da6M*QU-~ejURN(!hI&Af z+Z;Cm^L`*16@w)U{sIo!Wbr?CMx|9vden9M9fymF^HTsnd%FynHj40|$f=vx-=jz4 zI3igcN2d+>;7OqmGDJ&+n?sNo(Ug$Q;)L^Q+)ydFZbv`b2H>&7IHN`oZtWqt!{2D* ztYlkGtVKRG)j!l#{c9BHD0T#EI{W1)qLI?&F1ZD%Fs4|EI^a|Z1g1bX%^ijun(Hp# zeJFkRI)HIi=b`29&6&g}aQ*R1nCX4+Sw2q??!XI5#^vQlt40O)6LzWU|BoHLkk3D; zo+2>H1|^<)plyPO0=BnWT{8qa7y`lDUJI1*ZEXBEUzcAll#rNj#Kn;LPtD|)Od=A_ z#t-{lWK;=szh(pYYRa4NT7gCKUKpo z!tXbjF+FPXX);HULoR|Tt^URf+;{N>9?GIlOl29P?bR-k&ZjmavbS1(2XMkTyQUCT zjIAdziM1UkZje8(iP?pT$-a$--=as>o`gIr0gXFAte7n zs%w8?!4C~REC`y{6&x;M!o!Gg`91sH!?1-z|d-YM+RUebViECz# ze{rV*TyI1POO(b`-*}kZ6ku7gmoY~({5{FVGz2I5cLu-xWePY(jg43T_lG^tBip^| z{%lNHHo49diKa#fuKh{1Kp;M36RET6qWrU(1Kr@OlWJt(^%8BySC0S9wD&sfhkM?z z=&ymY% z9R7HAF!PxRc)|G#SmF-ztp5TQ!90K`tEq^UBG0RN@t?>M=az4p&RRzvat2CPWO9%jyh-Z+&PvBB;V;D*i! z=bWtwsN??f$_v@en>y2@Gcax@ z!OTs@A(QTlC`_6p>EO_x%i2?XdjG9e4a=OYq3Lls>)K7W$IG}1hCqfc-`zbg2`tJYS^8mI$>a`QFl3SX%WzY!%NF4MM45wH9DuS6`C5s1F!asaF2_fr5L zc?Aji%{{sXerX!8I~^)c>x$#7Vw7tuIvN1x`cIOn@J^O}izA!d3aN)Mn|V0Tv>! zl0ab`N;Pc#^*9+=ksQc!`qhGJ+uVNNEDHcnR$Nk~YL9CFAIWKzwB!6$qIo?*6hUdw zazbxhz;NE#{QE)#5G4<+VYzW3nf1iA`7m`5bD;A-P!$Aob=dp;ISB@=CJdBw26@|^ zxD73)jYpI(ZGl}5NYDF*OZI`6zX4h93oI~xKbU;^G3C?+A)EZp6GELYSqrYe7)7}9 zd)RGWXRdP?*3X?KSXrnU!r(~3Dg^)lHN9&{qWq? zTAApufI7O%IM*EVn%peX!=Twd-R*ssT9-(uDq2Hgb$yJ2q=09Cv;V2gW;`bhUj>ZE z8cfq_Key(yWSK(gquJF0-@G9%QESZr_Epc{ah_y_*DXCizcf%GA|ih;_}^bg5#z&% zWCpUK8>f=dl|w}@EkK&&R7g@u|M&kaxP~Y#R~z|TlTqg~&8HBmc4~0K*<0OgXxP(| z+CLL=GxDBOIph-s(@_mA=AC+GIf9^&Sb37K4L99@1>7U?cU+1T@i7kPKCg4Wq@riz zno8OHxY=n4Oq4Lkc4_zzL<;fA4 zbWxC$_{WEL&R(Mj_2Hwy$ZERmwr!9< z`D`zS<3^JEKc)>i?i;bDfm%Ijgl%zVZ@!e^aHkTgX0OJ8MLW*R=?); zl<|@)`PRb55Mjw8FGr_I^JfzfpD6;3vXd@9_e~KV--1bDCDVy|%P*rjV;S z35*=Z$a<@pC{ucrSOj1`8`qzyx+?SjZ-y1`Z7#^EobVXz)Rflz7c}NWu2i;eKES}}_$Tvgh>XcKcNTn;}2xRfiP^5#xZ62P(ti4(fTx{3y|HjLd^;b~X$kVu2VAVV13~8LCe`TM8%|bZD$xNhJZgTZl{QC%G3E2*+2MAQXH@du z$f72Esh{v`z(|Fg53yV?87%bfX}!iQ#%QzO?%b9mK>IyomrA~A*tYJlSlCZI&>20l z6pQnPclgA~?|HmS8C=yZAbg8#e<5Ko`yS%J$)e1F#$Rz1+z6!_;GyWT{}_&6&9^}? zg78v%-6@c~7-*M8>mnjq>P@Z|R!o~<;g^Du3s2jy9^Fh3VR4~taHXPoW2vwm`e;EPKbgx9DL9fII+y{Qk{lR1EbWU+D`X& z5xlqiLTDIGH&gQ_=o>0D=Iz5&IqIn(mF%KE?LDL?kPSt$tk!rme9L6fzd_F9+nh;n zn69mt{e|$@&fB8=*Vn2G$N5~(MqGt)Y>BY~Ts9f@p`j)!da@l(TvEBp@%iuJ?@i z&mJ4k7XOBdLDKi&!eH9MGq#Kuwr`Gd>7D7#7bS5fK4Fj=2QLKav{&X!NdP)77tGL@ zr9shByf9KTls_9`Q;=G;eK3gSLacT2v>u0{I-Z$B7vu=ye^g7ou%^M}3|yJkcz9E% z){`xN7WAp%9Xz>KEuH(Ere`U2k3C8@U1J3ozf0EE%;K+1gBGdqp|+Z9@GL^zo#i8i z-{DO{{_EJa7s6Mc;&6AiIO#F-<@dV!x0*Zsli=58-cWwA0@p9)1sM#h^4>i;as;}c zgEa1fedm7Buz1`=cd1T6+RmZq!druKg-kF>m7MyaJz#?+v@qDYLl-+i)Is! zZ!%IrN(2YtX1pbDEEhgn40YXhrQ>QoCTHRlW_h}#jO6doAymxLU)~#ye?TGaK8+cKIV( zUd9Vu6pr`*9PvVP0Za9O&brBBS*2oq%|S>xKw?-~!*#%~XDQKV;n8^;8{lX?9YpTG zaasp8>tT%9GJ3Ms?W*<&Y7EUoo92xU1=Zav(E9kIc5yG=rJRlSyBj^WTeb z$Z1{Dx!u0Cobh)YRP}z7Bw2S|@H9?T{!xzcPWXy2RVTIRS5@h2`SWM`E)SlUzGp+b zq#aCu-TjE%(Jq>^>tm!M!tZzQmtnFUN^JUpDt7_D6}ClJ?6{n0HOK}!&U@*j6y3S- zZj%TkI-H6{F&;!;rMtNtqmCmKM++%~E0V?Se`?i^$j{lC$}lzH3oO6)nglW7H`LHU!C9dfy^3dDZTwIodQ2B88gRBmpzmRrs8Nr8C&HkrDt|k9#QcaWC=0R))zkE`^;5>& zDqLK%;WoL4I)mVPo6N~f*vVab($HI65+i>-gfBt06YP}?($uk~ z@c=2y%Ev>OTuy#=vgdMZB8k?HM6qn3oRQ=6eE2TfFgGw_*qiMkQ$^9u3E~1Zq&umR z4Sid%zJ&m`uRt<(e_=xX^dJIhH}jBas3K6f=v^lt zyLGQA`RDdM@3dI7eK5_`Dbb!26>rh;!NXoDspKrW=TfKYQBqy3TpoPw z9MpTo;QM#Lm~IG+lrmCHpkSmaDzW1L*4ZX-Cr6p;Q;_fy=DitorI~SUBGz zOxFR@vqK6)r%ldK7j&YHNj<7A()y2EDdf$gh2G3BIdafoShRO9)bRT+NXG071ol6L zy?yb@-rRBepDo_LCGhZk)XH=?#4m>f2J^!pX&rjGU z7Ej=Ld)bz$!WnQynP_`l7ufOC!dKBYonOLyPAs$jw@>FX zh-=oJOxPzDZhx_-)Ag%_%fQB;b^s{+0rrmXJTdiGQ&thj+ZsapXwI|FX&par`>ZA0 z^Jw^55x%O9TVZIfM3QE|ku9;7qL;KYy*?wpA1Lt&tuS1Q!)xwC`7N*Jh56!idABWLDdMksa_|78XA)Vvywb&I z^G?X3DTtMZ$p6A}O7nc}!`M~XgizJ9WW=QScY%(ZDnhcpQKBtB<(lcEKFjyETvhor z=}PZsmiI*uB%(y9JvX9$cDz>Ut7EBXv4(^YaI7V41sW{-sO`tc0j~Q!&71zpA>P#c z^=kExkA}|l{{203^@`yisO=*cRLsA(oM|#tzVcI ztH*YA)0l)n=Yq=~f??T?Q^vYof18tqLHH9)h_mD8`F_vfEl7rHEwe{g-8Z?s=i;_B z1FH*oh*!eec8+uG*!a0IoEpd@wkjZ3gclkm#@Ovu+w|tFW`-r z5_b|`C*5qra=s_W{>F{S@sBb7)=PYK^Q%cbR@=twt-gzXq6LXN*aKs*Q76?&u#iK< zk8UzfVa8cD!<}BT4%OyBeS77)Ya*ffNV_!${vOUbb~r`a7Rb4_YH^eP{Fj(eV*Z4c z-Wu%|l^nm9uqfD6UdL)+uZOq6SW%=SUy6?(%Gr0;mP-1b>CLkRhL=vF1&XxSW9Mmx zINoQbn3#iEc6nKezO=QVO33EcI7f`zq}?YXWBXn{oDNF`5-jPSK}6E#{Oikcp%+cf zna8c}2i0wT3yKYcd-@GSsg$0AcaB^MUyRHMLHvJax;fJke}v zp&;IZkaTAZcUn0opHXlH^XI(g$)>Ioor0;${K+& zfM(Qlog8&GH&xA&6HtgKVEAr!E1G-y_1~`WQaG1wjDPVV`Ys0?u4IZoni&s&{$8&p zV$opIY+`pmi3p`g?B;LtKI=&YZybF2C>1k8{EH6W$;dr)LXX3Gns zP)7bg11rU3`eiS=PPh!M%~MC2bb)7MZCfiHI7HG^tSQ-Bc378eEw^meFiBiXkJQAF2T(aX z%GPdVc!E$_)uY+mbN9RtdyQ{r#;^@TV>00n-x_!MfA$pT{9HNl-^y?3rC3>_K(*nF zXLmHK-H5PB1RML<_*N~uvrHnMz9MxFVHw*S$%*ej*!&eY;A`Nb0cSA1Np)wgVC#B$ zP1>&V{ox#UpBdMM+0?=Ym$|OQ{k$x^-1zT9VcdVb$G0a5YQ#2m- zM#xsGXgQf9u)S1zC7(^dQrCqt6(!?W_L-vC!M2d*W5v*I#nz)|G)f%S4~6>lai3Pv zY%=(_1H01KT?%*A{10hFY-wrq^&K;ec7w0bD6Vs#(q{N@68MQ8CfxRF>IM|olja!y zNmeo-3lFPHeFO8LdqWdq2%1An8!xxw#lsI3yet*e3{m&|NbG~aOj?5G1*CJ>?G-#&^!C5k=j?dKD5)C@U$ z6sw_jkZt-3Q78Bg!z`<$*^;JL?4F1j--0dB7CyUVfiio7ufGCi0LcPV+q~VT_X^~z z%YqIAO_NUfXRtoAX_koYYi%=k-)U-xqAY-w8{M_mM)g|E6)E}6|0U&PUCCSw$-XTN za;wcBsIwpEdSCvDr}s~efb8gq0;U`x52-XExdJ;OM!tZL>1X>dC!aQ=rh*F7@b$_z zw3myI$UQ&&NECPMskHvCPw5J#Az-A_RQNSn<@LRQ<4d%`+*>^mt#*6JcP8KDyiN-w z32Fzg%|8zYUPC6rR}s)3*s=%wO}vLZ5QPqtB{KU9Dbf4pQ-RlP?i$^H8(7x-6mvs2 zm26&C3~{A=+*mVtbv{({B^$`CB6V5@Po+eAnlYnGZhgi~rkqb)8lfTS!*BB1YWQgx z4V|gEyBPEobC3g;7JikmO{C|)ibdt)j2Z(d;7d2VS4Jj2$>zFJJsX+2cB?}qHTX}G zfom(9E+St>cK=X2@JFlwXMrTKA#w!i(nGcCF61wWJccq2$tNKao-p-D@iW1|!}k&< ze|%?5D&u2e=obYGaxQ7WqvM~9ZN_V;tt!FW%1S$D<$}_Ox7660I48}R|K+CIKU2U; z)x*<;eV(FcZ7<48oxoCb9%7tbraY$Y;U7D_JCMET38S>5;SNOkCt&4Dx}(fiBWHd| zD(^V6TS^odEd|IC<1(N-ct*~U3jOW?1@Rfvp>>E0q9jYjchlU$t@n0f_^8%y*knQ^ z!VS++)7Tg{Xf1d_+E)|p?lmR@+crdzQ%|vXnpJnzNw+N255l<>lMdI}tFFM+2fVCZ zYm+;O|Gm+$F;HbCuoJ}=fJoX+>dZp*dw!LdDHVgaFte6pmYh_|303G%On!;;|` z1Lk+z0Y^eb9cwoN=D&sNRpl||_AmHyS(@Ih;R&|CTO@@DN(3AZ(dfD7z7omi>k)so z+KcVlQ%LP|Km$st8({=i7()@U??JUay1F-AzR}a|eaP20iWu7OYmo~%JALW9(}IRj z0{9Is8kSif2K^fbv3;2Dmfy~)z-H*4{20PsPjQ!NGQ=amBA$a|Goznkj8fO=QvjD{ zL0mJ}nMIN6jGI^5&(!GVJU@0w{U>nW3~MO4Yl)4M#v@GKL)g5{8vJ~=%b!qa_u13RL|J39R@Bujp&V-jIr^Rvv}6t*t|s59oscOi6yyBS+n3Gfh+pFN^k z8ua$D0sAcsft71FpLSQO#_a&YZPSDb*ItzW=~#b-RArjmEQ7<9pt6c zaw2=@R>Z&hb2FZ7Ladb)Q82+OA4K697j_|FXuc}Ny7{Sx9bJst0nJOcq6p328wE=J zLDyCZFM5hwgp)Q8{ls!oS9RkKQ{1RC04tCc5Q+*>PrU z?k{r=k4^7PvmMtjFKsOT@#Ebp13)>h7J4`=YmgxsNCfOh%?I{io8s@uHtyPZ8HCZc z)Rn%7wn-H;bl9tM``U?5YZtTg&G<$=Ty&#<>_COvam%lN zQ2==g2e4?qO}8#Bf8hp9P|Y{36-WruYPX+eAbR^&udZ*n$2I)Ff>!ppPoWFx*!@=4hTBDUsy>EjB@{zyNd!~&Kxas4dqqSWp@J&CYz-2pHYg5@8?kByM z{BD4`e(Ny)RBK{@u)*HEv5WK;spqX0i`m$q9<`ceH=oXzHCRlwk<9Y=_Nomo;PGbH z%8F*8!bsxj_o)qUWMJhXBxVGlFWI=gs-$-9w70Hs#}K+!KlNAF1OLJkleVA6XmN^+ z5D6^ngLEifwRCGSQO>N+qSW$6$IaGf@I(vF8tz#^hYY(N4b_t$C>P^#f4UqNhNux0 zNwT;z9|NygH77W5CLMG2|J;E_E=1Kydw*C(JM3|pw*jklj*&GjN(IXqIe326cI6fn#-NEbKob^tT`c?ikG$=kBYkF*6m`WMU3apf*b#8?VSHQRKP6e%Dz<< z>+~HJpZLWF!yg!eN9FnKeAZ^J7o89`z8+IddL|s;P97Q>-gTyrE8h0BETiOj-GOTZ z#(U{|p4F@NW%X*)n-ou(0BQBlR9hm$RYG=e^w7z8pzG9#UjA8zSxkjN=qJG+&9dOl zqaZT=rdcav_`86un}h2Z)8~0LHIgt@atW$iN?4tk$4idN&eeiR5po1Zm^^uhpVi~r z+L@lB{FPjkjlYu%Rdtrph^9Q7jweZKMT?TQ!asX!P^z*!M%Zdx^XC`l)uZ1(H1JnI zMswU4k#d(?y!N*LjLC(`YVcFxr9*fNG+3%0EEl|w)aqgLxAIrVS;g24h)N^1hO64w??Phdm>d%K?T-&$cpJJgG8w5SY+z#G{3r< zc(I|StkCBX8Jp*ZLmb;JM=hE~*-<#k?7IO*~?NTcHFO9T4+yMu^vwrV}n(lRM`4ZlU z_B7fpc`tlb!nq4*!f{^os#&Tif!*2vE9@U`$|YlKE|Q?YcRlenue;YW!n227a5fk1OmjmERj_0S_z@DK)H5r!2h#FS(?3Wlr9iX3)7Q_1cNX<>1%yav zTxWf9gxo<`9kZTs>fOj{Z^NOlm~zTyi`=G*4f);_VFp`kq%dE!h$uvMnvd!hVv`hh>yaR1)Bso8Slxw> z^HoI#9ORKb^%4)w%A-9-0-V8+1BIJITk>L1&>;7EtYzbH!QZ8OVbGnMy0cte9a%vq zPpER(N4P7B+45;K9|Zj(i9KE4gEk4#9K42*J88iPmN#rJGl!6p`;DwzuWr8nvTCk% zz9}#uee7=xEfC_o8_fDKMg8ZXS;5ru7d5gR=NgB5!YsEax%>=A?c1Ut`x;c+3mx z-n?YK#?|=+pN(sx`p$oaTsUz%4aee*%)o*)&-@x(b1$Hhi3Tj51TSSov;HW3u^GNW zvM=vH7n``1#<=!+QS?2Y?FGw^(Z?);JK;4_kRDT8Fx84*GdEAJrHIjNgTtJ!ZAG(! z$qSvhB?+>9;#-D@#8-RPw`)jHnx?u=G&agtM%;KgQhz;{AU(yd79*<*6Ky}aE@wId zk3NcPDO~7prJa?$?GK$&fT&IL$&YZf75@A}8kuir-Ft6{TY`Q^y|QcWT= zEuZR_U+MX8ZsMxx@`H5aZ=gNRzU{VI5~ph{muYkLtx#|H*|xsx#ZedKt``$w zlvnTov!~T(<EeE zE2~a>Lt7)}hV1u!z_ahF0P8Lv-7G)r$@lJ{GDN=@ ze2YXH{sfi>8j}W&Yn)2*^)tIjt#_uTs6KJg_*mZMVeU^^#H$v9^1;ic#P%0>TQ?l) z_*&N!d13f)L;BnmYOEndzN%YP_+sPl;oY|@_D^i*rQmJFH;!i?@7L8g6%O^0a)k0} zK*C6|jI8-Q=*%^`A?C|kQ}y6&Wbu|kbEuC3?Q$k&P|yROERe3o9WnIJ;}p~gWKI#E zi!vI58k~-68gv{*-%MctF{Ps0-Im?h=O>8OSUmJdJn8~dGp#KL4`sqRn*;Tia#}6l z|7i+;zTH87EIfOa3#CHRR4i|GOdtBzw`Sj#@-UPI?;iM3AC-8bozS_bIf5j;FpsC} z7pm2sk;~9a?rUmNau)C_3m!YHuhp+bC060V?L~5JN_Xbm%neS|YOy|e>k-6?iuJ&p zUk~nkwa>%h>~4F(Q`hLWk1jLA`7U^Vy<~jE4RY$LsOL4Y^X<@#HAcjow2cD%oZGg8}49!TuKsEvMY$e0<|IHkXAF&xN znq#%hvdn4n`Yh~j#+F#2Q~~v`Pcs?+Tk?_s-^jb2{7!5{_>cPNpfhqs3?gm}cO3?VZI1gM!+9 zA%UnL*@Q?uW+Iv;r8#(cVE2++@m{O;;5YLmQygDKLyEi`a?&_q0r1LByqLc8 z&LPWkME03_$CFf+i_C;YPJBhP#(H_m?a*ry`wM-Fzd~>6*>F(g_Y|3cg+DMhY(xC&a7JZ5a-g+}n2R z`qKUT^e%}5h9Pp(xXdHeUn5RnXCG+?9ZVY!1b^68Wo;{s?TPIVTBre<0c-LLpC~rA z$08nrtn`hCOkc;_M}|n8E}Y%l+t(y-1y2kjphxSqdK%L97sxg@7NpRh`5!*m za)s9d`-@G6L)X6-xXG?n|3X!@Plg9uJG2Wprz0)Nbk3-?lR|l zfpVbeUad7htZ%`12)dssqs+c~vGsuuIAM|?7LKOdbmia34*B|{jU9$ByJ>EBJkOZ& zarkHs6YB-?ZIJIPOgV}Am#N`oeHPvw0)aWWl1HbKYa8RseHe(mNsd_SOK*aQq=uo| zre2Ys#+gt4}Ajm)@AYS+;3~|63}*HM~|q`bzKnd zqVXPbQ}XaxX8u{eAy85@(@dZWUkoTs)2u%}7FIQ&bkvIzXOGN@gud=^;e9(q?3O~K znc$S}-fMGB0QI@tL#YMg@aBT~sMRK67(vRvE&MDuIiT?<*;`ac@URA8ie^r?6l{EL zMA`;~a|)n6cRC%#uA`gyNQGzyJlY8xA6+(x%nj-kH3(3#`uDa zX|;0Iq2t6b%?nEs16&Eesa_hczJRms0IWyYnL4x}j-jC-*tJV^{{TNW8#DffNAfXwRcEy&xPZFE4TNv3eXOVnM zAUx5Ar}Ngc@#t{60_9qNE740lWFYqiQ0r{@y@n(aEPoT85ZP20RSLB=d-7IU?A?7f2C_TUcrwAo;_)=4NY+W;8aKA_ondh$Ibqbi;nZo_vnJt z$Tt|6nl8lqYS9Rhk_lgYVh!$Zynk+g94I2}s1hgk=-0}x{9cX|0hO#!58^FSPlk(X zb3NC2E+L0y|ER#&I5nT}icXrWE^R*56rs*jq|KW?$7vC0v}}4!AE&*lV3TO=5o;Xu zlP~D?&thP01Q{FatLSk(_2NVQYH=waiJ?Xl_! zPCa^b1>$3zEVGw@`@Fri>t`qx!U3iI2QNZvEZ><5vvvJ8Br62&N8iJdRqAfVFiHIb z20MRLBsmKnKC-h-B4EE-ev{uy7Ixz7Y5DmMy8rpiHW*iOmd<0XSlOfmH0^G?oy;b+ z()S^yF$290aG?li3eP#UmplQs7amevlL*SS(t&gHlXCLt`$mSRq@< zrO1b(fW+Kxo)BrQxY8e==58+mT%B02;^lO^{1vUQ1Q5NlhHYj8(;=(JWuh`~XdSp7 zxLL!JXv+U6q-ZQb(w3gf)Eh4x&8D}+)y3eu*K*q~w>_7I37b&B9NaNK%<(tz1=kz< zDH2~U9~03#`ZJ?Ye*$tQo$79NpS7Og*f!v11sq;uc!c~6T&QpH z0iDIhq_pG@D2f0Xrs`XIP+-z}GL2Qr@ZsUYWB7mq#<_0%IJfkWtu0oO>gpC}!hz?x zSSf1Mi$|Buvke1Ut<91k*C-0v0Bm%eO8eH6i zAflm}!`rWMoZDKsOe^|s!?noGcCG$BK9QsRGjv@T+I>yZO5T1h6*6sT|KDd*LYtT@ zzP-adD)*#s8iNcY*{Bf3mi%rqz1szbdseSc`)?gd%$Bk+_sdZSCJrli+gLKQIzpyn z;{`ef6L;=$IY%GsHTiJH_PUecfqIyO3+6FA9VaLpFwQF)xrYj*!sA!b0H$)Q&Z(L4 zMf$l11GbGV&7z!biQ%nd$dHt8p{7I?|0N*H*l}_0$3N<1_zSVstBYM!+e^bPqTQs& zKs&2dsd={BIGNh_aG*JrL^7J8=*P+g*H8puBlA&aa(9`yu^-UgO?b<_9jYJUdGacH z$eLLAd)9jWL?Z`CD~FBzMTmB0qs;K^UBAdrt0)b6%tq@?5C4sI9yqd*u$LnLXwoBh zglQL%_1*<=uHh``*1hcZ61XA6Ha0`OkVD_`39uj5lG;eoiciC?eFAzQ!_3{i5%f5q zPaz(4;wA%1?jF4a5vB}Zt+a9t5mIV7PViArP6VDk@U7NAC7Eb%cMGGp@q9W;=nVt%#F7nYf3``jttu$;j4l`$WE z13Z-mq<_2%gcgYCh``Q9cWWxcbWjan!{U-lj`bGva;yN3d?#03Bk1QiH0?zw8!+e> z7pwf{nHG~5=tlu~_|}suU!j_u?KR{YP<>_6kwuVm{D5QT(2zAHu9DtFn-PV2-=2zo z(ztM9z0r%LNA$R{E`r_78*aP3!U_`=<_)<7b(W96R_u$7{YE(W_yvCF0by2tktAxI zq_dXq%e&b@$uFAI#heC_-1xlGmBu0Rh74e|0e)h7mz(`|NY7bvpYc8Hzk3pc^N{dZ zhOdC0{yTS4;4DeHIvYv+tBR!nfYTBJQL=G_{@7NpAo^B3aj1Om$4NI2aeW9GPM96g zqi@8G`RH*q#opDFhbKa56Q!_~=1FuXG2TtK0x@eMO-f4Y!`uTVL)4>Ivml-=-CWsz zaJB5Gk^OpCV79PBK#~P%njzqVyH2`Y6b~lKWx9}hV1)X;Sx<*mwB@{N26efo-vf*Wv~BU|6+-Mm1D(Z!q$3gW6pX39SoBt3h^B4MSRk z)_KD;L(BfgTwa?uH@R}U9FS#H)Yrnkv%8G^^mb?(I$4jHa= z9TN>mJ(_MbcNoD?SKa29RpWXLh@Hm-`dl!~sL_UQY^t8VsvvSU)Tw)HV<_jb16-xy z?1Eih-lIg|0PMs!lBmkWPdsbYBKOTd=8r}HGd1*$^Q~a;uH}CokXCxENaXx|Fp3NS zk1nvFgOuW&ip1()6sV25QbXOx&az*@BnkCavqSl0$bZxT$Aa2XuuvXV_QV}&YqNEY z2^=_L0fjL#3UGK!gexj}3uf`dV62Sf>~ck9@3anROqU1HqF5CSFDn>yG7pd2MK>yM zDdL*V>Hjp@3?)xB6ehrrElHc0;!9*c64C~iSL9$}QjOJLi9?7N?8Jv9QvcQG2uLkC zQRwqVX(5RJfZp)QC|#rBfh7e9H<rQ`j2)l%1~ECf1$-2%nB`@IU63 zb1sx%fP}Jn(!l3zXldXDeUqD)W&|f-&}KJ;NWsn~s{g@)mbcT{iU$Sa+@E={={{}z z6VHH;%MaD{#a2>t!c>Q_q0!d$3ggPnIwPD!t*BL3b&s{`>S~UtJ;Vpce-tRx?+NDh zUS(QjKgC7>L5Z&Kcil@Ti-;d$=tCkDNs)~NMky_&pnA;HS45U%h)BTVlQMGcN{-96 z6J~%lNe&JhJ^7=WeFK6Y+X-KdT5xR2C?es4=iuofMd3pDKe=0hU94QgjER>yV6m}n z`t+pN!34&D{g3ToB*D||_PuQLK=t_619Y!*5Z?ytaFF_|00a%jrB^KqO#BbTZ<6dG zq3_c#>0X!*I}q8P(0DteeRo0ZVwpWLB8{$-z3cc4q_`z?A{mv5GE9VaB*y$+q1Zsr zPrP_ycZ1A<{eP>Fg9UX}PdkyC+g{M@ioJEN0)<%s#@j{#)5%?SVkE4y!_nL<%?l*H zyq%N&OxlYUdWc8K^0XVlJd9bXX*V)r>w^y0OfsA9eitez=*xfqP!n%xBB6X=hI z6|~T6^MiV*7PwFc1YEE8g_;`s#{ZO!5Pp*yJ^W+d!4eSh{Vwh}D>cQM^t>00$XRFz z{g4y}R#rm#^rlf$B1}T3MFd3@C)?iT?Ua&|5`)6meIo|OrV<0UX}anqqa8a!MgMrs zRSFjJuVF|fN_&zdNyt#P14xAJw5zqqSsI=EanCZQ8>{ySHd6C4?=!w{4`$oXR{N=@ zh(r=D%uc%qG2}x!Bce&7=1l$L9R78oO1Un+dkwptJi9Vtk(RbTU1s8UIx*wu zAmwveL?fFeLvo6o_P0+-cGaw6=kL|^Y|8u4C-GVJ@p+HYOOJ?PdZ>=5go8pG`8XPy zo}3{1I0-s&M3Sjb>BepNH-!%CQ(aWIdT-GM^=W8Mm~pOy`=&mB2iop5G@*W}wnv0? ztbsRa9JMNcYRx77VAk-5w>vpKxl( z>pepXHx$Q5lP<-@V1|OTVbQgx?RBEV&qDW{9pBw3>@7}`KZTK!tK-~LM+YySyMvhu zHr@vPQ+S%_Tfdr@W%L(QMReTQpcHMLCR+npdVJq@Wir8A2txGCs&9twV8Sc zksvb`lTP^Vs}|E{l K73$@zqW%Z;d5s_d literal 0 HcmV?d00001 diff --git a/default_apps/netflix/templates/index.css b/default_apps/netflix/templates/index.css new file mode 100644 index 00000000..621d99d1 --- /dev/null +++ b/default_apps/netflix/templates/index.css @@ -0,0 +1,132 @@ +body { + font-family: Arial, sans-serif; + background-color: #141414; + color: white; + margin: 0; + padding: 20px; + box-sizing: border-box; +} +.wrapper { + background-color: #1f1f1f; + padding: 20px; + border-radius: 10px; + width: 100%; + max-width: 450px; + margin: 0 auto; + box-shadow: 0 0 10px rgba(255, 255, 255, 0.1); +} + +.viewing-stats { + display: flex; + justify-content: space-between; + margin-bottom: 20px; + padding-bottom: 15px; + border-bottom: 1px solid #333; +} +.stat-item { + text-align: center; +} +.stat-value { + font-size: 18px; + font-weight: bold; + color: #e50914; + display: block; + margin-bottom: 5px; +} +.stat-label { + font-size: 12px; + color: #999; +} +.show-list { + list-style-type: none; + padding: 0; + margin: 0; +} +.show-item { + display: block; + text-decoration: none; + color: white; + margin-bottom: 15px; + transition: + background-color 0.3s, + transform 0.2s; + padding: 10px; + border-radius: 8px; +} +.show-item:hover { + background-color: #2f2f2f; + transform: translateX(5px); +} +.show-content { + display: flex; + align-items: flex-start; +} +.show-number { + font-size: 24px; + margin-right: 15px; + min-width: 25px; + color: #e50914; + font-weight: bold; +} +.show-thumbnail { + height: 80px; + width: auto; + object-fit: cover; + object-position: top center; + border-radius: 8px; + margin-right: 15px; +} +.show-info { + flex-grow: 1; + min-width: 0; +} +.show-title { + display: flex; + align-items: center; + justify-content: space-between; + width: 100%; +} +.show-name { + font-size: 16px; + font-weight: bold; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + max-width: calc( + 100% - 80px + ); /* Adjust based on the width of your IMDb rating */ +} +.show-details { + font-size: 12px; + color: #999; +} +.show-detail { + margin-bottom: 2px; +} +.detail-label { + font-weight: bold; + color: #bbb; +} +.header { + display: flex; + align-items: center; + margin-bottom: 20px; +} +.netflix-logo { + height: 40px; + margin-right: 15px; +} +h1 { + margin: 0; + color: #e50914; + font-size: 24px; +} +.imdb-rating { + background-color: #f5c518; + color: #000; + padding: 2px 5px; + border-radius: 4px; + font-weight: bold; + font-size: 12px; + white-space: nowrap; +} diff --git a/default_apps/netflix/templates/page.html b/default_apps/netflix/templates/page.html new file mode 100644 index 00000000..084ab10d --- /dev/null +++ b/default_apps/netflix/templates/page.html @@ -0,0 +1,40 @@ + + + + + + My {year} Top Netflix Series + + + +
    +
    + +

    My {year} Top Netflix Series

    +
    +
    +
    + {total_time} + Total Time +
    +
    + {year_fav_day} + Top Day +
    +
    + {total_unique_show_views} + Shows +
    +
    + {total_views} + Eps +
    +
    +
      + {show_list_html} +
    +
    + + diff --git a/default_apps/netflix/tmdb.py b/default_apps/netflix/tmdb.py new file mode 100644 index 00000000..db7fcae6 --- /dev/null +++ b/default_apps/netflix/tmdb.py @@ -0,0 +1,294 @@ +import datetime +import json +import math +import os + +import pandas as pd +import requests +from utils import load_cache, normalize_title, save_cache + +TMDB_BASE_URL = "https://api.themoviedb.org/3" +IMAGE_BASE_URL = "https://image.tmdb.org/t/p/w500" # w500 refers to image size + +tmdb_id_cache = load_cache("tmdb_id.json") +tmdb_search_cache = load_cache("tmdb_search.json") +imdb_tmdb_cache = load_cache("imdb_tmdb.json") + + +def add_to_missing(one_or_many, tmdb_id, missing_imdb_id): + if not isinstance(one_or_many, list): + one_or_many = [one_or_many] + for one in one_or_many: + missing_imdb_id[one] = tmdb_id + save_cache(missing_imdb_id, "missing_imdb_id.json") + + +def in_manual_mapping(original_title, missing_imdb_id): + titles = [] + titles.append(original_title.lower()) + titles.append(normalize_title(original_title).lower()) + lower_keys = {k.lower(): v for k, v in missing_imdb_id.items()} + for title in titles: + for key, value in lower_keys.items(): + if title in key or key in title: + return value + return None + + +def search_tmdb_title(title, api_key, missing_imdb_id): + url = f"{TMDB_BASE_URL}/search/multi" + params = {"api_key": api_key, "query": title} + if title in tmdb_search_cache: + result = tmdb_search_cache[title] + return pd.Series(result) + + data = None + + # check manual mapping where a user can set the imdb tconst id by hand + manual_tmdb_id = in_manual_mapping(title, missing_imdb_id) + if manual_tmdb_id: + print( + f"> Resolving {title} imdb_id: {manual_tmdb_id} from supplied missing file" + ) + data = get_tmdb_details_by_imdb_id(manual_tmdb_id, api_key) + tmdb_search_cache[title] = data + save_cache(tmdb_search_cache, "tmdb_search.json") + return pd.Series(data) + + if data is None: + print(f"> Searching tmdb for {title}") + response = requests.get(url, params=params) + if response.status_code == 200: + data = response.json() + if "results" in data: + for result in data["results"]: + if result["media_type"] in ["tv", "movie"]: + tmdb_search_cache[title] = result + save_cache(tmdb_search_cache, "tmdb_search.json") + return pd.Series(result) + + return None + + +def flatten_tmdb_dict(data): + flattened_dict = {} + flattened_dict["homepage"] = data.get("homepage", None) + external_ids = data.get("external_ids", {}) + flattened_dict["imdb_id"] = external_ids.get("imdb_id", None) + flattened_dict["facebook_id"] = external_ids.get("facebook_id", None) + flattened_dict["instagram_id"] = external_ids.get("instagram_id", None) + flattened_dict["twitter_id"] = external_ids.get("twitter_id", None) + genres = data.get("genres", {}) + genre_ids = [] + genre_names = [] + for genre in genres: + genre_ids.append(genre["id"]) + genre_names.append(genre["name"]) + flattened_dict["genre_ids"] = genre_ids + flattened_dict["genre_names"] = genre_names + return flattened_dict + + +def get_tmdb_id_field(row) -> int | None: + try: + if "tmdb_id" in row: + return int(row["tmdb_id"]) + except Exception: + pass + return None + + +def get_tmdb_media_type_field(row) -> int | None: + try: + if "tmdb_media_type" in row: + math.isnan(row["tmdb_media_type"]) + except Exception: + if isinstance(row["tmdb_media_type"], str): + return row["tmdb_media_type"] + pass + return None + + +def get_tmdb_details(row, api_key): + tmdb_id = get_tmdb_id_field(row) + media_type = get_tmdb_media_type_field(row) + + if not isinstance(tmdb_id, int) or not isinstance(media_type, str): + print(f"> Skipping {row.netflix_title} no tmdb_id") + return None + url = f"{TMDB_BASE_URL}/{media_type}/{tmdb_id}" + params = {"api_key": api_key, "append_to_response": "external_ids"} + + cache_key = f"{tmdb_id}_{media_type}" + if cache_key in tmdb_id_cache: + result = tmdb_id_cache[cache_key] + return pd.Series(flatten_tmdb_dict(result)) + + print(f"> Querying tmdb for {cache_key}") + response = requests.get(url, params=params) + + if response.status_code == 200: + result = response.json() + if result: + tmdb_id_cache[cache_key] = result + save_cache(tmdb_id_cache, "tmdb_id.json") + return pd.Series(flatten_tmdb_dict(result)) + + return None + + +def get_tmdb_details_by_imdb_id(imdb_id, api_key): + if imdb_id in imdb_tmdb_cache: + print(f"Getting imdb_id: {imdb_id} from cache") + return imdb_tmdb_cache[imdb_id] + + url = f"https://api.themoviedb.org/3/find/{imdb_id}" + params = {"api_key": api_key, "external_source": "imdb_id"} + + print(f"> Querying tmdb for imdb_id: {imdb_id}") + response = requests.get(url, params=params) + + if response.status_code == 200: + data = response.json() + sections = [ + "movie_results", + "person_results", + "tv_results", + "tv_episode_results", + "tv_season_results", + ] + + data_dict = None + for section in sections: + if data.get(section): + # Get the first result in the section + data_dict = data[section][0] + imdb_tmdb_cache[imdb_id] = data_dict + save_cache(imdb_tmdb_cache, "imdb_tmdb.json") + return data_dict + + +def get_tmdb_id(row, tmdb_api_key, missing_imdb_id): + original_title = row["netflix_title"] + title = normalize_title(original_title) + + # Check for season/episode/series/volume in the title + if any( + keyword in title.lower() + for keyword in ["season", "episode", "series", "volume", " part"] + ): + # Split by colon and take the first part + title = title.split(":")[0].strip() + + result = search_tmdb_title(title, tmdb_api_key, missing_imdb_id) + if result is None: + title = title.split(":")[0].strip() + # if splitting it changes it lets try again anyway + if title != normalize_title(original_title): + result = search_tmdb_title(title, tmdb_api_key, missing_imdb_id) + if result is not None: + # make sure repeated search gets cached at first title as well + tmdb_search_cache[normalize_title(original_title)] = result.to_dict() + save_cache(tmdb_search_cache, "tmdb_search.json") + + if result is not None: + # shows have names and movies have titles + df = pd.DataFrame([result]) + if "name" in df.columns: + title_name = "name" + elif "title" in df.columns: + title_name = "title" + else: + raise Exception(f"Title is missing {row}") + + poster_path = result.get("poster_path") + tmdb_poster_url = f"{IMAGE_BASE_URL}{poster_path}" + df["tmdb_poster_url"] = tmdb_poster_url + + df = df.rename( + columns={ + title_name: "tmdb_title", + "id": "tmdb_id", + "media_type": "tmdb_media_type", + } + ) + + keep_cols = ["tmdb_id", "tmdb_title", "tmdb_media_type", "tmdb_poster_url"] + df = df[keep_cols] + return pd.Series(df.iloc[0]) + + return None + + +def get_this_year(df, year): + return df[df["netflix_date"].dt.year == year] + + +def run(api_key, missing_file): + try: + missing_imdb_id = {} + temp_folder = "./temp/" + output_file = "2_tmdb.csv" + + if missing_file is not None: + missing_file_path = os.path.abspath(missing_file) + if os.path.exists(missing_file_path): + try: + with open(missing_file_path, "r") as f: + missing_imdb_id = json.load(f) + except Exception as e: + print(f"Failed to load file: {missing_file_path}. {e}") + + tmdb_df = pd.read_csv("./temp/1_netflix.csv") + + tmdb_df["netflix_date"] = pd.to_datetime( + tmdb_df["netflix_date"], format=r"%m/%d/%y" + ) + + current_year = datetime.datetime.now().year + tmdb_df = get_this_year(tmdb_df, current_year) + + sample_tmdb_id = tmdb_df.apply( + lambda row: pd.concat([row, get_tmdb_id(row, api_key, missing_imdb_id)]), + axis=1, + ) + + df = sample_tmdb_id.apply( + lambda row: pd.concat( + [ + row, + get_tmdb_details(row, api_key), + ] + ), + axis=1, + ) + + # split and save missing imdb_id records + column_name = "imdb_id" + df_missing = df[df[column_name].isna()] + if len(df_missing) > 0: + missing_path = temp_folder + "/" + "2_missing.csv" + print(f"> You have {len(df_missing)} missing rows see: {missing_path}") + helper = r""" +To fix your missing imdb IDs you can create a manual json file. + +Run: +echo '{"Life: Primates": "tt1533395"}' > my-missing-ids.json +python main.py --missing-imdb-file=my-missing-ids.json + +Note: The titles can be partial string matches. +""" + print(helper) + df_missing.to_csv(missing_path, index=False) + + df_imdb_id = df[df[column_name].notna()] + + path = os.path.abspath(temp_folder + "/" + output_file) + df_imdb_id.to_csv(path, index=False) + print(f"> Writing {output_file} to {temp_folder}") + + except Exception as e: + import traceback + + print(traceback.print_exc()) + print("Failed to run tmdb.py", e) diff --git a/default_apps/netflix/utils.py b/default_apps/netflix/utils.py new file mode 100644 index 00000000..166f3d27 --- /dev/null +++ b/default_apps/netflix/utils.py @@ -0,0 +1,97 @@ +import ast +import base64 +import hashlib +import json +import os +import re +import unicodedata + +import requests + +cache_folder = "./cache/" + + +def save_cache(results_cache, file_name): + path = cache_folder + "/" + file_name + with open(path, "w") as f: + json.dump(results_cache, f) + + +def load_cache(file_name): + path = cache_folder + "/" + file_name + if os.path.exists(path): + with open(path, "r") as f: + return json.load(f) + else: + return {} + + +# Function to normalize the title for search, keeping colons +def normalize_title(title): + # Step 1: Normalize Unicode characters (decompose accents) + title = ( + unicodedata.normalize("NFKD", title).encode("ASCII", "ignore").decode("utf-8") + ) + + # Step 2: Convert to lowercase + title = title.lower() + + # Step 3: Remove unnecessary punctuation except for colons (keep ':') + title = re.sub( + r"[^\w\s:]", "", title + ) # Keeps only letters, numbers, whitespace, and colons + + # Step 4: Strip leading/trailing whitespace + return title.strip() + + +def download_file(url, folder_path, file_name=None): + # Ensure the folder exists + os.makedirs(folder_path, exist_ok=True) + + # Get the file name from the URL if not provided + if file_name is None: + file_name = url.split("/")[-1] + + # Define the full path to save the file + file_path = os.path.join(folder_path, file_name) + + # Download the file + response = requests.get(url, stream=True) + response.raise_for_status() # Check for errors + + # Write the file to the specified folder + with open(file_path, "wb") as file: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + file.write(chunk) + + print(f"File downloaded successfully and saved to: {file_path}") + + +def evaluate_list(value): + try: + # Use ast.literal_eval to safely evaluate strings into Python literals (like lists, dicts) + return ast.literal_eval(value) + except (ValueError, SyntaxError): + # Return the original value if it's not a valid Python literal + return value + + +def image_to_base64(image_path): + with open(image_path, "rb") as image_file: + encoded_string = base64.b64encode(image_file.read()).decode("utf-8") + return f"data:image/png;base64,{encoded_string}" + + +def compute_file_hash(filepath, hash_algorithm="sha256"): + # Choose the hash algorithm + hash_func = getattr(hashlib, hash_algorithm)() + + # Read file in binary mode and update hash in chunks + with open(filepath, "rb") as file: + while chunk := file.read(8192): + hash_func.update(chunk) + + # Return the hex representation of the hash + return hash_func.hexdigest() diff --git a/docker/syftbox.dockerfile b/docker/syftbox.dockerfile new file mode 100644 index 00000000..3d20c30d --- /dev/null +++ b/docker/syftbox.dockerfile @@ -0,0 +1,13 @@ +# Start with the Alpine base image with Python 3 +FROM python:3.12-alpine + +# Set the working directory inside the container +WORKDIR /app +COPY . /app + +RUN pip install uv +RUN uv venv .venv +RUN uv pip install -e . + +# CMD ["ash", "/app/scripts/server.sh"] + diff --git a/pyproject.toml b/pyproject.toml index b56ef7b6..89499cd3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ include-package-data = true # Include package data [tool.setuptools.package-data] -syftbox = ["*.css", ".js", ".html", ".zip"] +syftbox = ["*.css", ".js", ".html", ".zip", ".sh"] [project.scripts] syftbox = "syftbox.main:main" diff --git a/scripts/deploy.sh b/scripts/deploy.sh index 3d145e4c..8c85eb35 100755 --- a/scripts/deploy.sh +++ b/scripts/deploy.sh @@ -1,9 +1,9 @@ #!/bin/bash -source ./build.sh -source ./ssh.sh +source ./scripts/build.sh +source ./scripts/ssh.sh -LOCAL_FILE="dist/syftbox-0.1.0-py3-none-any.whl" +LOCAL_FILE="./dist/syftbox-0.1.0-py3-none-any.whl" REMOTE_PATH="~" # Use scp to transfer the file to the remote server diff --git a/syftbox/client/client.py b/syftbox/client/client.py index 904865e2..4d38af8e 100644 --- a/syftbox/client/client.py +++ b/syftbox/client/client.py @@ -1,5 +1,6 @@ import argparse import atexit +import platform import importlib import os import subprocess @@ -108,6 +109,9 @@ def copy_icon_file(icon_folder: str, dest_folder: str) -> None: def load_or_create_config(args) -> ClientConfig: + syft_config_dir = os.path.abspath(os.path.expanduser("~/.syftbox")) + os.makedirs(syft_config_dir, exist_ok=True) + client_config = None try: client_config = ClientConfig.load(args.config_path) @@ -141,7 +145,8 @@ def load_or_create_config(args) -> ClientConfig: if not os.path.exists(client_config.sync_folder): os.makedirs(client_config.sync_folder, exist_ok=True) - # copy_icon_file(ICON_FOLDER, client_config.sync_folder) + if platform.system() == "Darwin": + copy_icon_file(ICON_FOLDER, client_config.sync_folder) if args.email: client_config.email = args.email @@ -323,14 +328,16 @@ def parse_args(): parser = argparse.ArgumentParser( description="Run the web application with plugins.", ) - parser.add_argument("--config_path", type=str, default=DEFAULT_CONFIG_PATH, help="config path") + parser.add_argument( + "--config_path", type=str, default=DEFAULT_CONFIG_PATH, help="config path" + ) parser.add_argument("--sync_folder", type=str, help="sync folder path") parser.add_argument("--email", type=str, help="email") parser.add_argument("--port", type=int, default=8080, help="Port number") parser.add_argument( "--server", type=str, - default="http://20.168.10.234:8080", + default="http://20.168.10.234:8080", help="Server", ) return parser.parse_args() @@ -594,8 +601,6 @@ def main() -> None: print("Dev Mode: ", os.environ.get("SYFTBOX_DEV")) print("Wheel: ", os.environ.get("SYFTBOX_WHEEL")) - print(client_config) - debug = True uvicorn.run( "syftbox.client.client:app" diff --git a/syftbox/client/plugins/apps.py b/syftbox/client/plugins/apps.py index 83baca09..13905517 100644 --- a/syftbox/client/plugins/apps.py +++ b/syftbox/client/plugins/apps.py @@ -9,6 +9,7 @@ perm_file_path, ) + def find_and_run_script(task_path, extra_args): script_path = os.path.join(task_path, "run.sh") env = os.environ.copy() # Copy the current environment @@ -19,12 +20,16 @@ def find_and_run_script(task_path, extra_args): os.chmod(script_path, os.stat(script_path).st_mode | 0o111) # Check if the script has a shebang - with open(script_path, 'r') as script_file: + with open(script_path, "r") as script_file: first_line = script_file.readline().strip() - has_shebang = first_line.startswith('#!') + has_shebang = first_line.startswith("#!") # Prepare the command based on whether there's a shebang or not - command = [script_path] + extra_args if has_shebang else ["/bin/bash", script_path] + extra_args + command = ( + [script_path] + extra_args + if has_shebang + else ["/bin/bash", script_path] + extra_args + ) try: result = subprocess.run( @@ -44,13 +49,15 @@ def find_and_run_script(task_path, extra_args): raise FileNotFoundError(f"run.sh not found in {task_path}") - logger = logging.getLogger(__name__) DEFAULT_SCHEDULE = 10000 DESCRIPTION = "Runs Apps" -DEFAULT_APPS_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'default_apps')) +DEFAULT_APPS_PATH = os.path.abspath( + os.path.join(os.path.dirname(__file__), "..", "..", "..", "default_apps") +) + def copy_default_apps(apps_path): if not os.path.exists(DEFAULT_APPS_PATH): @@ -60,13 +67,16 @@ def copy_default_apps(apps_path): for app in os.listdir(DEFAULT_APPS_PATH): src_app_path = os.path.join(DEFAULT_APPS_PATH, app) dst_app_path = os.path.join(apps_path, app) - + if os.path.isdir(src_app_path): if os.path.exists(dst_app_path): - shutil.rmtree(dst_app_path) - shutil.copytree(src_app_path, dst_app_path) + print(f"App already installed at: {dst_app_path}") + # shutil.rmtree(dst_app_path) + else: + shutil.copytree(src_app_path, dst_app_path) print(f"Copied default app: {app}") + def run_apps(client_config): # create the directory apps_path = client_config.sync_folder + "/" + "apps" diff --git a/syftbox/client/plugins/create_datasite.py b/syftbox/client/plugins/create_datasite.py index 1b0f52a9..946e2673 100644 --- a/syftbox/client/plugins/create_datasite.py +++ b/syftbox/client/plugins/create_datasite.py @@ -24,6 +24,19 @@ def claim_datasite(client_config): perm_file.save(file_path) except Exception as e: print("Failed to create perm file", e) + + public_path = client_config.datasite_path + "/" + "public" + os.makedirs(public_path, exist_ok=True) + public_file_path = perm_file_path(public_path) + if os.path.exists(public_file_path): + public_perm_file = SyftPermission.load(public_file_path) + else: + print(f"> {client_config.email} Creating Public Permfile") + try: + public_perm_file = SyftPermission.mine_with_public_read(client_config.email) + public_perm_file.save(public_file_path) + except Exception as e: + print("Failed to create perm file", e) def run(shared_state): diff --git a/syftbox/client/plugins/sync.py b/syftbox/client/plugins/sync.py index 1bf683e1..d299820e 100644 --- a/syftbox/client/plugins/sync.py +++ b/syftbox/client/plugins/sync.py @@ -3,6 +3,7 @@ from collections import defaultdict from datetime import datetime from threading import Event +from watchdog.events import DirModifiedEvent import requests @@ -596,11 +597,21 @@ def do_sync(shared_state): def run(shared_state, *args, **kwargs): if len(args) == 1: event = args[0] - # ignore certain files / folders if hasattr(event, "src_path"): if CLIENT_CHANGELOG_FOLDER in event.src_path: return + + # ignore these events for now on linux + # FileOpenedEvent + # FileClosedNoWriteEvent + # DirModifiedEvent + if event.event_type in ["opened", "closed_no_write"]: + return + + if isinstance(event, DirModifiedEvent): + return + shared_state.fs_events.append(event) if "sync" not in shared_state.timers: diff --git a/syftbox/server/server.py b/syftbox/server/server.py index 98f5eb81..308b9699 100644 --- a/syftbox/server/server.py +++ b/syftbox/server/server.py @@ -170,6 +170,20 @@ async def lifespan(app: FastAPI): ___) | |_| | _| |_| |_) | (_) > < |____/ \__, |_| \__|____/ \___/_/\_\ |___/ + + +# MacOS and Linux +Install uv +curl -LsSf https://astral.sh/uv/install.sh | sh + +# create a virtualenv somewhere +uv venv .venv + +# install the wheel +uv pip install http://20.168.10.234:8080/wheel/syftbox-0.1.0-py3-none-any.whl --reinstall + +# run the client +uv run syftbox client """ @@ -177,6 +191,17 @@ async def lifespan(app: FastAPI): async def get_ascii_art(): return ascii_art +@app.get("/wheel/{path:path}", response_class=HTMLResponse) +async def browse_datasite(request: Request, path: str): + if path == "": # Check if path is empty (meaning "/datasites/") + return RedirectResponse(url="/") + + filename = path.split("/")[0] + if filename.endswith(".whl"): + wheel_path = os.path.expanduser("~/syftbox-0.1.0-py3-none-any.whl") + return FileResponse(wheel_path, media_type="application/octet-stream") + return filename + def get_file_list(directory="."): file_list = [] From a8ccb0265d713f0ba56dae2a2fe7bb61ea6f3b69 Mon Sep 17 00:00:00 2001 From: Madhava Jay Date: Mon, 30 Sep 2024 18:23:15 +1000 Subject: [PATCH 2/3] linting --- README.md | 12 +- default_apps/adder/main.py | 10 +- .../manual_pipeline/manual_pipeline_app.py | 7 - default_apps/netflix/main.py | 4 + notebooks/01-trade-create.ipynb | 1402 +---------------- notebooks/02-trade-code.ipynb | 371 +---- notebooks/03-netflix-code.ipynb | 2 - projects/netflix_stats/main.py | 1 - syftbox/client/client.py | 4 +- syftbox/client/plugins/create_datasite.py | 2 +- syftbox/client/plugins/sync.py | 21 +- syftbox/lib/lib.py | 22 +- syftbox/server/server.py | 1 + 13 files changed, 112 insertions(+), 1747 deletions(-) diff --git a/README.md b/README.md index 25b6cbfb..f891b9c4 100644 --- a/README.md +++ b/README.md @@ -10,49 +10,57 @@ # Quickstart User Installation ## install uv + curl -LsSf https://astral.sh/uv/install.sh | sh ## create a virtualenv somewhere + uv venv .venv ## install the wheel + uv pip install http://20.168.10.234:8080/wheel/syftbox-0.1.0-py3-none-any.whl --reinstall ## run the client + uv run syftbox client # Quickstart Client Developer Installation -### Step 0: Open your terminal to the root of this Github repository +### Step 0: Open your terminal to the root of this Github repository Begin by opening your terminal and navigating to the root directory of this github repository (so when you run 'ls' it should show folders like "syftbox", "server", "tests", etc.). Then run the commands in steps 1-4: ### Step 1: Install Homebrew + ``` /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" ``` ### Step 2: Install uv (using homebrew — which is better for this than pip) + ``` brew install uv ``` ### Step 3: Install a virtual environment using uv + ``` uv venv ``` ### Step 4: Install a relative version of uv. + ``` uv pip install -e . ``` ### Step 5: Run the client + ``` uv run syftbox/client/client.py ``` - # Alternative Options ### Run Client diff --git a/default_apps/adder/main.py b/default_apps/adder/main.py index bb44aa7c..8056523f 100644 --- a/default_apps/adder/main.py +++ b/default_apps/adder/main.py @@ -1,7 +1,7 @@ import json import os -from syftbox.lib import ClientConfig +from syftbox.lib import ClientConfig config_path = os.environ.get("SYFTBOX_CLIENT_CONFIG_PATH", None) client_config = ClientConfig.load(config_path) @@ -15,14 +15,14 @@ output_file_path = f"{output_folder}/data.json" if os.path.exists(input_file_path): - with open(input_file_path, 'r') as f: + with open(input_file_path, "r") as f: data = json.load(f) - data['datum'] += 1 + data["datum"] += 1 - with open(output_file_path, 'w') as f: + with open(output_file_path, "w") as f: json.dump(data, f) os.remove(input_file_path) else: - print(f"Input file {input_file_path} does not exist.") \ No newline at end of file + print(f"Input file {input_file_path} does not exist.") diff --git a/default_apps/manual_pipeline/manual_pipeline_app.py b/default_apps/manual_pipeline/manual_pipeline_app.py index 44c153ea..da792730 100644 --- a/default_apps/manual_pipeline/manual_pipeline_app.py +++ b/default_apps/manual_pipeline/manual_pipeline_app.py @@ -1,4 +1,3 @@ - @dataclass class SyftLink(Jsonable): @classmethod @@ -271,8 +270,6 @@ def create_datasite_import_path(datasite: str) -> str: return import_path - - @dataclass class DatasiteManifest(Jsonable): datasite: str @@ -295,9 +292,6 @@ class Dataset: sync_path: str - - - def extract_leftmost_email(text: str) -> str: # Define a regex pattern to match an email address email_regex = r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+" @@ -1230,7 +1224,6 @@ class TaskManifest(Jsonable): write_back_denied_path: str - @dataclass class PipelineActionRun(PipelineAction): exit_code: int | None = None diff --git a/default_apps/netflix/main.py b/default_apps/netflix/main.py index 277513e7..61315a9a 100644 --- a/default_apps/netflix/main.py +++ b/default_apps/netflix/main.py @@ -9,9 +9,11 @@ from tmdb import run as get_tmdb_data from utils import compute_file_hash, load_cache, save_cache + def publish_page(output_path): try: from syftbox.lib import ClientConfig + config_path = os.environ.get("SYFTBOX_CLIENT_CONFIG_PATH", None) client_config = ClientConfig.load(config_path) @@ -26,10 +28,12 @@ def publish_page(output_path): ) except Exception as e: import traceback + print(traceback.format_exc()) print("Couldnt publish", e) pass + def main(): # Create the argument parser parser = argparse.ArgumentParser(description="Enter your TMDB API key.") diff --git a/notebooks/01-trade-create.ipynb b/notebooks/01-trade-create.ipynb index eb9122d8..3cd728c9 100644 --- a/notebooks/01-trade-create.ipynb +++ b/notebooks/01-trade-create.ipynb @@ -20,18 +20,18 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "2", "metadata": {}, "outputs": [], "source": [ - "from syftbox.lib import SyftVault, TabularDataset, autocache, config_for_user, ClientConfig" + "from syftbox.lib import SyftVault, TabularDataset, autocache, config_for_user" ] }, { "cell_type": "code", - "execution_count": 15, - "id": "8ed4993c-e154-40a0-8ada-9a9eb320dc79", + "execution_count": null, + "id": "3", "metadata": {}, "outputs": [], "source": [ @@ -40,21 +40,10 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "4", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "ClientConfig(config_path='/Users/atrask/Documents/GitHub/syft/client_config.json', sync_folder='/Users/atrask/Desktop/SyftBox', port=8080, email='andrew@openmined.org', token=6461387628315936375, server_url='http://20.168.10.234:8080', email_token=None)" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "client_config = config_for_user(\"andrew@openmined.org\")\n", "client_config" @@ -62,28 +51,17 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "5", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "ClientConfig(config_path='/Users/atrask/Documents/GitHub/syft/client_config.json', sync_folder='/Users/atrask/Desktop/SyftBox', port=8080, email='andrew@openmined.org', token=6461387628315936375, server_url='http://20.168.10.234:8080', email_token=None)" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "client_config" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "6", "metadata": {}, "outputs": [], @@ -93,28 +71,17 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "7", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DatasiteManifest(datasite='andrew@openmined.org', file_path='/Users/atrask/Desktop/SyftBox/andrew@openmined.org/public/manifest/manifest.json', datasets={'Netflix_TMDB_IMDB': {'name': 'Netflix_TMDB_IMDB', 'syft_link': {'host_or_ip': 'andrew@openmined.org', 'path': '/public/datasets/netflix_tmdb_imdb/NetflixViewingHistory_TMDB_IMDB.csv', 'port': 80, 'protocol': 'syft', 'query': ''}, 'schema': {'Unnamed: 0': 'int64', 'netflix_title': 'object', 'netflix_date': 'object', 'tmdb_id': 'int64', 'tmdb_title': 'object', 'tmdb_media_type': 'object', 'tmdb_poster_url': 'object', 'homepage': 'object', 'imdb_id': 'object', 'facebook_id': 'object', 'instagram_id': 'object', 'twitter_id': 'object', 'genre_ids': 'object', 'genre_names': 'object', 'imdb_runtime_minutes': 'int64', 'imdb_rating': 'float64'}, 'readme_link': {'host_or_ip': 'andrew@openmined.org', 'path': '/public/datasets/netflix_tmdb_imdb/README.md', 'port': 80, 'protocol': 'syft', 'query': ''}, 'loader_link': {'host_or_ip': 'andrew@openmined.org', 'path': '/public/datasets/netflix_tmdb_imdb/loader.py', 'port': 80, 'protocol': 'syft', 'query': ''}, 'has_private': True}}, code={})" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "manifest" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "8", "metadata": {}, "outputs": [], @@ -124,7 +91,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "9", "metadata": {}, "outputs": [], @@ -134,388 +101,20 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "10", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/d4/s582723j2hqbtw60rnn5345r0000gn/T/ipykernel_94848/754433127.py:1: DtypeWarning: Columns (14) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(autocache(canada_dataset_url))\n" - ] - } - ], + "outputs": [], "source": [ "df = pd.read_csv(autocache(canada_dataset_url))" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "11", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
    \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    ClassificationYearPeriodPeriod Desc.Aggregate LevelIs Leaf CodeTrade Flow CodeTrade FlowReporter CodeReporter...PartnerPartner ISOCommodity CodeCommodityQty Unit CodeQty UnitQtyNetweight (kg)Trade Value (US$)Flag
    0HS2021202102February 2021401Imports124Canada...Other Asia, nesNaN6117Clothing accessories; made up, knitted or croc...0NaNNaNNaN92850
    1HS2021202102February 2021201Imports124Canada...EgyptNaN18Cocoa and cocoa preparations0NaNNaN0.01166040
    2HS2021202102February 2021201Imports124Canada...United KingdomNaN18Cocoa and cocoa preparations0NaNNaN0.014951750
    3HS2021202102February 2021201Imports124Canada...United Rep. of TanzaniaNaN18Cocoa and cocoa preparations0NaNNaN0.022480
    4HS2021202102February 2021201Imports124Canada...SingaporeNaN18Cocoa and cocoa preparations0NaNNaN0.0478400
    5HS2021202102February 2021201Imports124Canada...Viet NamNaN18Cocoa and cocoa preparations0NaNNaN0.035260
    6HS2021202102February 2021201Imports124Canada...South AfricaNaN18Cocoa and cocoa preparations0NaNNaN0.054620
    7HS2021202102February 2021201Imports124Canada...SpainNaN18Cocoa and cocoa preparations0NaNNaN0.03114250
    8HS2021202102February 2021201Imports124Canada...SwedenNaN18Cocoa and cocoa preparations0NaNNaN0.0117860
    9HS2021202102February 2021201Imports124Canada...VenezuelaNaN18Cocoa and cocoa preparations0NaNNaN0.0337150
    \n", - "

    10 rows × 22 columns

    \n", - "
    " - ], - "text/plain": [ - " Classification Year Period Period Desc. Aggregate Level Is Leaf Code \\\n", - "0 HS 2021 202102 February 2021 4 0 \n", - "1 HS 2021 202102 February 2021 2 0 \n", - "2 HS 2021 202102 February 2021 2 0 \n", - "3 HS 2021 202102 February 2021 2 0 \n", - "4 HS 2021 202102 February 2021 2 0 \n", - "5 HS 2021 202102 February 2021 2 0 \n", - "6 HS 2021 202102 February 2021 2 0 \n", - "7 HS 2021 202102 February 2021 2 0 \n", - "8 HS 2021 202102 February 2021 2 0 \n", - "9 HS 2021 202102 February 2021 2 0 \n", - "\n", - " Trade Flow Code Trade Flow Reporter Code Reporter ... \\\n", - "0 1 Imports 124 Canada ... \n", - "1 1 Imports 124 Canada ... \n", - "2 1 Imports 124 Canada ... \n", - "3 1 Imports 124 Canada ... \n", - "4 1 Imports 124 Canada ... \n", - "5 1 Imports 124 Canada ... \n", - "6 1 Imports 124 Canada ... \n", - "7 1 Imports 124 Canada ... \n", - "8 1 Imports 124 Canada ... \n", - "9 1 Imports 124 Canada ... \n", - "\n", - " Partner Partner ISO Commodity Code \\\n", - "0 Other Asia, nes NaN 6117 \n", - "1 Egypt NaN 18 \n", - "2 United Kingdom NaN 18 \n", - "3 United Rep. of Tanzania NaN 18 \n", - "4 Singapore NaN 18 \n", - "5 Viet Nam NaN 18 \n", - "6 South Africa NaN 18 \n", - "7 Spain NaN 18 \n", - "8 Sweden NaN 18 \n", - "9 Venezuela NaN 18 \n", - "\n", - " Commodity Qty Unit Code Qty Unit \\\n", - "0 Clothing accessories; made up, knitted or croc... 0 NaN \n", - "1 Cocoa and cocoa preparations 0 NaN \n", - "2 Cocoa and cocoa preparations 0 NaN \n", - "3 Cocoa and cocoa preparations 0 NaN \n", - "4 Cocoa and cocoa preparations 0 NaN \n", - "5 Cocoa and cocoa preparations 0 NaN \n", - "6 Cocoa and cocoa preparations 0 NaN \n", - "7 Cocoa and cocoa preparations 0 NaN \n", - "8 Cocoa and cocoa preparations 0 NaN \n", - "9 Cocoa and cocoa preparations 0 NaN \n", - "\n", - " Qty Netweight (kg) Trade Value (US$) Flag \n", - "0 NaN NaN 9285 0 \n", - "1 NaN 0.0 116604 0 \n", - "2 NaN 0.0 1495175 0 \n", - "3 NaN 0.0 2248 0 \n", - "4 NaN 0.0 47840 0 \n", - "5 NaN 0.0 3526 0 \n", - "6 NaN 0.0 5462 0 \n", - "7 NaN 0.0 311425 0 \n", - "8 NaN 0.0 11786 0 \n", - "9 NaN 0.0 33715 0 \n", - "\n", - "[10 rows x 22 columns]" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# private data samples\n", "ca_data = df[0:10]\n", @@ -524,369 +123,10 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "12", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
    \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    ClassificationYearPeriodPeriod Desc.Aggregate LevelIs Leaf CodeTrade Flow CodeTrade FlowReporter CodeReporter...PartnerPartner ISOCommodity CodeCommodityQty Unit CodeQty UnitQtyNetweight (kg)Trade Value (US$)Flag
    10HS2021202102February 2021201Imports124Canada...BangladeshNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.02272220
    11HS2021202102February 2021201Imports124Canada...HaitiNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.0147480
    12HS2021202102February 2021201Imports124Canada...GuatemalaNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.013140
    13HS2021202102February 2021201Imports124Canada...IraqNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.018250
    14HS2021202102February 2021201Imports124Canada...IsraelNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.010636270
    15HS2021202102February 2021201Imports124Canada...ItalyNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.083593270
    16HS2021202102February 2021201Imports124Canada...JordanNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.0168580
    17HS2021202102February 2021201Imports124Canada...Rep. of MoldovaNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.0298970
    18HS2021202102February 2021201Imports124Canada...NigeriaNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.0222350
    19HS2021202102February 2021201Imports124Canada...SloveniaNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.013280
    \n", - "

    10 rows × 22 columns

    \n", - "
    " - ], - "text/plain": [ - " Classification Year Period Period Desc. Aggregate Level Is Leaf Code \\\n", - "10 HS 2021 202102 February 2021 2 0 \n", - "11 HS 2021 202102 February 2021 2 0 \n", - "12 HS 2021 202102 February 2021 2 0 \n", - "13 HS 2021 202102 February 2021 2 0 \n", - "14 HS 2021 202102 February 2021 2 0 \n", - "15 HS 2021 202102 February 2021 2 0 \n", - "16 HS 2021 202102 February 2021 2 0 \n", - "17 HS 2021 202102 February 2021 2 0 \n", - "18 HS 2021 202102 February 2021 2 0 \n", - "19 HS 2021 202102 February 2021 2 0 \n", - "\n", - " Trade Flow Code Trade Flow Reporter Code Reporter ... Partner \\\n", - "10 1 Imports 124 Canada ... Bangladesh \n", - "11 1 Imports 124 Canada ... Haiti \n", - "12 1 Imports 124 Canada ... Guatemala \n", - "13 1 Imports 124 Canada ... Iraq \n", - "14 1 Imports 124 Canada ... Israel \n", - "15 1 Imports 124 Canada ... Italy \n", - "16 1 Imports 124 Canada ... Jordan \n", - "17 1 Imports 124 Canada ... Rep. of Moldova \n", - "18 1 Imports 124 Canada ... Nigeria \n", - "19 1 Imports 124 Canada ... Slovenia \n", - "\n", - " Partner ISO Commodity Code \\\n", - "10 NaN 19 \n", - "11 NaN 19 \n", - "12 NaN 19 \n", - "13 NaN 19 \n", - "14 NaN 19 \n", - "15 NaN 19 \n", - "16 NaN 19 \n", - "17 NaN 19 \n", - "18 NaN 19 \n", - "19 NaN 19 \n", - "\n", - " Commodity Qty Unit Code Qty Unit \\\n", - "10 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "11 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "12 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "13 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "14 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "15 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "16 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "17 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "18 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "19 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "\n", - " Qty Netweight (kg) Trade Value (US$) Flag \n", - "10 NaN 0.0 227222 0 \n", - "11 NaN 0.0 14748 0 \n", - "12 NaN 0.0 1314 0 \n", - "13 NaN 0.0 1825 0 \n", - "14 NaN 0.0 1063627 0 \n", - "15 NaN 0.0 8359327 0 \n", - "16 NaN 0.0 16858 0 \n", - "17 NaN 0.0 29897 0 \n", - "18 NaN 0.0 22235 0 \n", - "19 NaN 0.0 1328 0 \n", - "\n", - "[10 rows x 22 columns]" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Mock data samples\n", "mock_ca_data = df[10:20]\n", @@ -895,369 +135,10 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "13", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
    \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    ClassificationYearPeriodPeriod Desc.Aggregate LevelIs Leaf CodeTrade Flow CodeTrade FlowReporter CodeReporter...PartnerPartner ISOCommodity CodeCommodityQty Unit CodeQty UnitQtyNetweight (kg)Trade Value (US$)Flag
    10HS2021202102February 2021201Imports124Canada...BangladeshNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.02272220
    11HS2021202102February 2021201Imports124Canada...HaitiNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.0147480
    12HS2021202102February 2021201Imports124Canada...GuatemalaNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.013140
    13HS2021202102February 2021201Imports124Canada...IraqNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.018250
    14HS2021202102February 2021201Imports124Canada...IsraelNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.010636270
    15HS2021202102February 2021201Imports124Canada...ItalyNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.083593270
    16HS2021202102February 2021201Imports124Canada...JordanNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.0168580
    17HS2021202102February 2021201Imports124Canada...Rep. of MoldovaNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.0298970
    18HS2021202102February 2021201Imports124Canada...NigeriaNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.0222350
    19HS2021202102February 2021201Imports124Canada...SloveniaNaN19Preparations of cereals, flour, starch or milk...0NaNNaN0.013280
    \n", - "

    10 rows × 22 columns

    \n", - "
    " - ], - "text/plain": [ - " Classification Year Period Period Desc. Aggregate Level Is Leaf Code \\\n", - "10 HS 2021 202102 February 2021 2 0 \n", - "11 HS 2021 202102 February 2021 2 0 \n", - "12 HS 2021 202102 February 2021 2 0 \n", - "13 HS 2021 202102 February 2021 2 0 \n", - "14 HS 2021 202102 February 2021 2 0 \n", - "15 HS 2021 202102 February 2021 2 0 \n", - "16 HS 2021 202102 February 2021 2 0 \n", - "17 HS 2021 202102 February 2021 2 0 \n", - "18 HS 2021 202102 February 2021 2 0 \n", - "19 HS 2021 202102 February 2021 2 0 \n", - "\n", - " Trade Flow Code Trade Flow Reporter Code Reporter ... Partner \\\n", - "10 1 Imports 124 Canada ... Bangladesh \n", - "11 1 Imports 124 Canada ... Haiti \n", - "12 1 Imports 124 Canada ... Guatemala \n", - "13 1 Imports 124 Canada ... Iraq \n", - "14 1 Imports 124 Canada ... Israel \n", - "15 1 Imports 124 Canada ... Italy \n", - "16 1 Imports 124 Canada ... Jordan \n", - "17 1 Imports 124 Canada ... Rep. of Moldova \n", - "18 1 Imports 124 Canada ... Nigeria \n", - "19 1 Imports 124 Canada ... Slovenia \n", - "\n", - " Partner ISO Commodity Code \\\n", - "10 NaN 19 \n", - "11 NaN 19 \n", - "12 NaN 19 \n", - "13 NaN 19 \n", - "14 NaN 19 \n", - "15 NaN 19 \n", - "16 NaN 19 \n", - "17 NaN 19 \n", - "18 NaN 19 \n", - "19 NaN 19 \n", - "\n", - " Commodity Qty Unit Code Qty Unit \\\n", - "10 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "11 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "12 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "13 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "14 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "15 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "16 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "17 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "18 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "19 Preparations of cereals, flour, starch or milk... 0 NaN \n", - "\n", - " Qty Netweight (kg) Trade Value (US$) Flag \n", - "10 NaN 0.0 227222 0 \n", - "11 NaN 0.0 14748 0 \n", - "12 NaN 0.0 1314 0 \n", - "13 NaN 0.0 1825 0 \n", - "14 NaN 0.0 1063627 0 \n", - "15 NaN 0.0 8359327 0 \n", - "16 NaN 0.0 16858 0 \n", - "17 NaN 0.0 29897 0 \n", - "18 NaN 0.0 22235 0 \n", - "19 NaN 0.0 1328 0 \n", - "\n", - "[10 rows x 22 columns]" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "mock_ca_data" ] @@ -1272,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "15", "metadata": {}, "outputs": [], @@ -1282,21 +163,10 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "16", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "PosixPath('/Users/atrask/Desktop/SyftBox/andrew@openmined.org/public/datasets')" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "datasets_path = manifest.create_public_folder(\"datasets\")\n", "datasets_path" @@ -1304,21 +174,10 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "17", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "PosixPath('/Users/atrask/Desktop/SyftBox/andrew@openmined.org/public/datasets/trade_data')" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "dataset_path = datasets_path / \"trade_data\"\n", "dataset_path" @@ -1326,7 +185,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "18", "metadata": {}, "outputs": [], @@ -1338,21 +197,10 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "19", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "PosixPath('/Users/atrask/Desktop/SyftBox/andrew@openmined.org/public/datasets/trade_data/trade_mock.csv')" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "csv_file = dataset_path / \"trade_mock.csv\"\n", "csv_file" @@ -1360,7 +208,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "id": "20", "metadata": {}, "outputs": [], @@ -1370,160 +218,28 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "21", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "Trade Data\n", - "
    \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    AttributeValue
    0NameTrade Data
    1Syft Link..._data/trade_mock.csv
    2Schema{'Unnamed: 0': 'int64', 'Classification': 'obj...
    3Readme...None
    4Loader...None
    \n", - "
    " - ], - "text/plain": [ - "TabularDataset(name='Trade Data', syft_link=, schema={'Unnamed: 0': 'int64', 'Classification': 'object', 'Year': 'int64', 'Period': 'int64', 'Period Desc.': 'object', 'Aggregate Level': 'int64', 'Is Leaf Code': 'int64', 'Trade Flow Code': 'int64', 'Trade Flow': 'object', 'Reporter Code': 'int64', 'Reporter': 'object', 'Reporter ISO': 'float64', 'Partner Code': 'int64', 'Partner': 'object', 'Partner ISO': 'float64', 'Commodity Code': 'int64', 'Commodity': 'object', 'Qty Unit Code': 'int64', 'Qty Unit': 'float64', 'Qty': 'float64', 'Netweight (kg)': 'float64', 'Trade Value (US$)': 'int64', 'Flag': 'int64'}, readme_link=None, loader_link=None, _client_config=None, has_private=True)" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "dataset = TabularDataset.from_csv(csv_file, \n", - " name=\"Trade Data\", \n", - " has_private=True)\n", + "dataset = TabularDataset.from_csv(csv_file, name=\"Trade Data\", has_private=True)\n", "dataset" ] }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "id": "22", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "Trade Data\n", - "
    \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    AttributeValue
    0NameTrade Data
    1Syft Link..._data/trade_mock.csv
    2Schema{'Unnamed: 0': 'int64', 'Classification': 'obj...
    3Readme...None
    4Loader...None
    \n", - "
    " - ], - "text/plain": [ - "TabularDataset(name='Trade Data', syft_link=, schema={'Unnamed: 0': 'int64', 'Classification': 'object', 'Year': 'int64', 'Period': 'int64', 'Period Desc.': 'object', 'Aggregate Level': 'int64', 'Is Leaf Code': 'int64', 'Trade Flow Code': 'int64', 'Trade Flow': 'object', 'Reporter Code': 'int64', 'Reporter': 'object', 'Reporter ISO': 'float64', 'Partner Code': 'int64', 'Partner': 'object', 'Partner ISO': 'float64', 'Commodity Code': 'int64', 'Commodity': 'object', 'Qty Unit Code': 'int64', 'Qty Unit': 'float64', 'Qty': 'float64', 'Netweight (kg)': 'float64', 'Trade Value (US$)': 'int64', 'Flag': 'int64'}, readme_link=None, loader_link=None, _client_config=None, has_private=True)" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "dataset" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "id": "23", "metadata": {}, "outputs": [], @@ -1533,18 +249,10 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "id": "24", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✅ Dataset Published\n" - ] - } - ], + "outputs": [], "source": [ "dataset.publish(manifest, overwrite=True)" ] @@ -1567,7 +275,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "id": "27", "metadata": {}, "outputs": [], @@ -1577,7 +285,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": null, "id": "28", "metadata": {}, "outputs": [], @@ -1588,42 +296,20 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": null, "id": "29", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "SyftVault.link_private(csv_file, private_path)" ] }, { "cell_type": "code", - "execution_count": 54, + "execution_count": null, "id": "30", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SyftVault(mapping={'andrew@openmined.org/public/datasets/trade_data/trade_mock.csv': '/Users/atrask/Documents/GitHub/syft/notebooks/trade_private.csv'})" - ] - }, - "execution_count": 54, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "SyftVault.load_vault()" ] diff --git a/notebooks/02-trade-code.ipynb b/notebooks/02-trade-code.ipynb index db51b561..4916f115 100644 --- a/notebooks/02-trade-code.ipynb +++ b/notebooks/02-trade-code.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "1", "metadata": {}, "outputs": [], @@ -22,21 +22,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "2", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "ClientConfig(config_path='/Users/atrask/Documents/GitHub/syft/client_config.json', sync_folder='/Users/atrask/Desktop/SyftBox', port=8080, email='andrew@openmined.org', token=6461387628315936375, server_url='http://20.168.10.234:8080', email_token=None)" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "client_config = config_for_user(\"andrew@openmined.org\")\n", "client_config" @@ -44,39 +33,20 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "3", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> Setting Sync Dir to: /Users/atrask/Desktop/SyftBox\n" - ] - } - ], + "outputs": [], "source": [ "client_config.use()" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "4", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DatasiteManifest(datasite='andrew@openmined.org', file_path='/Users/atrask/Desktop/SyftBox/andrew@openmined.org/public/manifest/manifest.json', datasets={'Netflix_TMDB_IMDB': {'name': 'Netflix_TMDB_IMDB', 'syft_link': {'host_or_ip': 'andrew@openmined.org', 'path': '/public/datasets/netflix_tmdb_imdb/NetflixViewingHistory_TMDB_IMDB.csv', 'port': 80, 'protocol': 'syft', 'query': ''}, 'schema': {'Unnamed: 0': 'int64', 'netflix_title': 'object', 'netflix_date': 'object', 'tmdb_id': 'int64', 'tmdb_title': 'object', 'tmdb_media_type': 'object', 'tmdb_poster_url': 'object', 'homepage': 'object', 'imdb_id': 'object', 'facebook_id': 'object', 'instagram_id': 'object', 'twitter_id': 'object', 'genre_ids': 'object', 'genre_names': 'object', 'imdb_runtime_minutes': 'int64', 'imdb_rating': 'float64'}, 'readme_link': {'host_or_ip': 'andrew@openmined.org', 'path': '/public/datasets/netflix_tmdb_imdb/README.md', 'port': 80, 'protocol': 'syft', 'query': ''}, 'loader_link': {'host_or_ip': 'andrew@openmined.org', 'path': '/public/datasets/netflix_tmdb_imdb/loader.py', 'port': 80, 'protocol': 'syft', 'query': ''}, 'has_private': True}, 'Trade Data': {'name': 'Trade Data', 'syft_link': {'host_or_ip': 'andrew@openmined.org', 'path': '/public/datasets/trade_data/trade_mock.csv', 'port': 80, 'protocol': 'syft', 'query': ''}, 'schema': {'Unnamed: 0': 'int64', 'Classification': 'object', 'Year': 'int64', 'Period': 'int64', 'Period Desc.': 'object', 'Aggregate Level': 'int64', 'Is Leaf Code': 'int64', 'Trade Flow Code': 'int64', 'Trade Flow': 'object', 'Reporter Code': 'int64', 'Reporter': 'object', 'Reporter ISO': 'float64', 'Partner Code': 'int64', 'Partner': 'object', 'Partner ISO': 'float64', 'Commodity Code': 'int64', 'Commodity': 'object', 'Qty Unit Code': 'int64', 'Qty Unit': 'float64', 'Qty': 'float64', 'Netweight (kg)': 'float64', 'Trade Value (US$)': 'int64', 'Flag': 'int64'}, 'readme_link': {'host_or_ip': 'andrew@openmined.org', 'path': '/public/datasets/trade_data/README.md', 'port': 80, 'protocol': 'syft', 'query': ''}, 'loader_link': {'host_or_ip': 'andrew@openmined.org', 'path': '/public/datasets/trade_data/loader.py', 'port': 80, 'protocol': 'syft', 'query': ''}, 'has_private': True}}, code={})" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "manifest = client_config.manifest\n", "manifest" @@ -84,80 +54,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "5", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
    \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    NamePrivateSyft LinkSchemaReadmeLoader
    0Netflix_TMDB_IMDBTrue...istory_TMDB_IMDB.csv['Unnamed: 0', 'netflix_title', 'netflix_date'..._tmdb_imdb/README.md_tmdb_imdb/loader.py
    1Trade DataTrue..._data/trade_mock.csv['Unnamed: 0', 'Classification', 'Year', 'Peri...trade_data/README.mdtrade_data/loader.py
    2Netflix_TMDB_IMDBTrue...istory_TMDB_IMDB.csv['Unnamed: 0', 'netflix_title', 'netflix_date'..._tmdb_imdb/README.md_tmdb_imdb/loader.py
    \n", - "
    " - ], - "text/plain": [ - "DatasetResults(data=[TabularDataset(name='Netflix_TMDB_IMDB', syft_link=, schema={'Unnamed: 0': 'int64', 'netflix_title': 'object', 'netflix_date': 'object', 'tmdb_id': 'int64', 'tmdb_title': 'object', 'tmdb_media_type': 'object', 'tmdb_poster_url': 'object', 'homepage': 'object', 'imdb_id': 'object', 'facebook_id': 'object', 'instagram_id': 'object', 'twitter_id': 'object', 'genre_ids': 'object', 'genre_names': 'object', 'imdb_runtime_minutes': 'int64', 'imdb_rating': 'float64'}, readme_link=, loader_link=, _client_config=ClientConfig(config_path='/Users/atrask/Documents/GitHub/syft/client_config.json', sync_folder='/Users/atrask/Desktop/SyftBox', port=8080, email='andrew@openmined.org', token=6461387628315936375, server_url='http://20.168.10.234:8080', email_token=None), has_private=True), TabularDataset(name='Trade Data', syft_link=, schema={'Unnamed: 0': 'int64', 'Classification': 'object', 'Year': 'int64', 'Period': 'int64', 'Period Desc.': 'object', 'Aggregate Level': 'int64', 'Is Leaf Code': 'int64', 'Trade Flow Code': 'int64', 'Trade Flow': 'object', 'Reporter Code': 'int64', 'Reporter': 'object', 'Reporter ISO': 'float64', 'Partner Code': 'int64', 'Partner': 'object', 'Partner ISO': 'float64', 'Commodity Code': 'int64', 'Commodity': 'object', 'Qty Unit Code': 'int64', 'Qty Unit': 'float64', 'Qty': 'float64', 'Netweight (kg)': 'float64', 'Trade Value (US$)': 'int64', 'Flag': 'int64'}, readme_link=, loader_link=, _client_config=ClientConfig(config_path='/Users/atrask/Documents/GitHub/syft/client_config.json', sync_folder='/Users/atrask/Desktop/SyftBox', port=8080, email='andrew@openmined.org', token=6461387628315936375, server_url='http://20.168.10.234:8080', email_token=None), has_private=True), TabularDataset(name='Netflix_TMDB_IMDB', syft_link=, schema={'Unnamed: 0': 'int64', 'netflix_title': 'object', 'netflix_date': 'object', 'tmdb_id': 'int64', 'tmdb_title': 'object', 'tmdb_media_type': 'object', 'tmdb_poster_url': 'object', 'homepage': 'object', 'imdb_id': 'object', 'facebook_id': 'object', 'instagram_id': 'object', 'twitter_id': 'object', 'genre_ids': 'object', 'genre_names': 'object', 'imdb_runtime_minutes': 'int64', 'imdb_rating': 'float64'}, readme_link=, loader_link=, _client_config=ClientConfig(config_path='/Users/atrask/Documents/GitHub/syft/client_config.json', sync_folder='/Users/atrask/Desktop/SyftBox', port=8080, email='andrew@openmined.org', token=6461387628315936375, server_url='http://20.168.10.234:8080', email_token=None), has_private=True)])" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "datasets = client_config.get_datasets()\n", "datasets" @@ -173,85 +73,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "7", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "Netflix_TMDB_IMDB\n", - "\n", - "README:\n", - "

    Netflix_TMDB_IMDB

    \n", - "

    Private data: True

    \n", - "

    Schema: {'Unnamed: 0': 'int64', 'netflix_title': 'object', 'netflix_date': 'object', 'tmdb_id': 'int64', 'tmdb_title': 'object', 'tmdb_media_type': 'object', 'tmdb_poster_url': 'object', 'homepage': 'object', 'imdb_id': 'object', 'facebook_id': 'object', 'instagram_id': 'object', 'twitter_id': 'object', 'genre_ids': 'object', 'genre_names': 'object', 'imdb_runtime_minutes': 'int64', 'imdb_rating': 'float64'}

    \n", - "

    Import Syntax

    \n", - "

    client_config.use()\n", - "from syftbox.lib.andrew.at.openmined.org.datasets import netflix_tmdb_imdb

    \n", - "

    Python Loader Example

    \n", - "

    df = pd.read_csv(sy_path(\"syft://andrew@openmined.org:80/public/datasets/netflix_tmdb_imdb/NetflixViewingHistory_TMDB_IMDB.csv\"))

    \n", - "
    \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    AttributeValue
    0NameNetflix_TMDB_IMDB
    1Syft Link...istory_TMDB_IMDB.csv
    2Schema{'Unnamed: 0': 'int64', 'netflix_title': 'obje...
    3Readme..._tmdb_imdb/README.md
    4Loader..._tmdb_imdb/loader.py
    \n", - "
    " - ], - "text/plain": [ - "TabularDataset(name='Netflix_TMDB_IMDB', syft_link=, schema={'Unnamed: 0': 'int64', 'netflix_title': 'object', 'netflix_date': 'object', 'tmdb_id': 'int64', 'tmdb_title': 'object', 'tmdb_media_type': 'object', 'tmdb_poster_url': 'object', 'homepage': 'object', 'imdb_id': 'object', 'facebook_id': 'object', 'instagram_id': 'object', 'twitter_id': 'object', 'genre_ids': 'object', 'genre_names': 'object', 'imdb_runtime_minutes': 'int64', 'imdb_rating': 'float64'}, readme_link=, loader_link=, _client_config=ClientConfig(config_path='/Users/atrask/Documents/GitHub/syft/client_config.json', sync_folder='/Users/atrask/Desktop/SyftBox', port=8080, email='andrew@openmined.org', token=6461387628315936375, server_url='http://20.168.10.234:8080', email_token=None), has_private=True)" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "trade_data = datasets[0]\n", "trade_data" @@ -259,42 +84,20 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "8", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "PosixPath('/Users/atrask/Desktop/SyftBox/andrew@openmined.org/public/datasets/netflix_tmdb_imdb/NetflixViewingHistory_TMDB_IMDB.csv')" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "trade_data.file_path" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "9", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'from syftbox.lib.andrew.at.openmined.org.datasets import netflix_tmdb_imdb'" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "trade_data.import_string" ] @@ -309,85 +112,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "11", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "Trade Data\n", - "\n", - "README:\n", - "

    Trade Data

    \n", - "

    Private data: True

    \n", - "

    Schema: {'Unnamed: 0': 'int64', 'Classification': 'object', 'Year': 'int64', 'Period': 'int64', 'Period Desc.': 'object', 'Aggregate Level': 'int64', 'Is Leaf Code': 'int64', 'Trade Flow Code': 'int64', 'Trade Flow': 'object', 'Reporter Code': 'int64', 'Reporter': 'object', 'Reporter ISO': 'float64', 'Partner Code': 'int64', 'Partner': 'object', 'Partner ISO': 'float64', 'Commodity Code': 'int64', 'Commodity': 'object', 'Qty Unit Code': 'int64', 'Qty Unit': 'float64', 'Qty': 'float64', 'Netweight (kg)': 'float64', 'Trade Value (US$)': 'int64', 'Flag': 'int64'}

    \n", - "

    Import Syntax

    \n", - "

    client_config.use()\n", - "from syftbox.lib.andrew.at.openmined.org.datasets import trade_data

    \n", - "

    Python Loader Example

    \n", - "

    df = pd.read_csv(sy_path(\"syft://andrew@openmined.org:80/public/datasets/trade_data/trade_mock.csv\"))

    \n", - "
    \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    AttributeValue
    0NameTrade Data
    1Syft Link..._data/trade_mock.csv
    2Schema{'Unnamed: 0': 'int64', 'Classification': 'obj...
    3Readme...trade_data/README.md
    4Loader...trade_data/loader.py
    \n", - "
    " - ], - "text/plain": [ - "TabularDataset(name='Trade Data', syft_link=, schema={'Unnamed: 0': 'int64', 'Classification': 'object', 'Year': 'int64', 'Period': 'int64', 'Period Desc.': 'object', 'Aggregate Level': 'int64', 'Is Leaf Code': 'int64', 'Trade Flow Code': 'int64', 'Trade Flow': 'object', 'Reporter Code': 'int64', 'Reporter': 'object', 'Reporter ISO': 'float64', 'Partner Code': 'int64', 'Partner': 'object', 'Partner ISO': 'float64', 'Commodity Code': 'int64', 'Commodity': 'object', 'Qty Unit Code': 'int64', 'Qty Unit': 'float64', 'Qty': 'float64', 'Netweight (kg)': 'float64', 'Trade Value (US$)': 'int64', 'Flag': 'int64'}, readme_link=, loader_link=, _client_config=ClientConfig(config_path='/Users/atrask/Documents/GitHub/syft/client_config.json', sync_folder='/Users/atrask/Desktop/SyftBox', port=8080, email='andrew@openmined.org', token=6461387628315936375, server_url='http://20.168.10.234:8080', email_token=None), has_private=True)" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from syftbox.lib.andrew.at.openmined.org.datasets import trade_data\n", "\n", @@ -396,50 +124,20 @@ }, { "cell_type": "code", - "execution_count": 13, - "id": "c48a659a-3560-47a9-a53f-e0d79664bed1", + "execution_count": null, + "id": "12", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[2K\u001b[2mResolved \u001b[1m1 package\u001b[0m \u001b[2min 345ms\u001b[0m\u001b[0m \u001b[0m\n", - "\u001b[2K\u001b[2mPrepared \u001b[1m1 package\u001b[0m \u001b[2min 2.06s\u001b[0m\u001b[0m \n", - "\u001b[2K\u001b[2mInstalled \u001b[1m1 package\u001b[0m \u001b[2min 2ms\u001b[0m\u001b[0m \u001b[0m\n", - " \u001b[32m+\u001b[39m \u001b[1mopendp\u001b[0m\u001b[2m==0.11.1\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "!uv pip install opendp" ] }, { "cell_type": "code", - "execution_count": 14, - "id": "12", + "execution_count": null, + "id": "13", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(9.738381, -11.181064117927578)\n", - "Got mock\n" - ] - }, - { - "data": { - "text/plain": [ - "(9.738381, -11.181064117927578)" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "@syftbox_code\n", "def myanalysis(trade_data):\n", @@ -471,21 +169,10 @@ }, { "cell_type": "code", - "execution_count": 15, - "id": "13", + "execution_count": null, + "id": "14", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/Users/atrask/Desktop/SyftBox/staging/myanalysis'" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# TEMP bug where we cant use theirs_with_my_read because the parent write is ignored but allowing the perm file to set its own\n", "# rules wont work either so we need to solve the permissioning of files themselves\n", @@ -498,7 +185,7 @@ { "cell_type": "code", "execution_count": null, - "id": "14", + "id": "15", "metadata": {}, "outputs": [], "source": [ @@ -508,7 +195,7 @@ { "cell_type": "code", "execution_count": null, - "id": "15", + "id": "16", "metadata": {}, "outputs": [], "source": [ @@ -518,7 +205,7 @@ { "cell_type": "code", "execution_count": null, - "id": "16", + "id": "17", "metadata": {}, "outputs": [], "source": [] diff --git a/notebooks/03-netflix-code.ipynb b/notebooks/03-netflix-code.ipynb index e8986ea0..681fd0ff 100644 --- a/notebooks/03-netflix-code.ipynb +++ b/notebooks/03-netflix-code.ipynb @@ -220,7 +220,6 @@ "\n", " import numpy as np\n", " import pandas as pd\n", - "\n", " from Pyfhel import PyCtxt, Pyfhel\n", "\n", " crypto_folder = \"./crypto\"\n", @@ -319,7 +318,6 @@ "source": [ "def decode_results(HE, stat_keys, path):\n", " import numpy as np\n", - "\n", " from Pyfhel import PyCtxt\n", "\n", " crypto_folder = path + \"/crypto\"\n", diff --git a/projects/netflix_stats/main.py b/projects/netflix_stats/main.py index 8e7c591f..d485ae41 100644 --- a/projects/netflix_stats/main.py +++ b/projects/netflix_stats/main.py @@ -106,7 +106,6 @@ def netflix_stats(datasite, df): import numpy as np import pandas as pd - from Pyfhel import Pyfhel from Pyfhel.PyCtxt import PyCtxt diff --git a/syftbox/client/client.py b/syftbox/client/client.py index 4d38af8e..7fb09f75 100644 --- a/syftbox/client/client.py +++ b/syftbox/client/client.py @@ -1,8 +1,8 @@ import argparse import atexit -import platform import importlib import os +import platform import subprocess import sys import threading @@ -115,7 +115,7 @@ def load_or_create_config(args) -> ClientConfig: client_config = None try: client_config = ClientConfig.load(args.config_path) - except Exception as e: + except Exception: pass if client_config is None and args.config_path: diff --git a/syftbox/client/plugins/create_datasite.py b/syftbox/client/plugins/create_datasite.py index 946e2673..4df42fdf 100644 --- a/syftbox/client/plugins/create_datasite.py +++ b/syftbox/client/plugins/create_datasite.py @@ -24,7 +24,7 @@ def claim_datasite(client_config): perm_file.save(file_path) except Exception as e: print("Failed to create perm file", e) - + public_path = client_config.datasite_path + "/" + "public" os.makedirs(public_path, exist_ok=True) public_file_path = perm_file_path(public_path) diff --git a/syftbox/client/plugins/sync.py b/syftbox/client/plugins/sync.py index d299820e..17056249 100644 --- a/syftbox/client/plugins/sync.py +++ b/syftbox/client/plugins/sync.py @@ -3,9 +3,9 @@ from collections import defaultdict from datetime import datetime from threading import Event -from watchdog.events import DirModifiedEvent import requests +from watchdog.events import DirModifiedEvent from syftbox.lib import ( DirState, @@ -231,7 +231,10 @@ def push_changes(client_config, changes): f"> {client_config.email} FAILED /write {change.kind} {change.internal_path}", ) except Exception as e: - print(f"Failed to call /write on the server for {change.internal_path}", str(e)) + print( + f"Failed to call /write on the server for {change.internal_path}", + str(e), + ) return written_changes @@ -336,14 +339,14 @@ def ascii_for_change(changes) -> str: def handle_empty_folders(client_config, datasite): changes = [] datasite_path = os.path.join(client_config.sync_folder, datasite) - + for root, dirs, files in os.walk(datasite_path): if not files and not dirs: # This is an empty folder relative_path = os.path.relpath(root, datasite_path) - if relative_path == '.': + if relative_path == ".": continue # Skip the root folder - + change = FileChange( kind=FileChangeKind.CREATE, parent_path=datasite, @@ -353,7 +356,7 @@ def handle_empty_folders(client_config, datasite): sync_folder=client_config.sync_folder, ) changes.append(change) - + return changes @@ -398,7 +401,7 @@ def sync_up(client_config): # get the new dir state new_dir_state = hash_dir(client_config.sync_folder, datasite, IGNORE_FOLDERS) changes = diff_dirstate(old_dir_state, new_dir_state) - + # Add handling for empty folders empty_folder_changes = handle_empty_folders(client_config, datasite) changes.extend(empty_folder_changes) @@ -470,7 +473,7 @@ def sync_down(client_config) -> int: continue changes = diff_dirstate(new_dir_state, remote_dir_state) - + # Add handling for empty folders empty_folder_changes = handle_empty_folders(client_config, datasite) changes.extend(empty_folder_changes) @@ -602,7 +605,7 @@ def run(shared_state, *args, **kwargs): if CLIENT_CHANGELOG_FOLDER in event.src_path: return - # ignore these events for now on linux + # ignore these events for now on linux # FileOpenedEvent # FileClosedNoWriteEvent # DirModifiedEvent diff --git a/syftbox/lib/lib.py b/syftbox/lib/lib.py index c4f3cf55..56256fae 100644 --- a/syftbox/lib/lib.py +++ b/syftbox/lib/lib.py @@ -1,36 +1,20 @@ from __future__ import annotations -import ast import base64 -import copy import hashlib -import inspect import json import os -import pkgutil import re -import shutil -import subprocess -import sys -import sysconfig -import textwrap import threading -import types import zlib from collections.abc import Callable -from dataclasses import dataclass, field +from dataclasses import dataclass from datetime import datetime from enum import Enum -from importlib.abc import Loader, MetaPathFinder -from importlib.util import spec_from_loader from pathlib import Path from threading import Lock from typing import Any -from urllib.parse import urlparse -import markdown -import pandas as pd -import pkg_resources import requests from typing_extensions import Self @@ -49,6 +33,7 @@ def is_primitive_json_serializable(obj): return True return False + def pack(obj) -> Any: if is_primitive_json_serializable(obj): return obj @@ -539,6 +524,7 @@ def filter_read_state(user_email: str, dir_state: DirState, perm_tree: Permissio filtered_tree[file_path] = file_info return filtered_tree + class ResettableTimer: def __init__(self, timeout, callback, *args, **kwargs): self.timeout = timeout @@ -659,6 +645,7 @@ def str_to_bool(bool_str: str | None) -> bool: result = True return result + def validate_email(email: str) -> bool: # Define a regex pattern for a valid email email_regex = r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$" @@ -765,7 +752,6 @@ def use(self): os.environ["SYFTBOX_SYNC_DIR"] = self.sync_folder print(f"> Setting Sync Dir to: {self.sync_folder}") - @classmethod def create_manifest(cls, path: str, email: str): # make a dir and set the permissions diff --git a/syftbox/server/server.py b/syftbox/server/server.py index 308b9699..d5762389 100644 --- a/syftbox/server/server.py +++ b/syftbox/server/server.py @@ -191,6 +191,7 @@ async def lifespan(app: FastAPI): async def get_ascii_art(): return ascii_art + @app.get("/wheel/{path:path}", response_class=HTMLResponse) async def browse_datasite(request: Request, path: str): if path == "": # Check if path is empty (meaning "/datasites/") From 8442a67147481362976ceff75462a7b1d8bed42b Mon Sep 17 00:00:00 2001 From: Madhava Jay Date: Mon, 30 Sep 2024 18:29:33 +1000 Subject: [PATCH 3/3] Fixed netflix url --- default_apps/netflix/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/default_apps/netflix/main.py b/default_apps/netflix/main.py index 61315a9a..cdcc4cc5 100644 --- a/default_apps/netflix/main.py +++ b/default_apps/netflix/main.py @@ -24,7 +24,7 @@ def publish_page(output_path): shutil.copy2(output_path, destination_path + "/" + file_name) print( - f"> Netflix app published to: {client_config.server_url}/datasites/apps/netflix/{client_config.email}" + f"> Netflix app published to: {client_config.server_url}/datasites/{client_config.email}/apps/netflix/" ) except Exception as e: import traceback