Skip to content

Commit

Permalink
fix usage of gdown and add --no-upsert flag
Browse files Browse the repository at this point in the history
  • Loading branch information
fpgmaas committed Jun 17, 2024
1 parent 36077f4 commit 6e4dcd1
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 15 deletions.
20 changes: 10 additions & 10 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pypi_scout/scripts/download_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def download_dataset():

logging.info(f"⬇️ Downloading raw dataset from Google Drive to {target_path}...")
url = f"https://drive.google.com/uc?id={config.GOOGLE_FILE_ID}"
gdown.download(url, target_path, quiet=False)
gdown.download(url, str(target_path), quiet=False)
logging.info("✅ Done!")


Expand Down
17 changes: 13 additions & 4 deletions pypi_scout/scripts/setup.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
import argparse
from pypi_scout.scripts.download_dataset import download_dataset
from pypi_scout.scripts.process_dataset import process_dataset
from pypi_scout.scripts.setup_pinecone import setup_pinecone
from pypi_scout.scripts.upsert_data import upsert_data
from pypi_scout.utils.logging import setup_logging

setup_logging()

if __name__ == "__main__":
def main(no_upsert):
setup_logging()
setup_pinecone()
download_dataset()
process_dataset()
upsert_data()
if not no_upsert:
upsert_data()

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the setup script with optional flags.")
parser.add_argument('--no-upsert', action='store_true', help='If set, do not upsert data to the Pinecone database.')

args = parser.parse_args()

main(no_upsert=args.no_upsert)

0 comments on commit 6e4dcd1

Please sign in to comment.