-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
florian
committed
Jun 15, 2024
1 parent
e2cdf6e
commit c226b28
Showing
13 changed files
with
174 additions
and
86 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,4 +20,4 @@ COPY pypi_scout /code/pypi_scout/ | |
|
||
ENV PYTHONPATH=/code | ||
|
||
CMD [ "python", "pypi_scout/foo.py"] | ||
CMD [ "/bin/bash" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
version: "3.8" | ||
|
||
services: | ||
frontend: | ||
build: | ||
context: ./frontend | ||
dockerfile: Dockerfile | ||
ports: | ||
- "3000:3000" | ||
volumes: | ||
- ./frontend:/app | ||
environment: | ||
- NODE_ENV=production | ||
|
||
backend: | ||
build: | ||
context: . | ||
dockerfile: Dockerfile | ||
command: poetry run uvicorn pypi_scout.api.main:app --host 0.0.0.0 --port 8000 --reload | ||
ports: | ||
- "8000:8000" | ||
volumes: | ||
- .:/code | ||
environment: | ||
- PYTHONPATH=/code | ||
depends_on: | ||
- frontend |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Use the official Node.js image as the base image | ||
FROM node:18-alpine | ||
|
||
# Set the working directory inside the container | ||
WORKDIR /app | ||
|
||
# Copy package.json and package-lock.json files to the container | ||
COPY package.json package-lock.json ./ | ||
|
||
# Install dependencies | ||
RUN npm install | ||
|
||
# Copy the rest of the application code to the container | ||
COPY . . | ||
|
||
# Build the Next.js application | ||
RUN npm run build | ||
|
||
# Expose the port on which the application will run | ||
EXPOSE 3000 | ||
|
||
# Start the Next.js application | ||
CMD ["npm", "run", "start"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
WITH recent_downloads AS ( | ||
SELECT | ||
project, | ||
COUNT(*) AS download_count | ||
FROM | ||
`bigquery-public-data.pypi.file_downloads` | ||
WHERE | ||
DATE(timestamp) BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 28 DAY) AND CURRENT_DATE() | ||
GROUP BY | ||
project | ||
HAVING | ||
download_count >= 250 | ||
) | ||
SELECT | ||
rd.project AS name, | ||
dm.description AS description, | ||
dm.summary AS summary, | ||
dm.version AS latest_version, | ||
rd.download_count AS number_of_downloads | ||
FROM | ||
recent_downloads rd | ||
JOIN | ||
`bigquery-public-data.pypi.distribution_metadata` dm | ||
ON | ||
rd.project = dm.name | ||
WHERE | ||
dm.upload_time = ( | ||
SELECT | ||
MAX(upload_time) | ||
FROM | ||
`bigquery-public-data.pypi.distribution_metadata` sub_dm | ||
WHERE | ||
sub_dm.name = dm.name | ||
) | ||
ORDER BY | ||
rd.download_count DESC; |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from pypi_scout.scripts.download_dataset import download_dataset | ||
from pypi_scout.scripts.process_dataset import process_dataset | ||
from pypi_scout.scripts.setup_pinecone import setup_pinecone | ||
from pypi_scout.scripts.upsert_data import upsert_data | ||
from pypi_scout.utils.logging import setup_logging | ||
|
||
setup_logging() | ||
|
||
if __name__ == "__main__": | ||
setup_pinecone() | ||
download_dataset() | ||
process_dataset() | ||
upsert_data() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import logging | ||
|
||
from dotenv import load_dotenv | ||
from pinecone import Pinecone, ServerlessSpec | ||
from pinecone.core.client.exceptions import PineconeApiException | ||
|
||
from pypi_scout.config import Config | ||
from pypi_scout.utils.logging import setup_logging | ||
|
||
|
||
def setup_pinecone(): | ||
""" | ||
This script sets up a Pinecone index for storing embeddings. | ||
It loads the environment variables from a .env file, creates a Pinecone client, | ||
and creates an index with the specified name, dimension, metric, and serverless specification. | ||
""" | ||
|
||
load_dotenv() | ||
config = Config() | ||
|
||
logging.info("Connecting to Pinecone..") | ||
pc = Pinecone(api_key=config.PINECONE_TOKEN) | ||
|
||
try: | ||
logging.info("Creating Pinecone index..") | ||
pc.create_index( | ||
name=config.PINECONE_INDEX_NAME, | ||
dimension=config.EMBEDDINGS_DIMENSION, | ||
metric="dotproduct", | ||
spec=ServerlessSpec(cloud="aws", region="us-east-1"), | ||
) | ||
logging.info("Pinecone index created successfully.") | ||
except PineconeApiException as e: | ||
if e.status == 409: | ||
logging.warning(f"Pinecone index '{config.PINECONE_INDEX_NAME}' already exists.") | ||
else: | ||
logging.exception("An error occurred while creating the Pinecone index.") | ||
|
||
|
||
if __name__ == "__main__": | ||
setup_logging() | ||
setup_pinecone() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters