Skip to content

Generate Documentation Samples #249

Generate Documentation Samples

Generate Documentation Samples #249

Workflow file for this run

# On every push this script is executed
on:
workflow_dispatch:
schedule:
- cron: "0 5 * * *"
#concurrency: data
name: Generate Documentation Samples
jobs:
generate_samples:
timeout-minutes: 120
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v4
- name: Set up python
id: setup-python
uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Install Poetry
uses: snok/install-poetry@v1
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v4
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
- name: Install dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --no-root
- name: Install root
run: poetry install --only-root
- name: Download links
run: |
mkdir dataset/
mkdir dataset_python_only/
cat links/dataset.txt | shuf | head -n1 | xargs -P 5 -n 4 wget --no-verbose -P dataset/
cat links/only_python_files.txt | shuf | head -n1 | xargs -P 5 -n 4 wget --no-verbose -P dataset_python_only/
- name: Generate sample
run: |
poetry run pypi-data run-sql ${{ github.workspace }}/sql/random.prql stats/random_sample.json --output=json dataset/*.parquet
poetry run pypi-data run-sql ${{ github.workspace }}/sql/random_unique.prql stats/random_sample_python_only.json --output=json dataset_python_only/*.parquet
- run: |
cp stats/random_sample.json t
cat t | jq '.' > stats/random_sample.json
head stats/random_sample.json
- run: |
cp stats/random_sample_python_only.json t
cat t | jq '.' > stats/random_sample_python_only.json
head stats/random_sample_python_only.json
- uses: EndBug/add-and-commit@v9
with:
add: |
stats/random_sample.json
stats/random_sample_python_only.json
author_email: "41898282+github-actions[bot]@users.noreply.github.com"
author_name: "commit-bot"
message: "Add random samples"
push: true
fetch: true
pull: '--rebase --autostash'