Skip to content

Scrape

Scrape #397

Workflow file for this run

name: Scrape
on:
schedule:
# Coles and Woolworths update prices every Wednesday
# Run everyday at 6pm UTC, equivalent to everyday 5am
- cron: '0 18 * * *'
workflow_dispatch:
inputs:
logLevel:
description: 'Log level'
required: true
default: 'debug'
type: choice
options:
- info
- warning
- debug
jobs:
scrape:
name: Scrape Products
runs-on: ubuntu-latest
timeout-minutes: 90
strategy:
matrix:
node-version: [18.x]
steps:
- uses: actions/checkout@v3
- name: Install Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v3
with:
node-version: ${{ matrix.node-version }}
- name: Install pnpm
uses: pnpm/action-setup@v4
with:
version: 8
run_install: false
- name: Install Node.js ${{ matrix.node-version }} modules
run: pnpm i
- name: Scrape products
run: pnpm run scrape && pnpm run clean && pnpm run upload && pnpm run img
env:
CLOUDFLARE_ID: ${{ secrets.CLOUDFLARE_ID }}
ACCESS_KEY_ID: ${{ secrets.ACCESS_KEY_ID }}
SECRET_ACCESS_KEY: ${{ secrets.SECRET_ACCESS_KEY }}
- name: Get current date
id: date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
- name: Get current ip
run: curl http://whatismyip.akamai.com
- name: Upload scrape result artifact
uses: actions/upload-artifact@v3
with:
name: scrape-${{ steps.date.outputs.date }}
path: |
data/productInfo/*.json