Skip to content

Commit

Permalink
add aprfc_qpf_06h
Browse files Browse the repository at this point in the history
  • Loading branch information
adamscarberry committed Jan 17, 2025
1 parent 305b773 commit a1c9a71
Showing 1 changed file with 110 additions and 0 deletions.
110 changes: 110 additions & 0 deletions dags/cumulus/aprfc_qpf_06h.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
"""
Acquire and Process APRFC QPF 06h
"""

import json
from datetime import datetime, timedelta, timezone
import calendar
from bs4 import BeautifulSoup
import re
import requests

from airflow import DAG
from airflow.decorators import dag, task
from airflow.operators.python import get_current_context
from helpers.downloads import trigger_download

import helpers.cumulus as cumulus

# Default arguments
default_args = {
"owner": "airflow",
"depends_on_past": False,
"start_date": (datetime.now(timezone.utc) - timedelta(hours=72)).replace(
minute=0, second=0
),
"catchup_by_default": False,
"email_on_failure": False,
"email_on_retry": False,
"retries": 6,
"retry_delay": timedelta(minutes=30),
}


# ALR QPF filename generator
def get_filenames(edate, url):
"""
date at end of filename hour and min can not be predicted
scraping data from website and finding all matching filenames
for the sprcified date.
"""
d_t1 = edate.strftime("%Y%m%d")
d_t2 = (edate - timedelta(hours=24)).strftime("%Y%m%d")

page = requests.get(url)
soup = BeautifulSoup(page.content, "html.parser")
links = [node.get("href") for node in soup.find_all("a")]
filenames = []
for d in [d_t2, d_t1]:
regex = f"^qpf06f_has_.*.awips_{d}\d+.grb.gz$"
filenames = filenames + [link for link in links if re.match(regex, link)]

return filenames


@dag(
default_args=default_args,
schedule="40 14,5 * * *",
tags=["cumulus", "precip", "QPF", "APRFC"],
max_active_runs=2,
max_active_tasks=4,
)
def cumulus_aprfc_qpf_06h():
"""This pipeline handles download, processing, and derivative product creation for \n
APRFC QPE\n
URL Dir - https://cbt.crohms.org/akgrids
Files matching qpf06f_has_6f_20200917_18_awips_202009170949.grb - 6 hour\n
"""
key_prefix = cumulus.S3_ACQUIRABLE_PREFIX
URL_ROOT = f"https://cbt.crohms.org/akgrids"
PRODUCT_SLUG = "aprfc-qpf-06h"

@task()
def download_raw_qpf():
logical_date = get_current_context()["logical_date"]

return_list = list()
filenames = get_filenames(logical_date, URL_ROOT)
for filename in filenames:
url = f"{URL_ROOT}/{filename}"
s3_key = f"{key_prefix}/{PRODUCT_SLUG}/{filename}"
print(f"Downloading file: {filename}")
try:
trigger_download(url=url, s3_bucket=cumulus.S3_BUCKET, s3_key=s3_key)
return_list.append(
{
"execution": logical_date.isoformat(),
"s3_key": s3_key,
"filename": filename,
}
)
except:
print(f"{filename} is not available to download")

return json.dumps(return_list)

@task()
def notify_cumulus(payload):
payload = json.loads(payload)
for item in payload:
print("Notifying Cumulus: " + item["filename"])
cumulus.notify_acquirablefile(
acquirable_id=cumulus.acquirables[PRODUCT_SLUG],
datetime=item["execution"],
s3_key=item["s3_key"],
)

notify_cumulus(download_raw_qpf())


aprfc_qpe_dag = cumulus_aprfc_qpf_06h()

0 comments on commit a1c9a71

Please sign in to comment.