From eef91d47d292cc549e2afefa8b4ff17cd568ceec Mon Sep 17 00:00:00 2001 From: senkenn Date: Fri, 3 Jan 2025 22:31:55 +0900 Subject: [PATCH] Add daily and hourly forecast modules and refactor CSV writer interfaces --- .../usecase/harvest_forecast_data.rs | 14 ++- .../bin/harvest_observation_weather_data.rs | 4 +- .../src/bin/harvest_weather_forecast_data.rs | 20 +++- .../csv_writer/jma_forecast_daily.rs | 105 ++++++++++++++++++ ...jma_forecast.rs => jma_forecast_hourly.rs} | 15 ++- .../csv_writer/jma_observation.rs | 26 +++-- .../src/frameworks_drivers/csv_writer/mod.rs | 3 +- .../scraper/jma_forecast.rs | 6 + .../csv_writer_interface/jma_observation.rs | 6 +- 9 files changed, 166 insertions(+), 33 deletions(-) create mode 100644 packages/cron_jobs/src/frameworks_drivers/csv_writer/jma_forecast_daily.rs rename packages/cron_jobs/src/frameworks_drivers/csv_writer/{jma_forecast.rs => jma_forecast_hourly.rs} (91%) diff --git a/packages/cron_jobs/src/application_business_rules/usecase/harvest_forecast_data.rs b/packages/cron_jobs/src/application_business_rules/usecase/harvest_forecast_data.rs index df42503..2bb4a9d 100644 --- a/packages/cron_jobs/src/application_business_rules/usecase/harvest_forecast_data.rs +++ b/packages/cron_jobs/src/application_business_rules/usecase/harvest_forecast_data.rs @@ -4,14 +4,18 @@ use crate::interface_adapters::csv_writer_interface::jma_observation::ICsvWriter use crate::interface_adapters::scraper_interface::jma_observation::IScraper; use anyhow::Result; -pub struct ForecastUsecase { - scrapers: Vec, - csv_writers: Vec, +pub struct ForecastUsecase { + scrapers: Vec>, + csv_writers: Vec>, service: Box, } -impl ForecastUsecase { - pub fn new(scrapers: Vec, csv_writers: Vec, service: Box) -> Self { +impl ForecastUsecase { + pub fn new( + scrapers: Vec>, + csv_writers: Vec>, + service: Box, + ) -> Self { Self { scrapers, csv_writers, diff --git a/packages/cron_jobs/src/bin/harvest_observation_weather_data.rs b/packages/cron_jobs/src/bin/harvest_observation_weather_data.rs index 1d347c9..8e9651d 100644 --- a/packages/cron_jobs/src/bin/harvest_observation_weather_data.rs +++ b/packages/cron_jobs/src/bin/harvest_observation_weather_data.rs @@ -1,4 +1,4 @@ -use cron_jobs::frameworks_drivers::csv_writer::jma_observation::CsvWriter; +use cron_jobs::frameworks_drivers::csv_writer::jma_observation::JmaObservationCsvWriter; use cron_jobs::interface_adapters::s3_service::s3_service::S3Service; use cron_jobs::{ application_business_rules::usecase::harvest_observation_weather_data::WeatherUsecase, @@ -24,7 +24,7 @@ async fn main() { let s3_service = Box::new(S3Service::new()); let scraper = Box::new(Scraper::new()); - let csv_writer = Box::new(CsvWriter::new()); + let csv_writer = Box::new(JmaObservationCsvWriter::new()); let usecase = Box::new(WeatherUsecase::new(scraper, csv_writer, s3_service)); match usecase.harvest_observation_weather_data().await { Ok(_) => { diff --git a/packages/cron_jobs/src/bin/harvest_weather_forecast_data.rs b/packages/cron_jobs/src/bin/harvest_weather_forecast_data.rs index 2bbcc43..08f2d6f 100644 --- a/packages/cron_jobs/src/bin/harvest_weather_forecast_data.rs +++ b/packages/cron_jobs/src/bin/harvest_weather_forecast_data.rs @@ -1,7 +1,13 @@ -use cron_jobs::application_business_rules::usecase::harvest_forecast_data::ForecastUsecase; -use cron_jobs::frameworks_drivers::csv_writer::jma_observation::CsvWriter; +use cron_jobs::frameworks_drivers::csv_writer::jma_forecast_daily::JmaForecastDailyCsvWriter; +use cron_jobs::frameworks_drivers::csv_writer::jma_forecast_hourly::JmaForecastHourlyCsvWriter; use cron_jobs::frameworks_drivers::scraper::jma_forecast::JmaForecastHourlyScraper; +use cron_jobs::interface_adapters::csv_writer_interface::jma_observation::ICsvWriter; use cron_jobs::interface_adapters::s3_service::s3_service::S3Service; +use cron_jobs::interface_adapters::scraper_interface::jma_observation::IScraper; +use cron_jobs::{ + application_business_rules::usecase::harvest_forecast_data::ForecastUsecase, + frameworks_drivers::scraper::jma_forecast::JmaForecastDailyScraper, +}; use dotenvy::dotenv; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; @@ -20,8 +26,14 @@ async fn main() { .init(); let s3_service = Box::new(S3Service::new()); - let scrapers = vec![JmaForecastHourlyScraper::new()]; - let csv_writers = vec![CsvWriter::new()]; + let scrapers: Vec> = vec![ + Box::new(JmaForecastHourlyScraper::new()), + Box::new(JmaForecastDailyScraper::new()), + ]; + let csv_writers: Vec> = vec![ + Box::new(JmaForecastHourlyCsvWriter::new()), + Box::new(JmaForecastDailyCsvWriter::new()), + ]; let usecase = Box::new(ForecastUsecase::new(scrapers, csv_writers, s3_service)); match usecase.harvest_weather_forecast_data().await { Ok(_) => { diff --git a/packages/cron_jobs/src/frameworks_drivers/csv_writer/jma_forecast_daily.rs b/packages/cron_jobs/src/frameworks_drivers/csv_writer/jma_forecast_daily.rs new file mode 100644 index 0000000..f02dc9a --- /dev/null +++ b/packages/cron_jobs/src/frameworks_drivers/csv_writer/jma_forecast_daily.rs @@ -0,0 +1,105 @@ +use anyhow::Result; +use async_trait::async_trait; +use serde::Deserialize; +use tracing::info; + +use crate::{ + frameworks_drivers::date::date::DateWrapper, + interface_adapters::csv_writer_interface::jma_observation::ICsvWriter, +}; + +#[derive(Deserialize)] +struct TimeSeries { + #[serde(rename = "dateTime")] + date_time: String, +} + +#[derive(Deserialize)] +struct AreaTimeSeries { + #[serde(rename = "timeDefines")] + time_defines: Vec, + weather: Vec, +} + +#[derive(Deserialize)] +struct Forecast { + #[serde(rename = "areaTimeSeries")] + area_time_series: AreaTimeSeries, +} + +pub struct JmaForecastDailyCsvWriter; +impl JmaForecastDailyCsvWriter { + pub fn new() -> Self { + Self {} + } +} + +#[async_trait] +impl ICsvWriter for JmaForecastDailyCsvWriter { + async fn create_csv_file(&self, date: DateWrapper, json_str: String) -> Result { + let file_name = format!( + "jma_forecast_daily_{}_{}_{}.csv", + date.get_year(), + date.get_month(), + date.get_day() + ); + + // TODO: fix + let forecast: Forecast = serde_json::from_str(&json_str)?; + let mut wtr = csv::Writer::from_path(&file_name)?; + + wtr.write_record(&["dateTime", "weather"])?; + + for (i, time_define) in forecast.area_time_series.time_defines.iter().enumerate() { + wtr.write_record(&[ + &time_define.date_time, + &forecast.area_time_series.weather[i], + ])?; + } + + wtr.flush()?; + + info!("Created CSV file: {}", file_name); + + Ok(file_name) + } +} + +#[cfg(test)] +mod tests { + use std::fs; + + use super::*; + use crate::frameworks_drivers::date::date::DateWrapper; + + #[tokio::test] + async fn test_create_csv_file() { + let csv_writer = JmaForecastDailyCsvWriter {}; + let date = DateWrapper::new(2021, 1, 1).unwrap(); + let json_str = fs::read_to_string("src/tests/fixtures/jma_forecast_hourly.json").unwrap(); + let file_name = csv_writer.create_csv_file(date, json_str).await.unwrap(); + assert_eq!(file_name, "jma_forecast_daily_2021_1_1.csv"); + + let csv = fs::read_to_string(file_name).unwrap(); + let rows: Vec<&str> = csv.split('\n').collect(); + let expected_rows = vec![ + "dateTime,weather", + "2025-01-03T12:00:00+09:00,くもり", + "2025-01-03T15:00:00+09:00,くもり", + "2025-01-03T18:00:00+09:00,くもり", + "2025-01-03T21:00:00+09:00,くもり", + "2025-01-04T00:00:00+09:00,くもり", + "2025-01-04T03:00:00+09:00,くもり", + "2025-01-04T06:00:00+09:00,くもり", + "2025-01-04T09:00:00+09:00,晴れ", + "2025-01-04T12:00:00+09:00,晴れ", + "2025-01-04T15:00:00+09:00,晴れ", + "2025-01-04T18:00:00+09:00,くもり", + "2025-01-04T21:00:00+09:00,くもり", + "", + ]; + for (row, expected_row) in rows.iter().zip(expected_rows.iter()) { + assert_eq!(row, expected_row); + } + } +} diff --git a/packages/cron_jobs/src/frameworks_drivers/csv_writer/jma_forecast.rs b/packages/cron_jobs/src/frameworks_drivers/csv_writer/jma_forecast_hourly.rs similarity index 91% rename from packages/cron_jobs/src/frameworks_drivers/csv_writer/jma_forecast.rs rename to packages/cron_jobs/src/frameworks_drivers/csv_writer/jma_forecast_hourly.rs index fd156f1..77d20b9 100644 --- a/packages/cron_jobs/src/frameworks_drivers/csv_writer/jma_forecast.rs +++ b/packages/cron_jobs/src/frameworks_drivers/csv_writer/jma_forecast_hourly.rs @@ -1,5 +1,7 @@ +use anyhow::Result; use async_trait::async_trait; use serde::Deserialize; +use tracing::info; use crate::{ frameworks_drivers::date::date::DateWrapper, @@ -26,14 +28,15 @@ struct Forecast { } pub struct JmaForecastHourlyCsvWriter; +impl JmaForecastHourlyCsvWriter { + pub fn new() -> Self { + Self {} + } +} #[async_trait] impl ICsvWriter for JmaForecastHourlyCsvWriter { - async fn create_csv_file( - &self, - date: DateWrapper, - json_str: String, - ) -> Result> { + async fn create_csv_file(&self, date: DateWrapper, json_str: String) -> Result { let file_name = format!( "jma_forecast_hourly_{}_{}_{}.csv", date.get_year(), @@ -54,6 +57,8 @@ impl ICsvWriter for JmaForecastHourlyCsvWriter { wtr.flush()?; + info!("Created CSV file: {}", file_name); + Ok(file_name) } } diff --git a/packages/cron_jobs/src/frameworks_drivers/csv_writer/jma_observation.rs b/packages/cron_jobs/src/frameworks_drivers/csv_writer/jma_observation.rs index 5205863..e124d24 100644 --- a/packages/cron_jobs/src/frameworks_drivers/csv_writer/jma_observation.rs +++ b/packages/cron_jobs/src/frameworks_drivers/csv_writer/jma_observation.rs @@ -2,31 +2,35 @@ use anyhow::Result; use async_trait::async_trait; use csv::Writer; use scraper::{Html, Selector}; +use tracing::info; use crate::{ frameworks_drivers::date::date::DateWrapper, interface_adapters::csv_writer_interface::jma_observation::ICsvWriter, }; -pub struct CsvWriter {} +pub struct JmaObservationCsvWriter {} -impl CsvWriter { +impl JmaObservationCsvWriter { pub fn new() -> Self { Self {} } } #[async_trait] -impl ICsvWriter for CsvWriter { - async fn create_csv_file( - &self, - date: DateWrapper, - html: String, - ) -> Result> { +impl ICsvWriter for JmaObservationCsvWriter { + async fn create_csv_file(&self, date: DateWrapper, html: String) -> Result { // parse the HTML let document = Html::parse_document(&html); + if document.errors.len() > 0 { + return Err(anyhow::anyhow!( + "Failed to parse the HTML: {:?}", + document.errors + )); + } - let row_selector = Selector::parse("tr.mtx[style='text-align:right;']")?; + let row_selector = Selector::parse("tr.mtx[style='text-align:right;']") + .map_err(|e| anyhow::anyhow!("Failed to parse the selector: {:?}", e))?; let csv_file_name = format!( "jma_observation_{}_{}_{}.csv", date.get_year(), @@ -73,7 +77,7 @@ impl ICsvWriter for CsvWriter { wtr.write_record(&record)?; } wtr.flush()?; - tracing::info!("Created CSV file: {}", csv_file_name); + info!("Created CSV file: {}", csv_file_name); Ok(csv_file_name) } @@ -87,7 +91,7 @@ mod tests { #[tokio::test] async fn test_create_csv_file() { - let csv_writer = CsvWriter::new(); + let csv_writer = JmaObservationCsvWriter::new(); let date = DateWrapper::new(2024, 11, 1).unwrap(); let html = fs::read_to_string("./src/tests/fixtures/jma_observation_2024_11_1.html").unwrap(); diff --git a/packages/cron_jobs/src/frameworks_drivers/csv_writer/mod.rs b/packages/cron_jobs/src/frameworks_drivers/csv_writer/mod.rs index 5ba0859..4f5742e 100644 --- a/packages/cron_jobs/src/frameworks_drivers/csv_writer/mod.rs +++ b/packages/cron_jobs/src/frameworks_drivers/csv_writer/mod.rs @@ -1,2 +1,3 @@ -pub mod jma_forecast; +pub mod jma_forecast_daily; +pub mod jma_forecast_hourly; pub mod jma_observation; diff --git a/packages/cron_jobs/src/frameworks_drivers/scraper/jma_forecast.rs b/packages/cron_jobs/src/frameworks_drivers/scraper/jma_forecast.rs index 6a802c4..397984d 100644 --- a/packages/cron_jobs/src/frameworks_drivers/scraper/jma_forecast.rs +++ b/packages/cron_jobs/src/frameworks_drivers/scraper/jma_forecast.rs @@ -16,6 +16,12 @@ impl JmaForecastHourlyScraper { } } +impl JmaForecastDailyScraper { + pub fn new() -> Self { + Self {} + } +} + #[async_trait] impl IScraper for JmaForecastHourlyScraper { async fn fetch_data(&self, date: Option) -> Result { diff --git a/packages/cron_jobs/src/interface_adapters/csv_writer_interface/jma_observation.rs b/packages/cron_jobs/src/interface_adapters/csv_writer_interface/jma_observation.rs index 5f3b05a..7ecbd56 100644 --- a/packages/cron_jobs/src/interface_adapters/csv_writer_interface/jma_observation.rs +++ b/packages/cron_jobs/src/interface_adapters/csv_writer_interface/jma_observation.rs @@ -6,9 +6,5 @@ use crate::frameworks_drivers::date::date::DateWrapper; type FileName = String; #[async_trait] pub trait ICsvWriter { - async fn create_csv_file( - &self, - date: DateWrapper, - html: String, - ) -> Result>; + async fn create_csv_file(&self, date: DateWrapper, html: String) -> Result; }