diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..94c5422 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +*.txt text +*.png binary \ No newline at end of file diff --git a/.github/workflows/cleanup.yml b/.github/workflows/cleanup.yml new file mode 100644 index 0000000..e174c50 --- /dev/null +++ b/.github/workflows/cleanup.yml @@ -0,0 +1,103 @@ +# .gitHub/workflows/cleanup.yml +name: Repository Cleanup + +on: + workflow_dispatch: + inputs: + action_type: + description: '選擇要執行的操作' + required: true + type: choice + options: + - 'Cleanup Workflow' + - 'Cleanup Deployments' + workflow_status: + description: '要清理的工作流程狀態 (僅在選擇 Cleanup Workflow 時需要)' + required: false + type: choice + options: + - 'disabled' # 已停用的工作流程 + - 'active' # 活躍的工作流程 + - 'all' # 所有工作流程 + environment: + description: '要清理的部署環境 (僅在選擇 Cleanup Deployments 時需要)' + required: false + type: choice + options: + - 'all' + - 'github-pages' + - 'pypi' + +jobs: + cleanup-workflows: + if: ${{ github.event.inputs.action_type == 'Cleanup Workflow' }} + runs-on: ubuntu-latest + permissions: + actions: write + steps: + - name: Cleanup workflows + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const status = '${{ github.event.inputs.workflow_status }}'; + console.log(`Cleaning up workflows with status: ${status}`); + + // 獲取所有工作流程 + const workflows = await github.rest.actions.listRepoWorkflows({ + owner: context.repo.owner, + repo: context.repo.repo + }); + + for (const workflow of workflows.data.workflows) { + // 根據選擇的狀態過濾工作流程 + if (status === 'all' || + (status === 'disabled' && !workflow.state === 'active') || + (status === 'active' && workflow.state === 'active')) { + + console.log(`Processing workflow: ${workflow.name} (${workflow.state})`); + + // 獲取此工作流程的所有運行 + const runs = await github.rest.actions.listWorkflowRuns({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: workflow.id, + }); + + // 刪除運行 + console.log(`Found ${runs.data.total_count} runs to delete`); + for (const run of runs.data.workflow_runs) { + console.log(`Deleting run #${run.run_number} of ${workflow.name}`); + await github.rest.actions.deleteWorkflowRun({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: run.id + }); + } + } + } + console.log('Cleanup completed'); + + cleanup-deployments: + if: ${{ github.event.inputs.action_type == 'Cleanup Deployments' }} + runs-on: ubuntu-latest + permissions: + deployments: write + actions: write + contents: write + steps: + - name: Delete github-pages deployments + if: ${{ github.event.inputs.environment == 'github-pages' || github.event.inputs.environment == 'all' }} + uses: strumwolf/delete-deployment-environment@v3 + with: + token: ${{ secrets.GITHUB_TOKEN }} + environment: github-pages + onlyRemoveDeployments: true + + - name: Delete pypi deployments + if: ${{ github.event.inputs.environment == 'pypi' || github.event.inputs.environment == 'all' }} + uses: strumwolf/delete-deployment-environment@v3 + with: + token: ${{ secrets.GITHUB_TOKEN }} + environment: pypi + onlyRemoveDeployments: true \ No newline at end of file diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml new file mode 100644 index 0000000..99829c3 --- /dev/null +++ b/.github/workflows/pytest.yml @@ -0,0 +1,49 @@ +name: Python Tests + +on: + push: + branches: [ main, master ] + pull_request: + branches: [ main, master ] + +jobs: + test: + strategy: + matrix: + python-version: [ "3.11", "3.12" ] + os: [ ubuntu-latest ] + + fail-fast: false + + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python 3.XX + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' # 啟用 pip 緩存加速安裝 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e . + pip install -e ".[test]" + + - name: Run tests with coverage + run: | + pytest tests/ -m "not requires_data" \ + --cov=AeroViz \ + --cov-report=term-missing \ + --cov-report=xml \ + -v + + - name: Upload coverage reports + uses: actions/upload-artifact@v4 + with: + name: coverage-report-${{ matrix.python-version }}-${{ github.sha }} + path: coverage.xml + if-no-files-found: error diff --git a/.gitignore b/.gitignore index 2dc53ca..e13a170 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ + +# MacOX product +.DS_store +temp/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/Figure/NO2_2021.png b/Figure/NO2_2021.png deleted file mode 100644 index 41462fc..0000000 Binary files a/Figure/NO2_2021.png and /dev/null differ diff --git a/Figure/NO2_202101.png b/Figure/NO2_202101.png deleted file mode 100644 index a345955..0000000 Binary files a/Figure/NO2_202101.png and /dev/null differ diff --git a/Figure/NO2_202102.png b/Figure/NO2_202102.png deleted file mode 100644 index 00cbd00..0000000 Binary files a/Figure/NO2_202102.png and /dev/null differ diff --git a/Figure/NO2_202103.png b/Figure/NO2_202103.png deleted file mode 100644 index 520cc49..0000000 Binary files a/Figure/NO2_202103.png and /dev/null differ diff --git a/Figure/NO2_202104.png b/Figure/NO2_202104.png deleted file mode 100644 index 5fbfd4a..0000000 Binary files a/Figure/NO2_202104.png and /dev/null differ diff --git a/Figure/NO2_202105.png b/Figure/NO2_202105.png deleted file mode 100644 index 731cbf1..0000000 Binary files a/Figure/NO2_202105.png and /dev/null differ diff --git a/Figure/NO2_202106.png b/Figure/NO2_202106.png deleted file mode 100644 index a8251b0..0000000 Binary files a/Figure/NO2_202106.png and /dev/null differ diff --git a/Figure/NO2_202107.png b/Figure/NO2_202107.png deleted file mode 100644 index 8b578b2..0000000 Binary files a/Figure/NO2_202107.png and /dev/null differ diff --git a/Figure/NO2_202108.png b/Figure/NO2_202108.png deleted file mode 100644 index cbfb731..0000000 Binary files a/Figure/NO2_202108.png and /dev/null differ diff --git a/Figure/NO2_202109.png b/Figure/NO2_202109.png deleted file mode 100644 index e0cc4b1..0000000 Binary files a/Figure/NO2_202109.png and /dev/null differ diff --git a/Figure/NO2_202110.png b/Figure/NO2_202110.png deleted file mode 100644 index f6ae762..0000000 Binary files a/Figure/NO2_202110.png and /dev/null differ diff --git a/Figure/NO2_202111.png b/Figure/NO2_202111.png deleted file mode 100644 index f5368e6..0000000 Binary files a/Figure/NO2_202111.png and /dev/null differ diff --git a/Figure/NO2_202112.png b/Figure/NO2_202112.png deleted file mode 100644 index e1d8523..0000000 Binary files a/Figure/NO2_202112.png and /dev/null differ diff --git a/Figure/NO2_2022.png b/Figure/NO2_2022.png deleted file mode 100644 index 315f451..0000000 Binary files a/Figure/NO2_2022.png and /dev/null differ diff --git a/Figure/NO2_202201.png b/Figure/NO2_202201.png deleted file mode 100644 index a5979ee..0000000 Binary files a/Figure/NO2_202201.png and /dev/null differ diff --git a/Figure/NO2_202202.png b/Figure/NO2_202202.png deleted file mode 100644 index 908a2b5..0000000 Binary files a/Figure/NO2_202202.png and /dev/null differ diff --git a/Figure/NO2_202203.png b/Figure/NO2_202203.png deleted file mode 100644 index e2afabc..0000000 Binary files a/Figure/NO2_202203.png and /dev/null differ diff --git a/Figure/NO2_202204.png b/Figure/NO2_202204.png deleted file mode 100644 index 6c97c6f..0000000 Binary files a/Figure/NO2_202204.png and /dev/null differ diff --git a/Figure/NO2_202205.png b/Figure/NO2_202205.png deleted file mode 100644 index 9653f4f..0000000 Binary files a/Figure/NO2_202205.png and /dev/null differ diff --git a/Figure/NO2_202206.png b/Figure/NO2_202206.png deleted file mode 100644 index 842254f..0000000 Binary files a/Figure/NO2_202206.png and /dev/null differ diff --git a/Figure/NO2_202207.png b/Figure/NO2_202207.png deleted file mode 100644 index 0d1ffad..0000000 Binary files a/Figure/NO2_202207.png and /dev/null differ diff --git a/Figure/NO2_202208.png b/Figure/NO2_202208.png deleted file mode 100644 index 805c719..0000000 Binary files a/Figure/NO2_202208.png and /dev/null differ diff --git a/Figure/NO2_202209.png b/Figure/NO2_202209.png deleted file mode 100644 index 5a41ca4..0000000 Binary files a/Figure/NO2_202209.png and /dev/null differ diff --git a/Figure/NO2_202210.png b/Figure/NO2_202210.png deleted file mode 100644 index 21fe6f6..0000000 Binary files a/Figure/NO2_202210.png and /dev/null differ diff --git a/Figure/NO2_202211.png b/Figure/NO2_202211.png deleted file mode 100644 index 7ef3791..0000000 Binary files a/Figure/NO2_202211.png and /dev/null differ diff --git a/Figure/NO2_202212.png b/Figure/NO2_202212.png deleted file mode 100644 index 8464509..0000000 Binary files a/Figure/NO2_202212.png and /dev/null differ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b35ceb5 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Chih-Yu Chan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git "a/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.dbf" "b/data/shapefiles/stations/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.dbf" similarity index 100% rename from "\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.dbf" rename to "data/shapefiles/stations/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.dbf" diff --git "a/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.prj" "b/data/shapefiles/stations/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.prj" similarity index 100% rename from "\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.prj" rename to "data/shapefiles/stations/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.prj" diff --git "a/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.sbn" "b/data/shapefiles/stations/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.sbn" similarity index 100% rename from "\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.sbn" rename to "data/shapefiles/stations/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.sbn" diff --git "a/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.sbx" "b/data/shapefiles/stations/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.sbx" similarity index 100% rename from "\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.sbx" rename to "data/shapefiles/stations/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.sbx" diff --git "a/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.shp" "b/data/shapefiles/stations/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.shp" similarity index 100% rename from "\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.shp" rename to "data/shapefiles/stations/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.shp" diff --git "a/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.shp.xml" "b/data/shapefiles/stations/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.shp.xml" similarity index 100% rename from "\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.shp.xml" rename to "data/shapefiles/stations/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.shp.xml" diff --git "a/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.shx" "b/data/shapefiles/stations/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.shx" similarity index 100% rename from "\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.shx" rename to "data/shapefiles/stations/\347\251\272\346\260\243\345\223\201\350\263\252\347\233\243\346\270\254\347\253\231\344\275\215\347\275\256\345\234\226_121_10704.shx" diff --git a/mapdata202301070205/COUNTY_MOI_1090820.CPG b/data/shapefiles/taiwan/COUNTY_MOI_1090820.CPG similarity index 100% rename from mapdata202301070205/COUNTY_MOI_1090820.CPG rename to data/shapefiles/taiwan/COUNTY_MOI_1090820.CPG diff --git a/mapdata202301070205/COUNTY_MOI_1090820.dbf b/data/shapefiles/taiwan/COUNTY_MOI_1090820.dbf similarity index 100% rename from mapdata202301070205/COUNTY_MOI_1090820.dbf rename to data/shapefiles/taiwan/COUNTY_MOI_1090820.dbf diff --git a/mapdata202301070205/COUNTY_MOI_1090820.prj b/data/shapefiles/taiwan/COUNTY_MOI_1090820.prj similarity index 100% rename from mapdata202301070205/COUNTY_MOI_1090820.prj rename to data/shapefiles/taiwan/COUNTY_MOI_1090820.prj diff --git a/mapdata202301070205/COUNTY_MOI_1090820.shp b/data/shapefiles/taiwan/COUNTY_MOI_1090820.shp similarity index 100% rename from mapdata202301070205/COUNTY_MOI_1090820.shp rename to data/shapefiles/taiwan/COUNTY_MOI_1090820.shp diff --git a/mapdata202301070205/COUNTY_MOI_1090820.shx b/data/shapefiles/taiwan/COUNTY_MOI_1090820.shx similarity index 100% rename from mapdata202301070205/COUNTY_MOI_1090820.shx rename to data/shapefiles/taiwan/COUNTY_MOI_1090820.shx diff --git a/mapdata202301070205/Metadata.xml b/data/shapefiles/taiwan/Metadata.xml similarity index 100% rename from mapdata202301070205/Metadata.xml rename to data/shapefiles/taiwan/Metadata.xml diff --git a/mapdata202301070205/TW-01-301000100G-000017.xml b/data/shapefiles/taiwan/TW-01-301000100G-000017.xml similarity index 100% rename from mapdata202301070205/TW-01-301000100G-000017.xml rename to data/shapefiles/taiwan/TW-01-301000100G-000017.xml diff --git "a/mapdata202301070205/\344\277\256\346\255\243\346\270\205\345\226\256_1081113&21.xlsx" "b/data/shapefiles/taiwan/\344\277\256\346\255\243\346\270\205\345\226\256_1081113&21.xlsx" similarity index 100% rename from "mapdata202301070205/\344\277\256\346\255\243\346\270\205\345\226\256_1081113&21.xlsx" rename to "data/shapefiles/taiwan/\344\277\256\346\255\243\346\270\205\345\226\256_1081113&21.xlsx" diff --git a/docs/Satellite.png b/docs/Satellite.png new file mode 100755 index 0000000..63209f0 Binary files /dev/null and b/docs/Satellite.png differ diff --git a/ncData/NO2_202101.nc b/ncData/NO2_202101.nc deleted file mode 100644 index 7d1c2cd..0000000 Binary files a/ncData/NO2_202101.nc and /dev/null differ diff --git a/ncData/NO2_202102.nc b/ncData/NO2_202102.nc deleted file mode 100644 index 1564e30..0000000 Binary files a/ncData/NO2_202102.nc and /dev/null differ diff --git a/ncData/NO2_202103.nc b/ncData/NO2_202103.nc deleted file mode 100644 index d35c7fc..0000000 Binary files a/ncData/NO2_202103.nc and /dev/null differ diff --git a/ncData/NO2_202104.nc b/ncData/NO2_202104.nc deleted file mode 100644 index d531d67..0000000 Binary files a/ncData/NO2_202104.nc and /dev/null differ diff --git a/ncData/NO2_202105.nc b/ncData/NO2_202105.nc deleted file mode 100644 index 7b7d900..0000000 Binary files a/ncData/NO2_202105.nc and /dev/null differ diff --git a/ncData/NO2_202106.nc b/ncData/NO2_202106.nc deleted file mode 100644 index ba3db95..0000000 Binary files a/ncData/NO2_202106.nc and /dev/null differ diff --git a/ncData/NO2_202107.nc b/ncData/NO2_202107.nc deleted file mode 100644 index bab3366..0000000 Binary files a/ncData/NO2_202107.nc and /dev/null differ diff --git a/ncData/NO2_202108.nc b/ncData/NO2_202108.nc deleted file mode 100644 index 1a97098..0000000 Binary files a/ncData/NO2_202108.nc and /dev/null differ diff --git a/ncData/NO2_202109.nc b/ncData/NO2_202109.nc deleted file mode 100644 index 758cf3c..0000000 Binary files a/ncData/NO2_202109.nc and /dev/null differ diff --git a/ncData/NO2_202110.nc b/ncData/NO2_202110.nc deleted file mode 100644 index 159d065..0000000 Binary files a/ncData/NO2_202110.nc and /dev/null differ diff --git a/ncData/NO2_202111.nc b/ncData/NO2_202111.nc deleted file mode 100644 index 9fe3c59..0000000 Binary files a/ncData/NO2_202111.nc and /dev/null differ diff --git a/ncData/NO2_202112.nc b/ncData/NO2_202112.nc deleted file mode 100644 index f168e37..0000000 Binary files a/ncData/NO2_202112.nc and /dev/null differ diff --git a/ncData/NO2_202201.nc b/ncData/NO2_202201.nc deleted file mode 100644 index 7809585..0000000 Binary files a/ncData/NO2_202201.nc and /dev/null differ diff --git a/ncData/NO2_202202.nc b/ncData/NO2_202202.nc deleted file mode 100644 index e822607..0000000 Binary files a/ncData/NO2_202202.nc and /dev/null differ diff --git a/ncData/NO2_202203.nc b/ncData/NO2_202203.nc deleted file mode 100644 index 93c1b3b..0000000 Binary files a/ncData/NO2_202203.nc and /dev/null differ diff --git a/ncData/NO2_202204.nc b/ncData/NO2_202204.nc deleted file mode 100644 index 2d11789..0000000 Binary files a/ncData/NO2_202204.nc and /dev/null differ diff --git a/ncData/NO2_202205.nc b/ncData/NO2_202205.nc deleted file mode 100644 index 3cc599c..0000000 Binary files a/ncData/NO2_202205.nc and /dev/null differ diff --git a/ncData/NO2_202206.nc b/ncData/NO2_202206.nc deleted file mode 100644 index 0e96a88..0000000 Binary files a/ncData/NO2_202206.nc and /dev/null differ diff --git a/ncData/NO2_202207.nc b/ncData/NO2_202207.nc deleted file mode 100644 index 982f97a..0000000 Binary files a/ncData/NO2_202207.nc and /dev/null differ diff --git a/ncData/NO2_202208.nc b/ncData/NO2_202208.nc deleted file mode 100644 index 040bf12..0000000 Binary files a/ncData/NO2_202208.nc and /dev/null differ diff --git a/ncData/NO2_202209.nc b/ncData/NO2_202209.nc deleted file mode 100644 index 258a7d2..0000000 Binary files a/ncData/NO2_202209.nc and /dev/null differ diff --git a/ncData/NO2_202210.nc b/ncData/NO2_202210.nc deleted file mode 100644 index 083b143..0000000 Binary files a/ncData/NO2_202210.nc and /dev/null differ diff --git a/ncData/NO2_202211.nc b/ncData/NO2_202211.nc deleted file mode 100644 index 5af5da1..0000000 Binary files a/ncData/NO2_202211.nc and /dev/null differ diff --git a/ncData/NO2_202212.nc b/ncData/NO2_202212.nc deleted file mode 100644 index 36f006c..0000000 Binary files a/ncData/NO2_202212.nc and /dev/null differ diff --git a/read_nc.py b/read_nc.py deleted file mode 100644 index a34375d..0000000 --- a/read_nc.py +++ /dev/null @@ -1,153 +0,0 @@ -import numpy as np -import xarray as xr -import time -import os - -from scipy.ndimage import map_coordinates -from scipy.spatial import cKDTree -from netCDF4 import Dataset -from functools import wraps -from pathlib import Path - - -def timer(func=None, *, print_args=False): - """ 輸出函式耗時 - - :param func: - :param print_args: - :return: - """ - - def decorator(_func): - @wraps(_func) - def wrapper(*args, **kwargs): - st = time.perf_counter() - result = _func(*args, **kwargs) - print(f'{_func.__name__}' + 'time cost: {:.3f} seconds'.format(time.perf_counter() - st)) - return result - - return wrapper - - if func is None: - return decorator - - else: - return decorator(func) - - -class TaiwanFrame: - def __init__(self, resolution=0.01, lat_Taiwan=(21, 26), lon_Taiwan=(119, 123)): - self.lat = np.arange(lat_Taiwan[0], lat_Taiwan[1] + resolution, resolution) - self.lon = np.arange(lon_Taiwan[0], lon_Taiwan[1] + resolution, resolution) - - def frame(self): - return np.meshgrid(self.lon, self.lat) - - @property - def container(self): - return np.zeros(shape=(self.lat.size, self.lon.size)) - - -@timer -def extract_data(dataset, *, mask_value=0.75, **kwargs): - """ Extract data from the dataset based on a mask. - - :param dataset: (xarray.Dataset): The input dataset containing the data to be extracted. - :param mask_value: (float): The threshold value for the mask (default is 0.75). - :return: ndarray: three ndarray of longitude, latitude, and NO2 - This method returns three NumPy arrays: masked_lon, masked_lat, and masked_no2, which represent - the longitude,latitude, and nitrogendioxide_tropospheric_column values after applying the specified mask. - """ - - # set condition - mask_lon = ((dataset.longitude >= 118) & (dataset.longitude <= 124)) - mask_lat = ((dataset.latitude >= 20) & (dataset.latitude <= 27)) - masked_lon_lat_ds = dataset.where((mask_lon & mask_lat), drop=True) - - mask_qa = (masked_lon_lat_ds.qa_value >= mask_value) - masked_ds = masked_lon_lat_ds.where(mask_qa) - - masked_lon = masked_ds.longitude[0].data - masked_lat = masked_ds.latitude[0].data - masked_no2 = masked_ds.nitrogendioxide_tropospheric_column[0].data - - return masked_lon, masked_lat, masked_no2 - - -@timer -def interp_data(nc_lon, nc_lat, nc_no2, lon_coordinate, lat_coordinate): - """ This method is used to interpolate data for the purpose of using the map_coordinates function. - It utilizes a KD-tree to find the nearest neighbors in a 2D array and returns a 2D NO2 array. - - :param nc_lon: 2D array of longitudes from the netCDF dataset. - :param nc_lat: 2D array of latitudes from the netCDF dataset. - :param nc_no2: 2D array of nitrogendioxide_tropospheric_column values from the netCDF dataset. - :param lon_coordinate: 2D array of target longitudes for interpolation. - :param lat_coordinate: 2D array of target longitudes for interpolation. - :return: This method returns a 2D NumPy array (no2_array) that represents interpolated - nitrogendioxide_tropospheric_column values at the specified target longitudes and latitudes. - """ - lon_flat = lon_coordinate.flatten() - lat_flat = lat_coordinate.flatten() - - # 构建 KD 树 - tree = cKDTree(np.column_stack((nc_lon.flatten(), nc_lat.flatten()))) - - # 使用 query 方法查找最近的点 - distances, indices = tree.query(np.column_stack((lon_flat, lat_flat)), k=1) - - x_index, y_index = np.unravel_index(indices, nc_lon.shape) - - interpolated_values = map_coordinates(nc_no2, [x_index, y_index], order=1, mode='nearest') - - return interpolated_values.reshape(lon_coordinate.shape) - - -if __name__ == '__main__': - year = '2022' # 改時間即可使用 - - base_folder = Path("E:/S5P_NO2/data") - sorted_folders = sorted(list(base_folder.glob(f"{year}*"))) - store_folder = Path(__file__).parent / 'ncData' - - lon_coordinate, lat_coordinate = TaiwanFrame().frame() - - for folder_path in sorted_folders: - month = folder_path.name - nc_file_path = store_folder / f"NO2_{month}.nc" - - if nc_file_path.exists(): - print(f"File {nc_file_path} exists, skipping folder {month}") - continue - - container = [] - - for file_path in folder_path.glob("S5P_OFFL_L2__NO2*.nc"): - print('Open: ' + file_path.name) - dataset = xr.open_dataset(file_path, group='PRODUCT') - - try: - extracted_result = extract_data(dataset) - no2_array = interp_data(*extracted_result, lon_coordinate, lat_coordinate) - container.append(no2_array) - - except RuntimeError: - print("RuntimeError: NetCDF: HDF error.") - nan_array = np.full(lon_coordinate.shape, np.nan) - container.append(nan_array) - - no2_average = np.nanmean(container, axis=0) - - original_shape = no2_average.shape - - ds_time = np.array(np.datetime64(f'{month[:4]}-{month[4:]}', 'ns')) - - ds_result = xr.Dataset( - {'nitrogendioxide_tropospheric_column': (['latitude', 'longitude'], no2_average.reshape(*original_shape))}, - coords={'latitude': lat_coordinate[:, 0], - 'longitude': lon_coordinate[0]}) - - ds_result = ds_result.expand_dims(time=[ds_time]) - - ds_result.to_netcdf(nc_file_path) - print(f"{folder_path} mission completed") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7075378 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +Cartopy~=0.24.1 +numpy~=2.1.2 +xarray~=2024.9.0 +matplotlib~=3.9.2 +geopandas~=1.0.1 +scipy~=1.14.1 +netCDF4~=1.7.1 +pandas~=2.2.3 +shapely~=2.0.6 \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/api/__init__.py b/src/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/api/auth.py b/src/api/auth.py new file mode 100644 index 0000000..bbc63bc --- /dev/null +++ b/src/api/auth.py @@ -0,0 +1,46 @@ +"""Copernicus API 認證處理""" +import os +from datetime import datetime, timedelta +from dotenv import load_dotenv +import requests +import logging + +from src.config.settings import COPERNICUS_TOKEN_URL + +logger = logging.getLogger(__name__) + + +class CopernicusAuth: + def __init__(self): + load_dotenv() + self.username = os.getenv('COPERNICUS_USERNAME') + self.password = os.getenv('COPERNICUS_PASSWORD') + self.token = None + self.token_expiry = None + + if not self.username or not self.password: + raise ValueError("Missing Copernicus credentials in .env file") + + def get_token(self): + """獲取新的 access token""" + data = { + 'grant_type': 'password', + 'username': self.username, + 'password': self.password, + 'client_id': 'cdse-public' + } + + response = requests.post(COPERNICUS_TOKEN_URL, data=data, timeout=30) + response.raise_for_status() + + token_data = response.json() + self.token = token_data['access_token'] + self.token_expiry = datetime.now() + timedelta(seconds=token_data['expires_in'] - 60) + # logger.info("Access token updated") + return self.token + + def ensure_valid_token(self): + """確保 token 有效""" + if not self.token or not self.token_expiry or datetime.now() >= self.token_expiry: + return self.get_token() + return self.token \ No newline at end of file diff --git a/src/api/downloader.py b/src/api/downloader.py new file mode 100644 index 0000000..6cb827f --- /dev/null +++ b/src/api/downloader.py @@ -0,0 +1,75 @@ +"""檔案下載處理""" +import os +import requests +import logging +from pathlib import Path +from tqdm import tqdm + +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +from src.config.settings import RETRY_SETTINGS, CHUNK_SIZE, DOWNLOAD_TIMEOUT + +logger = logging.getLogger(__name__) + + +class Downloader: + def __init__(self): + self.session = requests.Session() + self.session.trust_env = False + self._retries = requests.adapters.Retry( + total=5, + backoff_factor=0.1, + status_forcelist=[500, 502, 503, 504], + ) + self.session.mount('https://', requests.adapters.HTTPAdapter(max_retries=self._retries)) + + def download_file(self, url, headers, output_path, progress_callback=None): + """下載檔案並更新進度 + + Args: + url: 下載URL + headers: 請求標頭 + output_path: 輸出路徑 + progress_callback: 進度回調函數,接收已下載的字節數 + """ + try: + # 使用 stream=True 來分塊下載 + response = self.session.get(url, headers=headers, stream=True) + response.raise_for_status() + + # 獲取檔案總大小 + total_size = int(response.headers.get('content-length', 0)) + block_size = 8192 + downloaded = 0 + + # 建立臨時檔案 + temp_path = output_path.with_suffix('.tmp') + + try: + with open(temp_path, 'wb') as file: + for data in response.iter_content(block_size): + file.write(data) + downloaded += len(data) + if progress_callback: + progress_callback(min(downloaded, total_size)) + + # 下載完成後,將臨時檔案重命名為正式檔案 + if temp_path.exists(): + if output_path.exists(): + output_path.unlink() + temp_path.rename(output_path) + return True + + finally: + # 清理臨時檔案 + if temp_path.exists(): + temp_path.unlink() + + except requests.exceptions.RequestException as e: + logger.error(f"Download error: {str(e)}") + return False + + except Exception as e: + logger.error(f"Unexpected error during download: {str(e)}") + return False diff --git a/src/api/sentinel_api.py b/src/api/sentinel_api.py new file mode 100644 index 0000000..2920d07 --- /dev/null +++ b/src/api/sentinel_api.py @@ -0,0 +1,440 @@ +"""Sentinel-5P API 操作""" +import logging +import time +import requests +import threading +import multiprocessing +from datetime import datetime +from pathlib import Path +from typing import List, Dict, Optional, Tuple + +from rich.progress import ( + Progress, + ProgressColumn, + SpinnerColumn, + TextColumn, + BarColumn, + TimeRemainingColumn, +) +from rich.console import Console +from rich.panel import Panel +from rich.table import Table +from rich.align import Align + +from src.api.auth import CopernicusAuth +from src.api.downloader import Downloader +from src.config.settings import ( + COPERNICUS_BASE_URL, + COPERNICUS_DOWNLOAD_URL, + RAW_DATA_DIR, + DEFAULT_TIMEOUT +) + +console = Console(force_terminal=True, color_system="auto", width=130) # 使用您想要的寬度 +logger = logging.getLogger(__name__) + + +class FileProgressColumn(ProgressColumn): + def render(self, task): + """渲染進度列顯示""" + if task.total is None: + return "" + + # 如果是主進度條 + if "Overall Progress" in task.description: + return f"{task.completed} / {task.total} products" + + # 如果是檔案下載進度條 + completed = task.completed / (1024 * 1024) # 轉換為 MB + total = task.total / (1024 * 1024) + return f"{completed:5.1f} / {total:5.1f} MB" + + +class Sentinel5PDataFetcher: + def __init__(self, max_workers: int = 5): + self.auth = CopernicusAuth() + self.downloader = Downloader() + self.base_url = COPERNICUS_BASE_URL + self.max_workers = max_workers + self._token_lock = threading.Lock() + self.download_stats = { + 'success': 0, + 'failed': 0, + 'skipped': 0, + 'total_size': 0, + } + + def fetch_no2_data(self, start_date: str, end_date: str, + bbox: Optional[Tuple[float, float, float, float]] = None, + limit: Optional[int] = None) -> List[Dict]: + """ + 擷取 NO2 數據 + + Args: + start_date: 開始日期 (YYYY-MM-DD) + end_date: 結束日期 (YYYY-MM-DD) + bbox: 邊界框座標 (min_lon, min_lat, max_lon, max_lat) + limit: 最大結果數量 + + Returns: + List[Dict]: 產品資訊列表 + """ + # logger.info(f"Fetching NO2 data from {start_date} to {end_date}") + + try: + # 取得認證 token + with self._token_lock: + token = self.auth.ensure_valid_token() + + headers = { + 'Authorization': f'Bearer {token}', + 'Content-Type': 'application/json' + } + + # 構建基本篩選條件 + base_filter = ( + f"Collection/Name eq 'SENTINEL-5P' " + f"and contains(Name,'NO2') " + f"and ContentDate/Start gt {start_date}T00:00:00.000Z " + f"and ContentDate/Start lt {end_date}T23:59:59.999Z" + ) + + # 如果提供了邊界框,加入空間篩選 + if bbox: + min_lon, min_lat, max_lon, max_lat = bbox + spatial_filter = ( + f" and OData.CSC.Intersects(area=geography'SRID=4326;POLYGON((" + f"{min_lon} {min_lat}, " + f"{max_lon} {min_lat}, " + f"{max_lon} {max_lat}, " + f"{min_lon} {max_lat}, " + f"{min_lon} {min_lat}))')" + ) + base_filter += spatial_filter + + # 設置查詢參數 + query_params = { + '$filter': base_filter, + '$orderby': 'ContentDate/Start desc', + '$top': limit if limit is not None else 200, + '$skip': 0 + } + + all_products = [] + + # 使用進度條顯示資料擷取進度 + with Progress( + SpinnerColumn(), + TextColumn("[bold blue]{task.description}"), + BarColumn(bar_width=106), + FileProgressColumn(), + TimeRemainingColumn(), + console=console, + transient=True, + expand=True + ) as progress: + fetch_task = progress.add_task( + "[cyan]Fetching products...", + total=None + ) + + while True: + try: + response = requests.get( + f"{self.base_url}/Products", + headers=headers, + params=query_params, + timeout=DEFAULT_TIMEOUT + ) + response.raise_for_status() + + products = response.json().get('value', []) + if not products: + break + + all_products.extend(products) + progress.update( + fetch_task, + description=f"[cyan]Found {len(all_products)} products..." + ) + + if limit and len(all_products) >= limit: + all_products = all_products[:limit] + break + + query_params['$skip'] += len(products) + + except requests.exceptions.RequestException as e: + logger.error(f"Error fetching products: {str(e)}") + if len(all_products) > 0: + logger.info("Returning partially fetched products") + break + raise + + # 顯示產品詳細資訊 + if all_products: + table = Table(title=f"Found {len(all_products)} Products") + table.add_column("No.", justify="right", style="cyan") + table.add_column("Time", style="magenta") + table.add_column("Name", style="blue") + table.add_column("Size", justify="right", style="green") + + for i, product in enumerate(all_products, 1): + time_str = product.get('ContentDate', {}).get('Start', 'N/A')[:19] + name = product.get('Name', 'N/A') + size = product.get('ContentLength', 0) + size_str = f"{size / 1024 / 1024:.2f} MB" + table.add_row(str(i), time_str, name, size_str) + + console.print(table) + + return all_products + + except Exception as e: + logger.error(f"Error in fetch_no2_data: {str(e)}") + raise + + # TODO: main_task count wrong + def parallel_download(self, products: list): + """並行下載多個產品""" + if not products: + logger.warning("No products to download") + return + + # 使用 Queue 來管理下載任務 + import queue + task_queue = queue.Queue() + for product in products: + task_queue.put(product) + + # 創建進度追蹤器 + import multiprocessing + completed_files = multiprocessing.Value('i', 0) + active_threads = multiprocessing.Value('i', 0) + + # 初始化下載統計 + self.download_stats.update({ + 'success': 0, + 'failed': 0, + 'skipped': 0, + 'total_size': sum(p.get('ContentLength', 0) for p in products), + 'start_time': time.time() + }) + + # 創建統計資料的鎖 + progress_lock = threading.Lock() + stats_lock = threading.Lock() + + def download_files(progress, task_id, thread_index, completed_files, task_queue): + try: + with active_threads.get_lock(): + active_threads.value += 1 + + while True: + try: + # 非阻塞方式取得任務 + product = task_queue.get_nowait() + except queue.Empty: + break + + file_size = product.get('ContentLength', 0) + file_name = product.get('Name') + + # 更新進度條顯示當前任務 + with progress_lock: + progress.update( + task_id, + description=f"[cyan]Thread {thread_index + 1}: {file_name}", + total=file_size, + completed=0, + visible=True, + refresh=True + ) + + success = False # 用於追蹤是否需要呼叫 task_done() + try: + # 取得認證 token + with self._token_lock: + token = self.auth.ensure_valid_token() + headers = {'Authorization': f'Bearer {token}'} + + start_time = product.get('ContentDate', {}).get('Start') + date_obj = datetime.strptime(start_time, '%Y-%m-%dT%H:%M:%S.%fZ') + output_dir = Path(RAW_DATA_DIR) / date_obj.strftime('%Y') / date_obj.strftime('%m') + output_path = output_dir / file_name + + # 檢查檔案是否已存在 + if output_path.exists() and output_path.stat().st_size == file_size: + with progress_lock: + progress.update(task_id, completed=file_size) + with stats_lock: + self.download_stats['skipped'] += 1 + with completed_files.get_lock(): + completed_files.value += 1 + success = True + task_queue.task_done() + continue + + output_dir.mkdir(parents=True, exist_ok=True) + product_id = product.get('Id') + download_url = f"{COPERNICUS_DOWNLOAD_URL}({product_id})/$value" + + def update_progress(downloaded_bytes): + current_progress = min(downloaded_bytes, file_size) + with progress_lock: + progress.update(task_id, completed=current_progress, refresh=True) + + # 執行下載 + download_success = False + for attempt in range(3): + try: + if self.downloader.download_file( + download_url, + headers, + output_path, + progress_callback=update_progress + ): + download_success = True + break + + if not download_success and attempt < 2: + time.sleep(5) + with self._token_lock: + token = self.auth.ensure_valid_token() + headers = {'Authorization': f'Bearer {token}'} + + except Exception as e: + logger.error(f"Download attempt {attempt + 1} failed for {file_name}: {str(e)}") + if attempt < 2: + time.sleep(5) + continue + + # 更新下載結果 + with stats_lock: + if download_success: + self.download_stats['success'] += 1 + else: + self.download_stats['failed'] += 1 + if output_path.exists(): + output_path.unlink() + + success = True + with completed_files.get_lock(): + completed_files.value += 1 + + except Exception as e: + logger.error(f"Error downloading {file_name}: {str(e)}") + with stats_lock: + self.download_stats['failed'] += 1 + with completed_files.get_lock(): + completed_files.value += 1 + + if 'output_path' in locals() and output_path.exists(): + output_path.unlink() + finally: + with progress_lock: + progress.update(task_id, visible=False, refresh=True) + if not success: + task_queue.task_done() + + finally: + with active_threads.get_lock(): + active_threads.value -= 1 + with progress_lock: + progress.update(task_id, visible=False, refresh=True) + + with Progress( + SpinnerColumn(), + TextColumn("[bold blue]{task.description}"), + BarColumn(complete_style="green"), + FileProgressColumn(), + TimeRemainingColumn(), + console=console, + expand=False, + transient=False + ) as progress: + # 創建主進度條 + main_task = progress.add_task("[green]Overall Progress", total=len(products)) + + # 創建執行緒的進度條 + sub_tasks = [] + for i in range(self.max_workers): + task_id = progress.add_task( + f"[cyan]Thread {i + 1}: Waiting for download...", + total=100, + visible=True + ) + sub_tasks.append(task_id) + + # 啟動下載執行緒 + threads = [] + for i, task_id in enumerate(sub_tasks): + thread = threading.Thread( + target=download_files, + args=(progress, task_id, i, completed_files, task_queue) + ) + thread.daemon = True + threads.append(thread) + thread.start() + + # 監控進度 + while True: + current_completed = completed_files.value + progress.update(main_task, completed=current_completed) + + if (task_queue.empty() and + current_completed >= len(products) and + active_threads.value == 0): + break + + time.sleep(0.1) + + # 確保所有進度條都被清理 + for task_id in sub_tasks: + progress.update(task_id, visible=False) + + # 確保主進度條顯示完成 + # progress.update(main_task, completed=len(products), refresh=True) + + # 顯示下載統計 + self._display_download_summary() + + def _display_download_summary(self): + """顯示下載統計摘要""" + elapsed_time = time.time() - self.download_stats['start_time'] + total_files = ( + self.download_stats['success'] + + self.download_stats['failed'] + + self.download_stats['skipped'] + ) + + table = Table(title="Download Summary", width=60, padding=(0, 2), expand=False) + table.add_column("Metric", style="cyan") + table.add_column("Value", justify="right", style="green") + + table.add_row("Total Files", str(total_files)) + table.add_row("Successfully Downloaded", str(self.download_stats['success'])) + table.add_row("Failed Downloads", str(self.download_stats['failed'])) + table.add_row("Skipped Files", str(self.download_stats['skipped'])) + + total_size = self.download_stats['total_size'] + size_str = f"{total_size / 1024 / 1024:.2f} MB" + table.add_row("Total Size", size_str) + table.add_row("Total Time", f"{elapsed_time:.2f}s") + + if elapsed_time > 0: + avg_speed = total_size / elapsed_time + speed_str = f"{avg_speed / 1024 / 1024:.2f} MB/s" + table.add_row("Average Speed", speed_str) + + # 使用 Align 將 table 置中 + centered_table = Align.center(table) + + console.print("\n", Panel( + centered_table, + title="Download Results", + width=130, + expand=True, + border_style="bright_blue", + padding=(1, 0) + )) diff --git a/src/config/settings.py b/src/config/settings.py new file mode 100644 index 0000000..0fb2ba1 --- /dev/null +++ b/src/config/settings.py @@ -0,0 +1,67 @@ +"""API 設定和常數""" +from pathlib import Path +from datetime import datetime +import logging + +logger = logging.getLogger(__name__) + +# API URLs +COPERNICUS_TOKEN_URL = "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token" +COPERNICUS_BASE_URL = "https://catalogue.dataspace.copernicus.eu/odata/v1" +COPERNICUS_DOWNLOAD_URL = "https://zipper.dataspace.copernicus.eu/odata/v1/Products" + +# HTTP 設定 +RETRY_SETTINGS = { + 'total': 5, + 'backoff_factor': 2, + 'status_forcelist': [429, 500, 502, 503, 504] +} + +CHUNK_SIZE = 8192 +DEFAULT_TIMEOUT = 60 +DOWNLOAD_TIMEOUT = 180 + +# 存儲路徑 +BASE_DIR = Path("/Users/chanchihyu/Sentinel_data") +RAW_DATA_DIR = BASE_DIR / "raw" +PROCESSED_DATA_DIR = BASE_DIR / "processed" +FIGURE_DIR = BASE_DIR / "figure" +LOGS_DIR = BASE_DIR / "logs" + + +def setup_directory_structure(start_date: str, end_date: str): + """依照開始和結束時間設定資料夾結構""" + start = datetime.strptime(start_date, '%Y-%m-%d') + end = datetime.strptime(end_date, '%Y-%m-%d') + + # 遍歷範圍內的所有月份 + current_date = start + while current_date <= end: + year = current_date.strftime('%Y') + month = current_date.strftime('%m') + + # 構建每個月份的 figure、processed 和 raw 路徑 + for base_dir in [FIGURE_DIR, PROCESSED_DATA_DIR, RAW_DATA_DIR]: + month_dir = base_dir / year / month + month_dir.mkdir(parents=True, exist_ok=True) + + # 移動到下個月 + if month == "12": + current_date = current_date.replace(year=current_date.year + 1, month=1, day=1) + else: + current_date = current_date.replace(month=current_date.month + 1, day=1) + + +def ensure_directories(): + """確保所有必要的目錄存在""" + directories = [RAW_DATA_DIR, PROCESSED_DATA_DIR, FIGURE_DIR, LOGS_DIR] + for directory in directories: + try: + directory.mkdir(parents=True, exist_ok=True) + logger.info(f"確保目錄存在: {directory}") + except Exception as e: + logger.error(f"創建目錄失敗 {directory}: {str(e)}") + raise + + +ensure_directories() diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..7ae0c06 --- /dev/null +++ b/src/main.py @@ -0,0 +1,105 @@ +"""主程式""" +import logging +from pathlib import Path +from datetime import datetime + +from api.sentinel_api import Sentinel5PDataFetcher +from processing.data_processor import NO2Processor +from utils.logger import setup_logging +from config.settings import setup_directory_structure +from rich.console import Console +from rich.panel import Panel +from rich.prompt import Confirm +from rich.align import Align + +# 定義常數 +PANEL_WIDTH = 130 + +console = Console(force_terminal=True, color_system="auto", width=130) # 使用您想要的寬度 +logger = logging.getLogger(__name__) + + +def rich_print(message: str, width: int = PANEL_WIDTH, confirm: bool = False) -> bool | None: + """統一的訊息顯示函數""" + if confirm: + return Confirm.ask( + f"[bold cyan]{message}[/bold cyan]", # 在訊息中加入樣式 + default=True, + show_default=True + ) + + console.print(Panel( + Align.center(f"[bold cyan]{message}[/bold cyan]"), + width=width, + expand=True, + border_style="bright_blue", + padding=(0, 1) + )) + + +def fetch_data(start_date: str, end_date: str): + """下載數據的工作流程""" + setup_logging() + + try: + fetcher = Sentinel5PDataFetcher(max_workers=3) + + rich_print(f"正在獲取 sentinel-5p 衛星數據 (NO\u2082) from {start_date} to {end_date} ...") + products = fetcher.fetch_no2_data( + start_date=start_date, + end_date=end_date, + bbox=(118, 20, 124, 27), + limit=None + ) + + if products: + if rich_print("是否要下載數據?", confirm=True): + rich_print(f"開始下載 sentinel-5p 衛星數據 (NO\u2082) from {start_date} to {end_date} ...") + fetcher.parallel_download(products) + rich_print("數據下載完成!") + else: + rich_print("已取消下載操作") + else: + rich_print("找不到符合條件的數據") + + except Exception as e: + error_message = f"下載數據失敗: {str(e)}" + rich_print(error_message) + logger.error(error_message) + + +def process_data(start_date: str, end_date: str): + """處理數據的工作流程""" + setup_logging() + + try: + processor = NO2Processor( + interpolation_method='griddata', + resolution=0.02, + mask_value=0.5 + ) + + # 改用 rich style 的輸入提示 + if rich_print("是否要處理數據?", confirm=True): + logger.info(f"Start processing data from {start_date} to {end_date}") + processor.process_each_data(start_date, end_date, use_taiwan_mask=False) + rich_print("數據完成處理") + + except Exception as e: + error_message = f"處理數據失敗: {str(e)}" + rich_print(error_message) + logger.error(error_message) + + +if __name__ == "__main__": + # 設定參數 + start, end = '2024-11-01', '2024-11-10' + + # 設定輸入輸出配置 + setup_directory_structure(start, end) + + # 下載數據 + fetch_data(start, end) + + # 處理數據 + # process_data(start, end) diff --git a/src/processing/__init__.py b/src/processing/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/processing/data_processor.py b/src/processing/data_processor.py new file mode 100644 index 0000000..4645289 --- /dev/null +++ b/src/processing/data_processor.py @@ -0,0 +1,243 @@ +"""src/processing/no2_processor.py""" +import numpy as np +import xarray as xr +import logging +import zipfile +import tempfile +from pathlib import Path +from datetime import datetime + +from src.processing.interpolators import DataInterpolator +from src.processing.taiwan_frame import TaiwanFrame +from src.config.settings import RAW_DATA_DIR, PROCESSED_DATA_DIR, FIGURE_DIR +from src.visualization.sample_plot_nc import plot_global_no2 + +logger = logging.getLogger(__name__) + + +class NO2Processor: + def __init__(self, interpolation_method='kdtree', resolution=0.02, mask_value=0.50): + """初始化 NO2 處理器 + + Parameters: + ----------- + interpolation_method : str + 插值方法,可選 'griddata' 或 'kdtree' + resolution : float + 網格解析度(度) + mask_value : float + QA 值的閾值 + """ + self.interpolation_method = interpolation_method + self.resolution = resolution + self.mask_value = mask_value + self.taiwan_frame = TaiwanFrame() + + @staticmethod + def process_zipped_nc(zip_path: Path): + """處理壓縮的 NC 檔案""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_dir_path = Path(temp_dir) + + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + zip_ref.extractall(temp_dir) + + nc_files = list(temp_dir_path.rglob("*.nc")) + if nc_files: + return xr.open_dataset(nc_files[0], group='PRODUCT') + return None + + def create_grid(self, lon: np.ndarray, lat: np.ndarray): + """根據數據的經緯度範圍創建網格""" + # 取得經緯度的範圍 + lon_min, lon_max = np.nanmin(lon), np.nanmax(lon) + lat_min, lat_max = np.nanmin(lat), np.nanmax(lat) + + # 創建網格點 + grid_lon = np.arange(lon_min, lon_max + self.resolution, self.resolution) + grid_lat = np.arange(lat_min, lat_max + self.resolution, self.resolution) + + # 創建網格矩陣 + return np.meshgrid(grid_lon, grid_lat) + + def extract_data(self, dataset: xr.Dataset, use_taiwan_mask: bool = False): + """從數據集中提取數據 + + Parameters: + ----------- + dataset : xr.Dataset + 輸入的數據集 + use_taiwan_mask : bool + 是否只提取台灣區域的數據 + """ + if use_taiwan_mask: + return self._extract_data_taiwan(dataset) + return self._extract_data_global(dataset) + + def _extract_data_global(self, dataset: xr.Dataset): + """提取全球範圍的數據""" + time = dataset.time.values[0] + lat = dataset.latitude.values[0] + lon = dataset.longitude.values[0] + no2 = dataset.nitrogendioxide_tropospheric_column.values[0] + qa = dataset.qa_value.values[0] + + # 根據 QA 值過濾數據 + mask = qa < self.mask_value + no2[mask] = np.nan + + logger.info(f"\t{'data time':15}: {np.datetime64(time, 'D').astype(str)}") + logger.info(f"\t{'lon range':15}: {lon.min():.2f} to {lon.max():.2f}") + logger.info(f"\t{'lat range':15}: {lat.min():.2f} to {lat.max():.2f}") + logger.info(f"\t{'data shape':15}: {no2.shape}") + + return lon, lat, no2 + + def _extract_data_taiwan(self, dataset: xr.Dataset): + """提取台灣區域的數據""" + # 設定條件 + mask_lon = ((dataset.longitude >= 118) & (dataset.longitude <= 124)) + mask_lat = ((dataset.latitude >= 20) & (dataset.latitude <= 27)) + masked_lon_lat_ds = dataset.where((mask_lon & mask_lat), drop=True) + + if masked_lon_lat_ds.sizes['scanline'] == 0 or masked_lon_lat_ds.sizes['ground_pixel'] == 0: + raise ValueError("No data points within Taiwan region") + + mask_qa = (masked_lon_lat_ds.qa_value >= self.mask_value) + masked_ds = masked_lon_lat_ds.where(mask_qa) + + if np.all(np.isnan(masked_ds.nitrogendioxide_tropospheric_column)): + raise ValueError("No valid data points after QA filtering") + + return ( + masked_ds.longitude[0].data, + masked_ds.latitude[0].data, + masked_ds.nitrogendioxide_tropospheric_column[0].data + ) + + def process_each_data(self, start_date: str, end_date: str, use_taiwan_mask: bool = False): + """處理單一數據""" + # 將字串日期轉換為 datetime 物件 + start = datetime.strptime(start_date, '%Y-%m-%d') + end = datetime.strptime(end_date, '%Y-%m-%d') + + # 逐月處理每個月份的路徑 + current_date = start + while current_date <= end: + year = current_date.strftime('%Y') + month = current_date.strftime('%m') + + # 構建每個月份的 input 和 output 路徑 + input_dir = RAW_DATA_DIR / year / month + output_dir = PROCESSED_DATA_DIR / year / month + figure_output_file = FIGURE_DIR / year / month + + # 創建路徑 + input_dir.mkdir(parents=True, exist_ok=True) + output_dir.mkdir(parents=True, exist_ok=True) + figure_output_file.mkdir(parents=True, exist_ok=True) + + # for monthly data + # output_file = output_dir / f"NO2_{year}{month}.nc" + # + # if output_file.exists(): + # logger.info(f"File {output_file} exists, skipping") + + # container = [] + + # 目前只拿"NTRI"畫圖 + for file_path in input_dir.glob("*NRTI_L2__NO2*.nc"): + logger.info(f'Processing: {file_path.name}') + + try: + dataset = self.process_zipped_nc(file_path) + + if dataset is not None: + try: + # 提取數據 + lon, lat, no2 = self.extract_data(dataset, use_taiwan_mask) + + # 為每個檔案創建新的網格 + lon_grid, lat_grid = self.create_grid(lon, lat) + + # 插值 + no2_grid = DataInterpolator.interpolate( + lon, lat, no2, + lon_grid, lat_grid, + method=self.interpolation_method + ) + + # 創建臨時的 Dataset 來顯示插值後的結果 + interpolated_ds = xr.Dataset( + { + 'nitrogendioxide_tropospheric_column': ( + ['time', 'latitude', 'longitude'], + no2_grid[np.newaxis, :, :] + ) + }, + coords={ + 'time': dataset.time.values[0:1], # 使用原始時間 + 'latitude': np.squeeze(lat_grid[:, 0]), + 'longitude': np.squeeze(lon_grid[0, :]) + } + ) + + # 繪製插值後的數據圖 + logger.info("繪製插值後的數據圖...") + plot_global_no2(interpolated_ds, figure_output_file / file_path.stem, close_after=True, map_scale='Taiwan') + + # 移動到下個月 + if month == "12": + current_date = current_date.replace(year=current_date.year + 1, month=1, day=1) + else: + current_date = current_date.replace(month=current_date.month + 1, day=1) + + # container.append(no2_grid) + finally: + dataset.close() + + except Exception as e: + logger.error(f"Error processing {file_path.name}: {e}") + continue + + def _save_monthly_average(self, container, grid, year, month, output_file): + """保存月平均數據""" + # 計算平均值 + no2_stack = np.stack(container) + no2_average = np.nanmean(no2_stack, axis=0) + + # 創建數據集 + # 確保年月格式正確 + year = str(year).zfill(4) + month = str(month).zfill(2) + # 創建完整的時間戳格式(包含日期和時間) + time_str = f"{year}-{month}-01T00:00:00.000000000" + + # 創建數據集 + ds_result = xr.Dataset( + { + 'nitrogendioxide_tropospheric_column': ( + ['time', 'latitude', 'longitude'], + no2_average[np.newaxis, :, :] + ) + }, + coords={ + 'time': [np.datetime64(time_str, 'ns')], # 使用奈秒精度 + 'latitude': np.squeeze(grid[1][:, 0]), + 'longitude': np.squeeze(grid[0][0, :]) + } + ) + + # 添加時間屬性 + ds_result.time.attrs['long_name'] = 'time' + ds_result.time.attrs['standard_name'] = 'time' + + # 確保輸出目錄存在 + output_file.parent.mkdir(parents=True, exist_ok=True) + ds_result.to_netcdf(output_file) + + logger.info(f"Saved monthly average to {output_file}") + logger.info(f"Final grid shape: {no2_average.shape}") + logger.info(f"Time: {ds_result.time.values}") + logger.info(f"Longitude range: {grid[0][0].min():.2f} to {grid[0][0].max():.2f}") + logger.info(f"Latitude range: {grid[1][:, 0].min():.2f} to {grid[1][:, 0].max():.2f}") \ No newline at end of file diff --git a/src/processing/interpolators.py b/src/processing/interpolators.py new file mode 100644 index 0000000..d9b4406 --- /dev/null +++ b/src/processing/interpolators.py @@ -0,0 +1,71 @@ +"""src/processing/interpolators.py""" +from scipy.interpolate import griddata +from scipy.ndimage import map_coordinates +from scipy.spatial import cKDTree +import numpy as np + + +class DataInterpolator: + """數據插值器,支援多種插值方法""" + + @staticmethod + def griddata_interpolation(lon, lat, data, lon_grid, lat_grid): + """使用 griddata 進行插值""" + points = np.column_stack((lon.flatten(), lat.flatten())) + values = data.flatten() + + # 移除無效值 + valid = ~np.isnan(values) + points = points[valid] + values = values[valid] + + # 將網格點轉換為適合 griddata 的格式 + grid_points = np.column_stack((lon_grid.flatten(), lat_grid.flatten())) + + # 進行插值 + grid_values = griddata(points, values, grid_points, method='linear') + return grid_values.reshape(lon_grid.shape) + + @staticmethod + def kdtree_interpolation(lon, lat, data, lon_grid, lat_grid): + """使用 KDTree 進行插值""" + lon_flat = lon_grid.flatten() + lat_flat = lat_grid.flatten() + + # 构建 KD 树 + tree = cKDTree(np.column_stack((lon.flatten(), lat.flatten()))) + + # 使用 query 方法查找最近的点 + distances, indices = tree.query(np.column_stack((lon_flat, lat_flat)), k=1) + + x_index, y_index = np.unravel_index(indices, lon.shape) + interpolated_values = map_coordinates(data, [x_index, y_index], order=1, mode='nearest') + + return interpolated_values.reshape(lon_grid.shape) + + @classmethod + def interpolate(cls, lon, lat, data, lon_grid, lat_grid, method='griddata'): + """統一的插值介面 + + Parameters: + ----------- + lon, lat : ndarray + 原始經緯度數據 + data : ndarray + 要插值的數據 + lon_grid, lat_grid : ndarray + 目標網格的經緯度 + method : str + 插值方法,可選 'griddata' 或 'kdtree' + + Returns: + -------- + ndarray + 插值後的數據 + """ + if method == 'griddata': + return cls.griddata_interpolation(lon, lat, data, lon_grid, lat_grid) + elif method == 'kdtree': + return cls.kdtree_interpolation(lon, lat, data, lon_grid, lat_grid) + else: + raise ValueError(f"Unsupported interpolation method: {method}") diff --git a/src/processing/taiwan_frame.py b/src/processing/taiwan_frame.py new file mode 100644 index 0000000..0f782f1 --- /dev/null +++ b/src/processing/taiwan_frame.py @@ -0,0 +1,14 @@ +import numpy as np + + +class TaiwanFrame: + def __init__(self, resolution=0.01, lat_Taiwan=(21, 26), lon_Taiwan=(119, 123)): + self.lat = np.arange(lat_Taiwan[0], lat_Taiwan[1] + resolution, resolution) + self.lon = np.arange(lon_Taiwan[0], lon_Taiwan[1] + resolution, resolution) + + def frame(self): + return np.meshgrid(self.lon, self.lat) + + @property + def container(self): + return np.zeros(shape=(self.lat.size, self.lon.size)) \ No newline at end of file diff --git a/src/utils/logger.py b/src/utils/logger.py new file mode 100644 index 0000000..4909918 --- /dev/null +++ b/src/utils/logger.py @@ -0,0 +1,26 @@ +"""日誌配置""" +import logging +from datetime import datetime +from pathlib import Path +from src.config.settings import LOGS_DIR + + +def setup_logging(): + """設置日誌配置""" + # 確保日誌目錄存在 + log_dir = Path(LOGS_DIR) + log_dir.mkdir(parents=True, exist_ok=True) + + # 創建日誌檔案路徑 + log_file = log_dir / f"Satellite_S5p_{datetime.now().strftime('%Y%m')}.log" + + # 配置基本設定 + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name).10s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + handlers=[ + logging.FileHandler(log_file, encoding='utf-8'), + logging.StreamHandler() # 同時輸出到控制台 + ] + ) diff --git a/src/visualization/__init__.py b/src/visualization/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/plot_nc.py b/src/visualization/plot_nc.py similarity index 86% rename from plot_nc.py rename to src/visualization/plot_nc.py index c4a7d30..add2996 100644 --- a/plot_nc.py +++ b/src/visualization/plot_nc.py @@ -8,24 +8,13 @@ from scipy.signal import convolve2d from netCDF4 import Dataset -from functools import wraps -# from figure_set import setFigure from matplotlib.ticker import ScalarFormatter from pathlib import Path +from src.processing.taiwan_frame import TaiwanFrame -taiwan_counties = gpd.read_file(Path(__file__).parent / "mapdata202301070205/COUNTY_MOI_1090820.shp") -station = gpd.read_file(Path(__file__).parent / "空氣品質監測站/空氣品質監測站位置圖_121_10704.shp") -class TaiwanFrame: - def __init__(self, resolution=0.01, lat_Taiwan=(21, 26), lon_Taiwan=(119, 123)): - self.lat = np.arange(lat_Taiwan[0], lat_Taiwan[1] + resolution, resolution) - self.lon = np.arange(lon_Taiwan[0], lon_Taiwan[1] + resolution, resolution) - - def frame(self): - return np.meshgrid(self.lon, self.lat) - - def container(self): - return np.zeros(shape=(self.lat.size, self.lon.size)) +taiwan_counties = gpd.read_file(Path(__file__).parent / "taiwan/COUNTY_MOI_1090820.shp") +station = gpd.read_file(Path(__file__).parent / "stations/空氣品質監測站位置圖_121_10704.shp") # @setFigure diff --git a/read_shp.py b/src/visualization/read_shp.py similarity index 69% rename from read_shp.py rename to src/visualization/read_shp.py index 93e6b7e..3b5446d 100644 --- a/read_shp.py +++ b/src/visualization/read_shp.py @@ -5,8 +5,8 @@ from pathlib import Path from shapely.geometry import Point, Polygon -taiwan_counties = gpd.read_file(Path(__file__).parent / "mapdata202301070205/COUNTY_MOI_1090820.shp") -station = gpd.read_file(Path(__file__).parent / "空氣品質監測站/空氣品質監測站位置圖_121_10704.shp") +taiwan_counties = gpd.read_file(Path(__file__).parents[2] / "data/shapefiles/taiwan/COUNTY_MOI_1090820.shp") +station = gpd.read_file(Path(__file__).parents[2] / "data/shapefiles/stations/空氣品質監測站位置圖_121_10704.shp") geometry = [Point(xy) for xy in zip(station['TWD97Lon'], station['TWD97Lat'])] geodata = gpd.GeoDataFrame(station, crs=ccrs.PlateCarree(), geometry=geometry) @@ -17,8 +17,7 @@ # 添加县市边界 ax.add_geometries(taiwan_counties['geometry'], crs=ccrs.PlateCarree(), edgecolor='black', facecolor='none') -# ax.add_features(station['geometry'], crs=ccrs.PlateCarree(), edgecolor='Red', facecolor='none') +ax.add_geometries(station['geometry'], crs=ccrs.PlateCarree(), edgecolor='Red', facecolor='none') geodata.plot(ax=ax, color='red', markersize=5) - -plt.show() \ No newline at end of file +plt.show() diff --git a/src/visualization/sample_plot_nc.py b/src/visualization/sample_plot_nc.py new file mode 100644 index 0000000..342294e --- /dev/null +++ b/src/visualization/sample_plot_nc.py @@ -0,0 +1,122 @@ +import xarray as xr +import matplotlib.pyplot as plt +import cartopy.crs as ccrs +import cartopy.feature as cfeature +import numpy as np +import warnings +from typing import Literal +from cartopy.io import DownloadWarning +from pathlib import Path +import logging + +plt.rcParams['mathtext.fontset'] = 'custom' +plt.rcParams['mathtext.rm'] = 'Times New Roman' +plt.rcParams['mathtext.it'] = 'Times New Roman: italic' +plt.rcParams['mathtext.bf'] = 'Times New Roman: bold' +plt.rcParams['mathtext.default'] = 'regular' +plt.rcParams['font.family'] = 'Times New Roman' +plt.rcParams['font.weight'] = 'normal' +plt.rcParams['font.size'] = 16 + +plt.rcParams['axes.titlesize'] = 'large' +plt.rcParams['axes.titleweight'] = 'bold' +plt.rcParams['axes.labelweight'] = 'bold' + + +# warnings.filterwarnings('ignore', category=DownloadWarning) +logger = logging.getLogger(__name__) + + +def plot_global_no2(data, + savefig_path=None, + close_after=True, + map_scale: Literal['global', 'Taiwan'] = 'global' + ): + """ + 在全球地圖上繪製 NO2 分布圖 + """ + try: + # 判斷輸入類型並適當處理 + if isinstance(data, (str, Path)): + ds = xr.open_dataset(data) + should_close = True + else: + ds = data + should_close = False and close_after + + # 創建圖形和投影 + fig = plt.figure(figsize=(15, 8) if map_scale == 'global' else (10, 8)) + ax = plt.axes(projection=ccrs.PlateCarree()) + + # 設定全球範圍 + if map_scale == 'global': + ax.set_global() + else: + ax.set_extent([100, 145, 0, 45], crs=ccrs.PlateCarree()) + + # 繪製 NO2 數據 + data = ds.nitrogendioxide_tropospheric_column[0] + + im = data.plot( + ax=ax, + cmap='RdBu_r', + transform=ccrs.PlateCarree(), + robust=True, # 自動處理極端值 + cbar_kwargs={ + 'label': f'NO$_2$ Tropospheric Column (mol/m$^2$)', + 'fraction': 0.046, # colorbar 的寬度 (預設是 0.15) + 'pad': 0.04, # colorbar 和圖之間的間距 + 'aspect': 20, # colorbar 的長寬比,增加這個值會讓 colorbar 變長 + 'shrink': 0.8 if map_scale == 'global' else 0.9 + } + ) + + # 添加地圖特徵 + ax.add_feature(cfeature.BORDERS.with_scale('50m'), linestyle=':') + ax.add_feature(cfeature.COASTLINE.with_scale('50m')) + ax.add_feature(cfeature.LAND.with_scale('50m'), alpha=0.1) + ax.add_feature(cfeature.OCEAN.with_scale('50m'), alpha=0.1) + + # 設定網格線 + gl = ax.gridlines(draw_labels=True, linestyle='--', alpha=0.5) + gl.top_labels = False + gl.right_labels = False + + # 用矩形標記數據範圍 + lon_min, lon_max = float(ds.longitude.min()), float(ds.longitude.max()) + lat_min, lat_max = float(ds.latitude.min()), float(ds.latitude.max()) + rect = plt.Rectangle( + (lon_min, lat_min), + lon_max - lon_min, + lat_max - lat_min, + fill=False, + color='red', + transform=ccrs.PlateCarree(), + linewidth=2 + ) + ax.add_patch(rect) + + # 設定標題 + time_str = np.datetime64(data.time.values).astype(str) + plt.title(f'NO$_2$ Tropospheric Column {time_str}', pad=20, fontdict={'weight': 'bold', 'fontsize': 24}) + + plt.tight_layout() + plt.show() + + if savefig_path is not None: + fig.savefig(savefig_path, dpi=600) + + if should_close: + ds.close() + + except Exception as e: + logger.error(f"繪圖時發生錯誤: {str(e)}") + raise + + +# 主程式 +if __name__ == "__main__": + file_list = ["/Users/chanchihyu/Sentinel_data/raw/2024/04/S5P_OFFL_L2__NO2____20240409T051555_20240409T065725_33622_03_020600_20240410T213619.nc"] + + for file in file_list: + plot_global_no2(file)