Skip to content

Commit

Permalink
Index 5 bulk load
Browse files Browse the repository at this point in the history
  • Loading branch information
emgeier authored Nov 18, 2024
1 parent b72278c commit 6e74582
Showing 1 changed file with 32 additions and 32 deletions.
64 changes: 32 additions & 32 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,37 +64,37 @@ jobs:

- name: Specify specific XML files and 25 per category
run: |
# find ./data/persons/tei -name '*.xml' | head -n 25 > xml_files.txt
# find ./data/places/tei -name '*.xml' | head -n 25 >> xml_files.txt
# find ./data/works/tei -name '*.xml' | head -n 25 >> xml_files.txt
find ./data/persons/tei -name '*.xml' | head -n 25 > xml_files.txt
find ./data/places/tei -name '*.xml' | head -n 25 >> xml_files.txt
find ./data/works/tei -name '*.xml' | head -n 25 >> xml_files.txt
find ./data/bibl/tei -name '*.xml' | head -n 25 >> xml_files.txt
# find ./data/subjects/tei -name '*.xml' | head -n 25 >> xml_files.txt
# echo "./data/persons/tei/25.xml" > xml_files.txt
# echo "./data/persons/tei/110.xml" >> xml_files.txt
# echo "./data/persons/tei/106.xml" >> xml_files.txt
# echo "./data/persons/tei/109.xml" >> xml_files.txt
# echo "./data/persons/tei/101.xml" >> xml_files.txt
# echo "./data/persons/tei/100.xml" >> xml_files.txt
# echo "./data/persons/tei/102.xml" >> xml_files.txt
# echo "./data/persons/tei/1021.xml" >> xml_files.txt
# echo "./data/persons/tei/320.xml" >> xml_files.txt
# echo "./data/persons/tei/67.xml" >> xml_files.txt
# echo "./data/persons/tei/544.xml" >> xml_files.txt
# echo "./data/persons/tei/732.xml" >> xml_files.txt
# echo "./data/places/tei/10.xml" >> xml_files.txt
# echo "./data/places/tei/78.xml" >> xml_files.txt
# echo "./data/places/tei/1507.xml" >> xml_files.txt
# echo "./data/places/tei/1486.xml" >> xml_files.txt
# echo "./data/places/tei/104.xml" >> xml_files.txt
# echo "./data/places/tei/602.xml" >> xml_files.txt
# echo "./data/works/tei/315.xml" >> xml_files.txt
# echo "./data/works/tei/9501.xml" >> xml_files.txt
# echo "./data/works/tei/nhsl/tei/9723.xml" >> xml_files.txt
# echo "./data/works/tei/nhsl/tei/9724.xml" >> xml_files.txt
# echo "./data/works/tei/10510.xml" >> xml_files.txt
# echo "./data/works/tei/nhsl/tei/10511.xml" >> xml_files.txt
# echo "Processing specified XML files:"
find ./data/subjects/tei -name '*.xml' | head -n 25 >> xml_files.txt
echo "./data/persons/tei/25.xml" >> xml_files.txt
echo "./data/persons/tei/110.xml" >> xml_files.txt
echo "./data/persons/tei/106.xml" >> xml_files.txt
echo "./data/persons/tei/109.xml" >> xml_files.txt
echo "./data/persons/tei/101.xml" >> xml_files.txt
echo "./data/persons/tei/100.xml" >> xml_files.txt
echo "./data/persons/tei/102.xml" >> xml_files.txt
echo "./data/persons/tei/1021.xml" >> xml_files.txt
echo "./data/persons/tei/320.xml" >> xml_files.txt
echo "./data/persons/tei/67.xml" >> xml_files.txt
echo "./data/persons/tei/544.xml" >> xml_files.txt
echo "./data/persons/tei/732.xml" >> xml_files.txt
echo "./data/places/tei/10.xml" >> xml_files.txt
echo "./data/places/tei/78.xml" >> xml_files.txt
echo "./data/places/tei/1507.xml" >> xml_files.txt
echo "./data/places/tei/1486.xml" >> xml_files.txt
echo "./data/places/tei/104.xml" >> xml_files.txt
echo "./data/places/tei/602.xml" >> xml_files.txt
echo "./data/works/tei/315.xml" >> xml_files.txt
echo "./data/works/tei/9501.xml" >> xml_files.txt
echo "./data/works/tei/nhsl/tei/9723.xml" >> xml_files.txt
echo "./data/works/tei/nhsl/tei/9724.xml" >> xml_files.txt
echo "./data/works/tei/10510.xml" >> xml_files.txt
echo "./data/works/tei/nhsl/tei/10511.xml" >> xml_files.txt
echo "Processing specified XML files:"
cat xml_files.txt
Expand All @@ -119,7 +119,7 @@ jobs:
# Extract the filename and create the index header for OpenSearch bulk format
filename=$(basename ${file%.xml})
echo "Processing $filename for JSON"
printf "{\"index\":{\"_index\":\"syriaca-index-4\",\"_id\":\"$type-$filename\"}}\n" >> bulk_data.json
printf "{\"index\":{\"_index\":\"syriaca-index-5\",\"_id\":\"$type-$filename\"}}\n" >> bulk_data.json

# Apply XSLT for JSON conversion and append it to bulk_data.json directly
java -jar saxon.jar -s:$file -xsl:json-stylesheet.xsl docType="$type" | tr -d '\n' >> bulk_data.json
Expand Down Expand Up @@ -166,7 +166,7 @@ jobs:
# Step 7: Upload files to S3
- name: Upload JSON file to S3
run: |
aws s3 cp bulk_data.json s3://srophe-syriaca-front-end/json-data/advancedsearchfields/bulk_data_bibl_index_4.json
aws s3 cp bulk_data.json s3://srophe-syriaca-front-end/json-data/advancedsearchfields/bulk_data_index_5.json
env:
AWS_REGION: ${{ secrets.AWS_REGION }}
AWS_ACCOUNT_ID: ${{ secrets.AWS_ACCOUNT_ID }}
Expand Down

0 comments on commit 6e74582

Please sign in to comment.