Skip to content

Commit

Permalink
Pull vacancies from unem csv
Browse files Browse the repository at this point in the history
  • Loading branch information
gilesdring committed Oct 17, 2023
1 parent 51b3741 commit c4cc244
Show file tree
Hide file tree
Showing 9 changed files with 1,044 additions and 789 deletions.
756 changes: 378 additions & 378 deletions data/vacancies/vacancies_by_date.csv

Large diffs are not rendered by default.

38 changes: 19 additions & 19 deletions scripts/vacancies/dvc.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ stages:
deps:
- path: data/vacancies/vacancies_by_date.csv
hash: md5
md5: 67ddc10dc25e809c28324e7a8eaa4407
size: 10506
md5: c46743c18e82087bb902fd660b2d5ada
size: 9102
- path: scripts/util/metadata.py
hash: md5
md5: cfc702d76a3920af615dbbf1a9408cfd
size: 697
- path: scripts/vacancies/prepare.py
hash: md5
md5: 3cf43cb424b9c3af8c5b362f36b36325
size: 5823
md5: 2a696208a31565f6efa28a7379646b55
size: 5728
- path: working/upstream/metadata.csv
hash: md5
md5: e8f918d67dad983a618a80d2185527dc
Expand All @@ -26,8 +26,8 @@ stages:
outs:
- path: src/_data/sources/vacancies/
hash: md5
md5: 9c94b63389a90d514b7bb98333dc2832.dir
size: 20314
md5: 6a30a4c5286c32ba322f297e711d02fa.dir
size: 19087
nfiles: 7
extract:
cmd: PYTHONPATH=. python3 scripts/vacancies/extract.py
Expand All @@ -41,22 +41,22 @@ stages:
md5: d87aa1a94f87db2274f2b87c9ee0d9ab
size: 848
transform:
cmd: PYTHONPATH=. python3 scripts/vacancies/transform.py
cmd: papermill --no-progress-bar transform.ipynb ../../working/output/vacancies-transform.ipynb
deps:
- path: scripts/vacancies/transform.py
- path: ../../working/upstream/unem-codes.csv
hash: md5
md5: 6888ce0bde0789ebac0ea8f0fd849d53
size: 788
- path: working/upstream/lms-codes.csv
md5: b08d4dba89fdbe5271e40f25178cd7e3
size: 35427
- path: ../../working/upstream/unem.csv
hash: md5
md5: 1ab28b2730305c82dcdb124ce296a66b
size: 181882
- path: working/upstream/lms.csv
md5: 985ee5bc4ee9d4458b321d5790ccac5c
size: 3641836
- path: transform.ipynb
hash: md5
md5: 72d52598a0ba79effd075f9f66180a4d
size: 17934704
md5: 9ad3d7e68c06102c0196aa068faabfc2
size: 2209
outs:
- path: data/vacancies/vacancies_by_date.csv
- path: ../../data/vacancies/vacancies_by_date.csv
hash: md5
md5: 67ddc10dc25e809c28324e7a8eaa4407
size: 10506
md5: c46743c18e82087bb902fd660b2d5ada
size: 9102
13 changes: 7 additions & 6 deletions scripts/vacancies/dvc.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
vars:
- top: ../..
stages:
transform:
cmd: PYTHONPATH=. python3 scripts/vacancies/transform.py
wdir: ../..
cmd: papermill --no-progress-bar transform.ipynb ${top}/working/output/vacancies-transform.ipynb
deps:
- scripts/vacancies/transform.py
- working/upstream/lms.csv
- working/upstream/lms-codes.csv
- transform.ipynb
- ${top}/working/upstream/unem.csv
- ${top}/working/upstream/unem-codes.csv
outs:
- data/vacancies/vacancies_by_date.csv:
- ${top}/data/vacancies/vacancies_by_date.csv:
cache: false
persist: false
prepare:
Expand Down
10 changes: 5 additions & 5 deletions scripts/vacancies/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,20 @@

def prepare_vacancies():
vacancies = pd.read_csv(os.path.join(
RAW_DATA_DIR, 'vacancies_by_date.csv'), index_col=['index'])
RAW_DATA_DIR, 'vacancies_by_date.csv'))
vacancies.sort_values(by='date', ascending=True, inplace=True)
vacancies['quarter_label'] = pd.to_datetime(
vacancies.date).map(lms_period_to_quarter_label)
vacancies['quarter_axis_label'] = vacancies.quarter_label.str.replace(
' ', '\\n')

quarterly = vacancies.loc[vacancies['freq'] == 'q'].reset_index()
quarterly = quarterly.drop(columns=['index', 'freq'], axis=1).reset_index()
quarterly = vacancies.loc[vacancies['freq'] == 'q']
quarterly = quarterly.drop(columns=['freq'], axis=1)
quarterly.to_csv(os.path.join(
DATA_DIR, 'quarterly_vacancies.csv'), index=False)

monthly = vacancies.loc[vacancies['freq'] == 'm'].reset_index()
monthly = monthly.drop(columns=['index', 'freq'], axis=1).reset_index()
monthly = vacancies.loc[vacancies['freq'] == 'm']
monthly = monthly.drop(columns=['freq'], axis=1)
monthly.to_csv(os.path.join(
DATA_DIR, 'monthly_vacancies.csv'), index=False)

Expand Down
91 changes: 91 additions & 0 deletions scripts/vacancies/transform.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Duplicating VACS02"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"levels = [\n",
" 'AP2Y', 'JP9H', 'JP9I', 'JP9J', 'JP9K', 'JP9L', 'JP9M', 'JP9N', 'JP9O', 'JP9P', 'JP9Q', 'JP9R', 'JP9S', 'JP9T', 'JP9U', 'JP9V', 'JP9W', 'JP9X', 'JP9Y', 'JP9Z'\n",
"]\n",
"ratios = [\n",
" 'AP2Z', 'JPA2', 'JPA3', 'JPA4', 'JPA5', 'JPA6', 'JPA7', 'JPA8', 'JPA9', 'JPB2', 'JPB3', 'JPB4', 'JPB5', 'JPB6', 'JPB7', 'JPB8', 'JPB9', 'JPC2', 'JPC3', 'JPC4'\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv('../../working/upstream/unem.csv')\n",
"codes = pd.read_csv('../../working/upstream/unem-codes.csv', index_col=['CDID'], parse_dates=['Release Date', 'Next release'])\n",
"data = data.loc[data.variable.isin(levels + ratios)]\n",
"data = data.merge(codes, left_on='variable', right_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"vacancies_by_date = data.loc[\n",
" data.variable=='AP2Y',\n",
" ['variable', 'value', 'date', 'freq']\n",
"].rename(\n",
" columns={'variable': 'code'}\n",
")\n",
"\n",
"vacancies_by_date.to_csv('../../data/vacancies/vacancies_by_date.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "yff-data-ansOWYcy",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
23 changes: 0 additions & 23 deletions scripts/vacancies/transform.py

This file was deleted.

Loading

0 comments on commit c4cc244

Please sign in to comment.