Skip to content

Commit

Permalink
debug
Browse files Browse the repository at this point in the history
  • Loading branch information
India Kerle committed May 10, 2024
1 parent 0c87545 commit ec208b9
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 13 deletions.
5 changes: 4 additions & 1 deletion ojd_daps_skills/extract_skills/extract_skills.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
from ojd_daps_skills import setup_spacy_extensions
from ojd_daps_skills.extract_skills.extract_skills_utils import ExtractConfig
from ojd_daps_skills.extract_skills.multiskill_rules import (
_split_duplicate_object, _split_duplicate_verb, _split_skill_mentions)
_split_duplicate_object,
_split_duplicate_verb,
_split_skill_mentions,
)
from ojd_daps_skills.map_skills.skill_mapper import SkillsMapper
from ojd_daps_skills.map_skills.skill_mapper_utils import MapConfig
from ojd_daps_skills.utils.text_cleaning import clean_text, short_hash
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
from wasabi import msg

from ojd_daps_skills import bucket_name
from ojd_daps_skills.utils.data_getters import (get_s3_resource, load_s3_data,
save_to_s3)
from ojd_daps_skills.utils.data_getters import get_s3_resource, load_s3_data, save_to_s3


def find_lev_0(code):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
"""

from ojd_daps_skills import bucket_name
from ojd_daps_skills.utils.data_getters import (get_s3_resource, load_s3_data,
save_to_s3)
from ojd_daps_skills.utils.data_getters import get_s3_resource, load_s3_data, save_to_s3
from ojd_daps_skills.utils.text_cleaning import short_hash

if __name__ == "__main__":
Expand Down
17 changes: 9 additions & 8 deletions ojd_daps_skills/map_skills/skill_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@

from ojd_daps_skills import setup_spacy_extensions
from ojd_daps_skills.map_skills.skill_mapper_utils import (
MapConfig, get_most_common_code, get_top_comparisons)
MapConfig,
get_most_common_code,
get_top_comparisons,
)
from ojd_daps_skills.utils.text_cleaning import clean_text, short_hash

setup_spacy_extensions()
Expand Down Expand Up @@ -126,17 +129,17 @@ def get_embeddings(
"""
all_skills = list(chain.from_iterable([doc._.skill_spans for doc in job_ads]))
all_skills_unique = list(set(all_skills))

if not isinstance(self.config.hard_coded_taxonomy, dict):
self.config.hard_coded_taxonomy = {}

self.all_skills_unique_dict = {}
for skill in all_skills_unique:
skill_clean = clean_text(skill)
skill_hash = short_hash(skill_clean)
if not self.config.hard_coded_taxonomy.get(skill_hash):
self.all_skills_unique_dict[skill_hash] = skill_clean

skill_embeddings = self.config.bert_model.transform(
list(self.all_skills_unique_dict.values())
)
Expand Down Expand Up @@ -169,17 +172,15 @@ def map_skills(self, job_ads: List[Doc]) -> List[Dict[str, Any]]:

skill_embeddings, taxonomy_embeddings_dict = self.get_embeddings(job_ads)


(
top_skill_indxs,
top_skill_scores,
tax_skills_ix,
) = self.get_top_taxonomy_skills(skill_embeddings, taxonomy_embeddings_dict)

print("top_skill_indxs", top_skill_indxs)
print("top_skill_scores", top_skill_scores)
print("tax_skills_ix", tax_skills_ix)


if self.config.taxonomy_config.get("skill_hier_info_col"):
top_hier_skills, hier_types = self.get_top_hierarchy_skills(
Expand Down Expand Up @@ -247,7 +248,7 @@ def map_skills(self, job_ads: List[Doc]) -> List[Dict[str, Any]]:
)

skill_mapper_list.append(match_results)

return skill_mapper_list

def match_skills(self, job_ads: List[Doc]) -> Dict[int, dict]:
Expand Down

0 comments on commit ec208b9

Please sign in to comment.