diff --git a/English_model.py b/English_model.py index 21b7d7b..b74e4a6 100644 --- a/English_model.py +++ b/English_model.py @@ -2,7 +2,7 @@ import re -from language_utils import file_scheme, term_seperator, build_sepereted_term, \ +from language_utils import file_scheme, term_seperator, build_separated_terms, \ regex_to_big_query, generate_bq_function, SCHEMA_NAME diff --git a/abstraction_model.py b/abstraction_model.py index efe65fa..e38b8f7 100644 --- a/abstraction_model.py +++ b/abstraction_model.py @@ -6,7 +6,7 @@ from configuration import DATA_PATH from language_utils import regex_to_big_query, generate_bq_function, match, SCHEMA_NAME, print_logic_to_bq\ - , build_sepereted_term, build_non_positive_linguistic, REGULAR_SUFFIX, VERB_E_SUFFIX, NEAR_ENOUGH\ + , build_separated_terms, build_non_positive_linguistic, REGULAR_SUFFIX, VERB_E_SUFFIX, NEAR_ENOUGH\ , programming_languges, software_goals from model_evaluation import classifiy_commits_df, evaluate_performance, evaluate_concept_classifier @@ -114,13 +114,13 @@ # Corrective def build_core_abstraction_regex(): - return build_sepereted_term(core_abstraction_terms) + return build_separated_terms(core_abstraction_terms) def build_excluded_abstraction_regex(): - return build_sepereted_term(excluded_abstraction_terms) + return build_separated_terms(excluded_abstraction_terms) def build_not_abstraction_regex(): diff --git a/adaptive_model.py b/adaptive_model.py index 7d8d422..efc9f4b 100644 --- a/adaptive_model.py +++ b/adaptive_model.py @@ -6,7 +6,7 @@ from conventional_commits import build_cc_adaptive_regex from labeling_util import get_false_positives, get_false_negatives -from language_utils import file_scheme, term_seperator, build_sepereted_term, negation_terms, modals\ +from language_utils import file_scheme, term_seperator, build_separated_terms, negation_terms, modals\ , regex_to_big_query, generate_bq_function, match, SCHEMA_NAME, documentation_entities, prefective_entities\ , software_terms, build_non_positive_linguistic, software_goals_modification, software_goals, unnedded_terms\ , code_review_fixes, no_message, NEAR_ENOUGH @@ -125,7 +125,7 @@ def build_adaptive_action_regex(): def build_adaptive_regex(use_conventional_commits=True): - adaptive_context_re = build_sepereted_term(adaptive_context, just_before=True) + adaptive_context_re = build_separated_terms(adaptive_context, just_before=True) base_re = "((%s)\s[\s\S]{0,50}(%s)%s)" % (adaptive_context_re @@ -162,10 +162,10 @@ def build_non_adaptive_context(): non_adaptive_entities = documentation_entities + software_terms + unnedded_terms + [file_scheme] - return '(%s)' % "|".join(['(?:%s)\s[\s\S]{0,50}(?:%s)' % (build_sepereted_term(adaptive_context, just_before=True) + return '(%s)' % "|".join(['(?:%s)\s[\s\S]{0,50}(?:%s)' % (build_separated_terms(adaptive_context, just_before=True) , "|".join(entities)) , non_adaptive_header - , '(?:%s)\s[\s\S]{0,50}(?:%s)' % (build_sepereted_term(adaptive_actions, just_before=True) + , '(?:%s)\s[\s\S]{0,50}(?:%s)' % (build_separated_terms(adaptive_actions, just_before=True) , "|".join(non_adaptive_entities)) ]) @@ -185,7 +185,7 @@ def is_adaptive(text): def build_core_adaptive_regex(): - return '(%s)' % build_sepereted_term(core_adaptive_terms) + return '(%s)' % build_separated_terms(core_adaptive_terms) def is_core_adaptive(text): @@ -282,7 +282,7 @@ def evaluate_adaptive_classifier(): if __name__ == '__main__': print_adaptive_functions(commit='4b76d8e76af938824f91f4b99247731c21e37ff9') - evaluate_adaptive_classifier() + #evaluate_adaptive_classifier() text = """ "Leverage pip to access installed packages diff --git a/corrective_model.py b/corrective_model.py index bbb4241..3ffa600 100644 --- a/corrective_model.py +++ b/corrective_model.py @@ -22,7 +22,7 @@ from conventional_commits import build_cc_corrective_regex from labeling_util import get_false_positives, get_false_negatives -from language_utils import file_scheme, term_seperator, build_sepereted_term, negation_terms, modals\ +from language_utils import file_scheme, term_seperator, build_separated_terms, negation_terms, modals\ , regex_to_big_query, generate_bq_function, match, SCHEMA_NAME, documentation_entities, prefective_entities\ , static_analyzers, NEAR_ENOUGH, software_entities, code_review_fixes, normalize from model_evaluation import classifiy_commits_df, evaluate_performance @@ -169,9 +169,9 @@ '(if|would)[\s\S]{0,40}go wrong', 'line(?:s)? break(?:s)?', 'typo(s)?\sfix(es)?', - 'fix(ed|es|ing)?' + build_sepereted_term(software_entities) + 'name(s)?', - build_sepereted_term(static_analyzers) + 'fix(es|ed)?', - 'fix(es|ed)?' + build_sepereted_term(static_analyzers) , + 'fix(ed|es|ing)?' + build_separated_terms(software_entities) + 'name(s)?', + build_separated_terms(static_analyzers) + 'fix(es|ed)?', + 'fix(es|ed)?' + build_separated_terms(static_analyzers) , '^### Bug Fix', # tends to be a title, later stating if the commit is a bug fix 'edit the jira link to the correct issue', # Another occurring title 'page(?:s)? break(?:s)?', @@ -214,7 +214,7 @@ def build_valid_find_regex(): # (valid_fix_object) + ")" + sepertion + fix_re + term_seperator #other_valid_re = "(%s)" % "|".join(valid_terms) - other_valid_re = build_sepereted_term(valid_terms) + other_valid_re = build_separated_terms(valid_terms) return "((%s)|(%s)|(%s))" % (prefix, suffix, other_valid_re) @@ -225,7 +225,7 @@ def build_bug_fix_regex(use_conventional_commits=True): # strict_header = "^(?:%s)%s" % ( "|".join([ "do not" ,"don't"]) # , term_seperator) - bug_fix_re = build_sepereted_term(bug_terms) + bug_fix_re = build_separated_terms(bug_terms) if use_conventional_commits: @@ -237,7 +237,7 @@ def build_bug_fix_regex(use_conventional_commits=True): def build_negeted_bug_fix_regex(): bug_fix_re = build_bug_fix_regex(use_conventional_commits=False) - negation_re = build_sepereted_term(negation_terms) + negation_re = build_separated_terms(negation_terms) return "%s[\s\S]{0,20}%s" % (negation_re, bug_fix_re) @@ -245,7 +245,7 @@ def build_negeted_bug_fix_regex(): def build_core_bug_regex(): - return '(%s)' % build_sepereted_term(core_bug_terms) + return '(%s)' % build_separated_terms(core_bug_terms) def is_core_bug(commit_text): text = commit_text.lower() diff --git a/good_model.py b/good_model.py index e4ab3c2..f95c05e 100644 --- a/good_model.py +++ b/good_model.py @@ -49,7 +49,7 @@ from configuration import DATA_PATH from language_utils import regex_to_big_query, generate_bq_function, match, SCHEMA_NAME, print_logic_to_bq\ - , build_sepereted_term, build_non_positive_linguistic + , build_separated_terms, build_non_positive_linguistic from model_evaluation import classifiy_commits_df, evaluate_performance, evaluate_concept_classifier # Not sure list @@ -154,13 +154,13 @@ def build_positive_regex(): - return build_sepereted_term(positive_terms) + return build_separated_terms(positive_terms) def build_excluded_regex(): - return build_sepereted_term(excluded_terms) + return build_separated_terms(excluded_terms) def build_not_positive_regex(): diff --git a/language_utils.py b/language_utils.py index 7abe573..89376e4 100644 --- a/language_utils.py +++ b/language_utils.py @@ -206,7 +206,7 @@ 'Open Policy Agent', 'RobotFramework']] -def build_sepereted_term(term_list : List, just_before =False): +def build_separated_terms(term_list : List, just_before =False): if just_before: sep = "%s(%s)" % (term_seperator, "|".join(term_list)) else: @@ -222,14 +222,14 @@ def build_non_positive_linguistic(positive_re return '(?:%s)' % "|".join([ - ('(?:%s)' + NEAR_ENOUGH + '(?:%s)') % (build_sepereted_term(modals, just_before=True) + ('(?:%s)' + NEAR_ENOUGH + '(?:%s)') % (build_separated_terms(modals, just_before=True) , positive_re) - , ('(?:%s)' + NEAR_ENOUGH + '(?:%s)') % (build_sepereted_term(neg, just_before=True) + , ('(?:%s)' + NEAR_ENOUGH + '(?:%s)') % (build_separated_terms(neg, just_before=True) , positive_re) - , ('(?:%s)' + NEAR_ENOUGH + '(?:%s)') % (build_sepereted_term(non_actionable_context, just_before=True) + , ('(?:%s)' + NEAR_ENOUGH + '(?:%s)') % (build_separated_terms(non_actionable_context, just_before=True) , positive_re) # TODO - take care of documentation entities spereatly - #, '(?:%s)[\s\S]{0,10}(?:%s)' % (build_sepereted_term(documentation_entities, just_before=True) + #, '(?:%s)[\s\S]{0,10}(?:%s)' % (build_separated_terms(documentation_entities, just_before=True) # ,positive_re) ]) diff --git a/performance_model.py b/performance_model.py index a624f40..2603fa6 100644 --- a/performance_model.py +++ b/performance_model.py @@ -10,7 +10,7 @@ from configuration import DATA_PATH from language_utils import regex_to_big_query, generate_bq_function, match, SCHEMA_NAME, print_logic_to_bq\ - , build_sepereted_term, build_non_positive_linguistic, REGULAR_SUFFIX, NEAR_ENOUGH, VERB_E_SUFFIX + , build_separated_terms, build_non_positive_linguistic, REGULAR_SUFFIX, NEAR_ENOUGH, VERB_E_SUFFIX from model_evaluation import classifiy_commits_df, evaluate_performance, evaluate_concept_classifier # Not sure list @@ -51,13 +51,13 @@ def build_positive_regex(): - return build_sepereted_term(positive_terms) + return build_separated_terms(positive_terms) def build_excluded_regex(): - return build_sepereted_term(excluded_terms) + return build_separated_terms(excluded_terms) def build_not_positive_regex(): diff --git a/queries/bq_refactor_functions.sql b/queries/bq_refactor_functions.sql index 99e581a..fd99537 100644 --- a/queries/bq_refactor_functions.sql +++ b/queries/bq_refactor_functions.sql @@ -20,7 +20,7 @@ AS ( # Model language based on commit: 4b76d8e76af938824f91f4b99247731c21e37ff9 # Refactor :build_refactor_regex() (LENGTH(REGEXP_REPLACE(lower(message),'((\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(clean(ing)?(-| )?up(s)?|call(s|ed|ing)?[\\s\\S]{1,50}instead|collaps(e|es|ed|ing)|consolidat(e|es|ed|ing)|decompos(e|es|ed|ing)|drop(ed|s|ing)?( back)|encapsulat(e|es|ed|ing)|gereneliz(e|es|ed|ing)|optimiz(e|es|ed|ing|ation|ations)|pull(ed|s|ing)? (up|down)|re(-)?(write|wrote)|re(-| )?factor(ed|s|ing|ings)?|re(-)?implement(ed|s|ing)?|renam(e|es|ed|ing|ings)|better nam(e|es|ing)|re(-)?organiz(e|es|ed|ing)|re(-)?organization|re(-)?work(ed|s|ing|ings)?|reorg|simplif(y|es|ied|ying|ication)|suppress(es|ed|ing)? warning(s)?|unif(y|ies|ied|ing)|uninline|beef(ed|s|ing)? up|refactor(ing)?(s)?|code improvement(s)?|revis(e|es|ed|ing)|re(-)?construct(s|ed|ing)?|re(-)?(write|write|wrote|writing)|re(-)?cod(e|ed|es|ing)|factor(ed|s|ing)? out|re(-| )?packag(e|es|ed|ing))(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|))|((((\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)((get|got|getting) rid|(make|makes|made|making)|convert(ed|s|ing)?|dead|drop(ed|s|ing)?|duplicat(e|es|ed|ing)|extract(ed|s|ing)?|hide(e|es|ed|ing)|improv(e|es|ed|ing)|increas(e|es|ed|ing)|mov(e|es|ed|ing)|parameteriz(e|es|ed|ing)|redundant|replac(e|es|ed|ing)|separat(e|e s|ed|ing)|short(:?en|er|ing|s)?|split(s|ing)?|subsitut(e|es|ed|ing)|substitut(e|es|ed|ing)|un(-| )?hid(e|es|ed|ing)|clean(ing)?(-| )?up(s)?|clean(ing|s|ed)?|combin(e|es|ed|ing)|compos(e|es|ed|ing)|de(-| )?compos(e|es|ed|ing)|deprecat(e|es|ed|ing)|encapsulat(e|es|ed|ing)|polish(ed|es|ing)?|re(-| )?factor(ed|s|ing|ings)?|re(-|)?organiz(e|es|ed|ing)|re(-|)?structur(e|es|ed|ing)|rebuil(d|ds|ding|t)|tid(y|ying|ied)|unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented))((\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)|(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)[\\s\\S]{0,50}(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|))(assertion(s)?|assignment(s)?|code|conditional(s)?|control|definition(s)?|delegate|delegation|design pattern(s)?|error(-| )?code(s)?|exception(s)?|flag(s)?|getter(s)?|guard clause(s)?|hierarch(y|ies)|implementation(s)?|inheritance|inline|internal|macro(s)?|magic number(s)?|modifier(s)?|null object(s)?|object(s)?|patch(es)?|pointer(s)?|polymorphism|quer(y|ies)|reference(s)?|ref(s)?|return type|setter(s)?|static|sub(-| )?class(es)?|super(-| )?class(es)?|(sub)?(-| )?system(s)?|uninline|variable(s)?|handler|plugin|contravariant|covariant|action(s)?|queue(s)?|stack(s)?|driver(s)?|storage|tool(s)?|log(s)?|setting(s)?|fall( |-)back(s)?|memory|param(s)?|volatile|file(s)?|generic(s)?|initialization(s)?|public|protected|private|framework|singelton|declaration(s)?|init|destructor(s)?|instances(s)?|primitive(s)?|algorithm(s)?|class(es)?|collection(s)?|constant(s)?|constructor(s)?|field(s)?|function(s)?|interface(s)?|member(s)?|method(s)?|module(s)?|parameter(s)?|procedure(s)?|routine(s)?`|structure(s)?|template(s)?|type(s)?|unit(s)?|(helper|utility|auxiliary) function(s)?)(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)))|((^|^[\\s\\S]{0,25}(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|))(clean(ing|s|ed)?(-| )?up(s)?|cleaner|deprecat(e|es|ed|ing)|extract(ed|s|ing)?|re(-|)?organiz(e|es|ed|ing)|re(-|)?structur(e|es|ed|ing)|tid(y|ying|ied) up|improv(e|ed|es|ing|ement|ements)|re(-|)?organiz(e|es|ed|ing)|re(-|)?structur(e|es|ed|ing)|(helper|utility|auxiliary) function(s)?|(move|moved|moves|moving) to|separat(e|es|ed|ing)|split(s|ing)?|->|(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(lint|pylint|tslint|jlint|jslint|eslint|klint|xlint|linter)(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)fix(es|ed)?|fix(es|ed)?(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(lint|pylint|tslint|jlint|jslint|eslint|klint|xlint|linter)(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|))(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|))|(^(refactor)(\\(.*\\))?(!)?:)', '@'))-LENGTH(REGEXP_REPLACE(lower(message),'((\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(clean(ing)?(-| )?up(s)?|call(s|ed|ing)?[\\s\\S]{1,50}instead|collaps(e|es|ed|ing)|consolidat(e|es|ed|ing)|decompos(e|es|ed|ing)|drop(ed|s|ing)?( back)|encapsulat(e|es|ed|ing)|gereneliz(e|es|ed|ing)|optimiz(e|es|ed|ing|ation|ations)|pull(ed|s|ing)? (up|down)|re(-)?(write|wrote)|re(-| )?factor(ed|s|ing|ings)?|re(-)?implement(ed|s|ing)?|renam(e|es|ed|ing|ings)|better nam(e|es|ing)|re(-)?organiz(e|es|ed|ing)|re(-)?organization|re(-)?work(ed|s|ing|ings)?|reorg|simplif(y|es|ied|ying|ication)|suppress(es|ed|ing)? warning(s)?|unif(y|ies|ied|ing)|uninline|beef(ed|s|ing)? up|refactor(ing)?(s)?|code improvement(s)?|revis(e|es|ed|ing)|re(-)?construct(s|ed|ing)?|re(-)?(write|write|wrote|writing)|re(-)?cod(e|ed|es|ing)|factor(ed|s|ing)? out|re(-| )?packag(e|es|ed|ing))(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|))|((((\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)((get|got|getting) rid|(make|makes|made|making)|convert(ed|s|ing)?|dead|drop(ed|s|ing)?|duplicat(e|es|ed|ing)|extract(ed|s|ing)?|hide(e|es|ed|ing)|improv(e|es|ed|ing)|increas(e|es|ed|ing)|mov(e|es|ed|ing)|parameteriz(e|es|ed|ing)|redundant|replac(e|es|ed|ing)|separat(e|e s|ed|ing)|short(:?en|er|ing|s)?|split(s|ing)?|subsitut(e|es|ed|ing)|substitut(e|es|ed|ing)|un(-| )?hid(e|es|ed|ing)|clean(ing)?(-| )?up(s)?|clean(ing|s|ed)?|combin(e|es|ed|ing)|compos(e|es|ed|ing)|de(-| )?compos(e|es|ed|ing)|deprecat(e|es|ed|ing)|encapsulat(e|es|ed|ing)|polish(ed|es|ing)?|re(-| )?factor(ed|s|ing|ings)?|re(-|)?organiz(e|es|ed|ing)|re(-|)?structur(e|es|ed|ing)|rebuil(d|ds|ding|t)|tid(y|ying|ied)|unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented))((\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)|(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)[\\s\\S]{0,50}(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|))(assertion(s)?|assignment(s)?|code|conditional(s)?|control|definition(s)?|delegate|delegation|design pattern(s)?|error(-| )?code(s)?|exception(s)?|flag(s)?|getter(s)?|guard clause(s)?|hierarch(y|ies)|implementation(s)?|inheritance|inline|internal|macro(s)?|magic number(s)?|modifier(s)?|null object(s)?|object(s)?|patch(es)?|pointer(s)?|polymorphism|quer(y|ies)|reference(s)?|ref(s)?|return type|setter(s)?|static|sub(-| )?class(es)?|super(-| )?class(es)?|(sub)?(-| )?system(s)?|uninline|variable(s)?|handler|plugin|contravariant|covariant|action(s)?|queue(s)?|stack(s)?|driver(s)?|storage|tool(s)?|log(s)?|setting(s)?|fall( |-)back(s)?|memory|param(s)?|volatile|file(s)?|generic(s)?|initialization(s)?|public|protected|private|framework|singelton|declaration(s)?|init|destructor(s)?|instances(s)?|primitive(s)?|algorithm(s)?|class(es)?|collection(s)?|constant(s)?|constructor(s)?|field(s)?|function(s)?|interface(s)?|member(s)?|method(s)?|module(s)?|parameter(s)?|procedure(s)?|routine(s)?`|structure(s)?|template(s)?|type(s)?|unit(s)?|(helper|utility|auxiliary) function(s)?)(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)))|((^|^[\\s\\S]{0,25}(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|))(clean(ing|s|ed)?(-| )?up(s)?|cleaner|deprecat(e|es|ed|ing)|extract(ed|s|ing)?|re(-|)?organiz(e|es|ed|ing)|re(-|)?structur(e|es|ed|ing)|tid(y|ying|ied) up|improv(e|ed|es|ing|ement|ements)|re(-|)?organiz(e|es|ed|ing)|re(-|)?structur(e|es|ed|ing)|(helper|utility|auxiliary) function(s)?|(move|moved|moves|moving) to|separat(e|es|ed|ing)|split(s|ing)?|->|(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(lint|pylint|tslint|jlint|jslint|eslint|klint|xlint|linter)(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)fix(es|ed)?|fix(es|ed)?(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(lint|pylint|tslint|jlint|jslint|eslint|klint|xlint|linter)(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|))(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|))|(^(refactor)(\\(.*\\))?(!)?:)', ''))) -# Refactor :build_sepereted_term(removal) +# Refactor :build_separated_terms(removal) + (LENGTH(REGEXP_REPLACE(lower(message),'(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(add(s|ed|ing)?[\\s\\S]{1,50}helper(s)?|us(e|es|ed|ing)[\\s\\S]{1,50}instead|split(s|ing)?[\\s\\S]{1,50}into|break(s|ing)?[\\s\\S]{1,50}into|separat(e|e s|ed|ing)[\\s\\S]{1,50}into|replac(e|es|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|remov(e|es|ed|ing)[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|kill(s|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|drop(s|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|mov(e|es|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented))(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)', '@'))-LENGTH(REGEXP_REPLACE(lower(message),'(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(add(s|ed|ing)?[\\s\\S]{1,50}helper(s)?|us(e|es|ed|ing)[\\s\\S]{1,50}instead|split(s|ing)?[\\s\\S]{1,50}into|break(s|ing)?[\\s\\S]{1,50}into|separat(e|e s|ed|ing)[\\s\\S]{1,50}into|replac(e|es|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|remov(e|es|ed|ing)[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|kill(s|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|drop(s|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|mov(e|es|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented))(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)', ''))) # Refactor :build_refactor_goals_regex() @@ -73,7 +73,7 @@ general.bq_non_positive_linguistic_refactor_removal RETURNS int64 AS ( # Model language based on commit: 4b76d8e76af938824f91f4b99247731c21e37ff9 -# Refactor :build_non_positive_linguistic(build_sepereted_term(removal)) +# Refactor :build_non_positive_linguistic(build_separated_terms(removal)) (LENGTH(REGEXP_REPLACE(lower(message),'(((\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(can|could|ha(ve|s|d)|may|might|must|need|ought|shall|should|will|would))[\\s\\S]{0,10}((\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(add(s|ed|ing)?[\\s\\S]{1,50}helper(s)?|us(e|es|ed|ing)[\\s\\S]{1,50}instead|split(s|ing)?[\\s\\S]{1,50}into|break(s|ing)?[\\s\\S]{1,50}into|separat(e|e s|ed|ing)[\\s\\S]{1,50}into|replac(e|es|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|remov(e|es|ed|ing)[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|kill(s|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|drop(s|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|mov(e|es|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented))(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|))|((\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(aren\'t|arent|didn\'t|didnt|don\'t|dont|doesn\'t|doesnt|isn\'t|isnt|lack|n\'t|never|no|nobody|none|not|nothing|weren\'t|werent|without|won\'t|wont))[\\s\\S]{0,10}((\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(add(s|ed|ing)?[\\s\\S]{1,50}helper(s)?|us(e|es|ed|ing)[\\s\\S]{1,50}instead|split(s|ing)?[\\s\\S]{1,50}into|break(s|ing)?[\\s\\S]{1,50}into|separat(e|e s|ed|ing)[\\s\\S]{1,50}into|replac(e|es|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|remov(e|es|ed|ing)[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|kill(s|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|drop(s|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|mov(e|es|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented))(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|))|((\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(for(get|gets|got|geting)|allow(s|ed|ing)?))[\\s\\S]{0,10}((\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(add(s|ed|ing)?[\\s\\S]{1,50}helper(s)?|us(e|es|ed|ing)[\\s\\S]{1,50}instead|split(s|ing)?[\\s\\S]{1,50}into|break(s|ing)?[\\s\\S]{1,50}into|separat(e|e s|ed|ing)[\\s\\S]{1,50}into|replac(e|es|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|remov(e|es|ed|ing)[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|kill(s|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|drop(s|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|mov(e|es|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented))(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)))', '@'))-LENGTH(REGEXP_REPLACE(lower(message),'(((\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(can|could|ha(ve|s|d)|may|might|must|need|ought|shall|should|will|would))[\\s\\S]{0,10}((\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(add(s|ed|ing)?[\\s\\S]{1,50}helper(s)?|us(e|es|ed|ing)[\\s\\S]{1,50}instead|split(s|ing)?[\\s\\S]{1,50}into|break(s|ing)?[\\s\\S]{1,50}into|separat(e|e s|ed|ing)[\\s\\S]{1,50}into|replac(e|es|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|remov(e|es|ed|ing)[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|kill(s|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|drop(s|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|mov(e|es|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented))(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|))|((\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(aren\'t|arent|didn\'t|didnt|don\'t|dont|doesn\'t|doesnt|isn\'t|isnt|lack|n\'t|never|no|nobody|none|not|nothing|weren\'t|werent|without|won\'t|wont))[\\s\\S]{0,10}((\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(add(s|ed|ing)?[\\s\\S]{1,50}helper(s)?|us(e|es|ed|ing)[\\s\\S]{1,50}instead|split(s|ing)?[\\s\\S]{1,50}into|break(s|ing)?[\\s\\S]{1,50}into|separat(e|e s|ed|ing)[\\s\\S]{1,50}into|replac(e|es|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|remov(e|es|ed|ing)[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|kill(s|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|drop(s|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|mov(e|es|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented))(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|))|((\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(for(get|gets|got|geting)|allow(s|ed|ing)?))[\\s\\S]{0,10}((\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)(add(s|ed|ing)?[\\s\\S]{1,50}helper(s)?|us(e|es|ed|ing)[\\s\\S]{1,50}instead|split(s|ing)?[\\s\\S]{1,50}into|break(s|ing)?[\\s\\S]{1,50}into|separat(e|e s|ed|ing)[\\s\\S]{1,50}into|replac(e|es|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|remov(e|es|ed|ing)[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|kill(s|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|drop(s|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented)|mov(e|es|ed|ing)?[\\s\\S]{1,50}(unnecessary|unneeded|unused|(not|never|no longer) used|no longer needed|redundant|useless|duplicate(d)?|deprecated|obsolete(d)?|commented))(\\s|\\.|\\?|\\!|\\[|\\]|\\(|\\)|\\:|^|$|\\,|\'|"|/|#|\\$|\\%|&|\\*|\\+|=|`|;|<|>|@|~|{|}|_|\\|)))', ''))) ) ; diff --git a/refactor_model.py b/refactor_model.py index c0ef161..328066a 100644 --- a/refactor_model.py +++ b/refactor_model.py @@ -1,7 +1,7 @@ import re from conventional_commits import build_cc_refactor_regex -from language_utils import file_scheme, term_seperator, build_sepereted_term, negation_terms, modals\ +from language_utils import file_scheme, term_seperator, build_separated_terms, negation_terms, modals\ , regex_to_big_query, generate_bq_function, match, SCHEMA_NAME, documentation_entities, prefective_entities\ , software_terms, build_non_positive_linguistic, software_goals_modification, software_goals, static_analyzers @@ -89,8 +89,8 @@ , '(?:move|moved|moves|moving) to' , 'separat(?:e|es|ed|ing)' , 'split(?:s|ing)?', '->' - , build_sepereted_term(static_analyzers) + 'fix(es|ed)?' - , 'fix(es|ed)?' + build_sepereted_term(static_analyzers) + , build_separated_terms(static_analyzers) + 'fix(es|ed)?' + , 'fix(es|ed)?' + build_separated_terms(static_analyzers) #, '(private|public|protected|static)' ] @@ -132,7 +132,7 @@ , 're(?:-| )?packag(?:e|es|ed|ing)' #, 'code review' #, 'collapse' - #, "(?:(?:%s)(?:%s|%s[\s\S]{0,50}%s)(?:%s)%s)" % (build_sepereted_term(feedback_action + #, "(?:(?:%s)(?:%s|%s[\s\S]{0,50}%s)(?:%s)%s)" % (build_separated_terms(feedback_action # , just_before=True) # , term_seperator # , term_seperator @@ -183,7 +183,7 @@ def build_core_refactor_regex(): - return '(%s)' % build_sepereted_term(core_refactor_terms) + return '(%s)' % build_separated_terms(core_refactor_terms) def is_core_refactor(text): return match(text, build_core_refactor_regex()) @@ -194,7 +194,7 @@ def build_refactor_regex(use_conventional_commits=True): , "|".join(perfective_header_action) , term_seperator) - activity_regerx = "(?:(?:%s)(?:%s|%s[\s\S]{0,50}%s)(?:%s)%s)" % (build_sepereted_term(modification_activity + activity_regerx = "(?:(?:%s)(?:%s|%s[\s\S]{0,50}%s)(?:%s)%s)" % (build_separated_terms(modification_activity , just_before=True) , term_seperator , term_seperator @@ -202,12 +202,12 @@ def build_refactor_regex(use_conventional_commits=True): , "|".join(refactor_entities) , term_seperator) if use_conventional_commits: - agg_re = "(%s)|(%s)|(%s)|(%s)" % (build_sepereted_term(refactor_context) + agg_re = "(%s)|(%s)|(%s)|(%s)" % (build_separated_terms(refactor_context) , activity_regerx , header_regex , build_cc_refactor_regex()) else: - agg_re = "(%s)|(%s)|(%s)" % (build_sepereted_term(refactor_context) + agg_re = "(%s)|(%s)|(%s)" % (build_separated_terms(refactor_context) , activity_regerx , header_regex) return agg_re @@ -215,7 +215,7 @@ def build_refactor_regex(use_conventional_commits=True): def build_refactor_goals_regex(): - goals_regerx = "(?:(?:%s)(?:%s|%s[\s\S]{0,50}%s)(?:%s)%s)" % (build_sepereted_term(software_goals_modification + goals_regerx = "(?:(?:%s)(?:%s|%s[\s\S]{0,50}%s)(?:%s)%s)" % (build_separated_terms(software_goals_modification , just_before=True) , term_seperator , term_seperator @@ -239,14 +239,14 @@ def build_non_code_perfective_regex(): , 'support(?:s|ed|ing)?' ] modifiers = modification_activity + non_perfective_context - activity_regerx = "((?:%s)(?:\s|%s[\s\S]{0,50}%s)(?:%s))" % (build_sepereted_term(modifiers, just_before=True) + activity_regerx = "((?:%s)(?:\s|%s[\s\S]{0,50}%s)(?:%s))" % (build_separated_terms(modifiers, just_before=True) , term_seperator , term_seperator , "|".join(prefective_entities + non_perfective_entities)) doc_header_regex = '(?:^|^[\s\S]{0,25}%s)(?:%s)[\s\S]{0,25}(?:%s)' % (term_seperator , "|".join(perfective_header_action) - , build_sepereted_term(documentation_entities)) + , build_separated_terms(documentation_entities)) no_prefective_action = "|".join([ @@ -256,7 +256,7 @@ def build_non_code_perfective_regex(): , '(?:make|made|making|makes)(?:%s|%s[\s\S]{0,50}%s)sense' % (term_seperator, term_seperator, term_seperator) , 'improv(?:e|es|ed|ing) handling' , 'need(?:s|ing)?\srefactor(?:ing)?' - , '(?:%s)(?:%s|%s[\s\S]{0,50}%s)(?:%s)' %(build_sepereted_term(non_perfective_entities,just_before=True) + , '(?:%s)(?:%s|%s[\s\S]{0,50}%s)(?:%s)' %(build_separated_terms(non_perfective_entities,just_before=True) ,term_seperator , term_seperator , term_seperator @@ -275,7 +275,7 @@ def build_documentation_entities_context(positive_re): return '(?:%s)' % "|".join([ # TODO - take care of documentation entities spereatly - '(?:%s)[\s\S]{0,10}(?:%s)' % (build_sepereted_term(documentation_entities, just_before=True) + '(?:%s)[\s\S]{0,10}(?:%s)' % (build_separated_terms(documentation_entities, just_before=True) ,positive_re) ]) @@ -301,9 +301,9 @@ def positive_refactor_to_bq(): print( "# Refactor :build_refactor_regex()") #print( ",") print( regex_to_big_query(build_refactor_regex())) - print( "# Refactor :build_sepereted_term(removal)") + print( "# Refactor :build_separated_terms(removal)") print( "+") - print( regex_to_big_query(build_sepereted_term(removal))) + print( regex_to_big_query(build_separated_terms(removal))) print( "# Refactor :build_refactor_goals_regex()") print( "+") @@ -324,8 +324,8 @@ def non_positive_linguistic_refactor_goals_to_bq(): print( regex_to_big_query(build_non_positive_linguistic(build_refactor_goals_regex()))) def non_positive_linguistic_removal_to_bq(): - print("# Refactor :build_non_positive_linguistic(build_sepereted_term(removal))") - print(regex_to_big_query(build_non_positive_linguistic(build_sepereted_term(removal)))) + print("# Refactor :build_non_positive_linguistic(build_separated_terms(removal))") + print(regex_to_big_query(build_non_positive_linguistic(build_separated_terms(removal)))) def documentation_entities_context_refactor_to_bq(): @@ -350,7 +350,7 @@ def refactor_to_bq(): def built_is_refactor(commit_text): - removal_re = build_sepereted_term(removal) + removal_re = build_separated_terms(removal) return (match(commit_text, build_refactor_regex()) + match(commit_text, removal_re) @@ -360,12 +360,12 @@ def built_is_refactor(commit_text): , build_documentation_entities_context(build_refactor_regex(use_conventional_commits=False))) - match(commit_text , build_non_positive_linguistic(build_refactor_regex(use_conventional_commits=False))) - - match(commit_text, build_non_positive_linguistic(build_sepereted_term(removal))) + - match(commit_text, build_non_positive_linguistic(build_separated_terms(removal))) - match(commit_text, build_non_positive_linguistic(build_refactor_goals_regex())) ) > 0 def build_perfective_regex(): - non_code = build_sepereted_term (prefective_entities) + non_code = build_separated_terms (prefective_entities) perfective = "(%s)" % non_code diff --git a/satd_model.py b/satd_model.py index 0b709d4..1523d82 100644 --- a/satd_model.py +++ b/satd_model.py @@ -9,7 +9,7 @@ from configuration import DATA_PATH from language_utils import regex_to_big_query, generate_bq_function, match, SCHEMA_NAME, print_logic_to_bq\ - , build_sepereted_term, build_non_positive_linguistic, REGULAR_SUFFIX, VERB_E_SUFFIX, NEAR_ENOUGH, term_seperator + , build_separated_terms, build_non_positive_linguistic, REGULAR_SUFFIX, VERB_E_SUFFIX, NEAR_ENOUGH, term_seperator from model_evaluation import classifiy_commits_df, evaluate_performance, evaluate_concept_classifier @@ -129,13 +129,13 @@ def build_positive_regex(): #return "(%s)" % ("|".join(positive_terms)) - return build_sepereted_term(positive_terms) + return build_separated_terms(positive_terms) def build_excluded_regex(): #return "(%s)" % ("|".join(excluded_terms)) - return build_sepereted_term(excluded_terms) + return build_separated_terms(excluded_terms) def build_not_positive_regex(): diff --git a/security_model.py b/security_model.py index e632dda..1fce9df 100644 --- a/security_model.py +++ b/security_model.py @@ -12,7 +12,7 @@ from configuration import DATA_PATH from language_utils import regex_to_big_query, generate_bq_function, match, SCHEMA_NAME, print_logic_to_bq\ - , build_sepereted_term, build_non_positive_linguistic, REGULAR_SUFFIX + , build_separated_terms, build_non_positive_linguistic, REGULAR_SUFFIX from model_evaluation import classifiy_commits_df, evaluate_performance, evaluate_concept_classifier # Not sure list @@ -112,13 +112,13 @@ def build_positive_regex(): - return build_sepereted_term(positive_terms) + return build_separated_terms(positive_terms) def build_excluded_regex(): - return build_sepereted_term(excluded_terms) + return build_separated_terms(excluded_terms) def build_not_positive_regex(): diff --git a/sentiment_model.py b/sentiment_model.py index eec26a4..5f78dce 100644 --- a/sentiment_model.py +++ b/sentiment_model.py @@ -9,7 +9,7 @@ from configuration import DATA_PATH from language_utils import regex_to_big_query, generate_bq_function, match, SCHEMA_NAME, print_logic_to_bq\ - , build_sepereted_term, build_non_positive_linguistic, REGULAR_SUFFIX, VERB_E_SUFFIX, NEAR_ENOUGH\ + , build_separated_terms, build_non_positive_linguistic, REGULAR_SUFFIX, VERB_E_SUFFIX, NEAR_ENOUGH\ , programming_languges from model_evaluation import classifiy_commits_df, evaluate_performance, evaluate_concept_classifier @@ -594,13 +594,13 @@ def build_positive_sentiment_regex(): - return build_sepereted_term(positive_sentiment) + return build_separated_terms(positive_sentiment) def build_positive_sentiment_excluded_regex(): - return build_sepereted_term(excluded_positive_sentiment) + return build_separated_terms(excluded_positive_sentiment) def build_not_positive_sentiment_regex(): @@ -635,13 +635,13 @@ def positive_sentiment_to_bq(): def build_negative_sentiment_regex(): - return build_sepereted_term(negative_sentiment) + return build_separated_terms(negative_sentiment) def build_negative_sentiment_excluded_regex(): - return build_sepereted_term(excluded_negative_sentiment) + return build_separated_terms(excluded_negative_sentiment) def build_not_negative_sentiment_regex(): diff --git a/swearing_model.py b/swearing_model.py index 3ee6f89..dd04b95 100644 --- a/swearing_model.py +++ b/swearing_model.py @@ -20,7 +20,7 @@ from configuration import DATA_PATH from language_utils import regex_to_big_query, generate_bq_function, match, SCHEMA_NAME, print_logic_to_bq\ - , build_sepereted_term, build_non_positive_linguistic + , build_separated_terms, build_non_positive_linguistic from model_evaluation import classifiy_commits_df, evaluate_performance, evaluate_concept_classifier # Not sure list @@ -141,13 +141,13 @@ def build_positive_regex(): - return build_sepereted_term(positive_terms) + return build_separated_terms(positive_terms) def build_excluded_regex(): - return build_sepereted_term(excluded_terms) + return build_separated_terms(excluded_terms) def build_not_positive_regex(): diff --git a/typos_model.py b/typos_model.py index abce75c..7d74238 100644 --- a/typos_model.py +++ b/typos_model.py @@ -9,7 +9,7 @@ from configuration import DATA_PATH from language_utils import regex_to_big_query, generate_bq_function, match, SCHEMA_NAME, print_logic_to_bq\ - , build_sepereted_term, build_non_positive_linguistic, REGULAR_SUFFIX + , build_separated_terms, build_non_positive_linguistic, REGULAR_SUFFIX from model_evaluation import classifiy_commits_df, evaluate_performance, evaluate_concept_classifier @@ -23,7 +23,7 @@ ,'accomodate' # mistake of accommodate ,'acommodate' # mistake of accommodate ,'acheive' # mistake of achieve -#,'agrieve' # mistake of aggrieved - Alos a name +#,'agrieve' # mistake of aggrieved - Also a name ,'acknowlege' # mistake of acknowledge ,'aknowledge' # mistake of acknowledge ,'acquaintence' # mistake of acquaintance @@ -295,13 +295,13 @@ def build_positive_regex(): - return build_sepereted_term(positive_terms) + return build_separated_terms(positive_terms) def build_excluded_regex(): - return build_sepereted_term(excluded_terms) + return build_separated_terms(excluded_terms) def build_not_positive_regex(): @@ -423,4 +423,4 @@ def evaluate_typo_classifier(): print("typo in text", re.findall(build_positive_regex(), text)) -print(build_sepereted_term(positive_terms)) \ No newline at end of file +print(build_separated_terms(positive_terms)) \ No newline at end of file