From 24acc130574d603277c50d548da7e5dcbf49d0f7 Mon Sep 17 00:00:00 2001 From: Jacob Levy <129657918+jlevypaloalto@users.noreply.github.com> Date: Thu, 11 Jul 2024 12:17:26 +0300 Subject: [PATCH] Update docker ml (#35081) * updated docker * added the rest * devdemisto/ml:1.0.0.100486 * fix tpb * return on no incidents * remove runonce * remove space * fixed * fix create incidents script * new docker * revert: fix create incidents script * add outputs to DBotFindSimilarIncidents * new tpb DBotFindSimilarIncidents-test * new docker * bump transformers * Empty-Commit * fix conf.json * more fixes * more fixes * new docker * RN * new docker * revert dockers * more stuff * redirect stderr * docker * format * format * RN * more stuff * build fixes * build fixes * fix unit-tests * more docker changes * more docker changes * build fixes * suppress logger * build fixes * build fixes --- Packs/Base/ReleaseNotes/1_34_28.md | 24 + .../DBotBuildPhishingClassifier.py | 33 +- .../DBotBuildPhishingClassifier.yml | 9 +- .../DBotBuildPhishingClassifier_test.py | 7 +- .../DBotFindSimilarIncidents.yml | 24 +- .../DBotFindSimilarIncidentsByIndicators.yml | 2 +- .../DBotPredictPhishingWords.py | 67 +-- .../DBotPredictPhishingWords.yml | 3 +- ...st.py => DBotPredictPhishingWords_test.py} | 26 +- .../DBotPreprocessTextData.py | 15 +- .../DBotPreprocessTextData.yml | 3 +- .../DBotTrainTextClassifierV2.py | 34 +- .../DBotTrainTextClassifierV2.yml | 3 +- .../DBotTrainTextClassifierV2_test.py | 4 +- .../GetMLModelEvaluation.py | 69 +-- .../GetMLModelEvaluation.yml | 6 +- .../GetMLModelEvaluation_test.py | 41 +- .../Scripts/GetMLModelEvaluation/README.md | 7 +- ...playbook-DBotFindSimilarIncidents-test.yml | 340 ++++++++++++++ Packs/Base/pack_metadata.json | 2 +- .../script-TestCreateIncidentsFile.yml | 4 +- ...stCreateIncidentsForPhishingClassifier.yml | 10 +- Packs/ML/ReleaseNotes/1_4_11.md | 14 + .../DBotPredictIncidentsBatch.py | 12 +- .../DBotPredictIncidentsBatch.yml | 8 +- .../DBotPredictIncidentsBatch_test.py | 0 .../DBotPredictIncidentsBatch/README.md | 51 +++ .../DBotPredictOutOfTheBoxV2.py | 3 +- .../DBotPredictOutOfTheBoxV2.yml | 3 +- .../EvaluateMLModllAtProduction.py | 77 ++-- .../EvaluateMLModllAtProduction.yml | 4 +- .../EvaluateMLModllAtProduction/README.md | 52 +++ ...-Create_Phishing_Classifier_V2_ML_Test.yml | 418 +++++------------- ...playbook-DBotPredictOutOfTheBoxV2-test.yml | 333 +++++++++----- ...Phishing_Classifier_V2_From_File-_Test.yml | 327 +++----------- ...areEnvPredictionsToExpectedPredictions.yml | 7 +- ...ncidentsForEvaluateMLModllAtProduction.yml | 12 +- .../script-CreateIncidentsOutOfTheBoxV2.yml | 1 - Packs/ML/pack_metadata.json | 2 +- Tests/conf.json | 4 +- 40 files changed, 1144 insertions(+), 917 deletions(-) create mode 100644 Packs/Base/ReleaseNotes/1_34_28.md rename Packs/Base/Scripts/DBotPredictPhishingWords/{dbot_predict_phishing_words_test.py => DBotPredictPhishingWords_test.py} (94%) create mode 100644 Packs/Base/TestPlaybooks/playbook-DBotFindSimilarIncidents-test.yml create mode 100644 Packs/ML/ReleaseNotes/1_4_11.md create mode 100644 Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch_test.py create mode 100644 Packs/ML/Scripts/DBotPredictIncidentsBatch/README.md create mode 100644 Packs/ML/Scripts/EvaluateMLModllAtProduction/README.md diff --git a/Packs/Base/ReleaseNotes/1_34_28.md b/Packs/Base/ReleaseNotes/1_34_28.md new file mode 100644 index 000000000000..b601d697befc --- /dev/null +++ b/Packs/Base/ReleaseNotes/1_34_28.md @@ -0,0 +1,24 @@ + +#### Scripts + +##### DBotTrainTextClassifierV2 + +- Updated the Docker image to: *demisto/ml:1.0.0.101889*. +##### DBotBuildPhishingClassifier + +- Changed the Docker image to: *demisto/python3:3.11.9.101916*. +##### DBotPreProcessTextData + +- Updated the Docker image to: *demisto/ml:1.0.0.101889*. +##### DBotPredictPhishingWords + +- Updated the Docker image to: *demisto/ml:1.0.0.101889*. +##### DBotFindSimilarIncidents + +- Updated the Docker image to: *demisto/ml:1.0.0.101889*. +##### GetMLModelEvaluation + +- Updated the Docker image to: *demisto/ml:1.0.0.101889*. +##### DBotFindSimilarIncidentsByIndicators + +- Updated the Docker image to: *demisto/ml:1.0.0.101889*. diff --git a/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier.py b/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier.py index 463a366e73ba..15c00554f906 100644 --- a/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier.py +++ b/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier.py @@ -1,19 +1,12 @@ +from CommonServerPython import * import base64 -import copy import gc -from CommonServerPython import * - -PREFIXES_TO_REMOVE = ['incident.'] ALL_LABELS = "*" def preprocess_incidents_field(incidents_field): - incidents_field = incidents_field.strip() - for prefix in PREFIXES_TO_REMOVE: - if incidents_field.startswith(prefix): - incidents_field = incidents_field[len(prefix):] - return incidents_field + return incidents_field.strip().removeprefix('incident.') def get_phishing_map_labels(comma_values): @@ -28,7 +21,7 @@ def get_phishing_map_labels(comma_values): labels_dict[splited[0].strip()] = splited[1].strip() else: labels_dict[v] = v - return {k: v for k, v in labels_dict.items()} + return dict(labels_dict.items()) def build_query_in_reepect_to_phishing_labels(args): @@ -38,17 +31,17 @@ def build_query_in_reepect_to_phishing_labels(args): return args mapping_dict = get_phishing_map_labels(mapping) tag_field = args['tagField'] - tags_union = ' '.join(['"{}"'.format(label) for label in mapping_dict]) - mapping_query = '{}:({})'.format(tag_field, tags_union) + tags_union = ' '.join([f'"{label}"' for label in mapping_dict]) + mapping_query = f'{tag_field}:({tags_union})' if 'query' not in args or args['query'].strip() == '': args['query'] = mapping_query else: - args['query'] = '({}) and ({})'.format(query, mapping_query) + args['query'] = f'({query}) and ({mapping_query})' return args def get_incidents(d_args): - get_incidents_by_query_args = copy.deepcopy(d_args) + get_incidents_by_query_args = d_args.copy() get_incidents_by_query_args['NonEmptyFields'] = d_args['tagField'] fields_names_to_populate = ['tagField', 'emailsubject', 'emailbody', "emailbodyhtml"] fields_to_populate = [get_incidents_by_query_args.get(x, None) for x in fields_names_to_populate] @@ -63,15 +56,15 @@ def get_incidents(d_args): def preprocess_incidents(incidents, d_args): - text_pre_process_args = copy.deepcopy(d_args) + text_pre_process_args = d_args.copy() text_pre_process_args['inputType'] = 'json_b64_string' text_pre_process_args['input'] = base64.b64encode(incidents.encode('utf-8')).decode('ascii') text_pre_process_args['preProcessType'] = 'nlp' email_body_fields = [text_pre_process_args.get("emailbody"), text_pre_process_args.get("emailbodyhtml")] email_body = "|".join([x for x in email_body_fields if x]) - text_pre_process_args['textFields'] = "%s,%s" % (text_pre_process_args['emailsubject'], email_body) - text_pre_process_args['whitelistFields'] = "{0},{1}".format('dbot_processed_text', - text_pre_process_args['tagField']) + text_pre_process_args['textFields'] = "{},{}".format(text_pre_process_args['emailsubject'], email_body) + text_pre_process_args['whitelistFields'] = "{},{}".format('dbot_processed_text', + text_pre_process_args['tagField']) res = demisto.executeCommand("DBotPreProcessTextData", text_pre_process_args) if is_error(res): return_error(get_error(res)) @@ -81,7 +74,7 @@ def preprocess_incidents(incidents, d_args): def train_model(processed_text_data, d_args): - train_model_args = copy.deepcopy(d_args) + train_model_args = d_args.copy() train_model_args['inputType'] = 'json_b64_string' train_model_args['input'] = base64.b64encode(processed_text_data.encode('utf-8')).decode('ascii') train_model_args['overrideExistingModel'] = 'true' @@ -90,7 +83,7 @@ def train_model(processed_text_data, d_args): def main(): - d_args = dict(demisto.args()) + d_args = demisto.args() for arg in ['tagField', 'emailbody', 'emailbodyhtml', 'emailsubject', 'timeField']: d_args[arg] = preprocess_incidents_field(d_args.get(arg, '')) diff --git a/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier.yml b/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier.yml index 8f04d84a19c8..82289f4eb072 100644 --- a/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier.yml +++ b/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier.yml @@ -4,9 +4,9 @@ args: - defaultValue: Phishing description: A comma-separated list of incident types by which to filter. name: incidentTypes -- description: 'The start date by which to filter incidents. Date format will be the same as in the incidents query page (valid strings example: "3 days ago", ""2019-01-01T00:00:00 +0200")' +- description: 'The start date by which to filter incidents. Date format will be the same as in the incidents query page (valid strings example: "3 days ago", ""2019-01-01T00:00:00 +0200").' name: fromDate -- description: 'The end date by which to filter incidents. Date format will be the same as in the incidents query page (valid strings example: "3 days ago", ""2019-01-01T00:00:00 +0200")' +- description: 'The end date by which to filter incidents. Date format will be the same as in the incidents query page (valid strings example: "3 days ago", ""2019-01-01T00:00:00 +0200").' name: toDate - defaultValue: '3000' description: The maximum number of incidents to fetch. @@ -39,7 +39,7 @@ args: - description: The model name to store in the system. name: modelName - defaultValue: '*' - description: 'A comma-separated list of email tags values and mapping. The script considers only the tags specified in this field. You can map a label to another value by using this format: LABEL:MAPPED_LABEL. For example, for 4 values in email tag: malicious, credentials harvesting, inner communitcation, external legit email, unclassified. While training, we want to ignore "unclassified" tag, and refer to "credentials harvesting" as "malicious" too. Also, we want to merge "inner communitcation" and "external legit email" to one tag called "non-malicious". The input will be: malicious, credentials harvesting:malicious, inner communitcation:non-malicious, external legit email:non-malicious' + description: 'A comma-separated list of email tags values and mapping. The script considers only the tags specified in this field. You can map a label to another value by using this format: LABEL:MAPPED_LABEL. For example, for 4 values in email tag: malicious, credentials harvesting, inner communitcation, external legit email, unclassified. While training, we want to ignore "unclassified" tag, and refer to "credentials harvesting" as "malicious" too. Also, we want to merge "inner communitcation" and "external legit email" to one tag called "non-malicious". The input will be: malicious, credentials harvesting:malicious, inner communitcation:non-malicious, external legit email:non-malicious.' name: phishingLabels - defaultValue: emailsubject description: Incident field name with the email subject. @@ -83,8 +83,7 @@ tags: - ml timeout: 12µs type: python -dockerimage: demisto/ml:1.0.0.45981 -runonce: true +dockerimage: demisto/python3:3.11.9.101916 tests: - Create Phishing Classifier V2 ML Test - DBotCreatePhishingClassifierV2FromFile-Test diff --git a/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier_test.py b/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier_test.py index 44a4660752c0..08e291edd12b 100644 --- a/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier_test.py +++ b/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier_test.py @@ -13,7 +13,8 @@ def test_no_mapping_no_query(): def test_no_mapping_with_query(): args = {'phishingLabels': '*', 'query': QUERY} args = build_query_in_reepect_to_phishing_labels(args) - assert 'query' in args and args['query'] == QUERY + assert 'query' in args + assert args['query'] == QUERY def test_mapping_no_query(): @@ -27,6 +28,6 @@ def test_mapping_with_query(): args = {'phishingLabels': MAPPING, 'tagField': 'closeReason', 'query': QUERY} args = build_query_in_reepect_to_phishing_labels(args) assert 'query' in args - opt1 = args['query'] == '({}) and (closeReason:("spam" "legit"))'.format(QUERY) - opt2 = args['query'] == '({}) and (closeReason:("legit" "spam"))'.format(QUERY) + opt1 = args['query'] == f'({QUERY}) and (closeReason:("spam" "legit"))' + opt2 = args['query'] == f'({QUERY}) and (closeReason:("legit" "spam"))' assert opt1 or opt2 diff --git a/Packs/Base/Scripts/DBotFindSimilarIncidents/DBotFindSimilarIncidents.yml b/Packs/Base/Scripts/DBotFindSimilarIncidents/DBotFindSimilarIncidents.yml index 60fef5c54f27..05d67d6f9fde 100644 --- a/Packs/Base/Scripts/DBotFindSimilarIncidents/DBotFindSimilarIncidents.yml +++ b/Packs/Base/Scripts/DBotFindSimilarIncidents/DBotFindSimilarIncidents.yml @@ -86,9 +86,27 @@ script: '-' subtype: python3 timeout: '0' type: python -dockerimage: demisto/ml:1.0.0.94241 +dockerimage: demisto/ml:1.0.0.101889 runas: DBotWeakRole -runonce: true tests: -- No tests (auto formatted) +- DBotFindSimilarIncidents-test fromversion: 5.0.0 +outputs: +- contextPath: DBotFindSimilarIncidents.isSimilarIncidentFound + description: Indicates whether similar incidents have been found. + type: boolean +- contextPath: DBotFindSimilarIncidents.similarIncident.created + description: The creation date of the linked incident. + type: date +- contextPath: DBotFindSimilarIncidents.similarIncident.id + description: The ID of the linked incident. + type: string +- contextPath: DBotFindSimilarIncidents.similarIncident.name + description: The name of the linked incident. + type: string +- contextPath: DBotFindSimilarIncidents.similarIncident.similarity incident + description: The similarity of the linked incident represented as a float in the range 0-1. + type: number +- contextPath: DBotFindSimilarIncidents.similarIncident.details + description: The details of the linked incident. + type: string diff --git a/Packs/Base/Scripts/DBotFindSimilarIncidentsByIndicators/DBotFindSimilarIncidentsByIndicators.yml b/Packs/Base/Scripts/DBotFindSimilarIncidentsByIndicators/DBotFindSimilarIncidentsByIndicators.yml index a13bce442cf4..cd7351ea300d 100644 --- a/Packs/Base/Scripts/DBotFindSimilarIncidentsByIndicators/DBotFindSimilarIncidentsByIndicators.yml +++ b/Packs/Base/Scripts/DBotFindSimilarIncidentsByIndicators/DBotFindSimilarIncidentsByIndicators.yml @@ -42,7 +42,7 @@ script: '-' subtype: python3 timeout: '0' type: python -dockerimage: demisto/ml:1.0.0.88591 +dockerimage: demisto/ml:1.0.0.101889 runas: DBotWeakRole tests: - DBotFindSimilarIncidentsByIndicators - Test diff --git a/Packs/Base/Scripts/DBotPredictPhishingWords/DBotPredictPhishingWords.py b/Packs/Base/Scripts/DBotPredictPhishingWords/DBotPredictPhishingWords.py index c8c668af7b07..a86b1dea6cf6 100644 --- a/Packs/Base/Scripts/DBotPredictPhishingWords/DBotPredictPhishingWords.py +++ b/Packs/Base/Scripts/DBotPredictPhishingWords/DBotPredictPhishingWords.py @@ -1,9 +1,12 @@ # pylint: disable=no-member - from CommonServerPython import * from string import punctuation import demisto_ml import numpy as np +import logging + +# Suppress logging for a specific library +logging.getLogger('transformers').setLevel(logging.ERROR) FASTTEXT_MODEL_TYPE = 'FASTTEXT_MODEL_TYPE' TORCH_TYPE = 'torch' @@ -14,27 +17,30 @@ def OrderedSet(iterable): return list(dict.fromkeys(iterable)) -def get_model_data(model_name, store_type, is_return_error): - res_model_list = demisto.executeCommand("getList", {"listName": model_name})[0] - res_model = demisto.executeCommand("getMLModel", {"modelName": model_name})[0] - if is_error(res_model_list) and not is_error(res_model): - model_data = res_model['Contents']['modelData'] - try: - model_type = res_model['Contents']['model']["type"]["type"] - return model_data, model_type - except Exception: - return model_data, UNKNOWN_MODEL_TYPE - elif not is_error(res_model_list) and is_error(res_model): - return res_model_list["Contents"], UNKNOWN_MODEL_TYPE - elif not is_error(res_model_list) and not is_error(res_model): - if store_type == "list": - return res_model_list["Contents"], UNKNOWN_MODEL_TYPE - elif store_type == "mlModel": - model_data = res_model['Contents']['modelData'] - model_type = res_model['Contents']['model']["type"]["type"] - return model_data, model_type - else: - handle_error("error reading model %s from Demisto" % model_name, is_return_error) +def get_model_data(model_name: str, store_type: str, is_return_error: bool) -> tuple[dict, str]: + + def load_from_models(model_name: str) -> None | tuple[dict, str]: + res_model = demisto.executeCommand("getMLModel", {"modelName": model_name}) + if is_error(res_model): + demisto.debug(get_error(res_model)) + return None + model_data = res_model[0]['Contents']['modelData'] + model_type = dict_safe_get(res_model, [0, 'Contents', 'model', "type", "type"], UNKNOWN_MODEL_TYPE) + return model_data, model_type + + def load_from_list(model_name): + res_model = demisto.executeCommand("getList", {"listName": model_name}) + if is_error(res_model): + demisto.debug(get_error(res_model)) + return None + return res_model[0]["Contents"], UNKNOWN_MODEL_TYPE + + if store_type == "mlModel": + res = load_from_models(model_name) or load_from_list(model_name) + elif store_type == "list": + res = load_from_list(model_name) or load_from_models(model_name) + + return res or handle_error(f"error reading model {model_name} from Demisto", is_return_error) # type: ignore def handle_error(message, is_return_error): @@ -88,6 +94,7 @@ def preprocess_text(text, model_type, is_return_error): else: words_to_token_maps = tokenized_text_result['originalWordsToTokens'] return input_text, words_to_token_maps + return None def predict_phishing_words(model_name, model_store_type, email_subject, email_body, min_text_length, label_threshold, @@ -97,7 +104,9 @@ def predict_phishing_words(model_name, model_store_type, email_subject, email_bo model_type = FASTTEXT_MODEL_TYPE if model_type not in [FASTTEXT_MODEL_TYPE, TORCH_TYPE, UNKNOWN_MODEL_TYPE]: model_type = UNKNOWN_MODEL_TYPE + phishing_model = demisto_ml.phishing_model_loads_handler(model_data, model_type) + is_model_applied_on_a_single_incidents = isinstance(email_subject, str) and isinstance(email_body, str) if is_model_applied_on_a_single_incidents: return predict_single_incident_full_output(email_subject, email_body, is_return_error, label_threshold, @@ -110,7 +119,7 @@ def predict_phishing_words(model_name, model_store_type, email_subject, email_bo def predict_batch_incidents_light_output(email_subject, email_body, phishing_model, model_type, min_text_length): - text_list = [{'text': "%s \n%s" % (subject, body)} for subject, body in zip(email_subject, email_body)] + text_list = [{'text': f"{subject} \n{body}"} for subject, body in zip(email_subject, email_body)] preprocessed_text_list = preprocess_text(text_list, model_type, is_return_error=False) batch_predictions = [] for input_text in preprocessed_text_list: @@ -132,14 +141,14 @@ def predict_batch_incidents_light_output(email_subject, email_body, phishing_mod 'Type': entryTypes['note'], 'Contents': batch_predictions, 'ContentsFormat': formats['json'], - 'HumanReadable': 'Applied predictions on {} incidents.'.format(len(batch_predictions)), + 'HumanReadable': f'Applied predictions on {len(batch_predictions)} incidents.', } def predict_single_incident_full_output(email_subject, email_body, is_return_error, label_threshold, min_text_length, model_type, phishing_model, set_incidents_fields, top_word_limit, word_threshold): - text = "%s \n%s" % (email_subject, email_body) + text = f"{email_subject} \n{email_body}" input_text, words_to_token_maps = preprocess_text(text, model_type, is_return_error) filtered_text, filtered_text_number_of_words = phishing_model.filter_model_words(input_text) if filtered_text_number_of_words == 0: @@ -163,14 +172,14 @@ def predict_single_incident_full_output(email_subject, email_body, is_return_err negative_tokens = OrderedSet(explain_result['NegativeWords']) positive_words = find_words_contain_tokens(positive_tokens, words_to_token_maps) negative_words = find_words_contain_tokens(negative_tokens, words_to_token_maps) - positive_words = list(OrderedSet([s.strip(punctuation) for s in positive_words])) - negative_words = list(OrderedSet([s.strip(punctuation) for s in negative_words])) + positive_words = OrderedSet([s.strip(punctuation) for s in positive_words]) + negative_words = OrderedSet([s.strip(punctuation) for s in negative_words]) positive_words = [w for w in positive_words if w.isalnum()] negative_words = [w for w in negative_words if w.isalnum()] highlighted_text_markdown = text.strip() for word in positive_words: for cased_word in [word.lower(), word.title(), word.upper()]: - highlighted_text_markdown = re.sub(r'(? 0: lang_counter = Counter(inc[LANGUAGE_KEY] for inc in data).most_common() description += "Dropped %d sample(s) that were detected as being in foreign languages. " % dropped_count - description += 'Found language counts: {}'.format(', '.join(['{}:{}'.format(lang, count) for lang, count + description += 'Found language counts: {}'.format(', '.join([f'{lang}:{count}' for lang, count in lang_counter])) description += "\n" return filtered_data, description @@ -441,7 +442,7 @@ def main(): # clean text if pre_process_type not in PRE_PROCESS_TYPES: - return_error('Pre-process type {} is not supported'.format(pre_process_type)) + return_error(f'Pre-process type {pre_process_type} is not supported') # clean html and new lines data = clean_text_of_incidents_list(data, DBOT_TEXT_FIELD, remove_html_tags) diff --git a/Packs/Base/Scripts/DBotPreprocessTextData/DBotPreprocessTextData.yml b/Packs/Base/Scripts/DBotPreprocessTextData/DBotPreprocessTextData.yml index 853640116f5a..06a9b3809ec2 100644 --- a/Packs/Base/Scripts/DBotPreprocessTextData/DBotPreprocessTextData.yml +++ b/Packs/Base/Scripts/DBotPreprocessTextData/DBotPreprocessTextData.yml @@ -104,8 +104,7 @@ tags: - ml timeout: 120µs type: python -dockerimage: demisto/ml:1.0.0.30541 -runonce: true +dockerimage: demisto/ml:1.0.0.101889 tests: - Create Phishing Classifier V2 ML Test fromversion: 5.0.0 diff --git a/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2.py b/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2.py index a557dd0921f5..0408487579bb 100644 --- a/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2.py +++ b/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2.py @@ -1,12 +1,10 @@ +from CommonServerPython import * # pylint: disable=no-member import gc - +import demisto_ml import pandas as pd -from typing import List, Dict from collections import defaultdict, Counter from sklearn.model_selection import StratifiedKFold -from CommonServerPython import * -import demisto_ml ALL_LABELS = "*" GENERAL_SCORES = { @@ -63,10 +61,10 @@ def read_file(input_data, input_type): else: res = demisto.getFilePath(input_data) if not res: - return_error("Entry {} not found".format(input_data)) + return_error(f"Entry {input_data} not found") file_path = res['path'] if input_type.startswith('json'): - with open(file_path, 'r') as f: + with open(file_path) as f: file_content = f.read() if input_type.startswith('csv'): return pd.read_csv(file_path).fillna('').to_dict(orient='records') @@ -76,6 +74,7 @@ def read_file(input_data, input_type): return pd.read_pickle(file_path, compression=None) else: return_error("Unsupported file type %s" % input_type) + return None def get_file_entry_id(file_name): @@ -156,7 +155,7 @@ def find_keywords(data, tag_field, text_field, min_score): human_readable = "# Keywords per category\n" for category, scores in keywords.items(): sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True) - table_items = [{"Word": word, "Score": '{:.2f}'.format(score)} for + table_items = [{"Word": word, "Score": f'{score:.2f}'} for word, score in sorted_scores if score >= min_score] human_readable += tableToMarkdown(category, table_items, ["Word", "Score"]) demisto.results({ @@ -238,13 +237,13 @@ def validate_data_and_labels(data, exist_labels_counter, labels_mapping, missing labels_counter = Counter([x[DBOT_TAG_FIELD] for x in data]) labels_below_thresh = [label for label, count in labels_counter.items() if count < MIN_INCIDENTS_THRESHOLD] if len(labels_below_thresh) > 0: - err = ['Minimum number of incidents per label required for training is {}.'.format(MIN_INCIDENTS_THRESHOLD)] - err += ['The following labels have less than {} incidents: '.format(MIN_INCIDENTS_THRESHOLD)] + err = [f'Minimum number of incidents per label required for training is {MIN_INCIDENTS_THRESHOLD}.'] + err += [f'The following labels have less than {MIN_INCIDENTS_THRESHOLD} incidents: '] for x in labels_below_thresh: - err += ['- {}: {}'.format(x, str(labels_counter[x]))] + err += [f'- {x}: {str(labels_counter[x])}'] err += ['Make sure that enough incidents exist in the environment per each of these labels.'] missing_labels = ', '.join(missing_labels_counter.keys()) - err += ['The following labels were not mapped to any label in the labels mapping: {}.'.format(missing_labels)] + err += [f'The following labels were not mapped to any label in the labels mapping: {missing_labels}.'] if labels_mapping != ALL_LABELS: err += ['The given mapped labels are: {}.'.format(', '.join(labels_mapping.keys()))] return_error('\n'.join(err)) @@ -269,7 +268,7 @@ def validate_data_and_labels(data, exist_labels_counter, labels_mapping, missing for label, count in exist_labels_counter.items(): mapped_label = labels_mapping[label] if isinstance(labels_mapping, dict) else label if mapped_label != label: - label = "%s -> %s" % (label, mapped_label) + label = f"{label} -> {mapped_label}" exist_labels_counter_mapped[label] = count human_readable = tableToMarkdown("Found labels", exist_labels_counter_mapped) entry = { @@ -280,23 +279,23 @@ def validate_data_and_labels(data, exist_labels_counter, labels_mapping, missing 'HumanReadableFormat': formats['markdown'], } demisto.results(entry) - if len(set([x[DBOT_TAG_FIELD] for x in data])) == 1: + if len({x[DBOT_TAG_FIELD] for x in data}) == 1: single_label = [x[DBOT_TAG_FIELD] for x in data][0] if labels_mapping == ALL_LABELS: - err = ['All received incidents have the same label: {}.'.format(single_label)] + err = [f'All received incidents have the same label: {single_label}.'] else: - err = ['All received incidents mapped to the same label: {}.'.format(single_label)] + err = [f'All received incidents mapped to the same label: {single_label}.'] err += ['At least 2 different labels are required to train a classifier.'] if labels_mapping == ALL_LABELS: err += ['Please make sure that incidents of at least 2 labels exist in the environment.'] else: err += ['The following labels were not mapped to any label in the labels mapping:'] - err += [', '.join([x for x in missing_labels_counter])] + err += [', '.join(list(missing_labels_counter))] not_found_mapped_label = [x for x in labels_mapping if x not in exist_labels_counter or exist_labels_counter[x] == 0] if len(not_found_mapped_label) > 0: miss = ', '.join(not_found_mapped_label) - err += ['Notice that the following mapped labels were not found among all incidents: {}.'.format(miss)] + err += [f'Notice that the following mapped labels were not found among all incidents: {miss}.'] return_error('\n'.join(err)) @@ -354,6 +353,7 @@ def validate_labels_and_decide_algorithm(y, algorithm): error += ['The following labels/verdicts need to be mapped to one of those values: '] error += [', '.join(illegal_labels_for_fine_tune) + '.'] return_error('\n'.join(error)) + return None elif algorithm == AUTO_TRAINING_ALGO: return FASTTEXT_TRAINING_ALGO else: diff --git a/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2.yml b/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2.yml index f57578960181..4bb5b1d03e35 100644 --- a/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2.yml +++ b/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2.yml @@ -121,8 +121,7 @@ tags: - ml timeout: 12µs type: python -dockerimage: demisto/ml:1.0.0.93129 -runonce: true +dockerimage: demisto/ml:1.0.0.101889 tests: - Create Phishing Classifier V2 ML Test fromversion: 5.0.0 diff --git a/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2_test.py b/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2_test.py index 9793f0c45994..fd7072e30f04 100644 --- a/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2_test.py +++ b/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2_test.py @@ -22,10 +22,10 @@ def test_read_file(mocker): mocker.patch.object(demisto, 'getFilePath', return_value={'path': './TestData/input_json_file_test'}) obj = read_file('231342@343', 'json') assert len(obj) >= 1 - with open('./TestData/input_json_file_test', 'r') as f: + with open('./TestData/input_json_file_test') as f: obj = read_file(f.read(), 'json_string') assert len(obj) >= 1 - with open('./TestData/input_json_file_test', 'r') as f: + with open('./TestData/input_json_file_test') as f: b64_input = base64.b64encode(f.read().encode('utf-8')) # base64.b64encode(f.read()) obj = read_file(b64_input, 'json_b64_string') assert len(obj) >= 1 diff --git a/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation.py b/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation.py index 31713f61858e..814b35692286 100644 --- a/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation.py +++ b/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation.py @@ -2,7 +2,6 @@ import pandas as pd from sklearn.metrics import precision_score, recall_score, precision_recall_curve from tabulate import tabulate -from typing import Dict from CommonServerPython import * # pylint: disable=no-member @@ -20,7 +19,7 @@ def bold_hr(s): - return '**{}:**'.format(s) + return f'**{s}:**' def binarize(arr, threshold): @@ -61,8 +60,8 @@ def generate_metrics_df(y_true, y_true_per_class, y_pred, y_pred_per_class, thre ], ignore_index=True) df = df[['Class', 'Precision', 'TP', 'FP', 'Coverage', 'Total']] explained_metrics = ['Precision', 'TP (true positive)', 'FP (false positive)', 'Coverage', 'Total'] - explanation = ['{} {}'.format(bold_hr(metric), METRICS[metric]) for metric in explained_metrics] - df.set_index('Class', inplace=True) + explanation = [f'{bold_hr(metric)} {METRICS[metric]}' for metric in explained_metrics] + df = df.set_index('Class') return df, explanation @@ -153,7 +152,7 @@ def output_report(y_true, y_true_per_class, y_pred, y_pred_per_class, found_thre if detailed_output: human_readable += human_readable_threshold + ['\n'] else: - human_readable += ['## Results for confidence threshold = {:.2f}'.format(found_threshold)] + ['\n'] + human_readable += [f'## Results for confidence threshold = {found_threshold:.2f}'] + ['\n'] human_readable += class_metrics_human_readable + ['\n'] human_readable += class_metrics_explanation_human_readable human_readable += csr_matrix_readable @@ -193,9 +192,8 @@ def merge_entries(entry, per_class_entry): return entry -def find_threshold(y_true_str, y_pred_str, customer_target_precision, target_recall, detailed_output=True): - y_true = convert_str_to_json(y_true_str, 'yTrue') - y_pred_all_classes = convert_str_to_json(y_pred_str, 'yPred') +def find_threshold(y_true, y_pred_all_classes, customer_target_precision, target_recall, detailed_output=True): + labels = sorted(set(y_true + list(y_pred_all_classes[0].keys()))) n_instances = len(y_true) y_true_per_class = {class_: np.zeros(n_instances) for class_ in labels} @@ -248,7 +246,7 @@ def find_best_threshold_for_target_precision(class_to_arrs, customer_target_prec precision_per_class[class_] = precision break if len(threshold_per_class) == len(labels): - threshold_candidates = sorted(list(threshold_per_class.values())) + threshold_candidates = sorted(threshold_per_class.values()) for threshold in threshold_candidates: legal_threshold_for_all_classes = True threshold_precision = sys.maxsize @@ -276,7 +274,7 @@ def calculate_per_class_report_entry(class_to_arrs, labels, y_pred_per_class, y_ 'The following tables present evlauation of the model per class at different confidence thresholds:'] class_to_thresholds = {} for class_ in labels: - class_to_thresholds[class_] = set([0.001]) # using no threshold + class_to_thresholds[class_] = {0.001} # using no threshold for target_precision in np.arange(0.95, 0.5, -0.05): # indexing is done by purpose - the ith precision corresponds with threshold i-1. Last precision is 1 for i, precision in enumerate(class_to_arrs[class_]['precisions'][:-1]): @@ -296,15 +294,15 @@ def calculate_per_class_report_entry(class_to_arrs, labels, y_pred_per_class, y_ row['Threshold'] = threshold class_threshold_df = pd.concat([class_threshold_df, pd.DataFrame([row])], ignore_index=True) class_threshold_df = reformat_df_fractions_to_percentage(class_threshold_df) - class_threshold_df['Threshold'] = class_threshold_df['Threshold'].apply(lambda p: '{:.2f}'.format(p)) + class_threshold_df['Threshold'] = class_threshold_df['Threshold'].apply(lambda p: f'{p:.2f}') class_threshold_df = class_threshold_df[['Threshold', 'Precision', 'TP', 'FP', 'Coverage', 'Total']] - class_threshold_df.sort_values(by='Coverage', ascending=False, inplace=True) - class_threshold_df.drop_duplicates(subset='Threshold', inplace=True, keep='first') - class_threshold_df.drop_duplicates(subset='Precision', inplace=True, keep='first') - class_threshold_df.set_index('Threshold', inplace=True) + class_threshold_df = class_threshold_df.sort_values(by='Coverage', ascending=False) + class_threshold_df = class_threshold_df.drop_duplicates(subset='Threshold', keep='first') + class_threshold_df = class_threshold_df.drop_duplicates(subset='Precision', keep='first') + class_threshold_df = class_threshold_df.set_index('Threshold') per_class_context[class_] = class_threshold_df.to_json() tabulated_class_df = tabulate(class_threshold_df, tablefmt="pipe", headers="keys") - per_class_hr += ['### {}'.format(class_), tabulated_class_df] + per_class_hr += [f'### {class_}', tabulated_class_df] per_class_entry = { 'Type': entryTypes['note'], 'ContentsFormat': formats['json'], @@ -321,31 +319,40 @@ def convert_str_to_json(str_json, var_name): y_true = json.loads(str_json) return y_true except Exception as e: - return_error('Exception while reading {} :{}'.format(var_name, e)) + return_error(f'Exception while reading {var_name} :{e}') def main(): - y_pred_all_classes = demisto.args()["yPred"] - y_true = demisto.args()["yTrue"] - target_precision = calculate_and_validate_float_parameter("targetPrecision") - target_recall = calculate_and_validate_float_parameter("targetRecall") - detailed_output = 'detailedOutput' in demisto.args() and demisto.args()['detailedOutput'] == 'true' - entries = find_threshold(y_true_str=y_true, - y_pred_str=y_pred_all_classes, - customer_target_precision=target_precision, - target_recall=target_recall, - detailed_output=detailed_output) - - demisto.results(entries) + try: + y_pred_all_classes = demisto.args()["yPred"] + y_true = demisto.args()["yTrue"] + target_precision = calculate_and_validate_float_parameter("targetPrecision") + target_recall = calculate_and_validate_float_parameter("targetRecall") + detailed_output = 'detailedOutput' in demisto.args() and demisto.args()['detailedOutput'] == 'true' + y_true = convert_str_to_json(y_true, 'yTrue') + y_pred_all_classes = convert_str_to_json(y_pred_all_classes, 'yPred') + + if not (y_true and y_pred_all_classes): + raise DemistoException('Either "yPred" or "yTrue" are empty.') + + entries = find_threshold(y_true=y_true, + y_pred_all_classes=y_pred_all_classes, + customer_target_precision=target_precision, + target_recall=target_recall, + detailed_output=detailed_output) + + demisto.results(entries) + except Exception as e: + return_error(f'Error in GetMLModelEvaluation:\n{e}') def calculate_and_validate_float_parameter(var_name): try: res = float(demisto.args()[var_name]) if var_name in demisto.args() else 0 except Exception: - return_error('{} must be a float between 0-1 or left empty'.format(var_name)) + return_error(f'{var_name} must be a float between 0-1 or left empty') if res < 0 or res > 1: - return_error('{} must be a float between 0-1 or left empty'.format(var_name)) + return_error(f'{var_name} must be a float between 0-1 or left empty') return res diff --git a/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation.yml b/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation.yml index 4d59a291421a..651f08b8e424 100644 --- a/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation.yml +++ b/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation.yml @@ -15,7 +15,7 @@ args: isArray: true name: targetRecall - defaultValue: 'true' - description: if set to 'true', the output will include a full exaplanation of the confidence threshold meaning. + description: if set to 'true', the output will include a full explanation of the confidence threshold meaning. isArray: true name: detailedOutput predefined: @@ -32,7 +32,7 @@ outputs: description: The found thresholds which meets the conditions of precision and recall. type: String - contextPath: GetMLModelEvaluation.ConfusionMatrixAtThreshold - description: The model evaluation confusion matrix for mails above the threhsold. + description: The model evaluation confusion matrix for mails above the threshold. type: Unknown - contextPath: GetMLModelEvaluation.Metrics description: Metrics per each class (includes precision, true positive, coverage, etc.) @@ -43,7 +43,7 @@ tags: - ml timeout: 60µs type: python -dockerimage: demisto/ml:1.0.0.88591 +dockerimage: demisto/ml:1.0.0.101889 tests: - Create Phishing Classifier V2 ML Test fromversion: 5.0.0 diff --git a/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation_test.py b/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation_test.py index e69aa533abe2..cc6835394e28 100644 --- a/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation_test.py +++ b/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation_test.py @@ -1,4 +1,3 @@ -import json from GetMLModelEvaluation import find_threshold @@ -48,8 +47,8 @@ class 2 precision per threshold: def test_threshold_found_0(mocker): global y_true, y_pred - entry = find_threshold(y_pred_str=json.dumps(y_pred), - y_true_str=json.dumps(y_true), + entry = find_threshold(y_pred_all_classes=y_pred, + y_true=y_true, customer_target_precision=0, target_recall=0) assert abs(entry['Contents']['threshold'] - 0.7) < 10 ** -2 @@ -57,8 +56,8 @@ def test_threshold_found_0(mocker): def test_threshold_found_1(mocker): global y_true, y_pred - entry = find_threshold(y_pred_str=json.dumps(y_pred), - y_true_str=json.dumps(y_true), + entry = find_threshold(y_pred_all_classes=y_pred, + y_true=y_true, customer_target_precision=0.63, target_recall=0) assert abs(entry['Contents']['threshold'] - 0.7) < 10 ** -2 @@ -66,8 +65,8 @@ def test_threshold_found_1(mocker): def test_threshold_found_2(mocker): global y_true, y_pred - entry = find_threshold(y_pred_str=json.dumps(y_pred), - y_true_str=json.dumps(y_true), + entry = find_threshold(y_pred_all_classes=y_pred, + y_true=y_true, customer_target_precision=0.7, target_recall=0) assert abs(entry['Contents']['threshold'] - 0.8) < 10 ** -2 @@ -75,16 +74,16 @@ def test_threshold_found_2(mocker): def test_threshold_found_3(mocker): global y_true, y_pred - entry = find_threshold(y_pred_str=json.dumps(y_pred), - y_true_str=json.dumps(y_true), + entry = find_threshold(y_pred_all_classes=y_pred, + y_true=y_true, customer_target_precision=0.875, target_recall=0) assert abs(entry['Contents']['threshold'] - 0.8) < 10 ** -2 def test_no_existing_threshold(mocker): - entry = find_threshold(y_pred_str=json.dumps(y_pred), - y_true_str=json.dumps(y_true), + entry = find_threshold(y_pred_all_classes=y_pred, + y_true=y_true, customer_target_precision=0.9, target_recall=0) assert abs(entry['Contents']['threshold'] - 0.8) < 10 ** -2 @@ -93,8 +92,8 @@ def test_no_existing_threshold(mocker): def test_predictions_are_correct_and_all_equals_one_prob(mocker): y_true = ['class1'] * 7 + ['class2'] * 7 y_pred = [{'class1': 0.95}] * 7 + [{'class2': 0.95}] * 7 - entry = find_threshold(y_pred_str=json.dumps(y_pred), - y_true_str=json.dumps(y_true), + entry = find_threshold(y_pred_all_classes=y_pred, + y_true=y_true, customer_target_precision=0.6, target_recall=0) assert abs(entry['Contents']['threshold'] - 0.95) < 10 ** -2 @@ -103,8 +102,8 @@ def test_predictions_are_correct_and_all_equals_one_prob(mocker): def test_predictions_are_correct_and_almost_all_equals_one_prob(mocker): y_true = ['class1'] * 7 + ['class2'] * 7 y_pred = [{'class1': 1}] * 6 + [{'class1': 0.95}] + [{'class2': 1}] * 7 - entry = find_threshold(y_pred_str=json.dumps(y_pred), - y_true_str=json.dumps(y_true), + entry = find_threshold(y_pred_all_classes=y_pred, + y_true=y_true, customer_target_precision=0.6, target_recall=0) assert abs(entry['Contents']['threshold'] - 0.95) < 10 ** -2 @@ -113,8 +112,8 @@ def test_predictions_are_correct_and_almost_all_equals_one_prob(mocker): def test_plabook_test_simulation(mocker): y_pred = [{"spam": 0.9987042546272278}, {"ham": 0.9987037777900696}] y_true = ["spam", "ham"] - entry = find_threshold(y_pred_str=json.dumps(y_pred), - y_true_str=json.dumps(y_true), + entry = find_threshold(y_pred_all_classes=y_pred, + y_true=y_true, customer_target_precision=0.7, target_recall=0) assert abs(entry['Contents']['threshold'] - 0.9987037777900696) < 10 ** -2 @@ -123,8 +122,8 @@ def test_plabook_test_simulation(mocker): def test_all_wrong_predictions(mocker): y_true = ['class1'] * 7 + ['class2'] * 7 y_pred = [{'class2': 0.5}] * 7 + [{'class1': 0.5}] * 7 - entry = find_threshold(y_pred_str=json.dumps(y_pred), - y_true_str=json.dumps(y_true), + entry = find_threshold(y_pred_all_classes=y_pred, + y_true=y_true, customer_target_precision=0.6, target_recall=0) assert entry['Contents']['threshold'] >= 0.5 @@ -133,8 +132,8 @@ def test_all_wrong_predictions(mocker): def test_all_wrong_predictions_2(mocker): y_true = ['class1'] * 7 + ['class2'] * 7 y_pred = [{'class2': 0.5}] * 7 + [{'class1': 0.5}] * 7 - entry = find_threshold(y_pred_str=json.dumps(y_pred), - y_true_str=json.dumps(y_true), + entry = find_threshold(y_pred_all_classes=y_pred, + y_true=y_true, customer_target_precision=0, target_recall=0) assert entry['Contents']['threshold'] >= 0.5 diff --git a/Packs/Base/Scripts/GetMLModelEvaluation/README.md b/Packs/Base/Scripts/GetMLModelEvaluation/README.md index 13cc64e0b779..395213a9d484 100644 --- a/Packs/Base/Scripts/GetMLModelEvaluation/README.md +++ b/Packs/Base/Scripts/GetMLModelEvaluation/README.md @@ -1,6 +1,7 @@ Finds a threshold for ML model, and performs an evaluation based on it ## Script Data + --- | **Name** | **Description** | @@ -10,6 +11,7 @@ Finds a threshold for ML model, and performs an evaluation based on it | Cortex XSOAR Version | 5.0.0 | ## Inputs + --- | **Argument Name** | **Description** | @@ -18,13 +20,14 @@ Finds a threshold for ML model, and performs an evaluation based on it | yPred | A list of dictionaries contain probability predictions for all classes | | targetPrecision | minimum precision of all classes, ranges 0-1 | | targetRecall | minimum recall of all classes, ranges 0-1 | -| detailedOutput | if set to 'true', the output will include a full exaplanation of the confidence threshold meaning | +| detailedOutput | if set to 'true', the output will include a full explanation of the confidence threshold meaning | ## Outputs + --- | **Path** | **Description** | **Type** | | --- | --- | --- | | GetMLModelEvaluation.Threshold | The found thresholds which meets the conditions of precision and recall | String | -| GetMLModelEvaluation.ConfusionMatrixAtThreshold | The model evaluation confusion matrix for mails above the threhsold. | Unknown | +| GetMLModelEvaluation.ConfusionMatrixAtThreshold | The model evaluation confusion matrix for mails above the threshold. | Unknown | | GetMLModelEvaluation.Metrics | Metrics per each class \(includes precision, true positive, coverage, etc.\) | Unknown | diff --git a/Packs/Base/TestPlaybooks/playbook-DBotFindSimilarIncidents-test.yml b/Packs/Base/TestPlaybooks/playbook-DBotFindSimilarIncidents-test.yml new file mode 100644 index 000000000000..ace3806aea00 --- /dev/null +++ b/Packs/Base/TestPlaybooks/playbook-DBotFindSimilarIncidents-test.yml @@ -0,0 +1,340 @@ +id: DBotFindSimilarIncidents-test +version: -1 +contentitemexportablefields: + contentitemfields: {} +name: DBotFindSimilarIncidents-test +starttaskid: "0" +tasks: + "0": + id: "0" + taskid: 53859bf6-0ad5-48e8-83ea-e56e86b07a82 + type: start + task: + id: 53859bf6-0ad5-48e8-83ea-e56e86b07a82 + version: -1 + name: "" + iscommand: false + brand: "" + description: '' + nexttasks: + '#none#': + - "5" + separatecontext: false + continueonerrortype: "" + view: |- + { + "position": { + "x": 265, + "y": 50 + } + } + note: false + timertriggers: [] + ignoreworker: false + skipunavailable: false + quietmode: 0 + isoversize: false + isautoswitchedtoquietmode: false + "2": + id: "2" + taskid: ab3d08f1-8bc2-4c87-857b-2e29bb3f5f38 + type: regular + task: + id: ab3d08f1-8bc2-4c87-857b-2e29bb3f5f38 + version: -1 + name: DBotFindSimilarIncidents + description: Find past similar incidents based on incident fields' similarity. Includes an option to also display indicators similarity. + scriptName: DBotFindSimilarIncidents + type: regular + iscommand: false + brand: "" + nexttasks: + '#none#': + - "3" + scriptarguments: + fieldExactMatch: + simple: accountname + fromDate: + simple: 1 hour + incidentId: + complex: + root: CreatedIncidentID + transformers: + - operator: atIndex + args: + index: + value: + simple: "0" + similarTextField: + simple: details + toDate: + simple: tomorrow + separatecontext: false + continueonerrortype: "" + view: |- + { + "position": { + "x": 265, + "y": 720 + } + } + note: false + timertriggers: [] + ignoreworker: false + skipunavailable: false + quietmode: 0 + isoversize: false + isautoswitchedtoquietmode: false + "3": + id: "3" + taskid: bdd0da3d-8aad-4682-8aff-d2c6a3321690 + type: condition + task: + id: bdd0da3d-8aad-4682-8aff-d2c6a3321690 + version: -1 + name: Check results + type: condition + iscommand: false + brand: "" + nexttasks: + "Yes": + - "4" + separatecontext: false + conditions: + - label: "Yes" + condition: + - - operator: isTrue + left: + value: + simple: DBotFindSimilarIncidents.isSimilarIncidentFound + iscontext: true + - - operator: isEqualString + left: + value: + simple: DBotFindSimilarIncidents.similarIncident.id + iscontext: true + right: + value: + complex: + root: CreatedIncidentID + transformers: + - operator: atIndex + args: + index: + value: + simple: "1" + iscontext: true + continueonerrortype: "" + view: |- + { + "position": { + "x": 265, + "y": 895 + } + } + note: false + timertriggers: [] + ignoreworker: false + skipunavailable: false + quietmode: 0 + isoversize: false + isautoswitchedtoquietmode: false + "4": + id: "4" + taskid: bd930f7e-c2f1-4b46-8efa-6562c60105fe + type: title + task: + id: bd930f7e-c2f1-4b46-8efa-6562c60105fe + version: -1 + name: Done + type: title + iscommand: false + brand: "" + description: '' + separatecontext: false + continueonerrortype: "" + view: |- + { + "position": { + "x": 265, + "y": 1070 + } + } + note: false + timertriggers: [] + ignoreworker: false + skipunavailable: false + quietmode: 0 + isoversize: false + isautoswitchedtoquietmode: false + "5": + id: "5" + taskid: c2bb7962-e995-45e1-8636-0ecf2b3ff45c + type: regular + task: + id: c2bb7962-e995-45e1-8636-0ecf2b3ff45c + version: -1 + name: Clear context + description: |- + Delete field from context. + + This automation runs using the default Limited User role, unless you explicitly change the permissions. + For more information, see the section about permissions here: + https://docs-cortex.paloaltonetworks.com/r/Cortex-XSOAR/6.10/Cortex-XSOAR-Administrator-Guide/Automations + scriptName: DeleteContext + type: regular + iscommand: false + brand: "" + nexttasks: + '#none#': + - "6" + - "7" + scriptarguments: + all: + simple: "yes" + separatecontext: false + continueonerrortype: "" + view: |- + { + "position": { + "x": 265, + "y": 195 + } + } + note: false + timertriggers: [] + ignoreworker: false + skipunavailable: false + quietmode: 0 + isoversize: false + isautoswitchedtoquietmode: false + "6": + id: "6" + taskid: 58890a1b-198a-4f21-87d6-a73d27b24075 + type: regular + task: + id: 58890a1b-198a-4f21-87d6-a73d27b24075 + version: -1 + name: Create incident 1 + description: commands.local.cmd.create.inc + script: Builtin|||createNewIncident + type: regular + iscommand: true + brand: Builtin + nexttasks: + '#none#': + - "8" + scriptarguments: + accountname: + simple: SimilarAccountName + details: + simple: this is a test incident and should match up with TestIncident_2 + name: + simple: TestIncident_1 + separatecontext: false + continueonerrortype: "" + view: |- + { + "position": { + "x": 50, + "y": 370 + } + } + note: false + timertriggers: [] + ignoreworker: false + skipunavailable: false + quietmode: 0 + isoversize: false + isautoswitchedtoquietmode: false + "7": + id: "7" + taskid: 89bf5bb4-77e5-4462-819e-656511050e55 + type: regular + task: + id: 89bf5bb4-77e5-4462-819e-656511050e55 + version: -1 + name: Create incident 2 + description: commands.local.cmd.create.inc + script: Builtin|||createNewIncident + type: regular + iscommand: true + brand: Builtin + nexttasks: + '#none#': + - "8" + scriptarguments: + accountname: + simple: SimilarAccountName + details: + simple: this is a test incident and should match up with TestIncident_1 + name: + simple: TestIncident_2 + separatecontext: false + continueonerrortype: "" + view: |- + { + "position": { + "x": 480, + "y": 370 + } + } + note: false + timertriggers: [] + ignoreworker: false + skipunavailable: false + quietmode: 0 + isoversize: false + isautoswitchedtoquietmode: false + "8": + id: "8" + taskid: e4380779-44e3-4395-8e2d-51e6e44ce672 + type: regular + task: + id: e4380779-44e3-4395-8e2d-51e6e44ce672 + version: -1 + name: Sleep for ten seconds to let the incidents load + description: Sleep for X seconds. + scriptName: Sleep + type: regular + iscommand: false + brand: "" + nexttasks: + '#none#': + - "2" + scriptarguments: + seconds: + simple: "10" + separatecontext: false + continueonerrortype: "" + view: |- + { + "position": { + "x": 265, + "y": 545 + } + } + note: false + timertriggers: [] + ignoreworker: false + skipunavailable: false + quietmode: 0 + isoversize: false + isautoswitchedtoquietmode: false +system: true +view: |- + { + "linkLabelsPosition": {}, + "paper": { + "dimensions": { + "height": 1085, + "width": 810, + "x": 50, + "y": 50 + } + } + } +inputs: [] +outputs: [] +fromversion: 6.9.0 +description: '' diff --git a/Packs/Base/pack_metadata.json b/Packs/Base/pack_metadata.json index 7809b945f12f..a775366f69e8 100644 --- a/Packs/Base/pack_metadata.json +++ b/Packs/Base/pack_metadata.json @@ -2,7 +2,7 @@ "name": "Base", "description": "The base pack for Cortex XSOAR.", "support": "xsoar", - "currentVersion": "1.34.27", + "currentVersion": "1.34.28", "author": "Cortex XSOAR", "serverMinVersion": "6.0.0", "url": "https://www.paloaltonetworks.com/cortex", diff --git a/Packs/DeveloperTools/TestPlaybooks/script-TestCreateIncidentsFile.yml b/Packs/DeveloperTools/TestPlaybooks/script-TestCreateIncidentsFile.yml index e086cef5d8bf..1513b26d3780 100644 --- a/Packs/DeveloperTools/TestPlaybooks/script-TestCreateIncidentsFile.yml +++ b/Packs/DeveloperTools/TestPlaybooks/script-TestCreateIncidentsFile.yml @@ -3409,9 +3409,9 @@ args: defaultValue: encodedIncidentsFile description: '' scripttarget: 0 -subtype: python2 +subtype: python3 runonce: false -dockerimage: demisto/python:2.7.18.9326 +dockerimage: demisto/python3:3.11.9.101916 runas: DBotWeakRole comment: '' fromversion: 5.0.0 diff --git a/Packs/DeveloperTools/TestPlaybooks/script-TestCreateIncidentsForPhishingClassifier.yml b/Packs/DeveloperTools/TestPlaybooks/script-TestCreateIncidentsForPhishingClassifier.yml index 46efe09ed8d2..2dd896044433 100644 --- a/Packs/DeveloperTools/TestPlaybooks/script-TestCreateIncidentsForPhishingClassifier.yml +++ b/Packs/DeveloperTools/TestPlaybooks/script-TestCreateIncidentsForPhishingClassifier.yml @@ -16,13 +16,13 @@ script: >+ incident1_template = { 'type': 'Simulation', - 'emailclassification': 'Tag1', + 'tags': 'Tag1', } incident2_template = { 'type': 'Simulation', - 'emailclassification': 'Tag2', + 'tags': 'Tag2', } @@ -38,7 +38,7 @@ script: >+ for i in range(0, NUMBER_OF_INCIDENTS): incidents.append({ 'type': 'Simulation', - 'emailclassification': 'Tag3', + 'tags': 'Tag3', 'dbot_processed_text': " ".join([words_tag3[i] for i in [random.randint(0, len(words_tag3)-1) for i in range(30)]]) }) @@ -53,9 +53,9 @@ script: >+ 'Contents': 'Done crete incidents', 'ContentsFormat': formats['text'], 'EntryContext': { - 'EmailSujbectKey': 'emailclassification', + 'EmailSujbectKey': 'tags', 'EmailBodyKey': 'details', - 'EmailTagKey': 'emailclassification', + 'EmailTagKey': 'tags', 'IncidentsQuery': 'type:Simulation' } }) diff --git a/Packs/ML/ReleaseNotes/1_4_11.md b/Packs/ML/ReleaseNotes/1_4_11.md new file mode 100644 index 000000000000..c10696c9c530 --- /dev/null +++ b/Packs/ML/ReleaseNotes/1_4_11.md @@ -0,0 +1,14 @@ + +#### Scripts + +##### EvaluateMLModllAtProduction + +- Changed the Docker image to: *demisto/pandas:1.0.0.102566*. + +##### DBotPredictOutOfTheBoxV2 + +- Updated the Docker image to: *demisto/ml:1.0.0.101889*. + +##### DBotPredictIncidentsBatch + +- Changed the Docker image to: *demisto/pandas:1.0.0.102566*. diff --git a/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch.py b/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch.py index 0600e8045aac..297d09211d9f 100644 --- a/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch.py +++ b/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch.py @@ -19,7 +19,7 @@ def get_phishing_map_labels(comma_values): labels_dict[splited[0].strip()] = splited[1].strip() else: labels_dict[v] = v - return {k: v for k, v in labels_dict.items()} + return dict(labels_dict.items()) def build_query_in_respect_to_phishing_labels(args): @@ -29,12 +29,12 @@ def build_query_in_respect_to_phishing_labels(args): return args mapping_dict = get_phishing_map_labels(mapping) tag_field = args['tagField'] - tags_union = ' '.join(['"{}"'.format(label) for label in mapping_dict]) - mapping_query = '{}:({})'.format(tag_field, tags_union) + tags_union = ' '.join([f'"{label}"' for label in mapping_dict]) + mapping_query = f'{tag_field}:({tags_union})' if 'query' not in args: args['query'] = mapping_query else: - args['query'] = '({}) and ({})'.format(query, mapping_query) + args['query'] = f'({query}) and ({mapping_query})' return args @@ -78,7 +78,7 @@ def main(): incidents_df = pd.DataFrame(incidents) predictions_df = pd.DataFrame(res[-1]['Contents']) df = pd.concat([incidents_df, predictions_df], axis=1) - df.rename(columns={"Label": "Prediction"}, inplace=True) + df = df.rename(columns={"Label": "Prediction"}) file_name = 'predictions.csv' file_columns = ['id', tag_field_name, 'Prediction', 'Probability', @@ -90,7 +90,7 @@ def main(): csv_data = filtered_df.to_csv() entry = fileResult(file_name, csv_data) entry['Contents'] = filtered_df.to_json(orient='records') - entry['HumanReadable'] = 'File contains predictions of {} incidents'.format(len(incidents)) + entry['HumanReadable'] = f'File contains predictions of {len(incidents)} incidents' return entry diff --git a/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch.yml b/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch.yml index 0debe7c2eff1..d7fdd4f6854f 100644 --- a/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch.yml +++ b/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch.yml @@ -4,9 +4,9 @@ args: - defaultValue: Phishing description: A comma-separated list of incident types by which to filter. name: incidentTypes -- description: 'The start date by which to filter incidents. Date format will be the same as in the incidents query page (valid strings exaple: "3 days ago", ""2019-01-01T00:00:00 +0200")' +- description: 'The start date by which to filter incidents. Date format will be the same as in the incidents query page (valid strings exaple: "3 days ago", ""2019-01-01T00:00:00 +0200").' name: fromDate -- description: 'The end date by which to filter incidents. Date format will be the same as in the incidents query page (valid strings exaple: "3 days ago", ""2019-01-01T00:00:00 +0200")' +- description: 'The end date by which to filter incidents. Date format will be the same as in the incidents query page (valid strings exaple: "3 days ago", ""2019-01-01T00:00:00 +0200").' name: toDate - defaultValue: '3000' description: The maximum number of incidents to fetch. @@ -17,7 +17,7 @@ args: - description: If non-empty, hash every word with this seed. name: hashSeed - defaultValue: '*' - description: 'A comma-separated list of email tags values and mapping. The script considers only the tags specified in this field. You can map a label to another value by using this format: LABEL:MAPPED_LABEL. For example, for 4 values in email tag: malicious, credentials harvesting, inner communitcation, external legit email, unclassified. While training, we want to ignore "unclassified" tag, and refer to "credentials harvesting" as "malicious" too. Also, we want to merge "inner communitcation" and "external legit email" to one tag called "non-malicious". The input will be: malicious, credentials harvesting:malicious, inner communitcation:non-malicious, external legit email:non-malicious' + description: 'A comma-separated list of email tags values and mapping. The script considers only the tags specified in this field. You can map a label to another value by using this format: LABEL:MAPPED_LABEL. For example, for 4 values in email tag: malicious, credentials harvesting, inner communitcation, external legit email, unclassified. While training, we want to ignore "unclassified" tag, and refer to "credentials harvesting" as "malicious" too. Also, we want to merge "inner communitcation" and "external legit email" to one tag called "non-malicious". The input will be: malicious, credentials harvesting:malicious, inner communitcation:non-malicious, external legit email:non-malicious.' name: phishingLabels - description: The model name to store in the system. name: modelName @@ -46,7 +46,7 @@ tags: - ml timeout: '0' type: python -dockerimage: demisto/ml:1.0.0.45981 +dockerimage: demisto/pandas:1.0.0.102566 fromversion: 5.0.0 tests: - VerifyOOBV2Predictions-Test diff --git a/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch_test.py b/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch_test.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/Packs/ML/Scripts/DBotPredictIncidentsBatch/README.md b/Packs/ML/Scripts/DBotPredictIncidentsBatch/README.md new file mode 100644 index 000000000000..89b2cf074c89 --- /dev/null +++ b/Packs/ML/Scripts/DBotPredictIncidentsBatch/README.md @@ -0,0 +1,51 @@ +Apply a trained ML model on multiple incidents at once, to compare incidents how the incidents were labeled by analysts, to the predictions of the model. This script is aimed to help evaluate a trained model using past incidents. + +## Script Data + +--- + +| **Name** | **Description** | +| --- | --- | +| Script Type | python3 | +| Tags | phishing, ml | +| Cortex XSOAR Version | 5.0.0 | + +## Dependencies + +--- +This script uses the following commands and scripts. + +* GetIncidentsByQuery +* DBotPredictPhishingWords + +## Used In + +--- +This script is used in the following playbooks and scripts. + +* VerifyOOBV2Predictions-Test + +## Inputs + +--- + +| **Argument Name** | **Description** | +| --- | --- | +| query | Additional text by which to query incidents. | +| incidentTypes | A comma-separated list of incident types by which to filter. | +| fromDate | The start date by which to filter incidents. Date format will be the same as in the incidents query page \(valid strings exaple: "3 days ago", ""2019-01-01T00:00:00 \+0200"\) | +| toDate | The end date by which to filter incidents. Date format will be the same as in the incidents query page \(valid strings exaple: "3 days ago", ""2019-01-01T00:00:00 \+0200"\) | +| limit | The maximum number of incidents to fetch. | +| tagField | The field name with the label. Supports a comma-separated list, the first non-empty value will be taken. | +| hashSeed | If non-empty, hash every word with this seed. | +| phishingLabels | A comma-separated list of email tags values and mapping. The script considers only the tags specified in this field. You can map a label to another value by using this format: LABEL:MAPPED_LABEL. For example, for 4 values in email tag: malicious, credentials harvesting, inner communitcation, external legit email, unclassified. While training, we want to ignore "unclassified" tag, and refer to "credentials harvesting" as "malicious" too. Also, we want to merge "inner communitcation" and "external legit email" to one tag called "non-malicious". The input will be: malicious, credentials harvesting:malicious, inner communitcation:non-malicious, external legit email:non-malicious | +| modelName | The model name to store in the system. | +| emailsubject | Incident field name with the email subject. | +| emailbody | Incident field name with the email body \(text\). | +| emailbodyhtml | Incident field name with the email body \(html\). | +| populateFields | A comma-separated list of fields in the object to poplulate. | + +## Outputs + +--- +There are no outputs for this script. diff --git a/Packs/ML/Scripts/DBotPredictOutOfTheBoxV2/DBotPredictOutOfTheBoxV2.py b/Packs/ML/Scripts/DBotPredictOutOfTheBoxV2/DBotPredictOutOfTheBoxV2.py index 8bc85fac9f72..0c526a008b14 100644 --- a/Packs/ML/Scripts/DBotPredictOutOfTheBoxV2/DBotPredictOutOfTheBoxV2.py +++ b/Packs/ML/Scripts/DBotPredictOutOfTheBoxV2/DBotPredictOutOfTheBoxV2.py @@ -37,7 +37,7 @@ def load_oob_model(): if is_error(res): return_error(get_error(res)) - with open(EVALUATION_PATH, 'r') as json_file: + with open(EVALUATION_PATH) as json_file: data = json.load(json_file) y_test = data['YTrue'] y_pred = data['YPred'] @@ -76,6 +76,7 @@ def predict_phishing_words(): load_oob_model() dargs = demisto.args() dargs['modelName'] = OUT_OF_THE_BOX_MODEL_NAME + dargs['modelStoreType'] = 'mlModel' res = demisto.executeCommand('DBotPredictPhishingWords', dargs) if is_error(res): return_error(get_error(res)) diff --git a/Packs/ML/Scripts/DBotPredictOutOfTheBoxV2/DBotPredictOutOfTheBoxV2.yml b/Packs/ML/Scripts/DBotPredictOutOfTheBoxV2/DBotPredictOutOfTheBoxV2.yml index a9c36418e659..6da8da23377f 100644 --- a/Packs/ML/Scripts/DBotPredictOutOfTheBoxV2/DBotPredictOutOfTheBoxV2.yml +++ b/Packs/ML/Scripts/DBotPredictOutOfTheBoxV2/DBotPredictOutOfTheBoxV2.yml @@ -59,8 +59,7 @@ script: '-' subtype: python3 timeout: 60µs type: python -dockerimage: demisto/ml:1.0.0.32340 -runonce: true +dockerimage: demisto/ml:1.0.0.101889 tests: - DbotPredictOufOfTheBoxTestV2 - VerifyOOBV2Predictions-Test diff --git a/Packs/ML/Scripts/EvaluateMLModllAtProduction/EvaluateMLModllAtProduction.py b/Packs/ML/Scripts/EvaluateMLModllAtProduction/EvaluateMLModllAtProduction.py index 27292ab3b254..8621fef38b9e 100644 --- a/Packs/ML/Scripts/EvaluateMLModllAtProduction/EvaluateMLModllAtProduction.py +++ b/Packs/ML/Scripts/EvaluateMLModllAtProduction/EvaluateMLModllAtProduction.py @@ -25,7 +25,7 @@ def get_phishing_map_labels(comma_values): labels_dict[v] = v if len(set(labels_dict.values())) == 1: mapped_value = list(labels_dict.values())[0] - error = ['Label mapping error: you need to map to at least two labels: {}.'.format(mapped_value)] + error = [f'Label mapping error: you need to map to at least two labels: {mapped_value}.'] return_error('\n'.join(error)) return {k: canonize_label(v) for k, v in labels_dict.items()} @@ -97,7 +97,7 @@ def return_file_result_with_predictions_on_test_set(data, y_true, y_pred, y_pred def main(incident_types, incident_query, y_true_field, y_pred_field, y_pred_prob_field, model_target_accuracy, labels_mapping, additional_fields): - non_empty_fields = '{},{}'.format(y_true_field.strip(), y_pred_field.strip()) + non_empty_fields = f'{y_true_field.strip()},{y_pred_field.strip()}' incidents_query_args = {'incidentTypes': incident_types, 'NonEmptyFields': non_empty_fields, } @@ -106,42 +106,45 @@ def main(incident_types, incident_query, y_true_field, y_pred_field, y_pred_prob incidents_query_res = demisto.executeCommand('GetIncidentsByQuery', incidents_query_args) if is_error(incidents_query_res): return_error(get_error(incidents_query_res)) - incidents = json.loads(incidents_query_res[-1]['Contents']) - demisto.results('Found {} incidents'.format(len(incidents))) - y_true = [] - y_pred = [] - y_pred_prob = [] - incidents_with_missing_pred_prob = 0 - for i in incidents: - y_true.append(i[y_true_field]) - y_pred.append(i[y_pred_field]) - if y_pred_prob_field not in i: - incidents_with_missing_pred_prob += 1 - y_pred_prob.append(i.get(y_pred_prob_field, None)) - y_true, relevant_indices = get_data_with_mapped_label(y_true, labels_mapping) - y_pred = [y_pred[i] for i in relevant_indices] - y_pred_prob = [y_pred_prob[i] for i in relevant_indices] - incidents = [incidents[i] for i in relevant_indices] - y_pred_prob_is_given = incidents_with_missing_pred_prob == 0 - if y_pred_prob_is_given: - y_pred_dict = [{label: prob} for label, prob in zip(y_pred, y_pred_prob)] + incidents = json.loads(incidents_query_res[0]['Contents']) + if incidents: + demisto.results(f'Found {len(incidents)} incident(s)') + y_true = [] + y_pred = [] + y_pred_prob = [] + incidents_with_missing_pred_prob = 0 + for i in incidents: + y_true.append(i[y_true_field]) + y_pred.append(i[y_pred_field]) + if y_pred_prob_field not in i: + incidents_with_missing_pred_prob += 1 + y_pred_prob.append(i.get(y_pred_prob_field, None)) + y_true, relevant_indices = get_data_with_mapped_label(y_true, labels_mapping) + y_pred = [y_pred[i] for i in relevant_indices] + y_pred_prob = [y_pred_prob[i] for i in relevant_indices] + incidents = [incidents[i] for i in relevant_indices] + y_pred_prob_is_given = incidents_with_missing_pred_prob == 0 + if y_pred_prob_is_given: + y_pred_dict = [{label: prob} for label, prob in zip(y_pred, y_pred_prob)] + else: + y_pred_dict = [{label: 1.0} for label in y_pred] + if y_pred_prob_is_given: + res_threshold = get_ml_model_evaluation(y_true, y_pred_dict, model_target_accuracy, target_recall=0, + detailed=True) + # show results for the threshold found - last result so it will appear first + output_model_evaluation(y_test=y_true, y_pred=y_pred_dict, res=res_threshold, + context_field='EvaluateMLModllAtProduction') + # show results if no threshold (threhsold=0) was used. Following code is reached only if a legal thresh was found: + if not y_pred_prob_is_given or not np.isclose(float(res_threshold[0]['Contents']['threshold']), 0): + res = get_ml_model_evaluation(y_true, y_pred_dict, target_accuracy=0, target_recall=0) + human_readable = '\n'.join(['## Results for No Threshold', + 'The following results were achieved by using no threshold (threshold equals 0)']) + output_model_evaluation(y_test=y_true, y_pred=y_pred_dict, res=res, + context_field='EvaluateMLModllAtProductionNoThresh', + human_readable_title=human_readable) + return_file_result_with_predictions_on_test_set(incidents, y_true, y_pred, y_pred_prob, additional_fields) else: - y_pred_dict = [{label: 1.0} for label in y_pred] - if y_pred_prob_is_given: - res_threshold = get_ml_model_evaluation(y_true, y_pred_dict, model_target_accuracy, target_recall=0, - detailed=True) - # show results for the threshold found - last result so it will appear first - output_model_evaluation(y_test=y_true, y_pred=y_pred_dict, res=res_threshold, - context_field='EvaluateMLModllAtProduction') - # show results if no threshold (threhsold=0) was used. Following code is reached only if a legal thresh was found: - if not y_pred_prob_is_given or not np.isclose(float(res_threshold[0]['Contents']['threshold']), 0): - res = get_ml_model_evaluation(y_true, y_pred_dict, target_accuracy=0, target_recall=0) - human_readable = '\n'.join(['## Results for No Threshold', - 'The following results were achieved by using no threshold (threshold equals 0)']) - output_model_evaluation(y_test=y_true, y_pred=y_pred_dict, res=res, - context_field='EvaluateMLModllAtProductionNoThresh', - human_readable_title=human_readable) - return_file_result_with_predictions_on_test_set(incidents, y_true, y_pred, y_pred_prob, additional_fields) + return_results('No incidents found.') model_target_accuracy = demisto.args().get('modelTargetAccuracy', 0) diff --git a/Packs/ML/Scripts/EvaluateMLModllAtProduction/EvaluateMLModllAtProduction.yml b/Packs/ML/Scripts/EvaluateMLModllAtProduction/EvaluateMLModllAtProduction.yml index 7e8eb335fe41..fccba729dba6 100644 --- a/Packs/ML/Scripts/EvaluateMLModllAtProduction/EvaluateMLModllAtProduction.yml +++ b/Packs/ML/Scripts/EvaluateMLModllAtProduction/EvaluateMLModllAtProduction.yml @@ -17,7 +17,7 @@ args: description: The model target accuracy, between 0 and 1. name: modelTargetAccuracy - defaultValue: '*' - description: 'A comma-separated list of email tags values and mapping. The script considers only the tags specified in this field. You can map label to another value by using this format: LABEL:MAPPED_LABEL. For example, for 4 values in email tag: malicious, credentials harvesting, inner communitcation, external legit email, unclassified. While training, we want to ignore "unclassified" tag, and refer to "credentials harvesting" as "malicious" too. Also, we want to merge "inner communitcation" and "external legit email" to one tag called "non-malicious". The input will be: malicious, credentials harvesting:malicious, inner communitcation:non-malicious, external legit email:non-malicious' + description: 'A comma-separated list of email tags values and mapping. The script considers only the tags specified in this field. You can map label to another value by using this format: LABEL:MAPPED_LABEL. For example, for 4 values in email tag: malicious, credentials harvesting, inner communitcation, external legit email, unclassified. While training, we want to ignore "unclassified" tag, and refer to "credentials harvesting" as "malicious" too. Also, we want to merge "inner communitcation" and "external legit email" to one tag called "non-malicious". The input will be: malicious, credentials harvesting:malicious, inner communitcation:non-malicious, external legit email:non-malicious.' name: phishingLabels - description: A comma-separated list of incident field names to include in the results file. name: additionalFields @@ -42,7 +42,7 @@ outputs: script: '-' subtype: python3 type: python -dockerimage: demisto/ml:1.0.0.45981 +dockerimage: demisto/pandas:1.0.0.102566 runas: DBotWeakRole fromversion: 5.0.0 tags: diff --git a/Packs/ML/Scripts/EvaluateMLModllAtProduction/README.md b/Packs/ML/Scripts/EvaluateMLModllAtProduction/README.md new file mode 100644 index 000000000000..799b77172067 --- /dev/null +++ b/Packs/ML/Scripts/EvaluateMLModllAtProduction/README.md @@ -0,0 +1,52 @@ +Evaluates an ML model in production. + +## Script Data + +--- + +| **Name** | **Description** | +| --- | --- | +| Script Type | python3 | +| Tags | ml | +| Cortex XSOAR Version | 5.0.0 | + +## Dependencies + +--- +This script uses the following commands and scripts. + +* GetIncidentsByQuery +* GetMLModelEvaluation + +## Used In + +--- +This script is used in the following playbooks and scripts. + +* EvaluateMLModllAtProduction-Test + +## Inputs + +--- + +| **Argument Name** | **Description** | +| --- | --- | +| incidentTypes | A common-separated list of incident types by which to filter. | +| incidentsQuery | The incident query to fetch the training data for the model. | +| emailTagKey | The field name with the email tag. Supports a comma-separated list, the first non-empty value will be taken. | +| emailPredictionKey | The field name with the model prediction. | +| emailPredictionProbabilityKey | The field name with the model prediction probability. | +| modelTargetAccuracy | The model target accuracy, between 0 and 1. | +| phishingLabels | A comma-separated list of email tags values and mapping. The script considers only the tags specified in this field. You can map label to another value by using this format: LABEL:MAPPED_LABEL. For example, for 4 values in email tag: malicious, credentials harvesting, inner communitcation, external legit email, unclassified. While training, we want to ignore "unclassified" tag, and refer to "credentials harvesting" as "malicious" too. Also, we want to merge "inner communitcation" and "external legit email" to one tag called "non-malicious". The input will be: malicious, credentials harvesting:malicious, inner communitcation:non-malicious, external legit email:non-malicious | +| additionalFields | A comma-separated list of incident field names to include in the results file. | + +## Outputs + +--- + +| **Path** | **Description** | **Type** | +| --- | --- | --- | +| EvaluateMLModllAtProduction.EvaluationScores | The model evaluation scores \(precision, coverage, etc.\) for the found threshold. | Unknown | +| EvaluateMLModllAtProduction.ConfusionMatrix | The model evaluation confusion matrix for the found threshold. | Unknown | +| EvaluateMLModllAtProductionNoThresh.EvaluationScores | The model evaluation scores \(precision, coverage, etc.\) for threshold = 0. | Unknown | +| EvaluateMLModllAtProductionNoThresh.ConfusionMatrix | The model evaluation confusion matrix for threshold = 0. | Unknown | diff --git a/Packs/ML/TestPlaybooks/playbook-Create_Phishing_Classifier_V2_ML_Test.yml b/Packs/ML/TestPlaybooks/playbook-Create_Phishing_Classifier_V2_ML_Test.yml index 83250a63c15d..af7ce16950d5 100644 --- a/Packs/ML/TestPlaybooks/playbook-Create_Phishing_Classifier_V2_ML_Test.yml +++ b/Packs/ML/TestPlaybooks/playbook-Create_Phishing_Classifier_V2_ML_Test.yml @@ -1,28 +1,28 @@ -elasticcommonfields: {} id: Create Phishing Classifier V2 ML Test version: -1 name: Create Phishing Classifier V2 ML Test -description: Test CreatePhishingClassifierML playbook +description: Test CreatePhishingClassifierML playbook. starttaskid: "0" tasks: "0": id: "0" - taskid: 4aeda861-fb7f-490a-89ce-397ea2c1fbca + taskid: fbadefab-5b4a-4360-853c-81893d0cb492 type: start task: - id: 4aeda861-fb7f-490a-89ce-397ea2c1fbca + id: fbadefab-5b4a-4360-853c-81893d0cb492 version: -1 name: "" iscommand: false brand: "" + description: '' nexttasks: '#none#': - - "30" + - "6" separatecontext: false view: |- { "position": { - "x": 695, + "x": 50, "y": 50 } } @@ -31,12 +31,15 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "2": id: "2" - taskid: 8bbf2fce-f2f1-49fc-8230-fe5b64b5a3c2 + taskid: 0a894a8b-7b17-4ab4-8f79-643b3191165d type: regular task: - id: 8bbf2fce-f2f1-49fc-8230-fe5b64b5a3c2 + id: 0a894a8b-7b17-4ab4-8f79-643b3191165d version: -1 name: Create incidents scriptName: TestCreateIncidentsForPhishingClassifier @@ -55,8 +58,8 @@ tasks: view: |- { "position": { - "x": 695, - "y": 1010 + "x": 50, + "y": 370 } } note: false @@ -64,16 +67,18 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "3": id: "3" - taskid: 29dd84d6-efb8-4487-8003-141b98934662 + taskid: baf8693e-4995-47f9-805f-3cbfe79f8ebc type: regular task: - id: 29dd84d6-efb8-4487-8003-141b98934662 + id: baf8693e-4995-47f9-805f-3cbfe79f8ebc version: -1 name: Predict Tag1 - description: Predict text label using a pre-trained machine learning phishing - model, and get the most important words used in the classification decision. + description: Predict text label using a pre-trained machine learning phishing model, and get the most important words used in the classification decision. scriptName: DBotPredictPhishingWords type: regular iscommand: false @@ -83,9 +88,7 @@ tasks: - "15" scriptarguments: emailSubject: - simple: closed church squeamish squeamish moaning closed closed closed church - squeamish squeamish moaning closed closed closed church squeamish squeamish - moaning closed closed + simple: closed church squeamish squeamish moaning closed closed closed church squeamish squeamish moaning closed closed closed church squeamish squeamish moaning closed closed labelProbabilityThreshold: simple: "0" minTextLength: @@ -98,8 +101,8 @@ tasks: view: |- { "position": { - "x": 695, - "y": 1535 + "x": 50, + "y": 895 } } note: false @@ -107,23 +110,27 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "5": id: "5" - taskid: f7a44f7e-0b35-4447-8424-b1d49db3235b + taskid: 5a71b8d6-cf64-494d-8889-46f70cc67c13 type: title task: - id: f7a44f7e-0b35-4447-8424-b1d49db3235b + id: 5a71b8d6-cf64-494d-8889-46f70cc67c13 version: -1 name: Done type: title iscommand: false brand: "" + description: '' separatecontext: false view: |- { "position": { - "x": 695, - "y": 2935 + "x": 50, + "y": 2295 } } note: false @@ -131,12 +138,15 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "6": id: "6" - taskid: 179f940e-7313-4fa5-8f5e-ba522c765669 + taskid: 74a08b82-f3e3-40c5-8143-fa5c135e2ce9 type: regular task: - id: 179f940e-7313-4fa5-8f5e-ba522c765669 + id: 74a08b82-f3e3-40c5-8143-fa5c135e2ce9 version: -1 name: Clear context scriptName: DeleteContext @@ -153,8 +163,8 @@ tasks: view: |- { "position": { - "x": 695, - "y": 835 + "x": 50, + "y": 195 } } note: false @@ -162,12 +172,15 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "8": id: "8" - taskid: 2d2c5687-3642-4fec-8a38-e0752ea4d398 + taskid: 88ba54ff-84fd-4c91-8ae8-4f88a4a5cafd type: regular task: - id: 2d2c5687-3642-4fec-8a38-e0752ea4d398 + id: 88ba54ff-84fd-4c91-8ae8-4f88a4a5cafd version: -1 name: clear context scriptName: DeleteContext @@ -184,8 +197,8 @@ tasks: view: |- { "position": { - "x": 695, - "y": 1885 + "x": 50, + "y": 1245 } } note: false @@ -193,16 +206,18 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "13": id: "13" - taskid: 5d168b90-65c6-4f98-8759-3a9fcddf28b9 + taskid: 67c5f1a7-4b58-4447-8cee-286f22b9139c type: playbook task: - id: 5d168b90-65c6-4f98-8759-3a9fcddf28b9 + id: 67c5f1a7-4b58-4447-8cee-286f22b9139c version: -1 name: DBot Create Phishing Classifier V2 - description: Create a phishing classifier using machine learning technique, - based on email content + description: Create a phishing classifier using machine learning technique, based on email content playbookName: DBot Create Phishing Classifier V2 type: playbook iscommand: false @@ -244,8 +259,8 @@ tasks: view: |- { "position": { - "x": 695, - "y": 1185 + "x": 50, + "y": 545 } } note: false @@ -253,12 +268,15 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "14": id: "14" - taskid: c786f22a-ce44-4021-84f6-1c74a3157049 + taskid: d0388493-758a-496d-8e20-0f4994316318 type: condition task: - id: c786f22a-ce44-4021-84f6-1c74a3157049 + id: d0388493-758a-496d-8e20-0f4994316318 version: -1 name: Model evaluation exist type: condition @@ -279,8 +297,8 @@ tasks: view: |- { "position": { - "x": 695, - "y": 1360 + "x": 50, + "y": 720 } } note: false @@ -288,12 +306,15 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "15": id: "15" - taskid: c607874c-eac1-404d-8c31-620e541c5b7c + taskid: 87fcc375-89be-4f44-8123-9e995379a389 type: condition task: - id: c607874c-eac1-404d-8c31-620e541c5b7c + id: 87fcc375-89be-4f44-8123-9e995379a389 version: -1 name: 'Check the prediction label: Tag1' type: condition @@ -317,8 +338,8 @@ tasks: view: |- { "position": { - "x": 695, - "y": 1710 + "x": 50, + "y": 1070 } } note: false @@ -326,12 +347,15 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "16": id: "16" - taskid: e1de53ff-a890-4c38-863a-3171f8705bc8 + taskid: a4973892-625f-4fcc-8a96-a270795d2751 type: condition task: - id: e1de53ff-a890-4c38-863a-3171f8705bc8 + id: a4973892-625f-4fcc-8a96-a270795d2751 version: -1 name: 'Check the prediction label: Tag2' type: condition @@ -355,8 +379,8 @@ tasks: view: |- { "position": { - "x": 695, - "y": 2235 + "x": 50, + "y": 1595 } } note: false @@ -364,16 +388,18 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "17": id: "17" - taskid: b783dbbf-bcbf-47be-8a5d-22dcc526df0e + taskid: bc73dcb7-4720-4504-85e0-590b5fe8fe02 type: regular task: - id: b783dbbf-bcbf-47be-8a5d-22dcc526df0e + id: bc73dcb7-4720-4504-85e0-590b5fe8fe02 version: -1 name: Predict Tag2 - description: Predict text label using a pre-trained machine learning phishing - model, and get the most important words used in the classification decision. + description: Predict text label using a pre-trained machine learning phishing model, and get the most important words used in the classification decision. scriptName: DBotPredictPhishingWords type: regular iscommand: false @@ -383,9 +409,7 @@ tasks: - "16" scriptarguments: emailSubject: - simple: ntidy boy substance faulty waves type boat argument ntidy boy substance - faulty waves type boat argument ntidy boy substance faulty waves type boat - argument + simple: ntidy boy substance faulty waves type boat argument ntidy boy substance faulty waves type boat argument ntidy boy substance faulty waves type boat argument labelProbabilityThreshold: simple: "0" minTextLength: @@ -398,8 +422,8 @@ tasks: view: |- { "position": { - "x": 695, - "y": 2060 + "x": 50, + "y": 1420 } } note: false @@ -407,16 +431,18 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "18": id: "18" - taskid: cb137c7c-1ba3-4fea-8356-f4ecd3bf6193 + taskid: 3573406e-2b81-4d40-8661-2680791e46f8 type: regular task: - id: cb137c7c-1ba3-4fea-8356-f4ecd3bf6193 + id: 3573406e-2b81-4d40-8661-2680791e46f8 version: -1 name: Predict Tag3 - description: Predict text label using a pre-trained machine learning phishing - model, and get the most important words used in the classification decision. + description: Predict text label using a pre-trained machine learning phishing model, and get the most important words used in the classification decision. scriptName: DBotPredictPhishingWords type: regular iscommand: false @@ -426,9 +452,7 @@ tasks: - "20" scriptarguments: emailSubject: - simple: suspend trucks aboriginal thread succeed gray last fall fall suspend - trucks aboriginal thread succeed gray last fall fall suspend trucks aboriginal - thread succeed gray last fall fall + simple: suspend trucks aboriginal thread succeed gray last fall fall suspend trucks aboriginal thread succeed gray last fall fall suspend trucks aboriginal thread succeed gray last fall fall labelProbabilityThreshold: simple: "0" minTextLength: @@ -441,8 +465,8 @@ tasks: view: |- { "position": { - "x": 695, - "y": 2585 + "x": 50, + "y": 1945 } } note: false @@ -450,12 +474,15 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "19": id: "19" - taskid: ed783755-907d-4097-8974-96034ab3b233 + taskid: d462dd42-6b56-49bf-8428-ae96e1f1be16 type: regular task: - id: ed783755-907d-4097-8974-96034ab3b233 + id: d462dd42-6b56-49bf-8428-ae96e1f1be16 version: -1 name: clear context scriptName: DeleteContext @@ -472,8 +499,8 @@ tasks: view: |- { "position": { - "x": 695, - "y": 2410 + "x": 50, + "y": 1770 } } note: false @@ -481,12 +508,15 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "20": id: "20" - taskid: 0e035d46-6829-41af-830c-6dfc7353dde1 + taskid: 5cf7405e-c4c2-40a0-8e63-1913319a70f3 type: condition task: - id: 0e035d46-6829-41af-830c-6dfc7353dde1 + id: 5cf7405e-c4c2-40a0-8e63-1913319a70f3 version: -1 name: 'Check the prediction label: Tag3' type: condition @@ -507,241 +537,11 @@ tasks: right: value: simple: Tag3 - view: |- - { - "position": { - "x": 695, - "y": 2760 - } - } - note: false - timertriggers: [] - ignoreworker: false - skipunavailable: false - quietmode: 0 - - "24": - id: "24" - taskid: 69c9af85-903f-4d2a-8540-fd48adb0c89b - type: regular - task: - id: 69c9af85-903f-4d2a-8540-fd48adb0c89b - version: -1 - name: Load prediction docker - description: Predict text label using a pre-trained machine learning phishing - model, and get the most important words used in the classification decision. - scriptName: DBotPredictPhishingWords - type: regular - iscommand: false - brand: "" - nexttasks: - '#none#': - - "28" - scriptarguments: - modelName: - simple: dummy - continueonerror: true - separatecontext: false view: |- { "position": { "x": 50, - "y": 340 - } - } - note: false - timertriggers: [] - ignoreworker: false - skipunavailable: false - quietmode: 0 - - "25": - id: "25" - taskid: ebf1c9ed-92a7-4633-8b37-42fb8570269f - type: regular - task: - id: ebf1c9ed-92a7-4633-8b37-42fb8570269f - version: -1 - name: Load evaluation docker - description: Finds a threshold for ML model, and performs an evaluation based - on it - scriptName: GetMLModelEvaluation - type: regular - iscommand: false - brand: "" - nexttasks: - '#none#': - - "28" - scriptarguments: - yPred: - simple: dummy - yTrue: - simple: dummy - continueonerror: true - separatecontext: false - view: |- - { - "position": { - "x": 480, - "y": 340 - } - } - note: false - timertriggers: [] - ignoreworker: false - skipunavailable: false - quietmode: 0 - "26": - id: "26" - taskid: bb61e790-fd72-49e8-842b-98933451305c - type: regular - task: - id: bb61e790-fd72-49e8-842b-98933451305c - version: -1 - name: Load training docker - description: Train a machine learning text classifier. - scriptName: DBotTrainTextClassifierV2 - type: regular - iscommand: false - brand: "" - nexttasks: - '#none#': - - "28" - scriptarguments: - input: - simple: dummy_input - tagField: - simple: dummy - continueonerror: true - separatecontext: false - view: |- - { - "position": { - "x": 910, - "y": 340 - } - } - note: false - timertriggers: [] - ignoreworker: false - skipunavailable: false - quietmode: 0 - - "27": - id: "27" - taskid: 8e33ab3c-0c36-494a-8b2d-02a838b437a8 - type: regular - task: - id: 8e33ab3c-0c36-494a-8b2d-02a838b437a8 - version: -1 - name: Load Preprocessing Docker - description: Pre-process text data for the machine learning text classifier. - scriptName: DBotPreProcessTextData - type: regular - iscommand: false - brand: "" - nexttasks: - '#none#': - - "28" - scriptarguments: - input: - simple: dummy input - continueonerror: true - separatecontext: false - view: |- - { - "position": { - "x": 1340, - "y": 340 - } - } - note: false - timertriggers: [] - ignoreworker: false - skipunavailable: false - quietmode: 0 - "28": - id: "28" - taskid: 285056a4-c36d-4fe3-836a-0635bbcb2902 - type: regular - task: - id: 285056a4-c36d-4fe3-836a-0635bbcb2902 - version: -1 - name: Wait for docker download - description: Sleep for X seconds - scriptName: Sleep - type: regular - iscommand: false - brand: "" - nexttasks: - '#none#': - - "29" - scriptarguments: - seconds: - simple: "10" - separatecontext: false - view: |- - { - "position": { - "x": 695, - "y": 515 - } - } - note: false - timertriggers: [] - ignoreworker: false - skipunavailable: false - quietmode: 0 - "29": - id: "29" - taskid: 36d855bc-d9a5-47f7-8f3e-5b72ab8fe194 - type: title - task: - id: 36d855bc-d9a5-47f7-8f3e-5b72ab8fe194 - version: -1 - name: Begin tests - type: title - iscommand: false - brand: "" - nexttasks: - '#none#': - - "6" - separatecontext: false - view: |- - { - "position": { - "x": 695, - "y": 690 - } - } - note: false - timertriggers: [] - ignoreworker: false - skipunavailable: false - quietmode: 0 - "30": - id: "30" - taskid: 26c99254-1dd9-4faa-8c80-0762360a7221 - type: title - task: - id: 26c99254-1dd9-4faa-8c80-0762360a7221 - version: -1 - name: Load all dockers - type: title - iscommand: false - brand: "" - nexttasks: - '#none#': - - "27" - - "26" - - "25" - - "24" - separatecontext: false - view: |- - { - "position": { - "x": 695, - "y": 195 + "y": 2120 } } note: false @@ -749,13 +549,17 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false view: |- { "linkLabelsPosition": {}, "paper": { "dimensions": { - "height": 2950, - "width": 1670, + "height": 2310, + "width": 380, "x": 50, "y": 50 } @@ -763,4 +567,4 @@ view: |- } inputs: [] outputs: [] -fromversion: 6.1.0 \ No newline at end of file +fromversion: 6.1.0 diff --git a/Packs/ML/TestPlaybooks/playbook-DBotPredictOutOfTheBoxV2-test.yml b/Packs/ML/TestPlaybooks/playbook-DBotPredictOutOfTheBoxV2-test.yml index db43403fbdb5..1e133dce27be 100644 --- a/Packs/ML/TestPlaybooks/playbook-DBotPredictOutOfTheBoxV2-test.yml +++ b/Packs/ML/TestPlaybooks/playbook-DBotPredictOutOfTheBoxV2-test.yml @@ -5,14 +5,15 @@ starttaskid: "0" tasks: "0": id: "0" - taskid: b25fa8d2-98fa-4dc6-845c-99809370cfd4 + taskid: 861b09ed-933f-4295-8ef1-1d804f3bd783 type: start task: - id: b25fa8d2-98fa-4dc6-845c-99809370cfd4 + id: 861b09ed-933f-4295-8ef1-1d804f3bd783 version: -1 name: "" iscommand: false brand: "" + description: '' nexttasks: '#none#': - "1" @@ -20,7 +21,7 @@ tasks: view: |- { "position": { - "x": 265, + "x": 50, "y": 50 } } @@ -29,12 +30,15 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "1": id: "1" - taskid: 8d59f33e-507c-4223-8480-c8bf26b7dac6 + taskid: 82ddcae4-60d6-4ce6-8279-0c2cec7b435e type: regular task: - id: 8d59f33e-507c-4223-8480-c8bf26b7dac6 + id: 82ddcae4-60d6-4ce6-8279-0c2cec7b435e version: -1 name: Clear Context description: Delete field from context @@ -44,20 +48,15 @@ tasks: brand: "" nexttasks: '#none#': - - "5" - - "7" + - "2" scriptarguments: all: simple: "yes" - index: {} - key: {} - keysToKeep: {} - subplaybook: {} separatecontext: false view: |- { "position": { - "x": 265, + "x": 50, "y": 195 } } @@ -66,12 +65,15 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "2": id: "2" - taskid: 242f4b80-ec3c-4bee-80dd-d835ff633640 + taskid: 7bca2804-4463-46c3-884a-457baf90d247 type: regular task: - id: 242f4b80-ec3c-4bee-80dd-d835ff633640 + id: 7bca2804-4463-46c3-884a-457baf90d247 version: -1 name: Predict Using Out Of The Box Model scriptName: DBotPredictOutOfTheBoxV2 @@ -86,25 +88,14 @@ tasks: simple: "0.5" emailBody: simple: 'Re: PO# OP848784204' - emailBodyHTML: {} emailSubject: - simple: 'Dear office, Kindly find attached our new order (Po# OP848784204) - and the attached letter for the bank payment. Please sign, stamp and resend. Kindly - ship our order by using the service DHL EXPRESS WORLDWIDE. Our DHL account - number is: 950389383 Thanks Best Regards, Cristina Cadano Marketing - Officer - Procurement Officer Marketing@trustm.tv Tel. +974 4431 3336 Fax - +974 4435 3336 P.O. Box 10536 Doha,' - labelProbabilityThreshold: {} - minTextLength: {} - returnError: {} - topWordsLimit: {} - wordThreshold: {} + simple: 'Dear office, Kindly find attached our new order (Po# OP848784204) and the attached letter for the bank payment. Please sign, stamp and resend. Kindly ship our order by using the service DHL EXPRESS WORLDWIDE. Our DHL account number is: 950389383 Thanks Best Regards, Cristina Cadano Marketing Officer - Procurement Officer Marketing@trustm.tv Tel. +974 4431 3336 Fax +974 4435 3336 P.O. Box 10536 Doha,' separatecontext: false view: |- { "position": { - "x": 265, - "y": 720 + "x": 50, + "y": 370 } } note: false @@ -112,12 +103,15 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "3": id: "3" - taskid: 12333aa3-0590-49b2-866e-8c2b7b7b899a + taskid: f0db71aa-e449-4a5a-8e4f-f45b9725eb3c type: condition task: - id: 12333aa3-0590-49b2-866e-8c2b7b7b899a + id: f0db71aa-e449-4a5a-8e4f-f45b9725eb3c version: -1 name: Check Prediction type: condition @@ -125,7 +119,7 @@ tasks: brand: "" nexttasks: "YES": - - "4" + - "7" separatecontext: false conditions: - label: "YES" @@ -138,19 +132,27 @@ tasks: right: value: simple: Malicious - - - operator: greaterThan + - - operator: isEqualNumber left: value: - simple: DBotPredictPhishingWords.Probability + complex: + root: DBotPredictPhishingWords + accessor: Probability + transformers: + - operator: precision + args: + by: + value: + simple: "2" iscontext: true right: value: - simple: "0.5" + simple: "0.68" view: |- { "position": { - "x": 265, - "y": 895 + "x": 50, + "y": 545 } } note: false @@ -158,23 +160,27 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "4": id: "4" - taskid: a2a470aa-9a87-4fa2-804d-de19d4d0b285 + taskid: 5183b8ff-ca1f-48a9-8bb7-b14c5681fa39 type: title task: - id: a2a470aa-9a87-4fa2-804d-de19d4d0b285 + id: 5183b8ff-ca1f-48a9-8bb7-b14c5681fa39 version: -1 name: Done type: title iscommand: false brand: "" + description: '' separatecontext: false view: |- { "position": { - "x": 265, - "y": 1070 + "x": 50, + "y": 1770 } } note: false @@ -182,40 +188,38 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "5": id: "5" - taskid: 45bfa561-25d8-4438-8780-de8166abd545 + taskid: 77722f64-31d6-4da4-87b2-9b876ab1eb3f type: regular task: - id: 45bfa561-25d8-4438-8780-de8166abd545 + id: 77722f64-31d6-4da4-87b2-9b876ab1eb3f version: -1 - name: Load Automation's Docker - description: Predict phishing incidents using the out-of-the-box pretrained - model. + name: Predict Using Out Of The Box Model - Not Malicious + description: Predict phishing incidents using the out-of-the-box pre-trained model. scriptName: DBotPredictOutOfTheBoxV2 type: regular iscommand: false brand: "" nexttasks: '#none#': - - "6" + - "14" scriptarguments: - emailBody: - simple: test - emailBodyHTML: {} - emailSubject: {} - labelProbabilityThreshold: {} - minTextLength: {} - returnError: {} - topWordsLimit: {} - wordThreshold: {} - continueonerror: true + emailBodyHTML: + simple: "Hi testbox@demistodev.onmicrosoft.com,
We've received your email and are investigating.
Do not touch the email until further notice.

Cordially,
Your friendly neighborhood security team\"\"" + emailSubject: + simple: 'Re: Phishing Investigation - Message from Cortex XSOAR Security Operations Server' + confidenceThreshold: + simple: "0.5" separatecontext: false view: |- { "position": { "x": 50, - "y": 370 + "y": 895 } } note: false @@ -223,31 +227,34 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 - "6": - id: "6" - taskid: 784d63be-adc2-4c91-8c80-6cb5ba991e1e + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false + "7": + id: "7" + taskid: 6140b6eb-2472-4dc0-81b9-b0c222947802 type: regular task: - id: 784d63be-adc2-4c91-8c80-6cb5ba991e1e + id: 6140b6eb-2472-4dc0-81b9-b0c222947802 version: -1 - name: Wait for automation - description: Sleep for X seconds - scriptName: Sleep + name: Clear Context + description: Delete field from context + scriptName: DeleteContext type: regular iscommand: false brand: "" nexttasks: '#none#': - - "2" + - "5" scriptarguments: - seconds: - simple: "60" + all: + simple: "yes" separatecontext: false view: |- { "position": { "x": 50, - "y": 545 + "y": 720 } } note: false @@ -255,40 +262,34 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 - "7": - id: "7" - taskid: 309a9ea9-16a6-4529-83cb-ac798529290b + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false + "8": + id: "8" + taskid: 306b2d8c-734a-4cb2-87c1-2909d1fb3a7f type: regular task: - id: 309a9ea9-16a6-4529-83cb-ac798529290b + id: 306b2d8c-734a-4cb2-87c1-2909d1fb3a7f version: -1 - name: Load Automation's Docker - description: Predict phishing incidents using the out-of-the-box pretrained - model. - scriptName: DBotPredictOutOfTheBoxV2 + name: Clear Context + description: Delete field from context + scriptName: DeleteContext type: regular iscommand: false brand: "" nexttasks: '#none#': - - "8" + - "9" scriptarguments: - emailBody: - simple: test - emailBodyHTML: {} - emailSubject: {} - labelProbabilityThreshold: {} - minTextLength: {} - returnError: {} - topWordsLimit: {} - wordThreshold: {} - continueonerror: true + all: + simple: "yes" separatecontext: false view: |- { "position": { - "x": 480, - "y": 370 + "x": 50, + "y": 1245 } } note: false @@ -296,31 +297,152 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 - "8": - id: "8" - taskid: ef6ce480-7598-4dfe-85fe-c74104790bdb + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false + "9": + id: "9" + taskid: d36fbfea-a8d8-472f-8614-a56c73f2950c type: regular task: - id: ef6ce480-7598-4dfe-85fe-c74104790bdb + id: d36fbfea-a8d8-472f-8614-a56c73f2950c version: -1 - name: Wait for automation - description: Sleep for X seconds - scriptName: Sleep + name: Predict Using Out Of The Box Model + scriptName: DBotPredictOutOfTheBoxV2 type: regular iscommand: false brand: "" nexttasks: '#none#': - - "2" + - "15" scriptarguments: - seconds: - simple: "60" + confidenceThreshold: + simple: "0.5" + emailBodyHTML: + simple: "\n\n\n\nUntitled Document\n\n\n\n\n\t\n\t\t\n\t\t\t\n\t\t\n\t\n
\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t
\n\t\t\t\t\t\t\t\t

Delivery Notification

\n\t\t\t\t\t\t\t\t
\n\t\t\t\t\t\t\t
\n\t\t\t\t\t\t\t\t

Order: SGH-9226-99950127
\n\t\t\t\t\t\t\t\t

\n\t\t\t\t\t\t\t\t

Dear Customer,

\n\t\t\t\t\t\t\t\t

Your parcel has arrived at the post office. Our courier attempted but was unable to deliver the parcel to you.

\n\t\t\t\t\t\t\t\t

To receive your parcel, please go to the nearest office and show this receipt.

\n\t\t\t\t\t\t\t\t
\n\t\t\t\t\t\t\t\t
\n\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t
\n\t\t\t\t\t\t\t\t\t\t\t\t

GET AND PRINT RECEIPT\n

\n\t\t\t\t\t\t\t\t\t\t\t
\n\t\t\t\t\t\t\t

\n\t\t\t\t\t\t\t\t

Thank you

\n\t\t\t\t\t\t\t
\n\t\t\t
\n\"\"\n\"\"\n\n
\n\n" + emailSubject: + simple: Package Undeliverable separatecontext: false + continueonerrortype: "" view: |- { "position": { - "x": 480, - "y": 545 + "x": 50, + "y": 1420 + } + } + note: false + timertriggers: [] + ignoreworker: false + skipunavailable: false + quietmode: 0 + isoversize: false + isautoswitchedtoquietmode: false + "14": + id: "14" + taskid: f871509d-b1c4-41ce-828c-0659b033b9b1 + type: condition + task: + id: f871509d-b1c4-41ce-828c-0659b033b9b1 + version: -1 + name: Check Prediction + type: condition + iscommand: false + brand: "" + nexttasks: + "YES": + - "8" + separatecontext: false + conditions: + - label: "YES" + condition: + - - operator: isEqualString + left: + value: + simple: DBotPredictPhishingWords.Label + iscontext: true + right: + value: + simple: Non-Malicious + - - operator: isEqualNumber + left: + value: + complex: + root: DBotPredictPhishingWords + accessor: Probability + transformers: + - operator: precision + args: + by: + value: + simple: "2" + iscontext: true + right: + value: + simple: "0.60" + continueonerrortype: "" + view: |- + { + "position": { + "x": 50, + "y": 1070 + } + } + note: false + timertriggers: [] + ignoreworker: false + skipunavailable: false + quietmode: 0 + isoversize: false + isautoswitchedtoquietmode: false + "15": + id: "15" + taskid: e1ba8eba-a7e3-43e8-8a8a-031a51f1b99f + type: condition + task: + id: e1ba8eba-a7e3-43e8-8a8a-031a51f1b99f + version: -1 + name: Check Prediction + type: condition + iscommand: false + brand: "" + nexttasks: + "YES": + - "4" + separatecontext: false + conditions: + - label: "YES" + condition: + - - operator: isEqualString + left: + value: + simple: DBotPredictPhishingWords.Label + iscontext: true + right: + value: + simple: Malicious + - - operator: isEqualNumber + left: + value: + complex: + root: DBotPredictPhishingWords + accessor: Probability + transformers: + - operator: precision + args: + by: + value: + simple: "2" + iscontext: true + right: + value: + simple: "0.95" + continueonerrortype: "" + view: |- + { + "position": { + "x": 50, + "y": 1595 } } note: false @@ -328,13 +450,15 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + isoversize: false + isautoswitchedtoquietmode: false view: |- { "linkLabelsPosition": {}, "paper": { "dimensions": { - "height": 1085, - "width": 810, + "height": 1785, + "width": 380, "x": 50, "y": 50 } @@ -342,4 +466,5 @@ view: |- } inputs: [] outputs: [] -fromversion: 5.5.0 \ No newline at end of file +fromversion: 5.5.0 +description: '' diff --git a/Packs/ML/TestPlaybooks/playbook-DBot_Create_Phishing_Classifier_V2_From_File-_Test.yml b/Packs/ML/TestPlaybooks/playbook-DBot_Create_Phishing_Classifier_V2_From_File-_Test.yml index c955a614d03f..9e6b3e43c172 100644 --- a/Packs/ML/TestPlaybooks/playbook-DBot_Create_Phishing_Classifier_V2_From_File-_Test.yml +++ b/Packs/ML/TestPlaybooks/playbook-DBot_Create_Phishing_Classifier_V2_From_File-_Test.yml @@ -5,22 +5,23 @@ starttaskid: "0" tasks: "0": id: "0" - taskid: e43d8441-51e2-4201-87ba-a15423de05ec + taskid: 8210b4a0-dd4b-4b24-8893-ab9f3e5d21ad type: start task: - id: e43d8441-51e2-4201-87ba-a15423de05ec + id: 8210b4a0-dd4b-4b24-8893-ab9f3e5d21ad version: -1 name: "" iscommand: false brand: "" + description: '' nexttasks: '#none#': - - "17" + - "6" separatecontext: false view: |- { "position": { - "x": 695, + "x": 50, "y": 50 } } @@ -29,12 +30,15 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "1": id: "1" - taskid: 6512432c-e980-41da-8a85-e318d7bbffbe + taskid: 622b1351-3b36-41a8-8617-e8c123c7ce02 type: regular task: - id: 6512432c-e980-41da-8a85-e318d7bbffbe + id: 622b1351-3b36-41a8-8617-e8c123c7ce02 version: -1 name: Create Incidents File script: TestCreateIncidentsFile @@ -51,8 +55,8 @@ tasks: view: |- { "position": { - "x": 695, - "y": 1010 + "x": 50, + "y": 370 } } note: false @@ -60,16 +64,18 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "3": id: "3" - taskid: ae7d8277-e9b2-4c71-8c5d-70979e7f4267 + taskid: 1aa209a9-3d42-4680-8ecf-10d050b9ba6e type: regular task: - id: ae7d8277-e9b2-4c71-8c5d-70979e7f4267 + id: 1aa209a9-3d42-4680-8ecf-10d050b9ba6e version: -1 name: Predict Sentence - description: Predict text label using a pre-trained machine learning phishing - model, and get the most important words used in the classification decision. + description: Predict text label using a pre-trained machine learning phishing model, and get the most important words used in the classification decision. scriptName: DBotPredictPhishingWords type: regular iscommand: false @@ -80,25 +86,18 @@ tasks: scriptarguments: emailBody: simple: this message is spam. this message is spam - emailBodyHTML: {} - emailSubject: {} - hashSeed: {} labelProbabilityThreshold: simple: "0" minTextLength: simple: "0" modelName: simple: ${DBotPhishingClassifier.ModelName} - modelStoreType: {} - returnError: {} - topWordsLimit: {} - wordThreshold: {} separatecontext: false view: |- { "position": { - "x": 695, - "y": 1360 + "x": 50, + "y": 720 } } note: false @@ -106,12 +105,15 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "4": id: "4" - taskid: e03072b6-e44e-40bd-8389-68c36bb5b435 + taskid: 067671f5-b98d-4e1e-8e6e-fa4c43ff7de0 type: condition task: - id: e03072b6-e44e-40bd-8389-68c36bb5b435 + id: 067671f5-b98d-4e1e-8e6e-fa4c43ff7de0 version: -1 name: Check Prediction type: condition @@ -143,8 +145,8 @@ tasks: view: |- { "position": { - "x": 695, - "y": 1535 + "x": 50, + "y": 895 } } note: false @@ -152,23 +154,27 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "5": id: "5" - taskid: 2f277472-af83-495c-89d3-44f1585258e2 + taskid: 7f39c142-9139-4e4f-880a-5ac03f42f5f9 type: title task: - id: 2f277472-af83-495c-89d3-44f1585258e2 + id: 7f39c142-9139-4e4f-880a-5ac03f42f5f9 version: -1 name: Done type: title iscommand: false brand: "" + description: '' separatecontext: false view: |- { "position": { - "x": 695, - "y": 1710 + "x": 50, + "y": 1070 } } note: false @@ -176,12 +182,15 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "6": id: "6" - taskid: 06ebace9-f13b-4e78-88d9-e799beb78b91 + taskid: 47ebe0a5-32d3-404e-851a-99aa212e76ef type: regular task: - id: 06ebace9-f13b-4e78-88d9-e799beb78b91 + id: 47ebe0a5-32d3-404e-851a-99aa212e76ef version: -1 name: Clean Context description: Delete field from context @@ -199,8 +208,8 @@ tasks: view: |- { "position": { - "x": 695, - "y": 835 + "x": 50, + "y": 195 } } note: false @@ -208,16 +217,18 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false "7": id: "7" - taskid: b7804202-3267-454e-8855-6105c41c15f7 + taskid: 76e221f8-85d5-4107-816b-b86cb93a5e7b type: playbook task: - id: b7804202-3267-454e-8855-6105c41c15f7 + id: 76e221f8-85d5-4107-816b-b86cb93a5e7b version: -1 name: DBot Create Phishing Classifier V2 From File - description: Create a phishing classifier using machine learning. The classifier - is based on incidents files extracted from email content. + description: Create a phishing classifier using machine learning. The classifier is based on incidents files extracted from email content. playbookName: DBot Create Phishing Classifier V2 From File type: playbook iscommand: false @@ -235,8 +246,7 @@ tasks: emailTextKey: simple: Email Body|Email Body HTML|details fileID: - simple: '${.=(val.File instanceof Array ? val.File[val.File.length-1].EntryID - : val.File.EntryID)}' + simple: '${.=(val.File instanceof Array ? val.File[val.File.length-1].EntryID : val.File.EntryID)}' incidentTypes: simple: Phishing inputFormat: @@ -261,238 +271,11 @@ tasks: exitCondition: "" wait: 1 max: 0 - view: |- - { - "position": { - "x": 695, - "y": 1185 - } - } - note: false - timertriggers: [] - ignoreworker: false - skipunavailable: false - quietmode: 0 - "11": - id: "11" - taskid: 7ef4beea-9151-425e-8bce-53eb0e993f50 - type: regular - task: - id: 7ef4beea-9151-425e-8bce-53eb0e993f50 - version: -1 - name: Load prediction docker - description: Predict text label using a pre-trained machine learning phishing - model, and get the most important words used in the classification decision. - scriptName: DBotPredictPhishingWords - type: regular - iscommand: false - brand: "" - nexttasks: - '#none#': - - "15" - scriptarguments: - modelName: - simple: dummy - continueonerror: true - separatecontext: false view: |- { "position": { "x": 50, - "y": 340 - } - } - note: false - timertriggers: [] - ignoreworker: false - skipunavailable: false - quietmode: 0 - "12": - id: "12" - taskid: cd9a8a22-a47d-4c3c-83df-592577755ece - type: regular - task: - id: cd9a8a22-a47d-4c3c-83df-592577755ece - version: -1 - name: Load evaluation docker - description: Finds a threshold for ML model, and performs an evaluation based - on it - scriptName: GetMLModelEvaluation - type: regular - iscommand: false - brand: "" - nexttasks: - '#none#': - - "15" - scriptarguments: - yPred: - simple: dummy - yTrue: - simple: dummy - continueonerror: true - separatecontext: false - view: |- - { - "position": { - "x": 480, - "y": 340 - } - } - note: false - timertriggers: [] - ignoreworker: false - skipunavailable: false - quietmode: 0 - "13": - id: "13" - taskid: 5f0baead-ae59-4311-8f28-50effdfd7c1b - type: regular - task: - id: 5f0baead-ae59-4311-8f28-50effdfd7c1b - version: -1 - name: Load training docker - description: Train a machine learning text classifier. - scriptName: DBotTrainTextClassifierV2 - type: regular - iscommand: false - brand: "" - nexttasks: - '#none#': - - "15" - scriptarguments: - input: - simple: dummy_input - tagField: - simple: dummy - continueonerror: true - separatecontext: false - view: |- - { - "position": { - "x": 910, - "y": 340 - } - } - note: false - timertriggers: [] - ignoreworker: false - skipunavailable: false - quietmode: 0 - "14": - id: "14" - taskid: ba870761-cad7-444e-85ca-d6ca9f505f36 - type: regular - task: - id: ba870761-cad7-444e-85ca-d6ca9f505f36 - version: -1 - name: Load Preprocessing Docker - description: Pre-process text data for the machine learning text classifier. - scriptName: DBotPreProcessTextData - type: regular - iscommand: false - brand: "" - nexttasks: - '#none#': - - "15" - scriptarguments: - input: - simple: dummy input - continueonerror: true - separatecontext: false - view: |- - { - "position": { - "x": 1340, - "y": 340 - } - } - note: false - timertriggers: [] - ignoreworker: false - skipunavailable: false - quietmode: 0 - "15": - id: "15" - taskid: 7974be6e-6e04-4da2-8e8c-6131f6d586b5 - type: regular - task: - id: 7974be6e-6e04-4da2-8e8c-6131f6d586b5 - version: -1 - name: Wait for docker download - description: Sleep for X seconds - scriptName: Sleep - type: regular - iscommand: false - brand: "" - nexttasks: - '#none#': - - "16" - scriptarguments: - seconds: - simple: "10" - separatecontext: false - view: |- - { - "position": { - "x": 695, - "y": 515 - } - } - note: false - timertriggers: [] - ignoreworker: false - skipunavailable: false - quietmode: 0 - "16": - id: "16" - taskid: 6d0b0b2e-851a-4f79-8371-56199707908e - type: title - task: - id: 6d0b0b2e-851a-4f79-8371-56199707908e - version: -1 - name: Begin tests - type: title - iscommand: false - brand: "" - nexttasks: - '#none#': - - "6" - separatecontext: false - view: |- - { - "position": { - "x": 695, - "y": 690 - } - } - note: false - timertriggers: [] - ignoreworker: false - skipunavailable: false - quietmode: 0 - "17": - id: "17" - taskid: 07f113b4-ad97-4426-8bb9-47f7cefe0187 - type: title - task: - id: 07f113b4-ad97-4426-8bb9-47f7cefe0187 - version: -1 - name: Load all dockers - type: title - iscommand: false - brand: "" - nexttasks: - '#none#': - - "14" - - "13" - - "12" - - "11" - separatecontext: false - view: |- - { - "position": { - "x": 695, - "y": 195 + "y": 545 } } note: false @@ -500,13 +283,16 @@ tasks: ignoreworker: false skipunavailable: false quietmode: 0 + continueonerrortype: "" + isoversize: false + isautoswitchedtoquietmode: false view: |- { "linkLabelsPosition": {}, "paper": { "dimensions": { - "height": 1725, - "width": 1670, + "height": 1085, + "width": 380, "x": 50, "y": 50 } @@ -514,4 +300,5 @@ view: |- } inputs: [] outputs: [] -fromversion: 6.1.0 \ No newline at end of file +fromversion: 6.1.0 +description: '' diff --git a/Packs/ML/TestPlaybooks/script-CompareEnvPredictionsToExpectedPredictions.yml b/Packs/ML/TestPlaybooks/script-CompareEnvPredictionsToExpectedPredictions.yml index d274790f274c..4a14ec137abc 100644 --- a/Packs/ML/TestPlaybooks/script-CompareEnvPredictionsToExpectedPredictions.yml +++ b/Packs/ML/TestPlaybooks/script-CompareEnvPredictionsToExpectedPredictions.yml @@ -19,7 +19,7 @@ script: |- message = '{}/{} correct predictions. '.format(len(df)- len(wrong_predictions_ids), len(df)) if len(wrong_predictions_ids) > 0: message += 'Wrong predictions:\n {}'.format('\n'.join([str(id_) for id_ in wrong_predictions_ids])) - return_outputs(message, {'CompareEnvPredictionsToExpectedPredictions .allPredictionsMatched': len(wrong_predictions_ids)== 0}) + return_outputs(message, {'CompareEnvPredictionsToExpectedPredictions.allPredictionsMatched': len(wrong_predictions_ids)== 0}) type: python tags: [] enabled: true @@ -27,11 +27,10 @@ args: - name: input required: true outputs: -- contextPath: CompareEnvPredictionsToExpectedPredictions .allPredictionsMatched +- contextPath: CompareEnvPredictionsToExpectedPredictions.allPredictionsMatched scripttarget: 0 subtype: python3 pswd: "" -runonce: false -dockerimage: demisto/ml:1.0.0.20606 +dockerimage: demisto/pandas:1.0.0.102566 runas: DBotWeakRole fromversion: 5.5.0 \ No newline at end of file diff --git a/Packs/ML/TestPlaybooks/script-CreateIncidentsForEvaluateMLModllAtProduction.yml b/Packs/ML/TestPlaybooks/script-CreateIncidentsForEvaluateMLModllAtProduction.yml index 63ba305fb488..8b6cd736e775 100644 --- a/Packs/ML/TestPlaybooks/script-CreateIncidentsForEvaluateMLModllAtProduction.yml +++ b/Packs/ML/TestPlaybooks/script-CreateIncidentsForEvaluateMLModllAtProduction.yml @@ -17,16 +17,16 @@ script: >+ incident1_template = { 'type': 'Simulation', 'name': 'Tag1', - 'dbotprediction': 'ham', - 'dbotpredictionprobability': 1.0 + 'classification': 'ham', + 'description': 1.0 } incident2_template = { 'type': 'Simulation', 'name': 'Tag2', - 'dbotprediction': 'spam', - 'dbotpredictionprobability': 1.0, + 'classification': 'spam', + 'description': 1.0, } @@ -54,8 +54,8 @@ script: >+ 'EmailBodyKey': 'details', 'EmailTagKey': 'name', 'IncidentsQuery': 'type:Simulation', - 'EmailPredictionKey': 'dbotprediction', - 'EmailPredictionProbabilityKey': 'dbotpredictionprobability' + 'EmailPredictionKey': 'classification', + 'EmailPredictionProbabilityKey': 'description' } }) diff --git a/Packs/ML/TestPlaybooks/script-CreateIncidentsOutOfTheBoxV2.yml b/Packs/ML/TestPlaybooks/script-CreateIncidentsOutOfTheBoxV2.yml index f1d52f2dba57..66ce67eeb158 100644 --- a/Packs/ML/TestPlaybooks/script-CreateIncidentsOutOfTheBoxV2.yml +++ b/Packs/ML/TestPlaybooks/script-CreateIncidentsOutOfTheBoxV2.yml @@ -5,7 +5,6 @@ vcShouldKeepItemLegacyProdMachine: false name: CreateIncidentsOutOfTheBoxV2 script: |2+ - import json incidents = [ diff --git a/Packs/ML/pack_metadata.json b/Packs/ML/pack_metadata.json index 4ac67ca6f356..379bd599d230 100644 --- a/Packs/ML/pack_metadata.json +++ b/Packs/ML/pack_metadata.json @@ -2,7 +2,7 @@ "name": "Machine Learning", "description": "Help to manage machine learning models in Cortex XSOAR", "support": "xsoar", - "currentVersion": "1.4.10", + "currentVersion": "1.4.11", "author": "Cortex XSOAR", "url": "https://www.paloaltonetworks.com/cortex", "email": "", diff --git a/Tests/conf.json b/Tests/conf.json index 40d448631885..9acc8c49e36c 100644 --- a/Tests/conf.json +++ b/Tests/conf.json @@ -3678,6 +3678,9 @@ { "playbookID": "GetIndicatorsByQuery - Test" }, + { + "playbookID": "DBotFindSimilarIncidents-test" + }, { "playbookID": "DBotCreatePhishingClassifierV2FromFile-Test", "timeout": 60000, @@ -5940,7 +5943,6 @@ "ThreatGrid_v2_Test": "No instance, developed by Qmasters", "Test-Detonate URL - ThreatGrid": "No instance, developed by Qmasters", "awake_security_test_pb": "No instance, CRTX-77572", - "Create Phishing Classifier V2 ML Test": "Updated docker image lacks data for the ml model. Once data issue is solved for ml module can un skip. ", "SumoLogic-Test": "401 unauthorized, CIAC-6334", "EWS_O365_test": "Issue CIAC-6753", "Microsoft Defender Advanced Threat Protection - Test dev": "Issue CIAC-7514",