From 24acc130574d603277c50d548da7e5dcbf49d0f7 Mon Sep 17 00:00:00 2001
From: Jacob Levy <129657918+jlevypaloalto@users.noreply.github.com>
Date: Thu, 11 Jul 2024 12:17:26 +0300
Subject: [PATCH] Update docker ml (#35081)
* updated docker
* added the rest
* devdemisto/ml:1.0.0.100486
* fix tpb
* return on no incidents
* remove runonce
* remove space
* fixed
* fix create incidents script
* new docker
* revert: fix create incidents script
* add outputs to DBotFindSimilarIncidents
* new tpb DBotFindSimilarIncidents-test
* new docker
* bump transformers
* Empty-Commit
* fix conf.json
* more fixes
* more fixes
* new docker
* RN
* new docker
* revert dockers
* more stuff
* redirect stderr
* docker
* format
* format
* RN
* more stuff
* build fixes
* build fixes
* fix unit-tests
* more docker changes
* more docker changes
* build fixes
* suppress logger
* build fixes
* build fixes
---
Packs/Base/ReleaseNotes/1_34_28.md | 24 +
.../DBotBuildPhishingClassifier.py | 33 +-
.../DBotBuildPhishingClassifier.yml | 9 +-
.../DBotBuildPhishingClassifier_test.py | 7 +-
.../DBotFindSimilarIncidents.yml | 24 +-
.../DBotFindSimilarIncidentsByIndicators.yml | 2 +-
.../DBotPredictPhishingWords.py | 67 +--
.../DBotPredictPhishingWords.yml | 3 +-
...st.py => DBotPredictPhishingWords_test.py} | 26 +-
.../DBotPreprocessTextData.py | 15 +-
.../DBotPreprocessTextData.yml | 3 +-
.../DBotTrainTextClassifierV2.py | 34 +-
.../DBotTrainTextClassifierV2.yml | 3 +-
.../DBotTrainTextClassifierV2_test.py | 4 +-
.../GetMLModelEvaluation.py | 69 +--
.../GetMLModelEvaluation.yml | 6 +-
.../GetMLModelEvaluation_test.py | 41 +-
.../Scripts/GetMLModelEvaluation/README.md | 7 +-
...playbook-DBotFindSimilarIncidents-test.yml | 340 ++++++++++++++
Packs/Base/pack_metadata.json | 2 +-
.../script-TestCreateIncidentsFile.yml | 4 +-
...stCreateIncidentsForPhishingClassifier.yml | 10 +-
Packs/ML/ReleaseNotes/1_4_11.md | 14 +
.../DBotPredictIncidentsBatch.py | 12 +-
.../DBotPredictIncidentsBatch.yml | 8 +-
.../DBotPredictIncidentsBatch_test.py | 0
.../DBotPredictIncidentsBatch/README.md | 51 +++
.../DBotPredictOutOfTheBoxV2.py | 3 +-
.../DBotPredictOutOfTheBoxV2.yml | 3 +-
.../EvaluateMLModllAtProduction.py | 77 ++--
.../EvaluateMLModllAtProduction.yml | 4 +-
.../EvaluateMLModllAtProduction/README.md | 52 +++
...-Create_Phishing_Classifier_V2_ML_Test.yml | 418 +++++-------------
...playbook-DBotPredictOutOfTheBoxV2-test.yml | 333 +++++++++-----
...Phishing_Classifier_V2_From_File-_Test.yml | 327 +++-----------
...areEnvPredictionsToExpectedPredictions.yml | 7 +-
...ncidentsForEvaluateMLModllAtProduction.yml | 12 +-
.../script-CreateIncidentsOutOfTheBoxV2.yml | 1 -
Packs/ML/pack_metadata.json | 2 +-
Tests/conf.json | 4 +-
40 files changed, 1144 insertions(+), 917 deletions(-)
create mode 100644 Packs/Base/ReleaseNotes/1_34_28.md
rename Packs/Base/Scripts/DBotPredictPhishingWords/{dbot_predict_phishing_words_test.py => DBotPredictPhishingWords_test.py} (94%)
create mode 100644 Packs/Base/TestPlaybooks/playbook-DBotFindSimilarIncidents-test.yml
create mode 100644 Packs/ML/ReleaseNotes/1_4_11.md
create mode 100644 Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch_test.py
create mode 100644 Packs/ML/Scripts/DBotPredictIncidentsBatch/README.md
create mode 100644 Packs/ML/Scripts/EvaluateMLModllAtProduction/README.md
diff --git a/Packs/Base/ReleaseNotes/1_34_28.md b/Packs/Base/ReleaseNotes/1_34_28.md
new file mode 100644
index 000000000000..b601d697befc
--- /dev/null
+++ b/Packs/Base/ReleaseNotes/1_34_28.md
@@ -0,0 +1,24 @@
+
+#### Scripts
+
+##### DBotTrainTextClassifierV2
+
+- Updated the Docker image to: *demisto/ml:1.0.0.101889*.
+##### DBotBuildPhishingClassifier
+
+- Changed the Docker image to: *demisto/python3:3.11.9.101916*.
+##### DBotPreProcessTextData
+
+- Updated the Docker image to: *demisto/ml:1.0.0.101889*.
+##### DBotPredictPhishingWords
+
+- Updated the Docker image to: *demisto/ml:1.0.0.101889*.
+##### DBotFindSimilarIncidents
+
+- Updated the Docker image to: *demisto/ml:1.0.0.101889*.
+##### GetMLModelEvaluation
+
+- Updated the Docker image to: *demisto/ml:1.0.0.101889*.
+##### DBotFindSimilarIncidentsByIndicators
+
+- Updated the Docker image to: *demisto/ml:1.0.0.101889*.
diff --git a/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier.py b/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier.py
index 463a366e73ba..15c00554f906 100644
--- a/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier.py
+++ b/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier.py
@@ -1,19 +1,12 @@
+from CommonServerPython import *
import base64
-import copy
import gc
-from CommonServerPython import *
-
-PREFIXES_TO_REMOVE = ['incident.']
ALL_LABELS = "*"
def preprocess_incidents_field(incidents_field):
- incidents_field = incidents_field.strip()
- for prefix in PREFIXES_TO_REMOVE:
- if incidents_field.startswith(prefix):
- incidents_field = incidents_field[len(prefix):]
- return incidents_field
+ return incidents_field.strip().removeprefix('incident.')
def get_phishing_map_labels(comma_values):
@@ -28,7 +21,7 @@ def get_phishing_map_labels(comma_values):
labels_dict[splited[0].strip()] = splited[1].strip()
else:
labels_dict[v] = v
- return {k: v for k, v in labels_dict.items()}
+ return dict(labels_dict.items())
def build_query_in_reepect_to_phishing_labels(args):
@@ -38,17 +31,17 @@ def build_query_in_reepect_to_phishing_labels(args):
return args
mapping_dict = get_phishing_map_labels(mapping)
tag_field = args['tagField']
- tags_union = ' '.join(['"{}"'.format(label) for label in mapping_dict])
- mapping_query = '{}:({})'.format(tag_field, tags_union)
+ tags_union = ' '.join([f'"{label}"' for label in mapping_dict])
+ mapping_query = f'{tag_field}:({tags_union})'
if 'query' not in args or args['query'].strip() == '':
args['query'] = mapping_query
else:
- args['query'] = '({}) and ({})'.format(query, mapping_query)
+ args['query'] = f'({query}) and ({mapping_query})'
return args
def get_incidents(d_args):
- get_incidents_by_query_args = copy.deepcopy(d_args)
+ get_incidents_by_query_args = d_args.copy()
get_incidents_by_query_args['NonEmptyFields'] = d_args['tagField']
fields_names_to_populate = ['tagField', 'emailsubject', 'emailbody', "emailbodyhtml"]
fields_to_populate = [get_incidents_by_query_args.get(x, None) for x in fields_names_to_populate]
@@ -63,15 +56,15 @@ def get_incidents(d_args):
def preprocess_incidents(incidents, d_args):
- text_pre_process_args = copy.deepcopy(d_args)
+ text_pre_process_args = d_args.copy()
text_pre_process_args['inputType'] = 'json_b64_string'
text_pre_process_args['input'] = base64.b64encode(incidents.encode('utf-8')).decode('ascii')
text_pre_process_args['preProcessType'] = 'nlp'
email_body_fields = [text_pre_process_args.get("emailbody"), text_pre_process_args.get("emailbodyhtml")]
email_body = "|".join([x for x in email_body_fields if x])
- text_pre_process_args['textFields'] = "%s,%s" % (text_pre_process_args['emailsubject'], email_body)
- text_pre_process_args['whitelistFields'] = "{0},{1}".format('dbot_processed_text',
- text_pre_process_args['tagField'])
+ text_pre_process_args['textFields'] = "{},{}".format(text_pre_process_args['emailsubject'], email_body)
+ text_pre_process_args['whitelistFields'] = "{},{}".format('dbot_processed_text',
+ text_pre_process_args['tagField'])
res = demisto.executeCommand("DBotPreProcessTextData", text_pre_process_args)
if is_error(res):
return_error(get_error(res))
@@ -81,7 +74,7 @@ def preprocess_incidents(incidents, d_args):
def train_model(processed_text_data, d_args):
- train_model_args = copy.deepcopy(d_args)
+ train_model_args = d_args.copy()
train_model_args['inputType'] = 'json_b64_string'
train_model_args['input'] = base64.b64encode(processed_text_data.encode('utf-8')).decode('ascii')
train_model_args['overrideExistingModel'] = 'true'
@@ -90,7 +83,7 @@ def train_model(processed_text_data, d_args):
def main():
- d_args = dict(demisto.args())
+ d_args = demisto.args()
for arg in ['tagField', 'emailbody', 'emailbodyhtml', 'emailsubject', 'timeField']:
d_args[arg] = preprocess_incidents_field(d_args.get(arg, ''))
diff --git a/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier.yml b/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier.yml
index 8f04d84a19c8..82289f4eb072 100644
--- a/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier.yml
+++ b/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier.yml
@@ -4,9 +4,9 @@ args:
- defaultValue: Phishing
description: A comma-separated list of incident types by which to filter.
name: incidentTypes
-- description: 'The start date by which to filter incidents. Date format will be the same as in the incidents query page (valid strings example: "3 days ago", ""2019-01-01T00:00:00 +0200")'
+- description: 'The start date by which to filter incidents. Date format will be the same as in the incidents query page (valid strings example: "3 days ago", ""2019-01-01T00:00:00 +0200").'
name: fromDate
-- description: 'The end date by which to filter incidents. Date format will be the same as in the incidents query page (valid strings example: "3 days ago", ""2019-01-01T00:00:00 +0200")'
+- description: 'The end date by which to filter incidents. Date format will be the same as in the incidents query page (valid strings example: "3 days ago", ""2019-01-01T00:00:00 +0200").'
name: toDate
- defaultValue: '3000'
description: The maximum number of incidents to fetch.
@@ -39,7 +39,7 @@ args:
- description: The model name to store in the system.
name: modelName
- defaultValue: '*'
- description: 'A comma-separated list of email tags values and mapping. The script considers only the tags specified in this field. You can map a label to another value by using this format: LABEL:MAPPED_LABEL. For example, for 4 values in email tag: malicious, credentials harvesting, inner communitcation, external legit email, unclassified. While training, we want to ignore "unclassified" tag, and refer to "credentials harvesting" as "malicious" too. Also, we want to merge "inner communitcation" and "external legit email" to one tag called "non-malicious". The input will be: malicious, credentials harvesting:malicious, inner communitcation:non-malicious, external legit email:non-malicious'
+ description: 'A comma-separated list of email tags values and mapping. The script considers only the tags specified in this field. You can map a label to another value by using this format: LABEL:MAPPED_LABEL. For example, for 4 values in email tag: malicious, credentials harvesting, inner communitcation, external legit email, unclassified. While training, we want to ignore "unclassified" tag, and refer to "credentials harvesting" as "malicious" too. Also, we want to merge "inner communitcation" and "external legit email" to one tag called "non-malicious". The input will be: malicious, credentials harvesting:malicious, inner communitcation:non-malicious, external legit email:non-malicious.'
name: phishingLabels
- defaultValue: emailsubject
description: Incident field name with the email subject.
@@ -83,8 +83,7 @@ tags:
- ml
timeout: 12µs
type: python
-dockerimage: demisto/ml:1.0.0.45981
-runonce: true
+dockerimage: demisto/python3:3.11.9.101916
tests:
- Create Phishing Classifier V2 ML Test
- DBotCreatePhishingClassifierV2FromFile-Test
diff --git a/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier_test.py b/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier_test.py
index 44a4660752c0..08e291edd12b 100644
--- a/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier_test.py
+++ b/Packs/Base/Scripts/DBotBuildPhishingClassifier/DBotBuildPhishingClassifier_test.py
@@ -13,7 +13,8 @@ def test_no_mapping_no_query():
def test_no_mapping_with_query():
args = {'phishingLabels': '*', 'query': QUERY}
args = build_query_in_reepect_to_phishing_labels(args)
- assert 'query' in args and args['query'] == QUERY
+ assert 'query' in args
+ assert args['query'] == QUERY
def test_mapping_no_query():
@@ -27,6 +28,6 @@ def test_mapping_with_query():
args = {'phishingLabels': MAPPING, 'tagField': 'closeReason', 'query': QUERY}
args = build_query_in_reepect_to_phishing_labels(args)
assert 'query' in args
- opt1 = args['query'] == '({}) and (closeReason:("spam" "legit"))'.format(QUERY)
- opt2 = args['query'] == '({}) and (closeReason:("legit" "spam"))'.format(QUERY)
+ opt1 = args['query'] == f'({QUERY}) and (closeReason:("spam" "legit"))'
+ opt2 = args['query'] == f'({QUERY}) and (closeReason:("legit" "spam"))'
assert opt1 or opt2
diff --git a/Packs/Base/Scripts/DBotFindSimilarIncidents/DBotFindSimilarIncidents.yml b/Packs/Base/Scripts/DBotFindSimilarIncidents/DBotFindSimilarIncidents.yml
index 60fef5c54f27..05d67d6f9fde 100644
--- a/Packs/Base/Scripts/DBotFindSimilarIncidents/DBotFindSimilarIncidents.yml
+++ b/Packs/Base/Scripts/DBotFindSimilarIncidents/DBotFindSimilarIncidents.yml
@@ -86,9 +86,27 @@ script: '-'
subtype: python3
timeout: '0'
type: python
-dockerimage: demisto/ml:1.0.0.94241
+dockerimage: demisto/ml:1.0.0.101889
runas: DBotWeakRole
-runonce: true
tests:
-- No tests (auto formatted)
+- DBotFindSimilarIncidents-test
fromversion: 5.0.0
+outputs:
+- contextPath: DBotFindSimilarIncidents.isSimilarIncidentFound
+ description: Indicates whether similar incidents have been found.
+ type: boolean
+- contextPath: DBotFindSimilarIncidents.similarIncident.created
+ description: The creation date of the linked incident.
+ type: date
+- contextPath: DBotFindSimilarIncidents.similarIncident.id
+ description: The ID of the linked incident.
+ type: string
+- contextPath: DBotFindSimilarIncidents.similarIncident.name
+ description: The name of the linked incident.
+ type: string
+- contextPath: DBotFindSimilarIncidents.similarIncident.similarity incident
+ description: The similarity of the linked incident represented as a float in the range 0-1.
+ type: number
+- contextPath: DBotFindSimilarIncidents.similarIncident.details
+ description: The details of the linked incident.
+ type: string
diff --git a/Packs/Base/Scripts/DBotFindSimilarIncidentsByIndicators/DBotFindSimilarIncidentsByIndicators.yml b/Packs/Base/Scripts/DBotFindSimilarIncidentsByIndicators/DBotFindSimilarIncidentsByIndicators.yml
index a13bce442cf4..cd7351ea300d 100644
--- a/Packs/Base/Scripts/DBotFindSimilarIncidentsByIndicators/DBotFindSimilarIncidentsByIndicators.yml
+++ b/Packs/Base/Scripts/DBotFindSimilarIncidentsByIndicators/DBotFindSimilarIncidentsByIndicators.yml
@@ -42,7 +42,7 @@ script: '-'
subtype: python3
timeout: '0'
type: python
-dockerimage: demisto/ml:1.0.0.88591
+dockerimage: demisto/ml:1.0.0.101889
runas: DBotWeakRole
tests:
- DBotFindSimilarIncidentsByIndicators - Test
diff --git a/Packs/Base/Scripts/DBotPredictPhishingWords/DBotPredictPhishingWords.py b/Packs/Base/Scripts/DBotPredictPhishingWords/DBotPredictPhishingWords.py
index c8c668af7b07..a86b1dea6cf6 100644
--- a/Packs/Base/Scripts/DBotPredictPhishingWords/DBotPredictPhishingWords.py
+++ b/Packs/Base/Scripts/DBotPredictPhishingWords/DBotPredictPhishingWords.py
@@ -1,9 +1,12 @@
# pylint: disable=no-member
-
from CommonServerPython import *
from string import punctuation
import demisto_ml
import numpy as np
+import logging
+
+# Suppress logging for a specific library
+logging.getLogger('transformers').setLevel(logging.ERROR)
FASTTEXT_MODEL_TYPE = 'FASTTEXT_MODEL_TYPE'
TORCH_TYPE = 'torch'
@@ -14,27 +17,30 @@ def OrderedSet(iterable):
return list(dict.fromkeys(iterable))
-def get_model_data(model_name, store_type, is_return_error):
- res_model_list = demisto.executeCommand("getList", {"listName": model_name})[0]
- res_model = demisto.executeCommand("getMLModel", {"modelName": model_name})[0]
- if is_error(res_model_list) and not is_error(res_model):
- model_data = res_model['Contents']['modelData']
- try:
- model_type = res_model['Contents']['model']["type"]["type"]
- return model_data, model_type
- except Exception:
- return model_data, UNKNOWN_MODEL_TYPE
- elif not is_error(res_model_list) and is_error(res_model):
- return res_model_list["Contents"], UNKNOWN_MODEL_TYPE
- elif not is_error(res_model_list) and not is_error(res_model):
- if store_type == "list":
- return res_model_list["Contents"], UNKNOWN_MODEL_TYPE
- elif store_type == "mlModel":
- model_data = res_model['Contents']['modelData']
- model_type = res_model['Contents']['model']["type"]["type"]
- return model_data, model_type
- else:
- handle_error("error reading model %s from Demisto" % model_name, is_return_error)
+def get_model_data(model_name: str, store_type: str, is_return_error: bool) -> tuple[dict, str]:
+
+ def load_from_models(model_name: str) -> None | tuple[dict, str]:
+ res_model = demisto.executeCommand("getMLModel", {"modelName": model_name})
+ if is_error(res_model):
+ demisto.debug(get_error(res_model))
+ return None
+ model_data = res_model[0]['Contents']['modelData']
+ model_type = dict_safe_get(res_model, [0, 'Contents', 'model', "type", "type"], UNKNOWN_MODEL_TYPE)
+ return model_data, model_type
+
+ def load_from_list(model_name):
+ res_model = demisto.executeCommand("getList", {"listName": model_name})
+ if is_error(res_model):
+ demisto.debug(get_error(res_model))
+ return None
+ return res_model[0]["Contents"], UNKNOWN_MODEL_TYPE
+
+ if store_type == "mlModel":
+ res = load_from_models(model_name) or load_from_list(model_name)
+ elif store_type == "list":
+ res = load_from_list(model_name) or load_from_models(model_name)
+
+ return res or handle_error(f"error reading model {model_name} from Demisto", is_return_error) # type: ignore
def handle_error(message, is_return_error):
@@ -88,6 +94,7 @@ def preprocess_text(text, model_type, is_return_error):
else:
words_to_token_maps = tokenized_text_result['originalWordsToTokens']
return input_text, words_to_token_maps
+ return None
def predict_phishing_words(model_name, model_store_type, email_subject, email_body, min_text_length, label_threshold,
@@ -97,7 +104,9 @@ def predict_phishing_words(model_name, model_store_type, email_subject, email_bo
model_type = FASTTEXT_MODEL_TYPE
if model_type not in [FASTTEXT_MODEL_TYPE, TORCH_TYPE, UNKNOWN_MODEL_TYPE]:
model_type = UNKNOWN_MODEL_TYPE
+
phishing_model = demisto_ml.phishing_model_loads_handler(model_data, model_type)
+
is_model_applied_on_a_single_incidents = isinstance(email_subject, str) and isinstance(email_body, str)
if is_model_applied_on_a_single_incidents:
return predict_single_incident_full_output(email_subject, email_body, is_return_error, label_threshold,
@@ -110,7 +119,7 @@ def predict_phishing_words(model_name, model_store_type, email_subject, email_bo
def predict_batch_incidents_light_output(email_subject, email_body, phishing_model, model_type, min_text_length):
- text_list = [{'text': "%s \n%s" % (subject, body)} for subject, body in zip(email_subject, email_body)]
+ text_list = [{'text': f"{subject} \n{body}"} for subject, body in zip(email_subject, email_body)]
preprocessed_text_list = preprocess_text(text_list, model_type, is_return_error=False)
batch_predictions = []
for input_text in preprocessed_text_list:
@@ -132,14 +141,14 @@ def predict_batch_incidents_light_output(email_subject, email_body, phishing_mod
'Type': entryTypes['note'],
'Contents': batch_predictions,
'ContentsFormat': formats['json'],
- 'HumanReadable': 'Applied predictions on {} incidents.'.format(len(batch_predictions)),
+ 'HumanReadable': f'Applied predictions on {len(batch_predictions)} incidents.',
}
def predict_single_incident_full_output(email_subject, email_body, is_return_error, label_threshold, min_text_length,
model_type, phishing_model, set_incidents_fields, top_word_limit,
word_threshold):
- text = "%s \n%s" % (email_subject, email_body)
+ text = f"{email_subject} \n{email_body}"
input_text, words_to_token_maps = preprocess_text(text, model_type, is_return_error)
filtered_text, filtered_text_number_of_words = phishing_model.filter_model_words(input_text)
if filtered_text_number_of_words == 0:
@@ -163,14 +172,14 @@ def predict_single_incident_full_output(email_subject, email_body, is_return_err
negative_tokens = OrderedSet(explain_result['NegativeWords'])
positive_words = find_words_contain_tokens(positive_tokens, words_to_token_maps)
negative_words = find_words_contain_tokens(negative_tokens, words_to_token_maps)
- positive_words = list(OrderedSet([s.strip(punctuation) for s in positive_words]))
- negative_words = list(OrderedSet([s.strip(punctuation) for s in negative_words]))
+ positive_words = OrderedSet([s.strip(punctuation) for s in positive_words])
+ negative_words = OrderedSet([s.strip(punctuation) for s in negative_words])
positive_words = [w for w in positive_words if w.isalnum()]
negative_words = [w for w in negative_words if w.isalnum()]
highlighted_text_markdown = text.strip()
for word in positive_words:
for cased_word in [word.lower(), word.title(), word.upper()]:
- highlighted_text_markdown = re.sub(r'(? 0:
lang_counter = Counter(inc[LANGUAGE_KEY] for inc in data).most_common()
description += "Dropped %d sample(s) that were detected as being in foreign languages. " % dropped_count
- description += 'Found language counts: {}'.format(', '.join(['{}:{}'.format(lang, count) for lang, count
+ description += 'Found language counts: {}'.format(', '.join([f'{lang}:{count}' for lang, count
in lang_counter]))
description += "\n"
return filtered_data, description
@@ -441,7 +442,7 @@ def main():
# clean text
if pre_process_type not in PRE_PROCESS_TYPES:
- return_error('Pre-process type {} is not supported'.format(pre_process_type))
+ return_error(f'Pre-process type {pre_process_type} is not supported')
# clean html and new lines
data = clean_text_of_incidents_list(data, DBOT_TEXT_FIELD, remove_html_tags)
diff --git a/Packs/Base/Scripts/DBotPreprocessTextData/DBotPreprocessTextData.yml b/Packs/Base/Scripts/DBotPreprocessTextData/DBotPreprocessTextData.yml
index 853640116f5a..06a9b3809ec2 100644
--- a/Packs/Base/Scripts/DBotPreprocessTextData/DBotPreprocessTextData.yml
+++ b/Packs/Base/Scripts/DBotPreprocessTextData/DBotPreprocessTextData.yml
@@ -104,8 +104,7 @@ tags:
- ml
timeout: 120µs
type: python
-dockerimage: demisto/ml:1.0.0.30541
-runonce: true
+dockerimage: demisto/ml:1.0.0.101889
tests:
- Create Phishing Classifier V2 ML Test
fromversion: 5.0.0
diff --git a/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2.py b/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2.py
index a557dd0921f5..0408487579bb 100644
--- a/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2.py
+++ b/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2.py
@@ -1,12 +1,10 @@
+from CommonServerPython import *
# pylint: disable=no-member
import gc
-
+import demisto_ml
import pandas as pd
-from typing import List, Dict
from collections import defaultdict, Counter
from sklearn.model_selection import StratifiedKFold
-from CommonServerPython import *
-import demisto_ml
ALL_LABELS = "*"
GENERAL_SCORES = {
@@ -63,10 +61,10 @@ def read_file(input_data, input_type):
else:
res = demisto.getFilePath(input_data)
if not res:
- return_error("Entry {} not found".format(input_data))
+ return_error(f"Entry {input_data} not found")
file_path = res['path']
if input_type.startswith('json'):
- with open(file_path, 'r') as f:
+ with open(file_path) as f:
file_content = f.read()
if input_type.startswith('csv'):
return pd.read_csv(file_path).fillna('').to_dict(orient='records')
@@ -76,6 +74,7 @@ def read_file(input_data, input_type):
return pd.read_pickle(file_path, compression=None)
else:
return_error("Unsupported file type %s" % input_type)
+ return None
def get_file_entry_id(file_name):
@@ -156,7 +155,7 @@ def find_keywords(data, tag_field, text_field, min_score):
human_readable = "# Keywords per category\n"
for category, scores in keywords.items():
sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
- table_items = [{"Word": word, "Score": '{:.2f}'.format(score)} for
+ table_items = [{"Word": word, "Score": f'{score:.2f}'} for
word, score in sorted_scores if score >= min_score]
human_readable += tableToMarkdown(category, table_items, ["Word", "Score"])
demisto.results({
@@ -238,13 +237,13 @@ def validate_data_and_labels(data, exist_labels_counter, labels_mapping, missing
labels_counter = Counter([x[DBOT_TAG_FIELD] for x in data])
labels_below_thresh = [label for label, count in labels_counter.items() if count < MIN_INCIDENTS_THRESHOLD]
if len(labels_below_thresh) > 0:
- err = ['Minimum number of incidents per label required for training is {}.'.format(MIN_INCIDENTS_THRESHOLD)]
- err += ['The following labels have less than {} incidents: '.format(MIN_INCIDENTS_THRESHOLD)]
+ err = [f'Minimum number of incidents per label required for training is {MIN_INCIDENTS_THRESHOLD}.']
+ err += [f'The following labels have less than {MIN_INCIDENTS_THRESHOLD} incidents: ']
for x in labels_below_thresh:
- err += ['- {}: {}'.format(x, str(labels_counter[x]))]
+ err += [f'- {x}: {str(labels_counter[x])}']
err += ['Make sure that enough incidents exist in the environment per each of these labels.']
missing_labels = ', '.join(missing_labels_counter.keys())
- err += ['The following labels were not mapped to any label in the labels mapping: {}.'.format(missing_labels)]
+ err += [f'The following labels were not mapped to any label in the labels mapping: {missing_labels}.']
if labels_mapping != ALL_LABELS:
err += ['The given mapped labels are: {}.'.format(', '.join(labels_mapping.keys()))]
return_error('\n'.join(err))
@@ -269,7 +268,7 @@ def validate_data_and_labels(data, exist_labels_counter, labels_mapping, missing
for label, count in exist_labels_counter.items():
mapped_label = labels_mapping[label] if isinstance(labels_mapping, dict) else label
if mapped_label != label:
- label = "%s -> %s" % (label, mapped_label)
+ label = f"{label} -> {mapped_label}"
exist_labels_counter_mapped[label] = count
human_readable = tableToMarkdown("Found labels", exist_labels_counter_mapped)
entry = {
@@ -280,23 +279,23 @@ def validate_data_and_labels(data, exist_labels_counter, labels_mapping, missing
'HumanReadableFormat': formats['markdown'],
}
demisto.results(entry)
- if len(set([x[DBOT_TAG_FIELD] for x in data])) == 1:
+ if len({x[DBOT_TAG_FIELD] for x in data}) == 1:
single_label = [x[DBOT_TAG_FIELD] for x in data][0]
if labels_mapping == ALL_LABELS:
- err = ['All received incidents have the same label: {}.'.format(single_label)]
+ err = [f'All received incidents have the same label: {single_label}.']
else:
- err = ['All received incidents mapped to the same label: {}.'.format(single_label)]
+ err = [f'All received incidents mapped to the same label: {single_label}.']
err += ['At least 2 different labels are required to train a classifier.']
if labels_mapping == ALL_LABELS:
err += ['Please make sure that incidents of at least 2 labels exist in the environment.']
else:
err += ['The following labels were not mapped to any label in the labels mapping:']
- err += [', '.join([x for x in missing_labels_counter])]
+ err += [', '.join(list(missing_labels_counter))]
not_found_mapped_label = [x for x in labels_mapping if x not in exist_labels_counter
or exist_labels_counter[x] == 0]
if len(not_found_mapped_label) > 0:
miss = ', '.join(not_found_mapped_label)
- err += ['Notice that the following mapped labels were not found among all incidents: {}.'.format(miss)]
+ err += [f'Notice that the following mapped labels were not found among all incidents: {miss}.']
return_error('\n'.join(err))
@@ -354,6 +353,7 @@ def validate_labels_and_decide_algorithm(y, algorithm):
error += ['The following labels/verdicts need to be mapped to one of those values: ']
error += [', '.join(illegal_labels_for_fine_tune) + '.']
return_error('\n'.join(error))
+ return None
elif algorithm == AUTO_TRAINING_ALGO:
return FASTTEXT_TRAINING_ALGO
else:
diff --git a/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2.yml b/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2.yml
index f57578960181..4bb5b1d03e35 100644
--- a/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2.yml
+++ b/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2.yml
@@ -121,8 +121,7 @@ tags:
- ml
timeout: 12µs
type: python
-dockerimage: demisto/ml:1.0.0.93129
-runonce: true
+dockerimage: demisto/ml:1.0.0.101889
tests:
- Create Phishing Classifier V2 ML Test
fromversion: 5.0.0
diff --git a/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2_test.py b/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2_test.py
index 9793f0c45994..fd7072e30f04 100644
--- a/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2_test.py
+++ b/Packs/Base/Scripts/DBotTrainTextClassifierV2/DBotTrainTextClassifierV2_test.py
@@ -22,10 +22,10 @@ def test_read_file(mocker):
mocker.patch.object(demisto, 'getFilePath', return_value={'path': './TestData/input_json_file_test'})
obj = read_file('231342@343', 'json')
assert len(obj) >= 1
- with open('./TestData/input_json_file_test', 'r') as f:
+ with open('./TestData/input_json_file_test') as f:
obj = read_file(f.read(), 'json_string')
assert len(obj) >= 1
- with open('./TestData/input_json_file_test', 'r') as f:
+ with open('./TestData/input_json_file_test') as f:
b64_input = base64.b64encode(f.read().encode('utf-8')) # base64.b64encode(f.read())
obj = read_file(b64_input, 'json_b64_string')
assert len(obj) >= 1
diff --git a/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation.py b/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation.py
index 31713f61858e..814b35692286 100644
--- a/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation.py
+++ b/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation.py
@@ -2,7 +2,6 @@
import pandas as pd
from sklearn.metrics import precision_score, recall_score, precision_recall_curve
from tabulate import tabulate
-from typing import Dict
from CommonServerPython import *
# pylint: disable=no-member
@@ -20,7 +19,7 @@
def bold_hr(s):
- return '**{}:**'.format(s)
+ return f'**{s}:**'
def binarize(arr, threshold):
@@ -61,8 +60,8 @@ def generate_metrics_df(y_true, y_true_per_class, y_pred, y_pred_per_class, thre
], ignore_index=True)
df = df[['Class', 'Precision', 'TP', 'FP', 'Coverage', 'Total']]
explained_metrics = ['Precision', 'TP (true positive)', 'FP (false positive)', 'Coverage', 'Total']
- explanation = ['{} {}'.format(bold_hr(metric), METRICS[metric]) for metric in explained_metrics]
- df.set_index('Class', inplace=True)
+ explanation = [f'{bold_hr(metric)} {METRICS[metric]}' for metric in explained_metrics]
+ df = df.set_index('Class')
return df, explanation
@@ -153,7 +152,7 @@ def output_report(y_true, y_true_per_class, y_pred, y_pred_per_class, found_thre
if detailed_output:
human_readable += human_readable_threshold + ['\n']
else:
- human_readable += ['## Results for confidence threshold = {:.2f}'.format(found_threshold)] + ['\n']
+ human_readable += [f'## Results for confidence threshold = {found_threshold:.2f}'] + ['\n']
human_readable += class_metrics_human_readable + ['\n']
human_readable += class_metrics_explanation_human_readable
human_readable += csr_matrix_readable
@@ -193,9 +192,8 @@ def merge_entries(entry, per_class_entry):
return entry
-def find_threshold(y_true_str, y_pred_str, customer_target_precision, target_recall, detailed_output=True):
- y_true = convert_str_to_json(y_true_str, 'yTrue')
- y_pred_all_classes = convert_str_to_json(y_pred_str, 'yPred')
+def find_threshold(y_true, y_pred_all_classes, customer_target_precision, target_recall, detailed_output=True):
+
labels = sorted(set(y_true + list(y_pred_all_classes[0].keys())))
n_instances = len(y_true)
y_true_per_class = {class_: np.zeros(n_instances) for class_ in labels}
@@ -248,7 +246,7 @@ def find_best_threshold_for_target_precision(class_to_arrs, customer_target_prec
precision_per_class[class_] = precision
break
if len(threshold_per_class) == len(labels):
- threshold_candidates = sorted(list(threshold_per_class.values()))
+ threshold_candidates = sorted(threshold_per_class.values())
for threshold in threshold_candidates:
legal_threshold_for_all_classes = True
threshold_precision = sys.maxsize
@@ -276,7 +274,7 @@ def calculate_per_class_report_entry(class_to_arrs, labels, y_pred_per_class, y_
'The following tables present evlauation of the model per class at different confidence thresholds:']
class_to_thresholds = {}
for class_ in labels:
- class_to_thresholds[class_] = set([0.001]) # using no threshold
+ class_to_thresholds[class_] = {0.001} # using no threshold
for target_precision in np.arange(0.95, 0.5, -0.05):
# indexing is done by purpose - the ith precision corresponds with threshold i-1. Last precision is 1
for i, precision in enumerate(class_to_arrs[class_]['precisions'][:-1]):
@@ -296,15 +294,15 @@ def calculate_per_class_report_entry(class_to_arrs, labels, y_pred_per_class, y_
row['Threshold'] = threshold
class_threshold_df = pd.concat([class_threshold_df, pd.DataFrame([row])], ignore_index=True)
class_threshold_df = reformat_df_fractions_to_percentage(class_threshold_df)
- class_threshold_df['Threshold'] = class_threshold_df['Threshold'].apply(lambda p: '{:.2f}'.format(p))
+ class_threshold_df['Threshold'] = class_threshold_df['Threshold'].apply(lambda p: f'{p:.2f}')
class_threshold_df = class_threshold_df[['Threshold', 'Precision', 'TP', 'FP', 'Coverage', 'Total']]
- class_threshold_df.sort_values(by='Coverage', ascending=False, inplace=True)
- class_threshold_df.drop_duplicates(subset='Threshold', inplace=True, keep='first')
- class_threshold_df.drop_duplicates(subset='Precision', inplace=True, keep='first')
- class_threshold_df.set_index('Threshold', inplace=True)
+ class_threshold_df = class_threshold_df.sort_values(by='Coverage', ascending=False)
+ class_threshold_df = class_threshold_df.drop_duplicates(subset='Threshold', keep='first')
+ class_threshold_df = class_threshold_df.drop_duplicates(subset='Precision', keep='first')
+ class_threshold_df = class_threshold_df.set_index('Threshold')
per_class_context[class_] = class_threshold_df.to_json()
tabulated_class_df = tabulate(class_threshold_df, tablefmt="pipe", headers="keys")
- per_class_hr += ['### {}'.format(class_), tabulated_class_df]
+ per_class_hr += [f'### {class_}', tabulated_class_df]
per_class_entry = {
'Type': entryTypes['note'],
'ContentsFormat': formats['json'],
@@ -321,31 +319,40 @@ def convert_str_to_json(str_json, var_name):
y_true = json.loads(str_json)
return y_true
except Exception as e:
- return_error('Exception while reading {} :{}'.format(var_name, e))
+ return_error(f'Exception while reading {var_name} :{e}')
def main():
- y_pred_all_classes = demisto.args()["yPred"]
- y_true = demisto.args()["yTrue"]
- target_precision = calculate_and_validate_float_parameter("targetPrecision")
- target_recall = calculate_and_validate_float_parameter("targetRecall")
- detailed_output = 'detailedOutput' in demisto.args() and demisto.args()['detailedOutput'] == 'true'
- entries = find_threshold(y_true_str=y_true,
- y_pred_str=y_pred_all_classes,
- customer_target_precision=target_precision,
- target_recall=target_recall,
- detailed_output=detailed_output)
-
- demisto.results(entries)
+ try:
+ y_pred_all_classes = demisto.args()["yPred"]
+ y_true = demisto.args()["yTrue"]
+ target_precision = calculate_and_validate_float_parameter("targetPrecision")
+ target_recall = calculate_and_validate_float_parameter("targetRecall")
+ detailed_output = 'detailedOutput' in demisto.args() and demisto.args()['detailedOutput'] == 'true'
+ y_true = convert_str_to_json(y_true, 'yTrue')
+ y_pred_all_classes = convert_str_to_json(y_pred_all_classes, 'yPred')
+
+ if not (y_true and y_pred_all_classes):
+ raise DemistoException('Either "yPred" or "yTrue" are empty.')
+
+ entries = find_threshold(y_true=y_true,
+ y_pred_all_classes=y_pred_all_classes,
+ customer_target_precision=target_precision,
+ target_recall=target_recall,
+ detailed_output=detailed_output)
+
+ demisto.results(entries)
+ except Exception as e:
+ return_error(f'Error in GetMLModelEvaluation:\n{e}')
def calculate_and_validate_float_parameter(var_name):
try:
res = float(demisto.args()[var_name]) if var_name in demisto.args() else 0
except Exception:
- return_error('{} must be a float between 0-1 or left empty'.format(var_name))
+ return_error(f'{var_name} must be a float between 0-1 or left empty')
if res < 0 or res > 1:
- return_error('{} must be a float between 0-1 or left empty'.format(var_name))
+ return_error(f'{var_name} must be a float between 0-1 or left empty')
return res
diff --git a/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation.yml b/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation.yml
index 4d59a291421a..651f08b8e424 100644
--- a/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation.yml
+++ b/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation.yml
@@ -15,7 +15,7 @@ args:
isArray: true
name: targetRecall
- defaultValue: 'true'
- description: if set to 'true', the output will include a full exaplanation of the confidence threshold meaning.
+ description: if set to 'true', the output will include a full explanation of the confidence threshold meaning.
isArray: true
name: detailedOutput
predefined:
@@ -32,7 +32,7 @@ outputs:
description: The found thresholds which meets the conditions of precision and recall.
type: String
- contextPath: GetMLModelEvaluation.ConfusionMatrixAtThreshold
- description: The model evaluation confusion matrix for mails above the threhsold.
+ description: The model evaluation confusion matrix for mails above the threshold.
type: Unknown
- contextPath: GetMLModelEvaluation.Metrics
description: Metrics per each class (includes precision, true positive, coverage, etc.)
@@ -43,7 +43,7 @@ tags:
- ml
timeout: 60µs
type: python
-dockerimage: demisto/ml:1.0.0.88591
+dockerimage: demisto/ml:1.0.0.101889
tests:
- Create Phishing Classifier V2 ML Test
fromversion: 5.0.0
diff --git a/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation_test.py b/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation_test.py
index e69aa533abe2..cc6835394e28 100644
--- a/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation_test.py
+++ b/Packs/Base/Scripts/GetMLModelEvaluation/GetMLModelEvaluation_test.py
@@ -1,4 +1,3 @@
-import json
from GetMLModelEvaluation import find_threshold
@@ -48,8 +47,8 @@ class 2 precision per threshold:
def test_threshold_found_0(mocker):
global y_true, y_pred
- entry = find_threshold(y_pred_str=json.dumps(y_pred),
- y_true_str=json.dumps(y_true),
+ entry = find_threshold(y_pred_all_classes=y_pred,
+ y_true=y_true,
customer_target_precision=0,
target_recall=0)
assert abs(entry['Contents']['threshold'] - 0.7) < 10 ** -2
@@ -57,8 +56,8 @@ def test_threshold_found_0(mocker):
def test_threshold_found_1(mocker):
global y_true, y_pred
- entry = find_threshold(y_pred_str=json.dumps(y_pred),
- y_true_str=json.dumps(y_true),
+ entry = find_threshold(y_pred_all_classes=y_pred,
+ y_true=y_true,
customer_target_precision=0.63,
target_recall=0)
assert abs(entry['Contents']['threshold'] - 0.7) < 10 ** -2
@@ -66,8 +65,8 @@ def test_threshold_found_1(mocker):
def test_threshold_found_2(mocker):
global y_true, y_pred
- entry = find_threshold(y_pred_str=json.dumps(y_pred),
- y_true_str=json.dumps(y_true),
+ entry = find_threshold(y_pred_all_classes=y_pred,
+ y_true=y_true,
customer_target_precision=0.7,
target_recall=0)
assert abs(entry['Contents']['threshold'] - 0.8) < 10 ** -2
@@ -75,16 +74,16 @@ def test_threshold_found_2(mocker):
def test_threshold_found_3(mocker):
global y_true, y_pred
- entry = find_threshold(y_pred_str=json.dumps(y_pred),
- y_true_str=json.dumps(y_true),
+ entry = find_threshold(y_pred_all_classes=y_pred,
+ y_true=y_true,
customer_target_precision=0.875,
target_recall=0)
assert abs(entry['Contents']['threshold'] - 0.8) < 10 ** -2
def test_no_existing_threshold(mocker):
- entry = find_threshold(y_pred_str=json.dumps(y_pred),
- y_true_str=json.dumps(y_true),
+ entry = find_threshold(y_pred_all_classes=y_pred,
+ y_true=y_true,
customer_target_precision=0.9,
target_recall=0)
assert abs(entry['Contents']['threshold'] - 0.8) < 10 ** -2
@@ -93,8 +92,8 @@ def test_no_existing_threshold(mocker):
def test_predictions_are_correct_and_all_equals_one_prob(mocker):
y_true = ['class1'] * 7 + ['class2'] * 7
y_pred = [{'class1': 0.95}] * 7 + [{'class2': 0.95}] * 7
- entry = find_threshold(y_pred_str=json.dumps(y_pred),
- y_true_str=json.dumps(y_true),
+ entry = find_threshold(y_pred_all_classes=y_pred,
+ y_true=y_true,
customer_target_precision=0.6,
target_recall=0)
assert abs(entry['Contents']['threshold'] - 0.95) < 10 ** -2
@@ -103,8 +102,8 @@ def test_predictions_are_correct_and_all_equals_one_prob(mocker):
def test_predictions_are_correct_and_almost_all_equals_one_prob(mocker):
y_true = ['class1'] * 7 + ['class2'] * 7
y_pred = [{'class1': 1}] * 6 + [{'class1': 0.95}] + [{'class2': 1}] * 7
- entry = find_threshold(y_pred_str=json.dumps(y_pred),
- y_true_str=json.dumps(y_true),
+ entry = find_threshold(y_pred_all_classes=y_pred,
+ y_true=y_true,
customer_target_precision=0.6,
target_recall=0)
assert abs(entry['Contents']['threshold'] - 0.95) < 10 ** -2
@@ -113,8 +112,8 @@ def test_predictions_are_correct_and_almost_all_equals_one_prob(mocker):
def test_plabook_test_simulation(mocker):
y_pred = [{"spam": 0.9987042546272278}, {"ham": 0.9987037777900696}]
y_true = ["spam", "ham"]
- entry = find_threshold(y_pred_str=json.dumps(y_pred),
- y_true_str=json.dumps(y_true),
+ entry = find_threshold(y_pred_all_classes=y_pred,
+ y_true=y_true,
customer_target_precision=0.7,
target_recall=0)
assert abs(entry['Contents']['threshold'] - 0.9987037777900696) < 10 ** -2
@@ -123,8 +122,8 @@ def test_plabook_test_simulation(mocker):
def test_all_wrong_predictions(mocker):
y_true = ['class1'] * 7 + ['class2'] * 7
y_pred = [{'class2': 0.5}] * 7 + [{'class1': 0.5}] * 7
- entry = find_threshold(y_pred_str=json.dumps(y_pred),
- y_true_str=json.dumps(y_true),
+ entry = find_threshold(y_pred_all_classes=y_pred,
+ y_true=y_true,
customer_target_precision=0.6,
target_recall=0)
assert entry['Contents']['threshold'] >= 0.5
@@ -133,8 +132,8 @@ def test_all_wrong_predictions(mocker):
def test_all_wrong_predictions_2(mocker):
y_true = ['class1'] * 7 + ['class2'] * 7
y_pred = [{'class2': 0.5}] * 7 + [{'class1': 0.5}] * 7
- entry = find_threshold(y_pred_str=json.dumps(y_pred),
- y_true_str=json.dumps(y_true),
+ entry = find_threshold(y_pred_all_classes=y_pred,
+ y_true=y_true,
customer_target_precision=0,
target_recall=0)
assert entry['Contents']['threshold'] >= 0.5
diff --git a/Packs/Base/Scripts/GetMLModelEvaluation/README.md b/Packs/Base/Scripts/GetMLModelEvaluation/README.md
index 13cc64e0b779..395213a9d484 100644
--- a/Packs/Base/Scripts/GetMLModelEvaluation/README.md
+++ b/Packs/Base/Scripts/GetMLModelEvaluation/README.md
@@ -1,6 +1,7 @@
Finds a threshold for ML model, and performs an evaluation based on it
## Script Data
+
---
| **Name** | **Description** |
@@ -10,6 +11,7 @@ Finds a threshold for ML model, and performs an evaluation based on it
| Cortex XSOAR Version | 5.0.0 |
## Inputs
+
---
| **Argument Name** | **Description** |
@@ -18,13 +20,14 @@ Finds a threshold for ML model, and performs an evaluation based on it
| yPred | A list of dictionaries contain probability predictions for all classes |
| targetPrecision | minimum precision of all classes, ranges 0-1 |
| targetRecall | minimum recall of all classes, ranges 0-1 |
-| detailedOutput | if set to 'true', the output will include a full exaplanation of the confidence threshold meaning |
+| detailedOutput | if set to 'true', the output will include a full explanation of the confidence threshold meaning |
## Outputs
+
---
| **Path** | **Description** | **Type** |
| --- | --- | --- |
| GetMLModelEvaluation.Threshold | The found thresholds which meets the conditions of precision and recall | String |
-| GetMLModelEvaluation.ConfusionMatrixAtThreshold | The model evaluation confusion matrix for mails above the threhsold. | Unknown |
+| GetMLModelEvaluation.ConfusionMatrixAtThreshold | The model evaluation confusion matrix for mails above the threshold. | Unknown |
| GetMLModelEvaluation.Metrics | Metrics per each class \(includes precision, true positive, coverage, etc.\) | Unknown |
diff --git a/Packs/Base/TestPlaybooks/playbook-DBotFindSimilarIncidents-test.yml b/Packs/Base/TestPlaybooks/playbook-DBotFindSimilarIncidents-test.yml
new file mode 100644
index 000000000000..ace3806aea00
--- /dev/null
+++ b/Packs/Base/TestPlaybooks/playbook-DBotFindSimilarIncidents-test.yml
@@ -0,0 +1,340 @@
+id: DBotFindSimilarIncidents-test
+version: -1
+contentitemexportablefields:
+ contentitemfields: {}
+name: DBotFindSimilarIncidents-test
+starttaskid: "0"
+tasks:
+ "0":
+ id: "0"
+ taskid: 53859bf6-0ad5-48e8-83ea-e56e86b07a82
+ type: start
+ task:
+ id: 53859bf6-0ad5-48e8-83ea-e56e86b07a82
+ version: -1
+ name: ""
+ iscommand: false
+ brand: ""
+ description: ''
+ nexttasks:
+ '#none#':
+ - "5"
+ separatecontext: false
+ continueonerrortype: ""
+ view: |-
+ {
+ "position": {
+ "x": 265,
+ "y": 50
+ }
+ }
+ note: false
+ timertriggers: []
+ ignoreworker: false
+ skipunavailable: false
+ quietmode: 0
+ isoversize: false
+ isautoswitchedtoquietmode: false
+ "2":
+ id: "2"
+ taskid: ab3d08f1-8bc2-4c87-857b-2e29bb3f5f38
+ type: regular
+ task:
+ id: ab3d08f1-8bc2-4c87-857b-2e29bb3f5f38
+ version: -1
+ name: DBotFindSimilarIncidents
+ description: Find past similar incidents based on incident fields' similarity. Includes an option to also display indicators similarity.
+ scriptName: DBotFindSimilarIncidents
+ type: regular
+ iscommand: false
+ brand: ""
+ nexttasks:
+ '#none#':
+ - "3"
+ scriptarguments:
+ fieldExactMatch:
+ simple: accountname
+ fromDate:
+ simple: 1 hour
+ incidentId:
+ complex:
+ root: CreatedIncidentID
+ transformers:
+ - operator: atIndex
+ args:
+ index:
+ value:
+ simple: "0"
+ similarTextField:
+ simple: details
+ toDate:
+ simple: tomorrow
+ separatecontext: false
+ continueonerrortype: ""
+ view: |-
+ {
+ "position": {
+ "x": 265,
+ "y": 720
+ }
+ }
+ note: false
+ timertriggers: []
+ ignoreworker: false
+ skipunavailable: false
+ quietmode: 0
+ isoversize: false
+ isautoswitchedtoquietmode: false
+ "3":
+ id: "3"
+ taskid: bdd0da3d-8aad-4682-8aff-d2c6a3321690
+ type: condition
+ task:
+ id: bdd0da3d-8aad-4682-8aff-d2c6a3321690
+ version: -1
+ name: Check results
+ type: condition
+ iscommand: false
+ brand: ""
+ nexttasks:
+ "Yes":
+ - "4"
+ separatecontext: false
+ conditions:
+ - label: "Yes"
+ condition:
+ - - operator: isTrue
+ left:
+ value:
+ simple: DBotFindSimilarIncidents.isSimilarIncidentFound
+ iscontext: true
+ - - operator: isEqualString
+ left:
+ value:
+ simple: DBotFindSimilarIncidents.similarIncident.id
+ iscontext: true
+ right:
+ value:
+ complex:
+ root: CreatedIncidentID
+ transformers:
+ - operator: atIndex
+ args:
+ index:
+ value:
+ simple: "1"
+ iscontext: true
+ continueonerrortype: ""
+ view: |-
+ {
+ "position": {
+ "x": 265,
+ "y": 895
+ }
+ }
+ note: false
+ timertriggers: []
+ ignoreworker: false
+ skipunavailable: false
+ quietmode: 0
+ isoversize: false
+ isautoswitchedtoquietmode: false
+ "4":
+ id: "4"
+ taskid: bd930f7e-c2f1-4b46-8efa-6562c60105fe
+ type: title
+ task:
+ id: bd930f7e-c2f1-4b46-8efa-6562c60105fe
+ version: -1
+ name: Done
+ type: title
+ iscommand: false
+ brand: ""
+ description: ''
+ separatecontext: false
+ continueonerrortype: ""
+ view: |-
+ {
+ "position": {
+ "x": 265,
+ "y": 1070
+ }
+ }
+ note: false
+ timertriggers: []
+ ignoreworker: false
+ skipunavailable: false
+ quietmode: 0
+ isoversize: false
+ isautoswitchedtoquietmode: false
+ "5":
+ id: "5"
+ taskid: c2bb7962-e995-45e1-8636-0ecf2b3ff45c
+ type: regular
+ task:
+ id: c2bb7962-e995-45e1-8636-0ecf2b3ff45c
+ version: -1
+ name: Clear context
+ description: |-
+ Delete field from context.
+
+ This automation runs using the default Limited User role, unless you explicitly change the permissions.
+ For more information, see the section about permissions here:
+ https://docs-cortex.paloaltonetworks.com/r/Cortex-XSOAR/6.10/Cortex-XSOAR-Administrator-Guide/Automations
+ scriptName: DeleteContext
+ type: regular
+ iscommand: false
+ brand: ""
+ nexttasks:
+ '#none#':
+ - "6"
+ - "7"
+ scriptarguments:
+ all:
+ simple: "yes"
+ separatecontext: false
+ continueonerrortype: ""
+ view: |-
+ {
+ "position": {
+ "x": 265,
+ "y": 195
+ }
+ }
+ note: false
+ timertriggers: []
+ ignoreworker: false
+ skipunavailable: false
+ quietmode: 0
+ isoversize: false
+ isautoswitchedtoquietmode: false
+ "6":
+ id: "6"
+ taskid: 58890a1b-198a-4f21-87d6-a73d27b24075
+ type: regular
+ task:
+ id: 58890a1b-198a-4f21-87d6-a73d27b24075
+ version: -1
+ name: Create incident 1
+ description: commands.local.cmd.create.inc
+ script: Builtin|||createNewIncident
+ type: regular
+ iscommand: true
+ brand: Builtin
+ nexttasks:
+ '#none#':
+ - "8"
+ scriptarguments:
+ accountname:
+ simple: SimilarAccountName
+ details:
+ simple: this is a test incident and should match up with TestIncident_2
+ name:
+ simple: TestIncident_1
+ separatecontext: false
+ continueonerrortype: ""
+ view: |-
+ {
+ "position": {
+ "x": 50,
+ "y": 370
+ }
+ }
+ note: false
+ timertriggers: []
+ ignoreworker: false
+ skipunavailable: false
+ quietmode: 0
+ isoversize: false
+ isautoswitchedtoquietmode: false
+ "7":
+ id: "7"
+ taskid: 89bf5bb4-77e5-4462-819e-656511050e55
+ type: regular
+ task:
+ id: 89bf5bb4-77e5-4462-819e-656511050e55
+ version: -1
+ name: Create incident 2
+ description: commands.local.cmd.create.inc
+ script: Builtin|||createNewIncident
+ type: regular
+ iscommand: true
+ brand: Builtin
+ nexttasks:
+ '#none#':
+ - "8"
+ scriptarguments:
+ accountname:
+ simple: SimilarAccountName
+ details:
+ simple: this is a test incident and should match up with TestIncident_1
+ name:
+ simple: TestIncident_2
+ separatecontext: false
+ continueonerrortype: ""
+ view: |-
+ {
+ "position": {
+ "x": 480,
+ "y": 370
+ }
+ }
+ note: false
+ timertriggers: []
+ ignoreworker: false
+ skipunavailable: false
+ quietmode: 0
+ isoversize: false
+ isautoswitchedtoquietmode: false
+ "8":
+ id: "8"
+ taskid: e4380779-44e3-4395-8e2d-51e6e44ce672
+ type: regular
+ task:
+ id: e4380779-44e3-4395-8e2d-51e6e44ce672
+ version: -1
+ name: Sleep for ten seconds to let the incidents load
+ description: Sleep for X seconds.
+ scriptName: Sleep
+ type: regular
+ iscommand: false
+ brand: ""
+ nexttasks:
+ '#none#':
+ - "2"
+ scriptarguments:
+ seconds:
+ simple: "10"
+ separatecontext: false
+ continueonerrortype: ""
+ view: |-
+ {
+ "position": {
+ "x": 265,
+ "y": 545
+ }
+ }
+ note: false
+ timertriggers: []
+ ignoreworker: false
+ skipunavailable: false
+ quietmode: 0
+ isoversize: false
+ isautoswitchedtoquietmode: false
+system: true
+view: |-
+ {
+ "linkLabelsPosition": {},
+ "paper": {
+ "dimensions": {
+ "height": 1085,
+ "width": 810,
+ "x": 50,
+ "y": 50
+ }
+ }
+ }
+inputs: []
+outputs: []
+fromversion: 6.9.0
+description: ''
diff --git a/Packs/Base/pack_metadata.json b/Packs/Base/pack_metadata.json
index 7809b945f12f..a775366f69e8 100644
--- a/Packs/Base/pack_metadata.json
+++ b/Packs/Base/pack_metadata.json
@@ -2,7 +2,7 @@
"name": "Base",
"description": "The base pack for Cortex XSOAR.",
"support": "xsoar",
- "currentVersion": "1.34.27",
+ "currentVersion": "1.34.28",
"author": "Cortex XSOAR",
"serverMinVersion": "6.0.0",
"url": "https://www.paloaltonetworks.com/cortex",
diff --git a/Packs/DeveloperTools/TestPlaybooks/script-TestCreateIncidentsFile.yml b/Packs/DeveloperTools/TestPlaybooks/script-TestCreateIncidentsFile.yml
index e086cef5d8bf..1513b26d3780 100644
--- a/Packs/DeveloperTools/TestPlaybooks/script-TestCreateIncidentsFile.yml
+++ b/Packs/DeveloperTools/TestPlaybooks/script-TestCreateIncidentsFile.yml
@@ -3409,9 +3409,9 @@ args:
defaultValue: encodedIncidentsFile
description: ''
scripttarget: 0
-subtype: python2
+subtype: python3
runonce: false
-dockerimage: demisto/python:2.7.18.9326
+dockerimage: demisto/python3:3.11.9.101916
runas: DBotWeakRole
comment: ''
fromversion: 5.0.0
diff --git a/Packs/DeveloperTools/TestPlaybooks/script-TestCreateIncidentsForPhishingClassifier.yml b/Packs/DeveloperTools/TestPlaybooks/script-TestCreateIncidentsForPhishingClassifier.yml
index 46efe09ed8d2..2dd896044433 100644
--- a/Packs/DeveloperTools/TestPlaybooks/script-TestCreateIncidentsForPhishingClassifier.yml
+++ b/Packs/DeveloperTools/TestPlaybooks/script-TestCreateIncidentsForPhishingClassifier.yml
@@ -16,13 +16,13 @@ script: >+
incident1_template = {
'type': 'Simulation',
- 'emailclassification': 'Tag1',
+ 'tags': 'Tag1',
}
incident2_template = {
'type': 'Simulation',
- 'emailclassification': 'Tag2',
+ 'tags': 'Tag2',
}
@@ -38,7 +38,7 @@ script: >+
for i in range(0, NUMBER_OF_INCIDENTS):
incidents.append({
'type': 'Simulation',
- 'emailclassification': 'Tag3',
+ 'tags': 'Tag3',
'dbot_processed_text': " ".join([words_tag3[i] for i in [random.randint(0, len(words_tag3)-1) for i in range(30)]])
})
@@ -53,9 +53,9 @@ script: >+
'Contents': 'Done crete incidents',
'ContentsFormat': formats['text'],
'EntryContext': {
- 'EmailSujbectKey': 'emailclassification',
+ 'EmailSujbectKey': 'tags',
'EmailBodyKey': 'details',
- 'EmailTagKey': 'emailclassification',
+ 'EmailTagKey': 'tags',
'IncidentsQuery': 'type:Simulation'
}
})
diff --git a/Packs/ML/ReleaseNotes/1_4_11.md b/Packs/ML/ReleaseNotes/1_4_11.md
new file mode 100644
index 000000000000..c10696c9c530
--- /dev/null
+++ b/Packs/ML/ReleaseNotes/1_4_11.md
@@ -0,0 +1,14 @@
+
+#### Scripts
+
+##### EvaluateMLModllAtProduction
+
+- Changed the Docker image to: *demisto/pandas:1.0.0.102566*.
+
+##### DBotPredictOutOfTheBoxV2
+
+- Updated the Docker image to: *demisto/ml:1.0.0.101889*.
+
+##### DBotPredictIncidentsBatch
+
+- Changed the Docker image to: *demisto/pandas:1.0.0.102566*.
diff --git a/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch.py b/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch.py
index 0600e8045aac..297d09211d9f 100644
--- a/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch.py
+++ b/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch.py
@@ -19,7 +19,7 @@ def get_phishing_map_labels(comma_values):
labels_dict[splited[0].strip()] = splited[1].strip()
else:
labels_dict[v] = v
- return {k: v for k, v in labels_dict.items()}
+ return dict(labels_dict.items())
def build_query_in_respect_to_phishing_labels(args):
@@ -29,12 +29,12 @@ def build_query_in_respect_to_phishing_labels(args):
return args
mapping_dict = get_phishing_map_labels(mapping)
tag_field = args['tagField']
- tags_union = ' '.join(['"{}"'.format(label) for label in mapping_dict])
- mapping_query = '{}:({})'.format(tag_field, tags_union)
+ tags_union = ' '.join([f'"{label}"' for label in mapping_dict])
+ mapping_query = f'{tag_field}:({tags_union})'
if 'query' not in args:
args['query'] = mapping_query
else:
- args['query'] = '({}) and ({})'.format(query, mapping_query)
+ args['query'] = f'({query}) and ({mapping_query})'
return args
@@ -78,7 +78,7 @@ def main():
incidents_df = pd.DataFrame(incidents)
predictions_df = pd.DataFrame(res[-1]['Contents'])
df = pd.concat([incidents_df, predictions_df], axis=1)
- df.rename(columns={"Label": "Prediction"}, inplace=True)
+ df = df.rename(columns={"Label": "Prediction"})
file_name = 'predictions.csv'
file_columns = ['id', tag_field_name, 'Prediction',
'Probability',
@@ -90,7 +90,7 @@ def main():
csv_data = filtered_df.to_csv()
entry = fileResult(file_name, csv_data)
entry['Contents'] = filtered_df.to_json(orient='records')
- entry['HumanReadable'] = 'File contains predictions of {} incidents'.format(len(incidents))
+ entry['HumanReadable'] = f'File contains predictions of {len(incidents)} incidents'
return entry
diff --git a/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch.yml b/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch.yml
index 0debe7c2eff1..d7fdd4f6854f 100644
--- a/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch.yml
+++ b/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch.yml
@@ -4,9 +4,9 @@ args:
- defaultValue: Phishing
description: A comma-separated list of incident types by which to filter.
name: incidentTypes
-- description: 'The start date by which to filter incidents. Date format will be the same as in the incidents query page (valid strings exaple: "3 days ago", ""2019-01-01T00:00:00 +0200")'
+- description: 'The start date by which to filter incidents. Date format will be the same as in the incidents query page (valid strings exaple: "3 days ago", ""2019-01-01T00:00:00 +0200").'
name: fromDate
-- description: 'The end date by which to filter incidents. Date format will be the same as in the incidents query page (valid strings exaple: "3 days ago", ""2019-01-01T00:00:00 +0200")'
+- description: 'The end date by which to filter incidents. Date format will be the same as in the incidents query page (valid strings exaple: "3 days ago", ""2019-01-01T00:00:00 +0200").'
name: toDate
- defaultValue: '3000'
description: The maximum number of incidents to fetch.
@@ -17,7 +17,7 @@ args:
- description: If non-empty, hash every word with this seed.
name: hashSeed
- defaultValue: '*'
- description: 'A comma-separated list of email tags values and mapping. The script considers only the tags specified in this field. You can map a label to another value by using this format: LABEL:MAPPED_LABEL. For example, for 4 values in email tag: malicious, credentials harvesting, inner communitcation, external legit email, unclassified. While training, we want to ignore "unclassified" tag, and refer to "credentials harvesting" as "malicious" too. Also, we want to merge "inner communitcation" and "external legit email" to one tag called "non-malicious". The input will be: malicious, credentials harvesting:malicious, inner communitcation:non-malicious, external legit email:non-malicious'
+ description: 'A comma-separated list of email tags values and mapping. The script considers only the tags specified in this field. You can map a label to another value by using this format: LABEL:MAPPED_LABEL. For example, for 4 values in email tag: malicious, credentials harvesting, inner communitcation, external legit email, unclassified. While training, we want to ignore "unclassified" tag, and refer to "credentials harvesting" as "malicious" too. Also, we want to merge "inner communitcation" and "external legit email" to one tag called "non-malicious". The input will be: malicious, credentials harvesting:malicious, inner communitcation:non-malicious, external legit email:non-malicious.'
name: phishingLabels
- description: The model name to store in the system.
name: modelName
@@ -46,7 +46,7 @@ tags:
- ml
timeout: '0'
type: python
-dockerimage: demisto/ml:1.0.0.45981
+dockerimage: demisto/pandas:1.0.0.102566
fromversion: 5.0.0
tests:
- VerifyOOBV2Predictions-Test
diff --git a/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch_test.py b/Packs/ML/Scripts/DBotPredictIncidentsBatch/DBotPredictIncidentsBatch_test.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/Packs/ML/Scripts/DBotPredictIncidentsBatch/README.md b/Packs/ML/Scripts/DBotPredictIncidentsBatch/README.md
new file mode 100644
index 000000000000..89b2cf074c89
--- /dev/null
+++ b/Packs/ML/Scripts/DBotPredictIncidentsBatch/README.md
@@ -0,0 +1,51 @@
+Apply a trained ML model on multiple incidents at once, to compare incidents how the incidents were labeled by analysts, to the predictions of the model. This script is aimed to help evaluate a trained model using past incidents.
+
+## Script Data
+
+---
+
+| **Name** | **Description** |
+| --- | --- |
+| Script Type | python3 |
+| Tags | phishing, ml |
+| Cortex XSOAR Version | 5.0.0 |
+
+## Dependencies
+
+---
+This script uses the following commands and scripts.
+
+* GetIncidentsByQuery
+* DBotPredictPhishingWords
+
+## Used In
+
+---
+This script is used in the following playbooks and scripts.
+
+* VerifyOOBV2Predictions-Test
+
+## Inputs
+
+---
+
+| **Argument Name** | **Description** |
+| --- | --- |
+| query | Additional text by which to query incidents. |
+| incidentTypes | A comma-separated list of incident types by which to filter. |
+| fromDate | The start date by which to filter incidents. Date format will be the same as in the incidents query page \(valid strings exaple: "3 days ago", ""2019-01-01T00:00:00 \+0200"\) |
+| toDate | The end date by which to filter incidents. Date format will be the same as in the incidents query page \(valid strings exaple: "3 days ago", ""2019-01-01T00:00:00 \+0200"\) |
+| limit | The maximum number of incidents to fetch. |
+| tagField | The field name with the label. Supports a comma-separated list, the first non-empty value will be taken. |
+| hashSeed | If non-empty, hash every word with this seed. |
+| phishingLabels | A comma-separated list of email tags values and mapping. The script considers only the tags specified in this field. You can map a label to another value by using this format: LABEL:MAPPED_LABEL. For example, for 4 values in email tag: malicious, credentials harvesting, inner communitcation, external legit email, unclassified. While training, we want to ignore "unclassified" tag, and refer to "credentials harvesting" as "malicious" too. Also, we want to merge "inner communitcation" and "external legit email" to one tag called "non-malicious". The input will be: malicious, credentials harvesting:malicious, inner communitcation:non-malicious, external legit email:non-malicious |
+| modelName | The model name to store in the system. |
+| emailsubject | Incident field name with the email subject. |
+| emailbody | Incident field name with the email body \(text\). |
+| emailbodyhtml | Incident field name with the email body \(html\). |
+| populateFields | A comma-separated list of fields in the object to poplulate. |
+
+## Outputs
+
+---
+There are no outputs for this script.
diff --git a/Packs/ML/Scripts/DBotPredictOutOfTheBoxV2/DBotPredictOutOfTheBoxV2.py b/Packs/ML/Scripts/DBotPredictOutOfTheBoxV2/DBotPredictOutOfTheBoxV2.py
index 8bc85fac9f72..0c526a008b14 100644
--- a/Packs/ML/Scripts/DBotPredictOutOfTheBoxV2/DBotPredictOutOfTheBoxV2.py
+++ b/Packs/ML/Scripts/DBotPredictOutOfTheBoxV2/DBotPredictOutOfTheBoxV2.py
@@ -37,7 +37,7 @@ def load_oob_model():
if is_error(res):
return_error(get_error(res))
- with open(EVALUATION_PATH, 'r') as json_file:
+ with open(EVALUATION_PATH) as json_file:
data = json.load(json_file)
y_test = data['YTrue']
y_pred = data['YPred']
@@ -76,6 +76,7 @@ def predict_phishing_words():
load_oob_model()
dargs = demisto.args()
dargs['modelName'] = OUT_OF_THE_BOX_MODEL_NAME
+ dargs['modelStoreType'] = 'mlModel'
res = demisto.executeCommand('DBotPredictPhishingWords', dargs)
if is_error(res):
return_error(get_error(res))
diff --git a/Packs/ML/Scripts/DBotPredictOutOfTheBoxV2/DBotPredictOutOfTheBoxV2.yml b/Packs/ML/Scripts/DBotPredictOutOfTheBoxV2/DBotPredictOutOfTheBoxV2.yml
index a9c36418e659..6da8da23377f 100644
--- a/Packs/ML/Scripts/DBotPredictOutOfTheBoxV2/DBotPredictOutOfTheBoxV2.yml
+++ b/Packs/ML/Scripts/DBotPredictOutOfTheBoxV2/DBotPredictOutOfTheBoxV2.yml
@@ -59,8 +59,7 @@ script: '-'
subtype: python3
timeout: 60µs
type: python
-dockerimage: demisto/ml:1.0.0.32340
-runonce: true
+dockerimage: demisto/ml:1.0.0.101889
tests:
- DbotPredictOufOfTheBoxTestV2
- VerifyOOBV2Predictions-Test
diff --git a/Packs/ML/Scripts/EvaluateMLModllAtProduction/EvaluateMLModllAtProduction.py b/Packs/ML/Scripts/EvaluateMLModllAtProduction/EvaluateMLModllAtProduction.py
index 27292ab3b254..8621fef38b9e 100644
--- a/Packs/ML/Scripts/EvaluateMLModllAtProduction/EvaluateMLModllAtProduction.py
+++ b/Packs/ML/Scripts/EvaluateMLModllAtProduction/EvaluateMLModllAtProduction.py
@@ -25,7 +25,7 @@ def get_phishing_map_labels(comma_values):
labels_dict[v] = v
if len(set(labels_dict.values())) == 1:
mapped_value = list(labels_dict.values())[0]
- error = ['Label mapping error: you need to map to at least two labels: {}.'.format(mapped_value)]
+ error = [f'Label mapping error: you need to map to at least two labels: {mapped_value}.']
return_error('\n'.join(error))
return {k: canonize_label(v) for k, v in labels_dict.items()}
@@ -97,7 +97,7 @@ def return_file_result_with_predictions_on_test_set(data, y_true, y_pred, y_pred
def main(incident_types, incident_query, y_true_field, y_pred_field, y_pred_prob_field, model_target_accuracy,
labels_mapping, additional_fields):
- non_empty_fields = '{},{}'.format(y_true_field.strip(), y_pred_field.strip())
+ non_empty_fields = f'{y_true_field.strip()},{y_pred_field.strip()}'
incidents_query_args = {'incidentTypes': incident_types,
'NonEmptyFields': non_empty_fields,
}
@@ -106,42 +106,45 @@ def main(incident_types, incident_query, y_true_field, y_pred_field, y_pred_prob
incidents_query_res = demisto.executeCommand('GetIncidentsByQuery', incidents_query_args)
if is_error(incidents_query_res):
return_error(get_error(incidents_query_res))
- incidents = json.loads(incidents_query_res[-1]['Contents'])
- demisto.results('Found {} incidents'.format(len(incidents)))
- y_true = []
- y_pred = []
- y_pred_prob = []
- incidents_with_missing_pred_prob = 0
- for i in incidents:
- y_true.append(i[y_true_field])
- y_pred.append(i[y_pred_field])
- if y_pred_prob_field not in i:
- incidents_with_missing_pred_prob += 1
- y_pred_prob.append(i.get(y_pred_prob_field, None))
- y_true, relevant_indices = get_data_with_mapped_label(y_true, labels_mapping)
- y_pred = [y_pred[i] for i in relevant_indices]
- y_pred_prob = [y_pred_prob[i] for i in relevant_indices]
- incidents = [incidents[i] for i in relevant_indices]
- y_pred_prob_is_given = incidents_with_missing_pred_prob == 0
- if y_pred_prob_is_given:
- y_pred_dict = [{label: prob} for label, prob in zip(y_pred, y_pred_prob)]
+ incidents = json.loads(incidents_query_res[0]['Contents'])
+ if incidents:
+ demisto.results(f'Found {len(incidents)} incident(s)')
+ y_true = []
+ y_pred = []
+ y_pred_prob = []
+ incidents_with_missing_pred_prob = 0
+ for i in incidents:
+ y_true.append(i[y_true_field])
+ y_pred.append(i[y_pred_field])
+ if y_pred_prob_field not in i:
+ incidents_with_missing_pred_prob += 1
+ y_pred_prob.append(i.get(y_pred_prob_field, None))
+ y_true, relevant_indices = get_data_with_mapped_label(y_true, labels_mapping)
+ y_pred = [y_pred[i] for i in relevant_indices]
+ y_pred_prob = [y_pred_prob[i] for i in relevant_indices]
+ incidents = [incidents[i] for i in relevant_indices]
+ y_pred_prob_is_given = incidents_with_missing_pred_prob == 0
+ if y_pred_prob_is_given:
+ y_pred_dict = [{label: prob} for label, prob in zip(y_pred, y_pred_prob)]
+ else:
+ y_pred_dict = [{label: 1.0} for label in y_pred]
+ if y_pred_prob_is_given:
+ res_threshold = get_ml_model_evaluation(y_true, y_pred_dict, model_target_accuracy, target_recall=0,
+ detailed=True)
+ # show results for the threshold found - last result so it will appear first
+ output_model_evaluation(y_test=y_true, y_pred=y_pred_dict, res=res_threshold,
+ context_field='EvaluateMLModllAtProduction')
+ # show results if no threshold (threhsold=0) was used. Following code is reached only if a legal thresh was found:
+ if not y_pred_prob_is_given or not np.isclose(float(res_threshold[0]['Contents']['threshold']), 0):
+ res = get_ml_model_evaluation(y_true, y_pred_dict, target_accuracy=0, target_recall=0)
+ human_readable = '\n'.join(['## Results for No Threshold',
+ 'The following results were achieved by using no threshold (threshold equals 0)'])
+ output_model_evaluation(y_test=y_true, y_pred=y_pred_dict, res=res,
+ context_field='EvaluateMLModllAtProductionNoThresh',
+ human_readable_title=human_readable)
+ return_file_result_with_predictions_on_test_set(incidents, y_true, y_pred, y_pred_prob, additional_fields)
else:
- y_pred_dict = [{label: 1.0} for label in y_pred]
- if y_pred_prob_is_given:
- res_threshold = get_ml_model_evaluation(y_true, y_pred_dict, model_target_accuracy, target_recall=0,
- detailed=True)
- # show results for the threshold found - last result so it will appear first
- output_model_evaluation(y_test=y_true, y_pred=y_pred_dict, res=res_threshold,
- context_field='EvaluateMLModllAtProduction')
- # show results if no threshold (threhsold=0) was used. Following code is reached only if a legal thresh was found:
- if not y_pred_prob_is_given or not np.isclose(float(res_threshold[0]['Contents']['threshold']), 0):
- res = get_ml_model_evaluation(y_true, y_pred_dict, target_accuracy=0, target_recall=0)
- human_readable = '\n'.join(['## Results for No Threshold',
- 'The following results were achieved by using no threshold (threshold equals 0)'])
- output_model_evaluation(y_test=y_true, y_pred=y_pred_dict, res=res,
- context_field='EvaluateMLModllAtProductionNoThresh',
- human_readable_title=human_readable)
- return_file_result_with_predictions_on_test_set(incidents, y_true, y_pred, y_pred_prob, additional_fields)
+ return_results('No incidents found.')
model_target_accuracy = demisto.args().get('modelTargetAccuracy', 0)
diff --git a/Packs/ML/Scripts/EvaluateMLModllAtProduction/EvaluateMLModllAtProduction.yml b/Packs/ML/Scripts/EvaluateMLModllAtProduction/EvaluateMLModllAtProduction.yml
index 7e8eb335fe41..fccba729dba6 100644
--- a/Packs/ML/Scripts/EvaluateMLModllAtProduction/EvaluateMLModllAtProduction.yml
+++ b/Packs/ML/Scripts/EvaluateMLModllAtProduction/EvaluateMLModllAtProduction.yml
@@ -17,7 +17,7 @@ args:
description: The model target accuracy, between 0 and 1.
name: modelTargetAccuracy
- defaultValue: '*'
- description: 'A comma-separated list of email tags values and mapping. The script considers only the tags specified in this field. You can map label to another value by using this format: LABEL:MAPPED_LABEL. For example, for 4 values in email tag: malicious, credentials harvesting, inner communitcation, external legit email, unclassified. While training, we want to ignore "unclassified" tag, and refer to "credentials harvesting" as "malicious" too. Also, we want to merge "inner communitcation" and "external legit email" to one tag called "non-malicious". The input will be: malicious, credentials harvesting:malicious, inner communitcation:non-malicious, external legit email:non-malicious'
+ description: 'A comma-separated list of email tags values and mapping. The script considers only the tags specified in this field. You can map label to another value by using this format: LABEL:MAPPED_LABEL. For example, for 4 values in email tag: malicious, credentials harvesting, inner communitcation, external legit email, unclassified. While training, we want to ignore "unclassified" tag, and refer to "credentials harvesting" as "malicious" too. Also, we want to merge "inner communitcation" and "external legit email" to one tag called "non-malicious". The input will be: malicious, credentials harvesting:malicious, inner communitcation:non-malicious, external legit email:non-malicious.'
name: phishingLabels
- description: A comma-separated list of incident field names to include in the results file.
name: additionalFields
@@ -42,7 +42,7 @@ outputs:
script: '-'
subtype: python3
type: python
-dockerimage: demisto/ml:1.0.0.45981
+dockerimage: demisto/pandas:1.0.0.102566
runas: DBotWeakRole
fromversion: 5.0.0
tags:
diff --git a/Packs/ML/Scripts/EvaluateMLModllAtProduction/README.md b/Packs/ML/Scripts/EvaluateMLModllAtProduction/README.md
new file mode 100644
index 000000000000..799b77172067
--- /dev/null
+++ b/Packs/ML/Scripts/EvaluateMLModllAtProduction/README.md
@@ -0,0 +1,52 @@
+Evaluates an ML model in production.
+
+## Script Data
+
+---
+
+| **Name** | **Description** |
+| --- | --- |
+| Script Type | python3 |
+| Tags | ml |
+| Cortex XSOAR Version | 5.0.0 |
+
+## Dependencies
+
+---
+This script uses the following commands and scripts.
+
+* GetIncidentsByQuery
+* GetMLModelEvaluation
+
+## Used In
+
+---
+This script is used in the following playbooks and scripts.
+
+* EvaluateMLModllAtProduction-Test
+
+## Inputs
+
+---
+
+| **Argument Name** | **Description** |
+| --- | --- |
+| incidentTypes | A common-separated list of incident types by which to filter. |
+| incidentsQuery | The incident query to fetch the training data for the model. |
+| emailTagKey | The field name with the email tag. Supports a comma-separated list, the first non-empty value will be taken. |
+| emailPredictionKey | The field name with the model prediction. |
+| emailPredictionProbabilityKey | The field name with the model prediction probability. |
+| modelTargetAccuracy | The model target accuracy, between 0 and 1. |
+| phishingLabels | A comma-separated list of email tags values and mapping. The script considers only the tags specified in this field. You can map label to another value by using this format: LABEL:MAPPED_LABEL. For example, for 4 values in email tag: malicious, credentials harvesting, inner communitcation, external legit email, unclassified. While training, we want to ignore "unclassified" tag, and refer to "credentials harvesting" as "malicious" too. Also, we want to merge "inner communitcation" and "external legit email" to one tag called "non-malicious". The input will be: malicious, credentials harvesting:malicious, inner communitcation:non-malicious, external legit email:non-malicious |
+| additionalFields | A comma-separated list of incident field names to include in the results file. |
+
+## Outputs
+
+---
+
+| **Path** | **Description** | **Type** |
+| --- | --- | --- |
+| EvaluateMLModllAtProduction.EvaluationScores | The model evaluation scores \(precision, coverage, etc.\) for the found threshold. | Unknown |
+| EvaluateMLModllAtProduction.ConfusionMatrix | The model evaluation confusion matrix for the found threshold. | Unknown |
+| EvaluateMLModllAtProductionNoThresh.EvaluationScores | The model evaluation scores \(precision, coverage, etc.\) for threshold = 0. | Unknown |
+| EvaluateMLModllAtProductionNoThresh.ConfusionMatrix | The model evaluation confusion matrix for threshold = 0. | Unknown |
diff --git a/Packs/ML/TestPlaybooks/playbook-Create_Phishing_Classifier_V2_ML_Test.yml b/Packs/ML/TestPlaybooks/playbook-Create_Phishing_Classifier_V2_ML_Test.yml
index 83250a63c15d..af7ce16950d5 100644
--- a/Packs/ML/TestPlaybooks/playbook-Create_Phishing_Classifier_V2_ML_Test.yml
+++ b/Packs/ML/TestPlaybooks/playbook-Create_Phishing_Classifier_V2_ML_Test.yml
@@ -1,28 +1,28 @@
-elasticcommonfields: {}
id: Create Phishing Classifier V2 ML Test
version: -1
name: Create Phishing Classifier V2 ML Test
-description: Test CreatePhishingClassifierML playbook
+description: Test CreatePhishingClassifierML playbook.
starttaskid: "0"
tasks:
"0":
id: "0"
- taskid: 4aeda861-fb7f-490a-89ce-397ea2c1fbca
+ taskid: fbadefab-5b4a-4360-853c-81893d0cb492
type: start
task:
- id: 4aeda861-fb7f-490a-89ce-397ea2c1fbca
+ id: fbadefab-5b4a-4360-853c-81893d0cb492
version: -1
name: ""
iscommand: false
brand: ""
+ description: ''
nexttasks:
'#none#':
- - "30"
+ - "6"
separatecontext: false
view: |-
{
"position": {
- "x": 695,
+ "x": 50,
"y": 50
}
}
@@ -31,12 +31,15 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"2":
id: "2"
- taskid: 8bbf2fce-f2f1-49fc-8230-fe5b64b5a3c2
+ taskid: 0a894a8b-7b17-4ab4-8f79-643b3191165d
type: regular
task:
- id: 8bbf2fce-f2f1-49fc-8230-fe5b64b5a3c2
+ id: 0a894a8b-7b17-4ab4-8f79-643b3191165d
version: -1
name: Create incidents
scriptName: TestCreateIncidentsForPhishingClassifier
@@ -55,8 +58,8 @@ tasks:
view: |-
{
"position": {
- "x": 695,
- "y": 1010
+ "x": 50,
+ "y": 370
}
}
note: false
@@ -64,16 +67,18 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"3":
id: "3"
- taskid: 29dd84d6-efb8-4487-8003-141b98934662
+ taskid: baf8693e-4995-47f9-805f-3cbfe79f8ebc
type: regular
task:
- id: 29dd84d6-efb8-4487-8003-141b98934662
+ id: baf8693e-4995-47f9-805f-3cbfe79f8ebc
version: -1
name: Predict Tag1
- description: Predict text label using a pre-trained machine learning phishing
- model, and get the most important words used in the classification decision.
+ description: Predict text label using a pre-trained machine learning phishing model, and get the most important words used in the classification decision.
scriptName: DBotPredictPhishingWords
type: regular
iscommand: false
@@ -83,9 +88,7 @@ tasks:
- "15"
scriptarguments:
emailSubject:
- simple: closed church squeamish squeamish moaning closed closed closed church
- squeamish squeamish moaning closed closed closed church squeamish squeamish
- moaning closed closed
+ simple: closed church squeamish squeamish moaning closed closed closed church squeamish squeamish moaning closed closed closed church squeamish squeamish moaning closed closed
labelProbabilityThreshold:
simple: "0"
minTextLength:
@@ -98,8 +101,8 @@ tasks:
view: |-
{
"position": {
- "x": 695,
- "y": 1535
+ "x": 50,
+ "y": 895
}
}
note: false
@@ -107,23 +110,27 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"5":
id: "5"
- taskid: f7a44f7e-0b35-4447-8424-b1d49db3235b
+ taskid: 5a71b8d6-cf64-494d-8889-46f70cc67c13
type: title
task:
- id: f7a44f7e-0b35-4447-8424-b1d49db3235b
+ id: 5a71b8d6-cf64-494d-8889-46f70cc67c13
version: -1
name: Done
type: title
iscommand: false
brand: ""
+ description: ''
separatecontext: false
view: |-
{
"position": {
- "x": 695,
- "y": 2935
+ "x": 50,
+ "y": 2295
}
}
note: false
@@ -131,12 +138,15 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"6":
id: "6"
- taskid: 179f940e-7313-4fa5-8f5e-ba522c765669
+ taskid: 74a08b82-f3e3-40c5-8143-fa5c135e2ce9
type: regular
task:
- id: 179f940e-7313-4fa5-8f5e-ba522c765669
+ id: 74a08b82-f3e3-40c5-8143-fa5c135e2ce9
version: -1
name: Clear context
scriptName: DeleteContext
@@ -153,8 +163,8 @@ tasks:
view: |-
{
"position": {
- "x": 695,
- "y": 835
+ "x": 50,
+ "y": 195
}
}
note: false
@@ -162,12 +172,15 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"8":
id: "8"
- taskid: 2d2c5687-3642-4fec-8a38-e0752ea4d398
+ taskid: 88ba54ff-84fd-4c91-8ae8-4f88a4a5cafd
type: regular
task:
- id: 2d2c5687-3642-4fec-8a38-e0752ea4d398
+ id: 88ba54ff-84fd-4c91-8ae8-4f88a4a5cafd
version: -1
name: clear context
scriptName: DeleteContext
@@ -184,8 +197,8 @@ tasks:
view: |-
{
"position": {
- "x": 695,
- "y": 1885
+ "x": 50,
+ "y": 1245
}
}
note: false
@@ -193,16 +206,18 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"13":
id: "13"
- taskid: 5d168b90-65c6-4f98-8759-3a9fcddf28b9
+ taskid: 67c5f1a7-4b58-4447-8cee-286f22b9139c
type: playbook
task:
- id: 5d168b90-65c6-4f98-8759-3a9fcddf28b9
+ id: 67c5f1a7-4b58-4447-8cee-286f22b9139c
version: -1
name: DBot Create Phishing Classifier V2
- description: Create a phishing classifier using machine learning technique,
- based on email content
+ description: Create a phishing classifier using machine learning technique, based on email content
playbookName: DBot Create Phishing Classifier V2
type: playbook
iscommand: false
@@ -244,8 +259,8 @@ tasks:
view: |-
{
"position": {
- "x": 695,
- "y": 1185
+ "x": 50,
+ "y": 545
}
}
note: false
@@ -253,12 +268,15 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"14":
id: "14"
- taskid: c786f22a-ce44-4021-84f6-1c74a3157049
+ taskid: d0388493-758a-496d-8e20-0f4994316318
type: condition
task:
- id: c786f22a-ce44-4021-84f6-1c74a3157049
+ id: d0388493-758a-496d-8e20-0f4994316318
version: -1
name: Model evaluation exist
type: condition
@@ -279,8 +297,8 @@ tasks:
view: |-
{
"position": {
- "x": 695,
- "y": 1360
+ "x": 50,
+ "y": 720
}
}
note: false
@@ -288,12 +306,15 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"15":
id: "15"
- taskid: c607874c-eac1-404d-8c31-620e541c5b7c
+ taskid: 87fcc375-89be-4f44-8123-9e995379a389
type: condition
task:
- id: c607874c-eac1-404d-8c31-620e541c5b7c
+ id: 87fcc375-89be-4f44-8123-9e995379a389
version: -1
name: 'Check the prediction label: Tag1'
type: condition
@@ -317,8 +338,8 @@ tasks:
view: |-
{
"position": {
- "x": 695,
- "y": 1710
+ "x": 50,
+ "y": 1070
}
}
note: false
@@ -326,12 +347,15 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"16":
id: "16"
- taskid: e1de53ff-a890-4c38-863a-3171f8705bc8
+ taskid: a4973892-625f-4fcc-8a96-a270795d2751
type: condition
task:
- id: e1de53ff-a890-4c38-863a-3171f8705bc8
+ id: a4973892-625f-4fcc-8a96-a270795d2751
version: -1
name: 'Check the prediction label: Tag2'
type: condition
@@ -355,8 +379,8 @@ tasks:
view: |-
{
"position": {
- "x": 695,
- "y": 2235
+ "x": 50,
+ "y": 1595
}
}
note: false
@@ -364,16 +388,18 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"17":
id: "17"
- taskid: b783dbbf-bcbf-47be-8a5d-22dcc526df0e
+ taskid: bc73dcb7-4720-4504-85e0-590b5fe8fe02
type: regular
task:
- id: b783dbbf-bcbf-47be-8a5d-22dcc526df0e
+ id: bc73dcb7-4720-4504-85e0-590b5fe8fe02
version: -1
name: Predict Tag2
- description: Predict text label using a pre-trained machine learning phishing
- model, and get the most important words used in the classification decision.
+ description: Predict text label using a pre-trained machine learning phishing model, and get the most important words used in the classification decision.
scriptName: DBotPredictPhishingWords
type: regular
iscommand: false
@@ -383,9 +409,7 @@ tasks:
- "16"
scriptarguments:
emailSubject:
- simple: ntidy boy substance faulty waves type boat argument ntidy boy substance
- faulty waves type boat argument ntidy boy substance faulty waves type boat
- argument
+ simple: ntidy boy substance faulty waves type boat argument ntidy boy substance faulty waves type boat argument ntidy boy substance faulty waves type boat argument
labelProbabilityThreshold:
simple: "0"
minTextLength:
@@ -398,8 +422,8 @@ tasks:
view: |-
{
"position": {
- "x": 695,
- "y": 2060
+ "x": 50,
+ "y": 1420
}
}
note: false
@@ -407,16 +431,18 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"18":
id: "18"
- taskid: cb137c7c-1ba3-4fea-8356-f4ecd3bf6193
+ taskid: 3573406e-2b81-4d40-8661-2680791e46f8
type: regular
task:
- id: cb137c7c-1ba3-4fea-8356-f4ecd3bf6193
+ id: 3573406e-2b81-4d40-8661-2680791e46f8
version: -1
name: Predict Tag3
- description: Predict text label using a pre-trained machine learning phishing
- model, and get the most important words used in the classification decision.
+ description: Predict text label using a pre-trained machine learning phishing model, and get the most important words used in the classification decision.
scriptName: DBotPredictPhishingWords
type: regular
iscommand: false
@@ -426,9 +452,7 @@ tasks:
- "20"
scriptarguments:
emailSubject:
- simple: suspend trucks aboriginal thread succeed gray last fall fall suspend
- trucks aboriginal thread succeed gray last fall fall suspend trucks aboriginal
- thread succeed gray last fall fall
+ simple: suspend trucks aboriginal thread succeed gray last fall fall suspend trucks aboriginal thread succeed gray last fall fall suspend trucks aboriginal thread succeed gray last fall fall
labelProbabilityThreshold:
simple: "0"
minTextLength:
@@ -441,8 +465,8 @@ tasks:
view: |-
{
"position": {
- "x": 695,
- "y": 2585
+ "x": 50,
+ "y": 1945
}
}
note: false
@@ -450,12 +474,15 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"19":
id: "19"
- taskid: ed783755-907d-4097-8974-96034ab3b233
+ taskid: d462dd42-6b56-49bf-8428-ae96e1f1be16
type: regular
task:
- id: ed783755-907d-4097-8974-96034ab3b233
+ id: d462dd42-6b56-49bf-8428-ae96e1f1be16
version: -1
name: clear context
scriptName: DeleteContext
@@ -472,8 +499,8 @@ tasks:
view: |-
{
"position": {
- "x": 695,
- "y": 2410
+ "x": 50,
+ "y": 1770
}
}
note: false
@@ -481,12 +508,15 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"20":
id: "20"
- taskid: 0e035d46-6829-41af-830c-6dfc7353dde1
+ taskid: 5cf7405e-c4c2-40a0-8e63-1913319a70f3
type: condition
task:
- id: 0e035d46-6829-41af-830c-6dfc7353dde1
+ id: 5cf7405e-c4c2-40a0-8e63-1913319a70f3
version: -1
name: 'Check the prediction label: Tag3'
type: condition
@@ -507,241 +537,11 @@ tasks:
right:
value:
simple: Tag3
- view: |-
- {
- "position": {
- "x": 695,
- "y": 2760
- }
- }
- note: false
- timertriggers: []
- ignoreworker: false
- skipunavailable: false
- quietmode: 0
-
- "24":
- id: "24"
- taskid: 69c9af85-903f-4d2a-8540-fd48adb0c89b
- type: regular
- task:
- id: 69c9af85-903f-4d2a-8540-fd48adb0c89b
- version: -1
- name: Load prediction docker
- description: Predict text label using a pre-trained machine learning phishing
- model, and get the most important words used in the classification decision.
- scriptName: DBotPredictPhishingWords
- type: regular
- iscommand: false
- brand: ""
- nexttasks:
- '#none#':
- - "28"
- scriptarguments:
- modelName:
- simple: dummy
- continueonerror: true
- separatecontext: false
view: |-
{
"position": {
"x": 50,
- "y": 340
- }
- }
- note: false
- timertriggers: []
- ignoreworker: false
- skipunavailable: false
- quietmode: 0
-
- "25":
- id: "25"
- taskid: ebf1c9ed-92a7-4633-8b37-42fb8570269f
- type: regular
- task:
- id: ebf1c9ed-92a7-4633-8b37-42fb8570269f
- version: -1
- name: Load evaluation docker
- description: Finds a threshold for ML model, and performs an evaluation based
- on it
- scriptName: GetMLModelEvaluation
- type: regular
- iscommand: false
- brand: ""
- nexttasks:
- '#none#':
- - "28"
- scriptarguments:
- yPred:
- simple: dummy
- yTrue:
- simple: dummy
- continueonerror: true
- separatecontext: false
- view: |-
- {
- "position": {
- "x": 480,
- "y": 340
- }
- }
- note: false
- timertriggers: []
- ignoreworker: false
- skipunavailable: false
- quietmode: 0
- "26":
- id: "26"
- taskid: bb61e790-fd72-49e8-842b-98933451305c
- type: regular
- task:
- id: bb61e790-fd72-49e8-842b-98933451305c
- version: -1
- name: Load training docker
- description: Train a machine learning text classifier.
- scriptName: DBotTrainTextClassifierV2
- type: regular
- iscommand: false
- brand: ""
- nexttasks:
- '#none#':
- - "28"
- scriptarguments:
- input:
- simple: dummy_input
- tagField:
- simple: dummy
- continueonerror: true
- separatecontext: false
- view: |-
- {
- "position": {
- "x": 910,
- "y": 340
- }
- }
- note: false
- timertriggers: []
- ignoreworker: false
- skipunavailable: false
- quietmode: 0
-
- "27":
- id: "27"
- taskid: 8e33ab3c-0c36-494a-8b2d-02a838b437a8
- type: regular
- task:
- id: 8e33ab3c-0c36-494a-8b2d-02a838b437a8
- version: -1
- name: Load Preprocessing Docker
- description: Pre-process text data for the machine learning text classifier.
- scriptName: DBotPreProcessTextData
- type: regular
- iscommand: false
- brand: ""
- nexttasks:
- '#none#':
- - "28"
- scriptarguments:
- input:
- simple: dummy input
- continueonerror: true
- separatecontext: false
- view: |-
- {
- "position": {
- "x": 1340,
- "y": 340
- }
- }
- note: false
- timertriggers: []
- ignoreworker: false
- skipunavailable: false
- quietmode: 0
- "28":
- id: "28"
- taskid: 285056a4-c36d-4fe3-836a-0635bbcb2902
- type: regular
- task:
- id: 285056a4-c36d-4fe3-836a-0635bbcb2902
- version: -1
- name: Wait for docker download
- description: Sleep for X seconds
- scriptName: Sleep
- type: regular
- iscommand: false
- brand: ""
- nexttasks:
- '#none#':
- - "29"
- scriptarguments:
- seconds:
- simple: "10"
- separatecontext: false
- view: |-
- {
- "position": {
- "x": 695,
- "y": 515
- }
- }
- note: false
- timertriggers: []
- ignoreworker: false
- skipunavailable: false
- quietmode: 0
- "29":
- id: "29"
- taskid: 36d855bc-d9a5-47f7-8f3e-5b72ab8fe194
- type: title
- task:
- id: 36d855bc-d9a5-47f7-8f3e-5b72ab8fe194
- version: -1
- name: Begin tests
- type: title
- iscommand: false
- brand: ""
- nexttasks:
- '#none#':
- - "6"
- separatecontext: false
- view: |-
- {
- "position": {
- "x": 695,
- "y": 690
- }
- }
- note: false
- timertriggers: []
- ignoreworker: false
- skipunavailable: false
- quietmode: 0
- "30":
- id: "30"
- taskid: 26c99254-1dd9-4faa-8c80-0762360a7221
- type: title
- task:
- id: 26c99254-1dd9-4faa-8c80-0762360a7221
- version: -1
- name: Load all dockers
- type: title
- iscommand: false
- brand: ""
- nexttasks:
- '#none#':
- - "27"
- - "26"
- - "25"
- - "24"
- separatecontext: false
- view: |-
- {
- "position": {
- "x": 695,
- "y": 195
+ "y": 2120
}
}
note: false
@@ -749,13 +549,17 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
view: |-
{
"linkLabelsPosition": {},
"paper": {
"dimensions": {
- "height": 2950,
- "width": 1670,
+ "height": 2310,
+ "width": 380,
"x": 50,
"y": 50
}
@@ -763,4 +567,4 @@ view: |-
}
inputs: []
outputs: []
-fromversion: 6.1.0
\ No newline at end of file
+fromversion: 6.1.0
diff --git a/Packs/ML/TestPlaybooks/playbook-DBotPredictOutOfTheBoxV2-test.yml b/Packs/ML/TestPlaybooks/playbook-DBotPredictOutOfTheBoxV2-test.yml
index db43403fbdb5..1e133dce27be 100644
--- a/Packs/ML/TestPlaybooks/playbook-DBotPredictOutOfTheBoxV2-test.yml
+++ b/Packs/ML/TestPlaybooks/playbook-DBotPredictOutOfTheBoxV2-test.yml
@@ -5,14 +5,15 @@ starttaskid: "0"
tasks:
"0":
id: "0"
- taskid: b25fa8d2-98fa-4dc6-845c-99809370cfd4
+ taskid: 861b09ed-933f-4295-8ef1-1d804f3bd783
type: start
task:
- id: b25fa8d2-98fa-4dc6-845c-99809370cfd4
+ id: 861b09ed-933f-4295-8ef1-1d804f3bd783
version: -1
name: ""
iscommand: false
brand: ""
+ description: ''
nexttasks:
'#none#':
- "1"
@@ -20,7 +21,7 @@ tasks:
view: |-
{
"position": {
- "x": 265,
+ "x": 50,
"y": 50
}
}
@@ -29,12 +30,15 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"1":
id: "1"
- taskid: 8d59f33e-507c-4223-8480-c8bf26b7dac6
+ taskid: 82ddcae4-60d6-4ce6-8279-0c2cec7b435e
type: regular
task:
- id: 8d59f33e-507c-4223-8480-c8bf26b7dac6
+ id: 82ddcae4-60d6-4ce6-8279-0c2cec7b435e
version: -1
name: Clear Context
description: Delete field from context
@@ -44,20 +48,15 @@ tasks:
brand: ""
nexttasks:
'#none#':
- - "5"
- - "7"
+ - "2"
scriptarguments:
all:
simple: "yes"
- index: {}
- key: {}
- keysToKeep: {}
- subplaybook: {}
separatecontext: false
view: |-
{
"position": {
- "x": 265,
+ "x": 50,
"y": 195
}
}
@@ -66,12 +65,15 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"2":
id: "2"
- taskid: 242f4b80-ec3c-4bee-80dd-d835ff633640
+ taskid: 7bca2804-4463-46c3-884a-457baf90d247
type: regular
task:
- id: 242f4b80-ec3c-4bee-80dd-d835ff633640
+ id: 7bca2804-4463-46c3-884a-457baf90d247
version: -1
name: Predict Using Out Of The Box Model
scriptName: DBotPredictOutOfTheBoxV2
@@ -86,25 +88,14 @@ tasks:
simple: "0.5"
emailBody:
simple: 'Re: PO# OP848784204'
- emailBodyHTML: {}
emailSubject:
- simple: 'Dear office, Kindly find attached our new order (Po# OP848784204)
- and the attached letter for the bank payment. Please sign, stamp and resend. Kindly
- ship our order by using the service DHL EXPRESS WORLDWIDE. Our DHL account
- number is: 950389383 Thanks Best Regards, Cristina Cadano Marketing
- Officer - Procurement Officer Marketing@trustm.tv Tel. +974 4431 3336 Fax
- +974 4435 3336 P.O. Box 10536 Doha,'
- labelProbabilityThreshold: {}
- minTextLength: {}
- returnError: {}
- topWordsLimit: {}
- wordThreshold: {}
+ simple: 'Dear office, Kindly find attached our new order (Po# OP848784204) and the attached letter for the bank payment. Please sign, stamp and resend. Kindly ship our order by using the service DHL EXPRESS WORLDWIDE. Our DHL account number is: 950389383 Thanks Best Regards, Cristina Cadano Marketing Officer - Procurement Officer Marketing@trustm.tv Tel. +974 4431 3336 Fax +974 4435 3336 P.O. Box 10536 Doha,'
separatecontext: false
view: |-
{
"position": {
- "x": 265,
- "y": 720
+ "x": 50,
+ "y": 370
}
}
note: false
@@ -112,12 +103,15 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"3":
id: "3"
- taskid: 12333aa3-0590-49b2-866e-8c2b7b7b899a
+ taskid: f0db71aa-e449-4a5a-8e4f-f45b9725eb3c
type: condition
task:
- id: 12333aa3-0590-49b2-866e-8c2b7b7b899a
+ id: f0db71aa-e449-4a5a-8e4f-f45b9725eb3c
version: -1
name: Check Prediction
type: condition
@@ -125,7 +119,7 @@ tasks:
brand: ""
nexttasks:
"YES":
- - "4"
+ - "7"
separatecontext: false
conditions:
- label: "YES"
@@ -138,19 +132,27 @@ tasks:
right:
value:
simple: Malicious
- - - operator: greaterThan
+ - - operator: isEqualNumber
left:
value:
- simple: DBotPredictPhishingWords.Probability
+ complex:
+ root: DBotPredictPhishingWords
+ accessor: Probability
+ transformers:
+ - operator: precision
+ args:
+ by:
+ value:
+ simple: "2"
iscontext: true
right:
value:
- simple: "0.5"
+ simple: "0.68"
view: |-
{
"position": {
- "x": 265,
- "y": 895
+ "x": 50,
+ "y": 545
}
}
note: false
@@ -158,23 +160,27 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"4":
id: "4"
- taskid: a2a470aa-9a87-4fa2-804d-de19d4d0b285
+ taskid: 5183b8ff-ca1f-48a9-8bb7-b14c5681fa39
type: title
task:
- id: a2a470aa-9a87-4fa2-804d-de19d4d0b285
+ id: 5183b8ff-ca1f-48a9-8bb7-b14c5681fa39
version: -1
name: Done
type: title
iscommand: false
brand: ""
+ description: ''
separatecontext: false
view: |-
{
"position": {
- "x": 265,
- "y": 1070
+ "x": 50,
+ "y": 1770
}
}
note: false
@@ -182,40 +188,38 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"5":
id: "5"
- taskid: 45bfa561-25d8-4438-8780-de8166abd545
+ taskid: 77722f64-31d6-4da4-87b2-9b876ab1eb3f
type: regular
task:
- id: 45bfa561-25d8-4438-8780-de8166abd545
+ id: 77722f64-31d6-4da4-87b2-9b876ab1eb3f
version: -1
- name: Load Automation's Docker
- description: Predict phishing incidents using the out-of-the-box pretrained
- model.
+ name: Predict Using Out Of The Box Model - Not Malicious
+ description: Predict phishing incidents using the out-of-the-box pre-trained model.
scriptName: DBotPredictOutOfTheBoxV2
type: regular
iscommand: false
brand: ""
nexttasks:
'#none#':
- - "6"
+ - "14"
scriptarguments:
- emailBody:
- simple: test
- emailBodyHTML: {}
- emailSubject: {}
- labelProbabilityThreshold: {}
- minTextLength: {}
- returnError: {}
- topWordsLimit: {}
- wordThreshold: {}
- continueonerror: true
+ emailBodyHTML:
+ simple: "
Hi testbox@demistodev.onmicrosoft.com,
We've received your email and are investigating.
Do not touch the email until further notice.
Cordially,
Your friendly neighborhood security team"
+ emailSubject:
+ simple: 'Re: Phishing Investigation - Message from Cortex XSOAR Security Operations Server'
+ confidenceThreshold:
+ simple: "0.5"
separatecontext: false
view: |-
{
"position": {
"x": 50,
- "y": 370
+ "y": 895
}
}
note: false
@@ -223,31 +227,34 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
- "6":
- id: "6"
- taskid: 784d63be-adc2-4c91-8c80-6cb5ba991e1e
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
+ "7":
+ id: "7"
+ taskid: 6140b6eb-2472-4dc0-81b9-b0c222947802
type: regular
task:
- id: 784d63be-adc2-4c91-8c80-6cb5ba991e1e
+ id: 6140b6eb-2472-4dc0-81b9-b0c222947802
version: -1
- name: Wait for automation
- description: Sleep for X seconds
- scriptName: Sleep
+ name: Clear Context
+ description: Delete field from context
+ scriptName: DeleteContext
type: regular
iscommand: false
brand: ""
nexttasks:
'#none#':
- - "2"
+ - "5"
scriptarguments:
- seconds:
- simple: "60"
+ all:
+ simple: "yes"
separatecontext: false
view: |-
{
"position": {
"x": 50,
- "y": 545
+ "y": 720
}
}
note: false
@@ -255,40 +262,34 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
- "7":
- id: "7"
- taskid: 309a9ea9-16a6-4529-83cb-ac798529290b
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
+ "8":
+ id: "8"
+ taskid: 306b2d8c-734a-4cb2-87c1-2909d1fb3a7f
type: regular
task:
- id: 309a9ea9-16a6-4529-83cb-ac798529290b
+ id: 306b2d8c-734a-4cb2-87c1-2909d1fb3a7f
version: -1
- name: Load Automation's Docker
- description: Predict phishing incidents using the out-of-the-box pretrained
- model.
- scriptName: DBotPredictOutOfTheBoxV2
+ name: Clear Context
+ description: Delete field from context
+ scriptName: DeleteContext
type: regular
iscommand: false
brand: ""
nexttasks:
'#none#':
- - "8"
+ - "9"
scriptarguments:
- emailBody:
- simple: test
- emailBodyHTML: {}
- emailSubject: {}
- labelProbabilityThreshold: {}
- minTextLength: {}
- returnError: {}
- topWordsLimit: {}
- wordThreshold: {}
- continueonerror: true
+ all:
+ simple: "yes"
separatecontext: false
view: |-
{
"position": {
- "x": 480,
- "y": 370
+ "x": 50,
+ "y": 1245
}
}
note: false
@@ -296,31 +297,152 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
- "8":
- id: "8"
- taskid: ef6ce480-7598-4dfe-85fe-c74104790bdb
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
+ "9":
+ id: "9"
+ taskid: d36fbfea-a8d8-472f-8614-a56c73f2950c
type: regular
task:
- id: ef6ce480-7598-4dfe-85fe-c74104790bdb
+ id: d36fbfea-a8d8-472f-8614-a56c73f2950c
version: -1
- name: Wait for automation
- description: Sleep for X seconds
- scriptName: Sleep
+ name: Predict Using Out Of The Box Model
+ scriptName: DBotPredictOutOfTheBoxV2
type: regular
iscommand: false
brand: ""
nexttasks:
'#none#':
- - "2"
+ - "15"
scriptarguments:
- seconds:
- simple: "60"
+ confidenceThreshold:
+ simple: "0.5"
+ emailBodyHTML:
+ simple: "\n\n\n\nUntitled Document\n\n\n\n\n\t\n\t\t\n\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t Delivery Notification\n\t\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t | \n\t\t\t\t\t\t \n\t\t\t\t\t\t\n\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t Order: SGH-9226-99950127 \n\t\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t\t Dear Customer, \n\t\t\t\t\t\t\t\t Your parcel has arrived at the post office. Our courier attempted but was unable to deliver the parcel to you. \n\t\t\t\t\t\t\t\t To receive your parcel, please go to the nearest office and show this receipt. \n\t\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t\t | \n\t\t\t\t\t\t \n\t\t\t\t\t\t\n\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t | \n\t\t\t\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t | \n\t\t\t\t\t\t \n\t\t\t\t\t\t\n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t\t Thank you \n\t\t\t\t\t\t\t | \n\t\t\t\t\t\t \n\t\t\t\t\t\n\t\t\t\t \n\t\t\t | \n\t\t
\n\t\n
\n\n\n\n\n\n"
+ emailSubject:
+ simple: Package Undeliverable
separatecontext: false
+ continueonerrortype: ""
view: |-
{
"position": {
- "x": 480,
- "y": 545
+ "x": 50,
+ "y": 1420
+ }
+ }
+ note: false
+ timertriggers: []
+ ignoreworker: false
+ skipunavailable: false
+ quietmode: 0
+ isoversize: false
+ isautoswitchedtoquietmode: false
+ "14":
+ id: "14"
+ taskid: f871509d-b1c4-41ce-828c-0659b033b9b1
+ type: condition
+ task:
+ id: f871509d-b1c4-41ce-828c-0659b033b9b1
+ version: -1
+ name: Check Prediction
+ type: condition
+ iscommand: false
+ brand: ""
+ nexttasks:
+ "YES":
+ - "8"
+ separatecontext: false
+ conditions:
+ - label: "YES"
+ condition:
+ - - operator: isEqualString
+ left:
+ value:
+ simple: DBotPredictPhishingWords.Label
+ iscontext: true
+ right:
+ value:
+ simple: Non-Malicious
+ - - operator: isEqualNumber
+ left:
+ value:
+ complex:
+ root: DBotPredictPhishingWords
+ accessor: Probability
+ transformers:
+ - operator: precision
+ args:
+ by:
+ value:
+ simple: "2"
+ iscontext: true
+ right:
+ value:
+ simple: "0.60"
+ continueonerrortype: ""
+ view: |-
+ {
+ "position": {
+ "x": 50,
+ "y": 1070
+ }
+ }
+ note: false
+ timertriggers: []
+ ignoreworker: false
+ skipunavailable: false
+ quietmode: 0
+ isoversize: false
+ isautoswitchedtoquietmode: false
+ "15":
+ id: "15"
+ taskid: e1ba8eba-a7e3-43e8-8a8a-031a51f1b99f
+ type: condition
+ task:
+ id: e1ba8eba-a7e3-43e8-8a8a-031a51f1b99f
+ version: -1
+ name: Check Prediction
+ type: condition
+ iscommand: false
+ brand: ""
+ nexttasks:
+ "YES":
+ - "4"
+ separatecontext: false
+ conditions:
+ - label: "YES"
+ condition:
+ - - operator: isEqualString
+ left:
+ value:
+ simple: DBotPredictPhishingWords.Label
+ iscontext: true
+ right:
+ value:
+ simple: Malicious
+ - - operator: isEqualNumber
+ left:
+ value:
+ complex:
+ root: DBotPredictPhishingWords
+ accessor: Probability
+ transformers:
+ - operator: precision
+ args:
+ by:
+ value:
+ simple: "2"
+ iscontext: true
+ right:
+ value:
+ simple: "0.95"
+ continueonerrortype: ""
+ view: |-
+ {
+ "position": {
+ "x": 50,
+ "y": 1595
}
}
note: false
@@ -328,13 +450,15 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ isoversize: false
+ isautoswitchedtoquietmode: false
view: |-
{
"linkLabelsPosition": {},
"paper": {
"dimensions": {
- "height": 1085,
- "width": 810,
+ "height": 1785,
+ "width": 380,
"x": 50,
"y": 50
}
@@ -342,4 +466,5 @@ view: |-
}
inputs: []
outputs: []
-fromversion: 5.5.0
\ No newline at end of file
+fromversion: 5.5.0
+description: ''
diff --git a/Packs/ML/TestPlaybooks/playbook-DBot_Create_Phishing_Classifier_V2_From_File-_Test.yml b/Packs/ML/TestPlaybooks/playbook-DBot_Create_Phishing_Classifier_V2_From_File-_Test.yml
index c955a614d03f..9e6b3e43c172 100644
--- a/Packs/ML/TestPlaybooks/playbook-DBot_Create_Phishing_Classifier_V2_From_File-_Test.yml
+++ b/Packs/ML/TestPlaybooks/playbook-DBot_Create_Phishing_Classifier_V2_From_File-_Test.yml
@@ -5,22 +5,23 @@ starttaskid: "0"
tasks:
"0":
id: "0"
- taskid: e43d8441-51e2-4201-87ba-a15423de05ec
+ taskid: 8210b4a0-dd4b-4b24-8893-ab9f3e5d21ad
type: start
task:
- id: e43d8441-51e2-4201-87ba-a15423de05ec
+ id: 8210b4a0-dd4b-4b24-8893-ab9f3e5d21ad
version: -1
name: ""
iscommand: false
brand: ""
+ description: ''
nexttasks:
'#none#':
- - "17"
+ - "6"
separatecontext: false
view: |-
{
"position": {
- "x": 695,
+ "x": 50,
"y": 50
}
}
@@ -29,12 +30,15 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"1":
id: "1"
- taskid: 6512432c-e980-41da-8a85-e318d7bbffbe
+ taskid: 622b1351-3b36-41a8-8617-e8c123c7ce02
type: regular
task:
- id: 6512432c-e980-41da-8a85-e318d7bbffbe
+ id: 622b1351-3b36-41a8-8617-e8c123c7ce02
version: -1
name: Create Incidents File
script: TestCreateIncidentsFile
@@ -51,8 +55,8 @@ tasks:
view: |-
{
"position": {
- "x": 695,
- "y": 1010
+ "x": 50,
+ "y": 370
}
}
note: false
@@ -60,16 +64,18 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"3":
id: "3"
- taskid: ae7d8277-e9b2-4c71-8c5d-70979e7f4267
+ taskid: 1aa209a9-3d42-4680-8ecf-10d050b9ba6e
type: regular
task:
- id: ae7d8277-e9b2-4c71-8c5d-70979e7f4267
+ id: 1aa209a9-3d42-4680-8ecf-10d050b9ba6e
version: -1
name: Predict Sentence
- description: Predict text label using a pre-trained machine learning phishing
- model, and get the most important words used in the classification decision.
+ description: Predict text label using a pre-trained machine learning phishing model, and get the most important words used in the classification decision.
scriptName: DBotPredictPhishingWords
type: regular
iscommand: false
@@ -80,25 +86,18 @@ tasks:
scriptarguments:
emailBody:
simple: this message is spam. this message is spam
- emailBodyHTML: {}
- emailSubject: {}
- hashSeed: {}
labelProbabilityThreshold:
simple: "0"
minTextLength:
simple: "0"
modelName:
simple: ${DBotPhishingClassifier.ModelName}
- modelStoreType: {}
- returnError: {}
- topWordsLimit: {}
- wordThreshold: {}
separatecontext: false
view: |-
{
"position": {
- "x": 695,
- "y": 1360
+ "x": 50,
+ "y": 720
}
}
note: false
@@ -106,12 +105,15 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"4":
id: "4"
- taskid: e03072b6-e44e-40bd-8389-68c36bb5b435
+ taskid: 067671f5-b98d-4e1e-8e6e-fa4c43ff7de0
type: condition
task:
- id: e03072b6-e44e-40bd-8389-68c36bb5b435
+ id: 067671f5-b98d-4e1e-8e6e-fa4c43ff7de0
version: -1
name: Check Prediction
type: condition
@@ -143,8 +145,8 @@ tasks:
view: |-
{
"position": {
- "x": 695,
- "y": 1535
+ "x": 50,
+ "y": 895
}
}
note: false
@@ -152,23 +154,27 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"5":
id: "5"
- taskid: 2f277472-af83-495c-89d3-44f1585258e2
+ taskid: 7f39c142-9139-4e4f-880a-5ac03f42f5f9
type: title
task:
- id: 2f277472-af83-495c-89d3-44f1585258e2
+ id: 7f39c142-9139-4e4f-880a-5ac03f42f5f9
version: -1
name: Done
type: title
iscommand: false
brand: ""
+ description: ''
separatecontext: false
view: |-
{
"position": {
- "x": 695,
- "y": 1710
+ "x": 50,
+ "y": 1070
}
}
note: false
@@ -176,12 +182,15 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"6":
id: "6"
- taskid: 06ebace9-f13b-4e78-88d9-e799beb78b91
+ taskid: 47ebe0a5-32d3-404e-851a-99aa212e76ef
type: regular
task:
- id: 06ebace9-f13b-4e78-88d9-e799beb78b91
+ id: 47ebe0a5-32d3-404e-851a-99aa212e76ef
version: -1
name: Clean Context
description: Delete field from context
@@ -199,8 +208,8 @@ tasks:
view: |-
{
"position": {
- "x": 695,
- "y": 835
+ "x": 50,
+ "y": 195
}
}
note: false
@@ -208,16 +217,18 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
"7":
id: "7"
- taskid: b7804202-3267-454e-8855-6105c41c15f7
+ taskid: 76e221f8-85d5-4107-816b-b86cb93a5e7b
type: playbook
task:
- id: b7804202-3267-454e-8855-6105c41c15f7
+ id: 76e221f8-85d5-4107-816b-b86cb93a5e7b
version: -1
name: DBot Create Phishing Classifier V2 From File
- description: Create a phishing classifier using machine learning. The classifier
- is based on incidents files extracted from email content.
+ description: Create a phishing classifier using machine learning. The classifier is based on incidents files extracted from email content.
playbookName: DBot Create Phishing Classifier V2 From File
type: playbook
iscommand: false
@@ -235,8 +246,7 @@ tasks:
emailTextKey:
simple: Email Body|Email Body HTML|details
fileID:
- simple: '${.=(val.File instanceof Array ? val.File[val.File.length-1].EntryID
- : val.File.EntryID)}'
+ simple: '${.=(val.File instanceof Array ? val.File[val.File.length-1].EntryID : val.File.EntryID)}'
incidentTypes:
simple: Phishing
inputFormat:
@@ -261,238 +271,11 @@ tasks:
exitCondition: ""
wait: 1
max: 0
- view: |-
- {
- "position": {
- "x": 695,
- "y": 1185
- }
- }
- note: false
- timertriggers: []
- ignoreworker: false
- skipunavailable: false
- quietmode: 0
- "11":
- id: "11"
- taskid: 7ef4beea-9151-425e-8bce-53eb0e993f50
- type: regular
- task:
- id: 7ef4beea-9151-425e-8bce-53eb0e993f50
- version: -1
- name: Load prediction docker
- description: Predict text label using a pre-trained machine learning phishing
- model, and get the most important words used in the classification decision.
- scriptName: DBotPredictPhishingWords
- type: regular
- iscommand: false
- brand: ""
- nexttasks:
- '#none#':
- - "15"
- scriptarguments:
- modelName:
- simple: dummy
- continueonerror: true
- separatecontext: false
view: |-
{
"position": {
"x": 50,
- "y": 340
- }
- }
- note: false
- timertriggers: []
- ignoreworker: false
- skipunavailable: false
- quietmode: 0
- "12":
- id: "12"
- taskid: cd9a8a22-a47d-4c3c-83df-592577755ece
- type: regular
- task:
- id: cd9a8a22-a47d-4c3c-83df-592577755ece
- version: -1
- name: Load evaluation docker
- description: Finds a threshold for ML model, and performs an evaluation based
- on it
- scriptName: GetMLModelEvaluation
- type: regular
- iscommand: false
- brand: ""
- nexttasks:
- '#none#':
- - "15"
- scriptarguments:
- yPred:
- simple: dummy
- yTrue:
- simple: dummy
- continueonerror: true
- separatecontext: false
- view: |-
- {
- "position": {
- "x": 480,
- "y": 340
- }
- }
- note: false
- timertriggers: []
- ignoreworker: false
- skipunavailable: false
- quietmode: 0
- "13":
- id: "13"
- taskid: 5f0baead-ae59-4311-8f28-50effdfd7c1b
- type: regular
- task:
- id: 5f0baead-ae59-4311-8f28-50effdfd7c1b
- version: -1
- name: Load training docker
- description: Train a machine learning text classifier.
- scriptName: DBotTrainTextClassifierV2
- type: regular
- iscommand: false
- brand: ""
- nexttasks:
- '#none#':
- - "15"
- scriptarguments:
- input:
- simple: dummy_input
- tagField:
- simple: dummy
- continueonerror: true
- separatecontext: false
- view: |-
- {
- "position": {
- "x": 910,
- "y": 340
- }
- }
- note: false
- timertriggers: []
- ignoreworker: false
- skipunavailable: false
- quietmode: 0
- "14":
- id: "14"
- taskid: ba870761-cad7-444e-85ca-d6ca9f505f36
- type: regular
- task:
- id: ba870761-cad7-444e-85ca-d6ca9f505f36
- version: -1
- name: Load Preprocessing Docker
- description: Pre-process text data for the machine learning text classifier.
- scriptName: DBotPreProcessTextData
- type: regular
- iscommand: false
- brand: ""
- nexttasks:
- '#none#':
- - "15"
- scriptarguments:
- input:
- simple: dummy input
- continueonerror: true
- separatecontext: false
- view: |-
- {
- "position": {
- "x": 1340,
- "y": 340
- }
- }
- note: false
- timertriggers: []
- ignoreworker: false
- skipunavailable: false
- quietmode: 0
- "15":
- id: "15"
- taskid: 7974be6e-6e04-4da2-8e8c-6131f6d586b5
- type: regular
- task:
- id: 7974be6e-6e04-4da2-8e8c-6131f6d586b5
- version: -1
- name: Wait for docker download
- description: Sleep for X seconds
- scriptName: Sleep
- type: regular
- iscommand: false
- brand: ""
- nexttasks:
- '#none#':
- - "16"
- scriptarguments:
- seconds:
- simple: "10"
- separatecontext: false
- view: |-
- {
- "position": {
- "x": 695,
- "y": 515
- }
- }
- note: false
- timertriggers: []
- ignoreworker: false
- skipunavailable: false
- quietmode: 0
- "16":
- id: "16"
- taskid: 6d0b0b2e-851a-4f79-8371-56199707908e
- type: title
- task:
- id: 6d0b0b2e-851a-4f79-8371-56199707908e
- version: -1
- name: Begin tests
- type: title
- iscommand: false
- brand: ""
- nexttasks:
- '#none#':
- - "6"
- separatecontext: false
- view: |-
- {
- "position": {
- "x": 695,
- "y": 690
- }
- }
- note: false
- timertriggers: []
- ignoreworker: false
- skipunavailable: false
- quietmode: 0
- "17":
- id: "17"
- taskid: 07f113b4-ad97-4426-8bb9-47f7cefe0187
- type: title
- task:
- id: 07f113b4-ad97-4426-8bb9-47f7cefe0187
- version: -1
- name: Load all dockers
- type: title
- iscommand: false
- brand: ""
- nexttasks:
- '#none#':
- - "14"
- - "13"
- - "12"
- - "11"
- separatecontext: false
- view: |-
- {
- "position": {
- "x": 695,
- "y": 195
+ "y": 545
}
}
note: false
@@ -500,13 +283,16 @@ tasks:
ignoreworker: false
skipunavailable: false
quietmode: 0
+ continueonerrortype: ""
+ isoversize: false
+ isautoswitchedtoquietmode: false
view: |-
{
"linkLabelsPosition": {},
"paper": {
"dimensions": {
- "height": 1725,
- "width": 1670,
+ "height": 1085,
+ "width": 380,
"x": 50,
"y": 50
}
@@ -514,4 +300,5 @@ view: |-
}
inputs: []
outputs: []
-fromversion: 6.1.0
\ No newline at end of file
+fromversion: 6.1.0
+description: ''
diff --git a/Packs/ML/TestPlaybooks/script-CompareEnvPredictionsToExpectedPredictions.yml b/Packs/ML/TestPlaybooks/script-CompareEnvPredictionsToExpectedPredictions.yml
index d274790f274c..4a14ec137abc 100644
--- a/Packs/ML/TestPlaybooks/script-CompareEnvPredictionsToExpectedPredictions.yml
+++ b/Packs/ML/TestPlaybooks/script-CompareEnvPredictionsToExpectedPredictions.yml
@@ -19,7 +19,7 @@ script: |-
message = '{}/{} correct predictions. '.format(len(df)- len(wrong_predictions_ids), len(df))
if len(wrong_predictions_ids) > 0:
message += 'Wrong predictions:\n {}'.format('\n'.join([str(id_) for id_ in wrong_predictions_ids]))
- return_outputs(message, {'CompareEnvPredictionsToExpectedPredictions .allPredictionsMatched': len(wrong_predictions_ids)== 0})
+ return_outputs(message, {'CompareEnvPredictionsToExpectedPredictions.allPredictionsMatched': len(wrong_predictions_ids)== 0})
type: python
tags: []
enabled: true
@@ -27,11 +27,10 @@ args:
- name: input
required: true
outputs:
-- contextPath: CompareEnvPredictionsToExpectedPredictions .allPredictionsMatched
+- contextPath: CompareEnvPredictionsToExpectedPredictions.allPredictionsMatched
scripttarget: 0
subtype: python3
pswd: ""
-runonce: false
-dockerimage: demisto/ml:1.0.0.20606
+dockerimage: demisto/pandas:1.0.0.102566
runas: DBotWeakRole
fromversion: 5.5.0
\ No newline at end of file
diff --git a/Packs/ML/TestPlaybooks/script-CreateIncidentsForEvaluateMLModllAtProduction.yml b/Packs/ML/TestPlaybooks/script-CreateIncidentsForEvaluateMLModllAtProduction.yml
index 63ba305fb488..8b6cd736e775 100644
--- a/Packs/ML/TestPlaybooks/script-CreateIncidentsForEvaluateMLModllAtProduction.yml
+++ b/Packs/ML/TestPlaybooks/script-CreateIncidentsForEvaluateMLModllAtProduction.yml
@@ -17,16 +17,16 @@ script: >+
incident1_template = {
'type': 'Simulation',
'name': 'Tag1',
- 'dbotprediction': 'ham',
- 'dbotpredictionprobability': 1.0
+ 'classification': 'ham',
+ 'description': 1.0
}
incident2_template = {
'type': 'Simulation',
'name': 'Tag2',
- 'dbotprediction': 'spam',
- 'dbotpredictionprobability': 1.0,
+ 'classification': 'spam',
+ 'description': 1.0,
}
@@ -54,8 +54,8 @@ script: >+
'EmailBodyKey': 'details',
'EmailTagKey': 'name',
'IncidentsQuery': 'type:Simulation',
- 'EmailPredictionKey': 'dbotprediction',
- 'EmailPredictionProbabilityKey': 'dbotpredictionprobability'
+ 'EmailPredictionKey': 'classification',
+ 'EmailPredictionProbabilityKey': 'description'
}
})
diff --git a/Packs/ML/TestPlaybooks/script-CreateIncidentsOutOfTheBoxV2.yml b/Packs/ML/TestPlaybooks/script-CreateIncidentsOutOfTheBoxV2.yml
index f1d52f2dba57..66ce67eeb158 100644
--- a/Packs/ML/TestPlaybooks/script-CreateIncidentsOutOfTheBoxV2.yml
+++ b/Packs/ML/TestPlaybooks/script-CreateIncidentsOutOfTheBoxV2.yml
@@ -5,7 +5,6 @@ vcShouldKeepItemLegacyProdMachine: false
name: CreateIncidentsOutOfTheBoxV2
script: |2+
-
import json
incidents = [
diff --git a/Packs/ML/pack_metadata.json b/Packs/ML/pack_metadata.json
index 4ac67ca6f356..379bd599d230 100644
--- a/Packs/ML/pack_metadata.json
+++ b/Packs/ML/pack_metadata.json
@@ -2,7 +2,7 @@
"name": "Machine Learning",
"description": "Help to manage machine learning models in Cortex XSOAR",
"support": "xsoar",
- "currentVersion": "1.4.10",
+ "currentVersion": "1.4.11",
"author": "Cortex XSOAR",
"url": "https://www.paloaltonetworks.com/cortex",
"email": "",
diff --git a/Tests/conf.json b/Tests/conf.json
index 40d448631885..9acc8c49e36c 100644
--- a/Tests/conf.json
+++ b/Tests/conf.json
@@ -3678,6 +3678,9 @@
{
"playbookID": "GetIndicatorsByQuery - Test"
},
+ {
+ "playbookID": "DBotFindSimilarIncidents-test"
+ },
{
"playbookID": "DBotCreatePhishingClassifierV2FromFile-Test",
"timeout": 60000,
@@ -5940,7 +5943,6 @@
"ThreatGrid_v2_Test": "No instance, developed by Qmasters",
"Test-Detonate URL - ThreatGrid": "No instance, developed by Qmasters",
"awake_security_test_pb": "No instance, CRTX-77572",
- "Create Phishing Classifier V2 ML Test": "Updated docker image lacks data for the ml model. Once data issue is solved for ml module can un skip. ",
"SumoLogic-Test": "401 unauthorized, CIAC-6334",
"EWS_O365_test": "Issue CIAC-6753",
"Microsoft Defender Advanced Threat Protection - Test dev": "Issue CIAC-7514",