-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4603340
commit 2973cbd
Showing
9 changed files
with
1,256 additions
and
1,256 deletions.
There are no files selected for viewing
176 changes: 88 additions & 88 deletions
176
...content/propaganda/ArProBinary_FewShot.py → ...nt/propaganda/ArProBinary_GPT4_FewShot.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,88 +1,88 @@ | ||
import re | ||
|
||
from llmebench.datasets import ArProBinaryDataset | ||
from llmebench.models import OpenAIModel | ||
from llmebench.tasks import ArProTask | ||
|
||
|
||
def metadata(): | ||
return { | ||
"author": "Arabic Language Technologies, QCRI, HBKU", | ||
"model": "gpt-4-32k (version 0314)", | ||
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.", | ||
"scores": {"Micro-F1": "0.592"}, | ||
} | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": ArProBinaryDataset, | ||
"task": ArProTask, | ||
"model": OpenAIModel, | ||
"model_args": { | ||
"max_tries": 3, | ||
}, | ||
} | ||
|
||
|
||
def prompt(input_sample, examples): | ||
prompt_text = ( | ||
f"Your task is to analyze the text and determine if it contains elements of propaganda.\n\n" | ||
f"Below you will find a few examples that can help you to understand:\n\n" | ||
) | ||
|
||
fs_prompt = few_shot_prompt(input_sample, prompt_text, examples) | ||
return [ | ||
{ | ||
"role": "system", | ||
"content": "You are an expert annotator.", | ||
}, | ||
{ | ||
"role": "user", | ||
"content": fs_prompt, | ||
}, | ||
] | ||
|
||
|
||
def few_shot_prompt(input_sample, base_prompt, examples): | ||
out_prompt = base_prompt | ||
for index, example in enumerate(examples): | ||
sent = example["input"] | ||
label = example["label"] | ||
out_prompt = ( | ||
out_prompt | ||
+ "Example " | ||
+ str(index) | ||
+ ":" | ||
+ "\n" | ||
+ "text: " | ||
+ sent | ||
+ "\nlabel: " | ||
+ label | ||
+ "\n\n" | ||
) | ||
|
||
out_prompt = ( | ||
out_prompt | ||
+ "Based on the instructions and examples above analyze the following 'text' and predict whether it contains the use of any propaganda technique. Answer only by true or false. Return only predicted label.\n\n" | ||
) | ||
out_prompt = out_prompt + "text: " + input_sample + "\nlabel: \n" | ||
|
||
return out_prompt | ||
|
||
|
||
def post_process(response): | ||
input_label = response["choices"][0]["message"]["content"] | ||
pred_label = input_label.replace(".", "").strip().lower() | ||
|
||
pred_label = pred_label.replace("label:", "").strip() | ||
|
||
if "true" == pred_label: | ||
pred_label = "true" | ||
|
||
elif "false" == pred_label: | ||
pred_label = "false" | ||
else: | ||
pred_label = None | ||
|
||
return pred_label | ||
import re | ||
|
||
from llmebench.datasets import ArProBinaryDataset | ||
from llmebench.models import OpenAIModel | ||
from llmebench.tasks import ArProTask | ||
|
||
|
||
def metadata(): | ||
return { | ||
"author": "Arabic Language Technologies, QCRI, HBKU", | ||
"model": "gpt-4-32k (version 0314)", | ||
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.", | ||
"scores": {"Micro-F1": "0.592"}, | ||
} | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": ArProBinaryDataset, | ||
"task": ArProTask, | ||
"model": OpenAIModel, | ||
"model_args": { | ||
"max_tries": 3, | ||
}, | ||
} | ||
|
||
|
||
def prompt(input_sample, examples): | ||
prompt_text = ( | ||
f"Your task is to analyze the text and determine if it contains elements of propaganda.\n\n" | ||
f"Below you will find a few examples that can help you to understand:\n\n" | ||
) | ||
|
||
fs_prompt = few_shot_prompt(input_sample, prompt_text, examples) | ||
return [ | ||
{ | ||
"role": "system", | ||
"content": "You are an expert annotator.", | ||
}, | ||
{ | ||
"role": "user", | ||
"content": fs_prompt, | ||
}, | ||
] | ||
|
||
|
||
def few_shot_prompt(input_sample, base_prompt, examples): | ||
out_prompt = base_prompt | ||
for index, example in enumerate(examples): | ||
sent = example["input"] | ||
label = example["label"] | ||
out_prompt = ( | ||
out_prompt | ||
+ "Example " | ||
+ str(index) | ||
+ ":" | ||
+ "\n" | ||
+ "text: " | ||
+ sent | ||
+ "\nlabel: " | ||
+ label | ||
+ "\n\n" | ||
) | ||
|
||
out_prompt = ( | ||
out_prompt | ||
+ "Based on the instructions and examples above analyze the following 'text' and predict whether it contains the use of any propaganda technique. Answer only by true or false. Return only predicted label.\n\n" | ||
) | ||
out_prompt = out_prompt + "text: " + input_sample + "\nlabel: \n" | ||
|
||
return out_prompt | ||
|
||
|
||
def post_process(response): | ||
input_label = response["choices"][0]["message"]["content"] | ||
pred_label = input_label.replace(".", "").strip().lower() | ||
|
||
pred_label = pred_label.replace("label:", "").strip() | ||
|
||
if "true" == pred_label: | ||
pred_label = "true" | ||
|
||
elif "false" == pred_label: | ||
pred_label = "false" | ||
else: | ||
pred_label = None | ||
|
||
return pred_label |
172 changes: 86 additions & 86 deletions
172
...ontent/propaganda/ArProBinary_ZeroShot.py → ...t/propaganda/ArProBinary_GPT4_ZeroShot.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,86 +1,86 @@ | ||
import re | ||
|
||
from llmebench.datasets import ArProBinaryDataset | ||
from llmebench.models import OpenAIModel | ||
from llmebench.tasks import ArProTask | ||
|
||
|
||
def metadata(): | ||
return { | ||
"author": "Arabic Language Technologies, QCRI, HBKU", | ||
"model": "gpt-4-32k (version 0314)", | ||
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.", | ||
"scores": {"Micro-F1": "0.526"}, | ||
} | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": ArProBinaryDataset, | ||
"task": ArProTask, | ||
"model": OpenAIModel, | ||
"model_args": { | ||
"max_tries": 3, | ||
}, | ||
} | ||
|
||
|
||
def prompt(input_sample): | ||
prompt_text = ( | ||
f"Your task is to analyze the text and determine if it contains elements of propaganda. Based on the instructions, analyze the following 'text' and predict whether it contains the use of any propaganda technique. Answer only by true or false. Return only predicted label.\n\n" | ||
f"text: {input_sample}\n" | ||
f"label: \n" | ||
) | ||
|
||
return [ | ||
{ | ||
"role": "system", | ||
"content": "You are an expert fact checker.", | ||
}, | ||
{ | ||
"role": "user", | ||
"content": prompt_text, | ||
}, | ||
] | ||
|
||
|
||
def few_shot_prompt(input_sample, base_prompt, examples): | ||
out_prompt = base_prompt | ||
for example in examples: | ||
sent = example["input"] | ||
label = example["label"] | ||
|
||
out_prompt = ( | ||
out_prompt + "Sentence: " + sent + "\n" + "label: " + label + "\n\n" | ||
) | ||
|
||
# Append the sentence we want the model to predict for but leave the Label blank | ||
out_prompt = out_prompt + "Sentence: " + input_sample + "\nlabel: \n" | ||
|
||
# print("=========== FS Prompt =============\n") | ||
# print(out_prompt) | ||
|
||
return out_prompt | ||
|
||
|
||
def post_process(response): | ||
input_label = response["choices"][0]["message"]["content"] | ||
input_label = input_label.replace(".", "").strip().lower() | ||
|
||
if ( | ||
"true" in input_label | ||
or "label: 1" in input_label | ||
or "label: yes" in input_label | ||
): | ||
pred_label = "true" | ||
elif ( | ||
"false" in input_label | ||
or "label: 0" in input_label | ||
or "label: no" in input_label | ||
): | ||
pred_label = "false" | ||
else: | ||
print("label problem!! " + input_label) | ||
pred_label = None | ||
|
||
return pred_label | ||
import re | ||
|
||
from llmebench.datasets import ArProBinaryDataset | ||
from llmebench.models import OpenAIModel | ||
from llmebench.tasks import ArProTask | ||
|
||
|
||
def metadata(): | ||
return { | ||
"author": "Arabic Language Technologies, QCRI, HBKU", | ||
"model": "gpt-4-32k (version 0314)", | ||
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.", | ||
"scores": {"Micro-F1": "0.526"}, | ||
} | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": ArProBinaryDataset, | ||
"task": ArProTask, | ||
"model": OpenAIModel, | ||
"model_args": { | ||
"max_tries": 3, | ||
}, | ||
} | ||
|
||
|
||
def prompt(input_sample): | ||
prompt_text = ( | ||
f"Your task is to analyze the text and determine if it contains elements of propaganda. Based on the instructions, analyze the following 'text' and predict whether it contains the use of any propaganda technique. Answer only by true or false. Return only predicted label.\n\n" | ||
f"text: {input_sample}\n" | ||
f"label: \n" | ||
) | ||
|
||
return [ | ||
{ | ||
"role": "system", | ||
"content": "You are an expert fact checker.", | ||
}, | ||
{ | ||
"role": "user", | ||
"content": prompt_text, | ||
}, | ||
] | ||
|
||
|
||
def few_shot_prompt(input_sample, base_prompt, examples): | ||
out_prompt = base_prompt | ||
for example in examples: | ||
sent = example["input"] | ||
label = example["label"] | ||
|
||
out_prompt = ( | ||
out_prompt + "Sentence: " + sent + "\n" + "label: " + label + "\n\n" | ||
) | ||
|
||
# Append the sentence we want the model to predict for but leave the Label blank | ||
out_prompt = out_prompt + "Sentence: " + input_sample + "\nlabel: \n" | ||
|
||
# print("=========== FS Prompt =============\n") | ||
# print(out_prompt) | ||
|
||
return out_prompt | ||
|
||
|
||
def post_process(response): | ||
input_label = response["choices"][0]["message"]["content"] | ||
input_label = input_label.replace(".", "").strip().lower() | ||
|
||
if ( | ||
"true" in input_label | ||
or "label: 1" in input_label | ||
or "label: yes" in input_label | ||
): | ||
pred_label = "true" | ||
elif ( | ||
"false" in input_label | ||
or "label: 0" in input_label | ||
or "label: no" in input_label | ||
): | ||
pred_label = "false" | ||
else: | ||
print("label problem!! " + input_label) | ||
pred_label = None | ||
|
||
return pred_label |
Oops, something went wrong.