Skip to content

Commit

Permalink
Research project for Hinglish Sentiment Analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
okaditya84 committed Nov 30, 2023
1 parent 199f160 commit 5dc2869
Show file tree
Hide file tree
Showing 29 changed files with 62,671 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter"
},
"python.formatting.provider": "none"
}
3,273 changes: 3,273 additions & 0 deletions Dataset making/Labelling.ipynb

Large diffs are not rendered by default.

15,809 changes: 15,809 additions & 0 deletions Dataset making/data.csv

Large diffs are not rendered by default.

6,649 changes: 6,649 additions & 0 deletions Dataset making/same_sentiment_rows.csv

Large diffs are not rendered by default.

3,165 changes: 3,165 additions & 0 deletions Dataset making/test.csv

Large diffs are not rendered by default.

213 changes: 213 additions & 0 deletions Dataset making/test.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"data=pd.read_csv(\"train.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"df=pd.DataFrame(data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from transformers import pipeline\n",
"\n",
"sentiment_analysis = pipeline('sentiment-analysis', model='cardiffnlp/twitter-roberta-base-sentiment-latest')\n",
"\n",
"sentiments = []\n",
"for text in data['english']:\n",
" result = sentiment_analysis(text)\n",
" sentiments.append(result[0]['label'])\n",
"\n",
"data['sentiment'] = sentiments\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sentiments = []\n",
"for text in data['english']:\n",
" result = sentiment_analysis(text)\n",
" sentiments.append(result[0]['label'])\n",
"\n",
"data['sentiment'] = sentiments\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"# read the csv files\n",
"train_df = pd.read_csv('train.csv')\n",
"test_df = pd.read_csv('test.csv')\n",
"val_df = pd.read_csv('valid.csv')\n",
"\n",
"# concatenate the dataframes\n",
"data_df = pd.concat([train_df, test_df, val_df], ignore_index=True)\n",
"\n",
"# write the merged dataframe to a new csv file\n",
"data_df.to_csv('data.csv', index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from transformers import pipeline\n",
"\n",
"sentiment_analysis = pipeline('sentiment-analysis', model='cardiffnlp/twitter-roberta-base-sentiment-latest')\n",
"\n",
"sentiments = []\n",
"for text in data['English']:\n",
" result = sentiment_analysis(text)\n",
" sentiments.append(result[0]['label'])\n",
"\n",
"data.rename(columns={'sentiment': 'twitter_roberta'}, inplace=True)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# import required libraries\n",
"import pandas as pd\n",
"from transformers import pipeline\n",
"\n",
"# read the data.csv file\n",
"data_df = pd.read_csv('data.csv')\n",
"\n",
"# create a sentiment analysis pipeline using the beto model\n",
"sentiment_analysis = pipeline('sentiment-analysis', model='finiteautomata/beto-sentiment-analysis')\n",
"\n",
"# predict the sentiment for each text in the english column\n",
"sentiments = []\n",
"for text in data_df['English']:\n",
" result = sentiment_analysis(text)\n",
" sentiments.append(result[0]['label'])\n",
"\n",
"# add the predicted sentiments to the dataframe\n",
"data_df['beto_sentiment'] = sentiments\n",
"\n",
"# write the updated dataframe to a new csv file\n",
"data_df.to_csv('data_with_beto_sentiment.csv', index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install vaderSentiment\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# import the required libraries\n",
"from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n",
"\n",
"# create an instance of the SentimentIntensityAnalyzer class\n",
"analyzer = SentimentIntensityAnalyzer()\n",
"\n",
"# predict the sentiment for each text in the english column\n",
"sentiments = []\n",
"for text in data_df['English']:\n",
" result = analyzer.polarity_scores(text)\n",
" compound_score = result['compound']\n",
" if compound_score > 0.05:\n",
" sentiments.append('positive')\n",
" elif compound_score < -0.05:\n",
" sentiments.append('negative')\n",
" else:\n",
" sentiments.append('neutral')\n",
"\n",
"# add the predicted sentiments to the dataframe\n",
"data_df['vader_sentiment'] = sentiments\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"import pandas as pd\n",
"\n",
" # read the data.csv file\n",
"data_df = pd.read_csv('data.csv')\n",
"\n",
"# create a new dataframe with only the required columns\n",
"new_df = data_df[['English', 'twitter_roberta', 'beto_sentiment', 'vader_sentiment']]\n",
"\n",
"new_df = new_df[new_df['twitter_roberta'] == new_df['beto_sentiment']]\n",
"new_df = new_df[new_df['twitter_roberta'] == new_df['vader_sentiment']]\n",
"\n",
" # write the updated dataframe to a new csv file\n",
"new_df.to_csv('same_sentiment_rows.csv', index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#read data.csv and print the first 5 rows\n",
"data_df = pd.read_csv('data.csv')\n",
"print(data_df.head())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 5dc2869

Please sign in to comment.