-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPreprocessing.cpp
35 lines (33 loc) · 941 Bytes
/
Preprocessing.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#include<cstdio>
#include<iostream>
#include<fstream>
#include <ctype.h>
#include<string.h>
#include "Preprocessing.h"
using namespace std;
// Reads from the SOStr.txt file and rewrites the dataset file, in order to avoid the reinterpretation of accents
// and paranthesis.
void preprocessSentences() {
ifstream input("stanfordSentimentTreebank/SOStr.txt");
ofstream outputResult("Preprocessing.txt");
outputResult<<"sentence_index sentence"<<endl;
long long index = 1;
string line;
string word = "";
while(getline(input, line)) {
word = "";
outputResult<<index<<"\t";
for(char &c: line) {
if (c == '|') {
outputResult<<word<<" ";
word = "";
} else {
char aux = tolower(c);
word +=aux;
}
}
outputResult<<word;
outputResult<<endl;
index++;
}
}