-
Notifications
You must be signed in to change notification settings - Fork 0
/
TestsetSentences.cpp
63 lines (58 loc) · 1.85 KB
/
TestsetSentences.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#include "TestsetSentences.h"
#include <iostream>
#include<fstream>
#include<cstdio>
#include<ctype.h>
#include<string.h>
using namespace std;
/*
* Map which keeps the reads from the "DatasetSentences.txt" stores only the sentences used for testing
* and their index. One can find which senteces are used for testing by looking in datasetSplit.txt and
* retrieve the sentences annotated with 2.
*/
TestsetSentences::TestsetSentences() {
// Read from the file.
//ifstream input("stanfordSentimentTreebank/datasetSentences.txt");
ifstream input("Preprocessing.txt");
ifstream input_datasplit("stanfordSentimentTreebank/datasetSplit.txt");
string line;
string line_dataset;
bool is_number = false;
long long number = 0;
string word = "";
getline( input, line );
getline( input_datasplit, line_dataset );
while(getline( input, line ) ) {
getline( input_datasplit, line_dataset );
is_number = true;
number = 0;
word.clear();
for(char & c : line) {
if (c == '\t') {
is_number = false;
} else {
if (!is_number) {
char aux = tolower(c);
word += aux;
}
else number = number*10 + c -'0';
}
}
is_number = true;
long long number1 =0, number2 = 0;
for(char & c : line_dataset) {
if (c == ',') {
is_number = false;
} else {
if (is_number) number1 = number1*10 + c -'0';
else number2 = number2*10 + c -'0';
}
}
if (number == number1 && number2 == 2) {
sentences.insert(make_pair(word, number));
}
}
}
unordered_map<string, long long> TestsetSentences::getSentencesMap() {
return sentences;
}