-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathValidationsetSentences.cpp
80 lines (72 loc) · 2.48 KB
/
ValidationsetSentences.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#include "ValidationsetSentences.h"
#include <iostream>
#include<fstream>
#include<ctype.h>
#include<cstdio>
#include<string.h>
using namespace std;
/**
* Map which keeps the reads from the "DatasetSentences.txt" stores only the sentences used for dev
* and their index. One can find which senteces are used for testing by looking in datasetSplit.txt and
* retrieve the sentences annotated with 3.
*/
ValidationsetSentences::ValidationsetSentences() {
// Read from the file.
//ifstream input("stanfordSentimentTreebank/datasetSentences.txt");
ifstream input("Preprocessing.txt");
ifstream input_datasplit("stanfordSentimentTreebank/datasetSplit.txt");
ofstream outputFile;
outputFile.open("debug.txt", std::ios_base::app);
string line;
string line_dataset;
bool is_number = false;
long long number = 0;
string word = "";
getline( input, line );
getline( input_datasplit, line_dataset );
while(getline( input, line ) ) {
getline( input_datasplit, line_dataset );
is_number = true;
number = 0;
word.clear();
for(char & c : line) {
if (c == '\t') {
is_number = false;
} else {
if (!is_number) {
char aux = tolower(c);
word +=aux;
}
else number = number*10 + c -'0';
}
}
is_number = true;
long long number1 = 0, number2 = 0;
for(char & c : line_dataset) {
if (c == ',') {
is_number = false;
} else {
if (!is_number) number1 = number1*10 + c -'0';
else number2 = number2*10 + c -'0';
}
}
if (number == number2 && number1 == 3) {
sentences.insert(make_pair(word, number));
outputFile<<word<<" "<<number<<endl;
}
}
}
/* long long DevsetSentences::retrieveSentenceIndex(string phrase) {
unordered_map<string, long long>::const_iterator found_iter = sentences.find(phrase);
if (found_iter == sentences.end()) {
ofstream outputFile;
outputFile.open("debug.txt", std::ios_base::app);
outputFile<<"The given phrase: "<<phrase<<" was not found in the sentence dataset."<<endl;
return -1;
}
return found_iter->second;
} */
unordered_map<string, long long> ValidationsetSentences::getSentencesMap() {
return sentences;
}
ValidationsetSentences::~ValidationsetSentences(){}