forked from EricaKrause/StoppingStemming
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstopword.java
104 lines (72 loc) · 2.5 KB
/
stopword.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
/**
* Assignment 3 (Text Processing)
*
* @ author: Erica Krause
* @ class: CSC 320 - Information Retrieval
* @ professor: Dr. Cooper
* @ last edit: October 3, 2014
*
* This program implements a text processor using the following specs:
*
* - Implement stopword removal using the following stopword list: http://bit.ly/1bqQWaV (Links to an external site.)
*
* - Use the Porter Stemmer package: http://tartarus.org/~martin/PorterStemmer/ (Links to an external site.)
*
**/
import java.io.*;
import java.util.*;
//import Stemmer.class;
public class stopword{
public static void main(String[] argv) throws Exception {
/**
* Run the tokenizer first, then the stopword removal, and finally the stemmer on the text provided in
* the input file assignment3-input.txtPreview the documentView in a new window.
**/
//create the output file
FileWriter file = new FileWriter("output2.txt");
BufferedWriter buffer = new BufferedWriter(file);
PrintWriter print = new PrintWriter (buffer);
//declare new input file & create Scanner to read that input file
Scanner scanFile = new Scanner(new File("output.txt"));
Scanner scanStopwordFile = new Scanner(new File("stopwords.txt"));
scanFile.useDelimiter("\n");
scanStopwordFile.useDelimiter("\n");
Scanner lineTokenizer;
String line, word, stopWord;
ArrayList<String> stopWords = new ArrayList<String>(); //list of STOPwords from the text file
//fill list
while (scanStopwordFile.hasNext()){
stopWord = scanStopwordFile.next();
stopWords.add(stopWord);
}
while (scanFile.hasNextLine() == true){
line = scanFile.nextLine();
lineTokenizer = new Scanner(line);
while (lineTokenizer.hasNext()){
word = lineTokenizer.next();
/*
* Stopword removal
*/
if (stopWords.contains(word))
print.print("");
else
print.print(word + " ");
}
print.println("");
}
print.close(); // Close file writing
/*
* Call the Stemmer
*/
//read the input file
Scanner scanFile2 = new Scanner(new File("output2.txt"));
System.out.println("");
System.out.println("Stopwords have been removed and output2.txt has been created");
System.out.println("");
//call the PorterStemmer
System.out.println("To use the Porter Stemmer compile and run PorterStemmer.java");
System.out.println("");
System.out.println("to compile: \t javac PorterStemmer.java");
System.out.println("to run: \t java Stemmer output2.txt");
}
}