-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathCausesChallengeLucene.java
128 lines (95 loc) · 3.84 KB
/
CausesChallengeLucene.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Scanner;
import java.util.List;
import java.io.File;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.store.instantiated.*;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.util.Version;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
class CausesChallengeLucene {
public static void main(String[] args) throws Exception {
Scanner s = new Scanner(new File(args[0]));
ArrayList<String> wordList = new ArrayList<String>(280000);
while (s.hasNext()) {
wordList.add(s.next());
}
s.close();
WordNetwork wordNetwork = new WordNetwork(wordList);
System.out.println(
wordNetwork.discoverNetwork("causes").size()
);
}
}
class WordNetwork {
private IndexSearcher searcher;
static public final int FRIENDSHIP = 1;
static public final String WORD_FIELD = "word";
public WordNetwork(List<String> words) throws Exception {
InstantiatedIndex index = new InstantiatedIndex();
InstantiatedIndexReader reader = new InstantiatedIndexReader(index);
InstantiatedIndexWriter writer = new InstantiatedIndexWriter(index);
Document doc;
for ( String word : words) {
doc = new Document();
doc.add(new StringField(WORD_FIELD, word));
writer.addDocument(doc);
}
writer.commit();
searcher = new IndexSearcher(reader);
}
public ArrayList<String> discoverNetwork(String word) throws Exception {
HashSet<String> network = new HashSet<String>(80000);
ArrayList<String> buffer = new ArrayList<String>(80000);
buffer.add(word);
ArrayList<String> friends;
String friend;
while (!buffer.isEmpty()) {
friend = buffer.get(0);
buffer.remove(0);
if (network.contains(friend)) continue;
friends = discoverFriends(friend);
buffer.addAll(friends);
network.add(friend);
// System.out.println(network.size());
}
return network;
}
public ArrayList<String> discoverFriends(String word) throws Exception {
final ArrayList<String> results = new ArrayList<String>();
FuzzyQuery query = new FuzzyQuery(new Term(WORD_FIELD, word), FRIENDSHIP);
// Search for the query
searcher.search(new ConstantScoreQuery(query), new Collector() {
private int docBase;
// ignore scorer
public void setScorer(Scorer scorer) { }
// accept docs out of order (for a BitSet it doesn't matter)
public boolean acceptsDocsOutOfOrder() {
return true;
}
public void collect(int docId) {
try {
results.add(searcher.doc( docId ).get(WORD_FIELD));
} catch (Exception e) { }
}
public void setNextReader(AtomicReaderContext context) {
this.docBase = context.docBase;
}
});
return results;
}
}