Skip to content

Commit

Permalink
beginning of the dawn of nlp (untested)
Browse files Browse the repository at this point in the history
  • Loading branch information
PtrMan committed Dec 8, 2015
1 parent f788bde commit b16c3c5
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 0 deletions.
76 changes: 76 additions & 0 deletions java/ptrman/causalReasoningSystem/nlp/Builder.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package ptrman.causalReasoningSystem.nlp;

import ptrman.causalReasoningSystem.InputGraph;

import java.util.*;

/**
*
*/
public class Builder {
public static class TokenWithIndex {
String token;
int index;

public TokenWithIndex(String token, int index) {
this.token = token;
this.index = index;
}
}

public static List<TokenWithIndex> fillGraphWithSentences(InputGraph graph, List<Sentence> sentences) {
List<TokenWithIndex> uniqueTokens = calcUniqueTokens(sentences);
final int lastTokenIndex = uniqueTokens.get(uniqueTokens.size()-1).index;
int iterationSentenceIndex = lastTokenIndex+1;

for( Sentence currentSentence : sentences ) {
currentSentence.causalRootGraphIndex = iterationSentenceIndex;

// connect from the words to the root
// we do this in this direction because the sentences bind better to tighter bound words

for( String iterationToken : currentSentence.tokens ) {
int tokenIndex = getTokenIndex(uniqueTokens, iterationToken);
graph.connections.add(new InputGraph.Connection(tokenIndex, currentSentence.causalRootGraphIndex));
}

iterationSentenceIndex++;
}


return uniqueTokens;
}

private static int getTokenIndex(final List<TokenWithIndex> tokenWithIndexes, final String token) {
for( TokenWithIndex iterationToken : tokenWithIndexes ) {
if( token.equals(iterationToken.token) ) {
return iterationToken.index;
}
}

throw new RuntimeException("Internal Error");
}

private static List<TokenWithIndex> calcUniqueTokens(List<Sentence> sentences) {
Set<String> uniqueSet = new HashSet<>();

for( Sentence iterationSentence : sentences ) {
for( String iterationToken : iterationSentence.tokens ) {
uniqueSet.add(iterationToken);
}
}

String[] uniqueStringArray = new String[uniqueSet.size()];
uniqueStringArray = uniqueSet.toArray(uniqueStringArray);

List<TokenWithIndex> result = new ArrayList<>();
int i = 0;

for( String iterationToken : uniqueStringArray ) {
result.add(new TokenWithIndex(iterationToken, i));
i++;
}

return result;
}
}
13 changes: 13 additions & 0 deletions java/ptrman/causalReasoningSystem/nlp/Sentence.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package ptrman.causalReasoningSystem.nlp;

import java.util.ArrayList;
import java.util.List;

/**
* Created by r0b3 on 08.12.2015.
*/
public class Sentence {
public List<String> tokens = new ArrayList<>();

int causalRootGraphIndex;
}

0 comments on commit b16c3c5

Please sign in to comment.