Skip to content

Commit

Permalink
nlp: sequence "compression" example
Browse files Browse the repository at this point in the history
  • Loading branch information
PtrMan committed Dec 9, 2015
1 parent b16c3c5 commit 7612107
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 0 deletions.
2 changes: 2 additions & 0 deletions java/ptrman/causalReasoningSystem/EnergyMinimizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ private static void minimizeSingleStep(Random random, State state) throws Except

if( state.workingGraph.energy < state.minimalEnergy )
{
System.out.println("new minimal energy is " + Integer.toString(state.workingGraph.energy));

state.minimalEnergy = state.workingGraph.energy;
state.graphWithMinimalEnergy = state.workingGraph.clone();
state.minimalSequence = potentialMinimalSequence;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package ptrman.causalReasoningSystem.demos;

import ptrman.causalReasoningSystem.*;
import ptrman.causalReasoningSystem.nlp.BuilderForTokenstream;

import java.util.*;

/**
* Created by r0b3 on 08.12.2015.
*/
public class UnsupervisedTokenstream {
public static void main(String[] args) throws Exception {
Map<String, Integer> tokenMap = new HashMap<>();

List<Integer> tokens = new ArrayList<>();

tokenize("", tokenMap, tokens);

Map<Integer, Integer> tokensToGraphIndices = new HashMap<>();


final int sequenceBeginningGraphIndex = tokenMap.size();

createOneToOneTokenToGraphIndices(tokensToGraphIndices, tokenMap.size());

InputGraph inputGraph = new InputGraph();

BuilderForTokenstream.build(inputGraph, tokensToGraphIndices, sequenceBeginningGraphIndex, tokens);

inputGraph.numberOfNodes = tokens.size() + (tokens.size() - 1);

int x = 0;

DecoratedCausalGraph causalGraph = ConvertInputGraphToCausalGraph.convert(inputGraph);

//ArrayList<Integer> result = TrackbackGenerator.generate(new Random(), causalGraph);

EnergyMinimizer.State state = new EnergyMinimizer.State();
state.workingGraph = causalGraph;

EnergyMinimizer.minimize(new Random(), 50000, state);


int y = 0;
}

private static void createOneToOneTokenToGraphIndices(Map<Integer, Integer> target, int number) {
for( int i = 0; i < number; i++ ) {
target.put(i, i);
}
}

private static void tokenize(final String text, Map<String, Integer> map, List<Integer> resultTokens) {
// TODO< really tokenize >
final List<String> stringTokens = Arrays.asList(new String[] { "i", "am", "a", "self", "!", "i", "am", "a", "machine", "!"});
int tokenIndicesCounter = 0;

for( String iterationToken : stringTokens ) {
if( doesntContainsToken(map, iterationToken) ) {
tokenIndicesCounter = addTokenToMap(map, tokenIndicesCounter, iterationToken);
}

resultTokens.add(map.get(iterationToken));
}
}

private static int addTokenToMap(Map<String, Integer> map, int tokenIndicesCounter, String iterationToken) {
map.put(iterationToken, tokenIndicesCounter);
tokenIndicesCounter++;
return tokenIndicesCounter;
}

private static boolean doesntContainsToken(Map<String, Integer> map, String iterationToken) {
return !map.keySet().contains(iterationToken);
}
}
35 changes: 35 additions & 0 deletions java/ptrman/causalReasoningSystem/nlp/BuilderForTokenstream.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package ptrman.causalReasoningSystem.nlp;

import ptrman.causalReasoningSystem.InputGraph;

import java.util.List;
import java.util.Map;

/**
*
* Converts a sequences of any tokens with any repetitions into a representation where binders (depicted with []) point at the token before and after it
*
* []
* / \
* / \
* V V
* before after
*/
public class BuilderForTokenstream {
public static void build(InputGraph graph, Map<Integer, Integer> tokenToGraphIndices, int sequenceBeginningGraphIndex, List<Integer> sequence) {
int currentBinderGraphIndex = sequenceBeginningGraphIndex;

for( int sequenceIndex = 0; sequenceIndex < sequence.size()-1; sequenceIndex++ ) {
final int iterationTokenIndexBefore = sequence.get(sequenceIndex);
final int iterationTokenIndexAfter = sequence.get(sequenceIndex+1);

final int graphIndexBefore = tokenToGraphIndices.get(iterationTokenIndexBefore);
final int graphIndexAfter = tokenToGraphIndices.get(iterationTokenIndexAfter);

graph.connections.add(new InputGraph.Connection(currentBinderGraphIndex, graphIndexBefore));
graph.connections.add(new InputGraph.Connection(currentBinderGraphIndex, graphIndexAfter));

currentBinderGraphIndex++;
}
}
}

0 comments on commit 7612107

Please sign in to comment.