From 4bf98facc60bd9ddc1889b8f55deb28ec6194c80 Mon Sep 17 00:00:00 2001
From: Alice Lai <aylai2@illinois.edu>
Date: Mon, 11 Jan 2021 12:20:39 -0500
Subject: [PATCH] Add files via upload

---
 DocumentWithCliques.py            | 108 +++++++
 DocumentWithParagraphs.py         |  44 +++
 LSTMClique.py                     |  69 ++++
 LSTMParSeq.py                     |  96 ++++++
 LSTMSentAvg.py                    |  76 +++++
 README.md                         | 142 +++++++++
 csv_to_text_files.py              |  19 ++
 data_loader.py                    | 505 ++++++++++++++++++++++++++++++
 entity_graph.py                   | 154 +++++++++
 entity_grid.py                    | 153 +++++++++
 evaluation.py                     | 286 +++++++++++++++++
 extract_entity_grid.py            | 125 ++++++++
 extract_entity_grid_perm.py       | 153 +++++++++
 extract_features_from_grid.py     | 104 ++++++
 extract_graph_from_grid.py        | 115 +++++++
 generate_high_coh_permutations.py | 109 +++++++
 main.py                           | 126 ++++++++
 train_neural_models.py            | 149 +++++++++
 18 files changed, 2533 insertions(+)
 create mode 100644 DocumentWithCliques.py
 create mode 100644 DocumentWithParagraphs.py
 create mode 100644 LSTMClique.py
 create mode 100644 LSTMParSeq.py
 create mode 100644 LSTMSentAvg.py
 create mode 100644 README.md
 create mode 100644 csv_to_text_files.py
 create mode 100644 data_loader.py
 create mode 100644 entity_graph.py
 create mode 100644 entity_grid.py
 create mode 100644 evaluation.py
 create mode 100644 extract_entity_grid.py
 create mode 100644 extract_entity_grid_perm.py
 create mode 100644 extract_features_from_grid.py
 create mode 100644 extract_graph_from_grid.py
 create mode 100644 generate_high_coh_permutations.py
 create mode 100644 main.py
 create mode 100644 train_neural_models.py
diff --git a/DocumentWithCliques.py b/DocumentWithCliques.py
new file mode 100644
index 0000000..1ad9668
--- /dev/null
+++ b/DocumentWithCliques.py
@@ -0,0 +1,108 @@
+import torch
+from nltk import word_tokenize
+import random
+
+is_cuda = torch.cuda.is_available()
+
+
+class DocumentWithCliques(object):
+
+    def __init__(self, orig_sentences, clique_size, permutation_indices=None, id = '', label=None):
+        self.id = id
+        self.clique_size = clique_size
+        self.orig_sentences = []
+        for sent in orig_sentences:
+            sent = sent.strip()
+            if sent == "":
+                continue
+            if sent == "<d>" or sent == "</d>":
+                self.orig_sentences.append([sent])
+            else:
+                self.orig_sentences.append(word_tokenize(sent))
+        self.permutation_indices = []
+        if permutation_indices is not None:
+            self.permutation_indices = permutation_indices  # index into orig_sentences
+        self.index_sentences = [] # token-indexed version of self.orig_sentences
+        self.orig_full_sequence = None
+        self.perm_full_sequences = None
+        self.label = label
+
+    # turn full doc into flat sequence of word indices
+    def get_orig_full_sequence(self):
+        if self.orig_full_sequence is not None:
+            return self.orig_full_sequence
+        self.orig_full_sequence = []
+        for sent in self.index_sentences:
+            self.orig_full_sequence.extend(sent)
+        return self.orig_full_sequence
+
+    # turn all doc permutations into sentence lists of word indices
+    def get_perm_index_sentences(self):
+        if self.perm_full_sequences is not None:
+            return self.get_perm_full_sequences
+        self.perm_doc_sentences = []
+        for perm in self.permutation_indices:
+            doc_temp = []
+            for sent_idx in perm:
+                doc_temp.append(self.index_sentences[sent_idx])
+            self.perm_doc_sentences.append(doc_temp)
+        return self.perm_doc_sentences
+
+    # turn all doc permutations into flat sequences of word indices
+    def get_perm_full_sequences(self):
+        if self.perm_full_sequences is not None:
+            return self.get_perm_full_sequences
+        self.perm_full_sequences = []
+        for perm in self.permutation_indices:
+            doc_temp = []
+            for sent_idx in perm:
+                doc_temp.extend(self.index_sentences[sent_idx])
+            self.perm_full_sequences.append(doc_temp)
+        return self.perm_full_sequences
+
+    def create_cliques_orig(self): # assume self.index_sentences is non-empty
+        self.orig_cliques = []
+        self.orig_cliques_index = []
+        for i in range(len(self.index_sentences) - self.clique_size + 1):
+            clique = []
+            clique_index = []
+            for j in range(self.clique_size):
+                clique.append(self.index_sentences[i + j])
+                clique_index.append(i+j)
+            self.orig_cliques.append(clique)
+            self.orig_cliques_index.append(clique_index)
+
+    # randomly create negative cliques from the original document sentences
+    def create_cliques_neg(self):
+        self.neg_cliques = []
+        for orig_clique in self.orig_cliques_index: # negative example for each window: replace center sentence
+            if len(self.orig_cliques_index) == 1:
+                break # no possible negative cliques for this doc
+            valid_sentences = {}
+            for sent_idx in orig_clique:
+                valid_sentences[sent_idx] = 1
+            valid_sentences[0] = 1  # don't allow <d> pad
+            valid_sentences[len(self.index_sentences) - 1] = 1  # don't allow </d> pad
+            if len(valid_sentences) == len(self.index_sentences):
+                continue # no possible negative cliques for this positive clique
+            center_idx = int(len(orig_clique) / 2)
+            new_sent = random.randrange(len(self.index_sentences))
+            while new_sent in valid_sentences:
+                new_sent = random.randrange(len(self.index_sentences))
+            neg_clique = []
+            for sent_idx in orig_clique:
+                neg_clique.append(self.index_sentences[sent_idx])
+            neg_clique[center_idx] = self.index_sentences[new_sent]
+            self.neg_cliques.append(neg_clique)
+
+    # create cliques for predefined permutations of this document
+    def create_cliques_perm(self):
+        self.perm_cliques = []
+        for perm in self.permutation_indices:
+            cliques = []
+            for i in range(len(perm) - self.clique_size + 1):
+                clique = []
+                for j in range(self.clique_size):
+                    clique.append(self.index_sentences[perm[i + j]])
+                cliques.append(clique)
+            self.perm_cliques.append(cliques)
diff --git a/DocumentWithParagraphs.py b/DocumentWithParagraphs.py
new file mode 100644
index 0000000..9585c84
--- /dev/null
+++ b/DocumentWithParagraphs.py
@@ -0,0 +1,44 @@
+from nltk import sent_tokenize, word_tokenize
+
+
+class DocumentWithParagraphs(object):
+
+    def __init__(self, text_with_line_breaks, label=None, orig_sentences=None, permutation_indices=None, id=''):
+        self.id = id
+        self.text = []
+        self.text_indexed = []
+        self.label = label
+        lines = text_with_line_breaks.splitlines()
+        for line in lines:
+            line = line.strip()
+            if line != "": # this is a paragraph
+                paragraph = []
+                sents = sent_tokenize(line)
+                for sent in sents:
+                    words = word_tokenize(sent)
+                    paragraph.append(words)
+                self.text.append(paragraph)
+        self.orig_sentences = []
+        if orig_sentences is not None:
+            self.orig_sentences = orig_sentences
+        self.permutation_indices = []
+        if permutation_indices is not None:
+            self.permutation_indices = permutation_indices
+
+    def get_paragraphs(self):
+        return self.text_indexed
+
+    def get_sentences(self):
+        sentences = []
+        for paragraph in self.text_indexed:
+            for sent in paragraph:
+                sentences.append(sent)
+        return sentences
+
+    def get_words(self):
+        words = []
+        for paragraph in self.text_indexed:
+            for sent in paragraph:
+                for word in sent:
+                    words.append(word)
+        return words
\ No newline at end of file
diff --git a/LSTMClique.py b/LSTMClique.py
new file mode 100644
index 0000000..899416b
--- /dev/null
+++ b/LSTMClique.py
@@ -0,0 +1,69 @@
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+import torch.nn.functional as F
+from torch.nn.utils.rnn import pack_padded_sequence
+
+USE_CUDA = torch.cuda.is_available()
+FloatTensor = torch.cuda.FloatTensor if USE_CUDA else torch.FloatTensor
+LongTensor = torch.cuda.LongTensor if USE_CUDA else torch.LongTensor
+
+
+class LSTMClique(nn.Module):
+
+    def __init__(self, params, data_obj):
+        super(LSTMClique, self).__init__()
+        self.embedding_dim = params['embedding_dim']
+        self.hidden_dim = params['hidden_dim']
+        self.lstm_dim = params['lstm_dim']
+        self.dropout = params['dropout']
+        self.clique_size = params['clique_size']
+        self.embeddings = data_obj.word_embeds
+        self.lstm = nn.LSTM(self.embedding_dim, self.lstm_dim)
+        self.hidden = None
+        self.clique_layer = nn.Linear(params['clique_size'] * self.lstm_dim, self.hidden_dim)
+        nn.init.xavier_uniform(self.clique_layer.weight, gain=nn.init.calculate_gain('tanh'))
+        self.task = params['task']
+        if params['task'] == 'perm':
+            num_labels = 2
+        elif params['task'] == 'minority':
+            num_labels = 2
+        elif params['task'] == 'class':
+            num_labels = 3
+        elif params['task'] == 'score_pred':
+            num_labels = 1
+        self.predict_layer = nn.Linear(self.hidden_dim, num_labels)
+        nn.init.xavier_uniform(self.predict_layer.weight, gain=nn.init.calculate_gain('sigmoid'))
+        if USE_CUDA:
+            self.clique_layer = self.clique_layer.cuda()
+            self.predict_layer = self.predict_layer.cuda()
+
+    def init_hidden(self, batch_size):
+        if USE_CUDA:
+            return (Variable(torch.zeros(1, batch_size, self.lstm_dim).cuda()),
+                    Variable(torch.zeros(1, batch_size, self.lstm_dim)).cuda())
+        else:
+            return (Variable(torch.zeros(1, batch_size, self.lstm_dim)),
+                    Variable(torch.zeros(1, batch_size, self.lstm_dim)))
+
+    def forward(self, inputs, input_lengths, original_index): # now with cliques
+        lstm_out = None
+        for i in range(self.clique_size):  # send each sentence x batch through LSTM
+            self.hidden = self.init_hidden(len(input_lengths[i]))
+            seq_tensor = self.embeddings(inputs[i])
+            packed_input = pack_padded_sequence(seq_tensor, input_lengths[i], batch_first=True)
+            packed_output, (ht, ct) = self.lstm(packed_input, self.hidden)
+            # reorder
+            final_output = ht[-1]
+            odx = original_index[i].view(-1, 1).expand(len(input_lengths[i]), final_output.size(-1))
+            output_unsorted = torch.gather(final_output, 0, Variable(odx))
+            if lstm_out is None:
+                lstm_out = output_unsorted
+            else:
+                lstm_out = torch.cat([lstm_out, output_unsorted], dim=1)
+        clique_vector = F.tanh(self.clique_layer(lstm_out))
+        clique_vector = F.dropout(clique_vector, p=self.dropout, training=self.training)
+        coherence_pred = self.predict_layer(clique_vector)
+        if self.task != 'score_pred':
+            coherence_pred = F.softmax(coherence_pred, dim=0)
+        return coherence_pred
diff --git a/LSTMParSeq.py b/LSTMParSeq.py
new file mode 100644
index 0000000..7c8dc63
--- /dev/null
+++ b/LSTMParSeq.py
@@ -0,0 +1,96 @@
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+import torch.nn.functional as F
+from torch.nn.utils.rnn import pack_padded_sequence
+
+USE_CUDA = torch.cuda.is_available()
+FloatTensor = torch.cuda.FloatTensor if USE_CUDA else torch.FloatTensor
+LongTensor = torch.cuda.LongTensor if USE_CUDA else torch.LongTensor
+
+# todo this whole class
+class LSTMParSeq(nn.Module):
+
+    def __init__(self, params, data_obj):
+        super(LSTMParSeq, self).__init__()
+        self.data_obj = data_obj
+        self.task = params['task']
+        self.embedding_dim = params['embedding_dim']
+        self.hidden_dim = params['hidden_dim']
+        self.lstm_dim = params['lstm_dim']
+        self.dropout = params['dropout']
+        self.embeddings = data_obj.word_embeds
+        self.word_lstm = nn.LSTM(self.embedding_dim, self.lstm_dim)
+        self.word_lstm_hidden = None
+        self.sent_lstm = nn.LSTM(self.lstm_dim, self.lstm_dim)
+        self.sent_lstm_hidden = None
+        self.par_lstm = nn.LSTM(self.lstm_dim, self.lstm_dim)
+        self.par_lstm_hidden = None
+        self.hidden_layer = nn.Linear(self.lstm_dim, self.hidden_dim)
+        if params['task'] == 'perm':
+            num_labels = 2
+        elif params['task'] == 'minority':
+            num_labels = 2
+        elif params['task'] == 'class':
+            num_labels = 3
+        elif params['task'] == 'score_pred':
+            num_labels = 1
+        self.predict_layer = nn.Linear(self.hidden_dim, num_labels)
+        self.bn = nn.BatchNorm1d(self.hidden_dim)
+        # weight initialization
+        for m in self.modules():
+            if isinstance(m, nn.Linear):
+                m.bias.data.zero_()
+                nn.init.xavier_uniform(m.weight)
+        if USE_CUDA:
+            self.hidden_layer = self.hidden_layer.cuda()
+            self.predict_layer = self.predict_layer.cuda()
+
+    def init_hidden(self, batch_size):
+        if USE_CUDA:
+            return (Variable(torch.zeros(1, batch_size, self.lstm_dim).cuda()),
+                    Variable(torch.zeros(1, batch_size, self.lstm_dim)).cuda())
+        else:
+            return (Variable(torch.zeros(1, batch_size, self.lstm_dim)),
+                    Variable(torch.zeros(1, batch_size, self.lstm_dim)))
+
+    def forward(self, inputs, input_lengths, original_index):
+        doc_vecs = None
+        for i in range(len(inputs)): # loop over docs
+            par_vecs = None
+            for j in range(len(inputs[i])): # loop over paragraphs
+                doc_batch_size = len(inputs[i][j]) # number of sents
+                self.word_lstm_hidden = self.init_hidden(doc_batch_size)
+                seq_tensor = self.embeddings(inputs[i][j])
+                # pack
+                packed_input = pack_padded_sequence(seq_tensor, input_lengths[i][j], batch_first=True)
+                packed_output, (ht, ct) = self.word_lstm(packed_input, self.word_lstm_hidden)
+                # reorder
+                final_output = ht[-1]
+                odx = original_index[i][j].view(-1, 1).expand(len(input_lengths[i][j]), final_output.size(-1))
+                output_unsorted = torch.gather(final_output, 0, Variable(odx))
+                # LSTM to produce paragraph vector from sentence vectors
+                output_unsorted = output_unsorted.unsqueeze(1)
+                self.sent_lstm_hidden = self.init_hidden(output_unsorted.size(1)) # batch size 1
+                output_pars, (ht, ct) = self.sent_lstm(output_unsorted, self.sent_lstm_hidden)
+                final_output = ht[-1]
+                # append paragraph vector to batch
+                if par_vecs is None:
+                    par_vecs = final_output
+                else:
+                    par_vecs = torch.cat([par_vecs, final_output], dim=0)
+            # LSTM over paragraph vectors to create document vector
+            par_vecs = par_vecs.unsqueeze(1)
+            self.par_lstm_hidden = self.init_hidden(par_vecs.size(1)) # batch size 1
+            output_doc, (ht, ct) = self.par_lstm(par_vecs, self.par_lstm_hidden)
+            final_output = ht[-1]
+            # append doc vector to batch
+            if doc_vecs is None:
+                doc_vecs = final_output
+            else:
+                doc_vecs = torch.cat([doc_vecs, final_output], dim=0)
+        doc_vectors = F.dropout(self.bn(F.relu(self.hidden_layer(doc_vecs))), p=self.dropout, training=self.training)
+        coherence_pred = self.predict_layer(doc_vectors)
+        if self.task != 'score_pred':
+            coherence_pred = F.softmax(coherence_pred, dim=0)
+        return coherence_pred
diff --git a/LSTMSentAvg.py b/LSTMSentAvg.py
new file mode 100644
index 0000000..a7c0d34
--- /dev/null
+++ b/LSTMSentAvg.py
@@ -0,0 +1,76 @@
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+import torch.nn.functional as F
+from torch.nn.utils.rnn import pack_padded_sequence
+
+USE_CUDA = torch.cuda.is_available()
+FloatTensor = torch.cuda.FloatTensor if USE_CUDA else torch.FloatTensor
+LongTensor = torch.cuda.LongTensor if USE_CUDA else torch.LongTensor
+
+
+class LSTMSentAvg(nn.Module):
+
+    def __init__(self, params, data_obj):
+        super(LSTMSentAvg, self).__init__()
+        self.data_obj = data_obj
+        self.task = params['task']
+        self.embedding_dim = params['embedding_dim']
+        self.hidden_dim = params['hidden_dim']
+        self.lstm_dim = params['lstm_dim']
+        self.dropout = params['dropout']
+        self.embeddings = data_obj.word_embeds
+        self.lstm = nn.LSTM(self.embedding_dim, self.lstm_dim)
+        self.hidden = None
+        self.hidden_layer = nn.Linear(self.lstm_dim, self.hidden_dim)
+        if params['task'] == 'perm':
+            num_labels = 2
+        elif params['task'] == 'minority':
+            num_labels = 2
+        elif params['task'] == 'class':
+            num_labels = 3
+        elif params['task'] == 'score_pred':
+            num_labels = 1
+        self.predict_layer = nn.Linear(self.hidden_dim, num_labels)
+        self.bn = nn.BatchNorm1d(self.hidden_dim)
+        # weight initialization
+        for m in self.modules():
+            if isinstance(m, nn.Linear):
+                m.bias.data.zero_()
+                nn.init.xavier_uniform(m.weight)
+        if USE_CUDA:
+            self.hidden_layer = self.hidden_layer.cuda()
+            self.predict_layer = self.predict_layer.cuda()
+
+    def init_hidden(self, batch_size):
+        if USE_CUDA:
+            return (Variable(torch.zeros(1, batch_size, self.lstm_dim).cuda()),
+                    Variable(torch.zeros(1, batch_size, self.lstm_dim)).cuda())
+        else:
+            return (Variable(torch.zeros(1, batch_size, self.lstm_dim)),
+                    Variable(torch.zeros(1, batch_size, self.lstm_dim)))
+
+    def forward(self, inputs, input_lengths, original_index):
+        lstm_out = None  # document vectors
+        for i in range(len(inputs)):  # loop over docs
+            doc_batch_size = len(inputs[i])  # number of sents
+            self.hidden = self.init_hidden(doc_batch_size)
+            seq_tensor = self.embeddings(inputs[i])
+            # pack
+            packed_input = pack_padded_sequence(seq_tensor, input_lengths[i], batch_first=True)
+            packed_output, (ht, ct) = self.lstm(packed_input, self.hidden)
+            # reorder
+            final_output = ht[-1]
+            odx = original_index[i].view(-1, 1).expand(len(input_lengths[i]), final_output.size(-1))
+            output_unsorted = torch.gather(final_output, 0, Variable(odx))
+            # sum sentence vectors
+            output_sum = torch.sum(output_unsorted, 0).unsqueeze(0)
+            if lstm_out is None:
+                lstm_out = output_sum
+            else:
+                lstm_out = torch.cat([lstm_out, output_sum], dim=0)
+        doc_vectors = F.dropout(self.bn(F.relu(self.hidden_layer(lstm_out))), p=self.dropout, training=self.training)
+        coherence_pred = self.predict_layer(doc_vectors)
+        if self.task != 'score_pred':
+            coherence_pred = F.softmax(coherence_pred, dim=0)
+        return coherence_pred
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..937f7ca
--- /dev/null
+++ b/README.md
@@ -0,0 +1,142 @@
+# DiscourseCoherenceDev
+
+## Dependencies
+
+This code is written in Python. The dependencies are:
+
+* Python3 (with recent versions of [NumPy](http://www.numpy.org/) and [SciPy](http://www.scipy.org/))
+* [Pytorch](http://pytorch.org/) (tested on Pytorch 0.3.1)
+* [scikit-learn](http://scikit-learn.org/stable/)
+* NLTK >= 3
+* [pycorenlp](https://github.com/smilli/py-corenlp)
+* [progressbar2](https://pypi.org/project/progressbar2/)
+
+## Evaluation
+
+All models can be trained for 4 different evaluation tasks:
+- 'class': 3-class classification (low, medium, high coherence)
+- 'score_pred': mean score prediction
+- 'perm': binary ranking of original vs. permuted texts (requires text permutation files)
+- 'minority': binary classification of low coherence vs. all other texts
+
+## Data Directory Structure
+
+The GDCD data is available by request (see https://github.com/aylai/GCDC-corpus for details). To run the preprocessing scripts, you will have to create a directory for each corpus in 'data/' containing the train and test csv files. For the Yelp data, you will need to download the data separately (https://www.yelp.com/dataset) and add the corresponding review titles and texts to the incomplete csv file (the CSV header should match the fields in the Clinton and Enron CSVs).
+
+
+## Preprocessing
+
+'corpus' refers to the corpus name: {Yahoo, Clinton, Enron, Yelp}
+
+*1) Extract texts from CSV to separate files.* Required for entity grid and entity graph models, as well as generating text permutations for evaluation.
+
+Input: data/[corpus]/[corpus]_train.csv and data/[corpus]/[corpus]_test.csv files. Output: data/[corpus]/text/ directory containing all individual text files.
+
+```
+python3 csv_to_text_files.py [corpus]
+```
+
+*2) Generate permutation text files (20 per text).* Only generates permutations for high-coherence texts (label = 3). Required for evaluating any model on the binary permutation ranking task (can skip this step for all other experiments).
+
+Input: data/[corpus]/[corpus]_train.csv, data/[corpus]/[corpus]_test.csv, and data/[corpus]/text/ files. Output: data/[corpus]/text_permute directory containing original and permuted text files for all high-coherence texts.
+
+```
+python3 generate_high_coh_permutations [corpus]
+```
+
+*3) Extract entity grid files (requires Stanford CoreNLP for parsing).* Required for entity grid and entity graph models.
+
+This step requires running the Stanford CoreNLP server (with Java 8, not Java 9). More details here: https://github.com/smilli/py-corenlp and here: https://stanfordnlp.github.io/CoreNLP/corenlp-server.html#getting-started. You will probably need to run the server with -timeout 50000 (or possibly higher) instead of -timeout 15000 in order to process the longest documents in this dataset.
+
+**Original files only:**
+
+Input: data/[corpus]/text/ files. Output: data/[corpus]/parsed/ and data/[corpus]/grid/ files.
+
+
+```
+python3 extract_entity_grid.py [corpus]
+```
+
+**Permuted files:**
+
+Input: data/[corpus]/text_permute/ files. Output: data/[corpus]/parsed_permute/ and data/[corpus]/grid_permute/ files
+
+
+```
+python3 extract_entity_grid_perm.py [corpus]
+```
+
+*4) Extract entity graph files from entity grid files.* Extracts 6 different types of entity graphs: {unweighted, weighted, and syntax-sensitive} with or without distance discounting. Specify 'true' or 'false' for 'is_permutation' argument. Required for entity graph model.
+
+Input: data/[corpus]/grid[_permute]/ files. Output: data/[corpus]/graph[_permute] files.
+
+```
+python3 extract_graph_from_grid.py [corpus] [is_permutation]
+```
+
+*5) Extract features from entity grid files.* Required for entity grid model. Must specify:
+- 'seq_len' the number of sequential sentences over which to compute features (e.g. 2, 3, 4)
+- 'salience_threshold' the threshold for salient vs. non-salient entities (e.g. 2, 3, 4 occurrences); specify '1' for only one saliance class
+- 'syntax_opt' 1 to use syntactic roles (s, o, x, -); 0 to ignore syntactic roles (x, -)
+- 'is_permutation': 'true' if using permuted text files, 'false' if using original text files only
+
+Input: data/[corpus]/grid[_permute]/ files. Output: data/[corpus]/features[_permute]/[feature_set]
+
+```
+python3 extract_features_from_grid.py [corpus] [seq_len] [salience_threshold] [syntax_opt] [is_permutation]
+```
+
+## Models
+
+### Entity grid
+
+Train a random forest classifier on entity grid features. 'feature_set' specifies the name of the feature directory in data/[corpus]/features[_permute]. 'evaluation' specifies the task: 'class', 'score_pred', 'minority', 'perm'.
+
+```
+python3 entity_grid.py [corpus] [feature_set] [evaluation]
+```
+
+### Entity graph
+
+Use entity graph outdegree values to evaluate on different tasks. Must specify graph type: [u, u_dist, w, w_dist, syn, syn_dist].
+
+Thresholds (any real numbers):
+- 'class': must specify 'threshold1' and 'threshold2'
+- 'minority': must specify 'threshold1'
+- 'perm': no threshold
+- 'score_pred': no threshold
+
+```
+python3 entity_graph.py [corpus] [evaluation] [graph_type] [opt:threshold1] [opt:threshold2]
+```
+
+### Neural clique
+
+Train 3-class classification model on Yahoo data with clique size = 7 sentences:
+```
+python3 main.py --model_name yahoo_class_model --train_corpus Yahoo --model_type clique --task class --clique 7
+```
+
+See main.py for other parameters.
+
+### Neural SentAvg
+
+Train 3-class classification model on Yahoo data:
+```
+python3 main.py --model_name yahoo_class_model --train_corpus Yahoo --model_type sent_avg --task class
+```
+
+See main.py for other parameters.
+
+**Note:** the SentAvg model cannot be trained for the binary permutation ranking task (because all sentence order permutations have the same score).
+
+### Neural ParSeq
+
+Train 3-class classification model on Yahoo data:
+```
+python3 main.py --model_name yahoo_class_model --train_corpus Yahoo --model_type par_seq --task class
+```
+
+See main.py for other parameters.
+
+**Note:** the ParSeq model currently cannot be trained for the binary permutation ranking task.
\ No newline at end of file
diff --git a/csv_to_text_files.py b/csv_to_text_files.py
new file mode 100644
index 0000000..3c36e87
--- /dev/null
+++ b/csv_to_text_files.py
@@ -0,0 +1,19 @@
+import csv, os, sys
+
+corpus = sys.argv[1]
+corpus_dir = 'data/' + corpus + '/'
+text_dir = corpus_dir + 'text/'
+if not os.path.exists(text_dir):
+    os.makedirs(text_dir)
+splits = ['train', 'test']
+for split in splits:
+    with open(corpus_dir + corpus + '_' + split + '.csv','r') as in_file:
+        reader = csv.DictReader(in_file)
+        for row in reader:
+            text_id = row['text_id']
+            filename = text_id + '.txt'
+            if os.path.exists(text_dir + filename):
+                continue
+            out_file = open(text_dir + filename, 'w')
+            out_file.write(row['text'])
+            out_file.close()
diff --git a/data_loader.py b/data_loader.py
new file mode 100644
index 0000000..6412768
--- /dev/null
+++ b/data_loader.py
@@ -0,0 +1,505 @@
+import torch
+import torch.nn as nn
+import numpy as np
+import os
+from DocumentWithCliques import DocumentWithCliques
+from DocumentWithParagraphs import DocumentWithParagraphs
+import random
+from torch.autograd import Variable
+from nltk import word_tokenize
+from nltk import sent_tokenize
+import csv
+
+USE_CUDA = torch.cuda.is_available()
+FloatTensor = torch.cuda.FloatTensor if USE_CUDA else torch.FloatTensor
+LongTensor = torch.cuda.LongTensor if USE_CUDA else torch.LongTensor
+is_cuda = torch.cuda.is_available()
+
+
+class Data(object):
+
+    def __init__(self, params):
+        self.params = params
+        self.word_to_idx = {}
+        self.idx_to_word = {}
+        self.word_to_idx['<pad>'] = 0
+        self.idx_to_word[0] = '<pad>'
+        self.word_embeds = None
+
+    def read_orig_doc(self, filename, data_type, for_clique):
+        sentences = []
+        with open(filename, "r") as in_file:
+            for line in in_file:
+                line = line.strip()
+                if data_type == "BL":
+                    line = line.split(None, 1)[1] # remove sent ID
+                if not self.params['case_sensitive']:
+                    line = line.lower()
+                sentences.append(line)
+        if for_clique:
+            for i in range(int(self.params['clique_size'] / 2)):
+                sentences.insert(0, "<d>")
+                sentences.append("</d>")
+        return sentences
+
+    def read_perm_doc(self, filename, sentences, data_type, for_clique):
+        sentence_indices = []
+        with open(filename, "r") as in_file:
+            for line in in_file:
+                line = line.strip()
+                if data_type == "BL":
+                    line = line.split(None, 1)[1]
+                if not self.params['case_sensitive']:
+                    line = line.lower()
+                sentence_indices.append(sentences.index(line))
+        if for_clique:
+            for i in range(int(self.params['clique_size'] / 2)):
+                sentence_indices.insert(0, 0)  # start pad
+                sentence_indices.append(len(sentences) - 1)
+        return sentence_indices
+
+    # read my Yahoo/Clinton/Enron data for 3-way classification (full train/test)
+    def read_data_class(self, params, split):
+        # corpus = params['data_dir'].rsplit('/', 2)[1]
+        if split == 'train' or split == 'train_nodev':
+            corpus = params['train_corpus']
+        elif split == 'test':
+            corpus = params['test_corpus']
+        documents = []
+        add_new_words = False
+        if self.word_embeds is None and split == "train":
+            add_new_words = True
+        filename = corpus + '_' + split + '.csv'
+        with open(params['data_dir'] + corpus + '/' + filename,'r') as in_file:
+            reader = csv.DictReader(in_file)
+            for row in reader:
+                text = row['text']
+                if not self.params['case_sensitive']:
+                    text = text.lower()
+                text_id = row['text_id']
+                if params['task'] == 'score_pred':
+                    labels = [int(row['ratingA1']), int(row['ratingA2']), int(row['ratingA3'])]
+                    label = np.mean(labels)
+                # elif params['eval_minority']:
+                elif params['task'] == 'minority':
+                    num_low_judgments = 0
+                    if row['ratingA1'] == '1':
+                        num_low_judgments += 1
+                    if row['ratingA2'] == '1':
+                        num_low_judgments += 1
+                    if row['ratingA3'] == '1':
+                        num_low_judgments += 1
+                    if num_low_judgments >= 2:
+                        label = 1
+                    else:
+                        label = 0
+                else:
+                    label = int(row['labelA'])
+                    label = label - 1 # zero-indexing
+                if params['model_type'] == 'clique':
+                    orig_sentences = []
+                    for par in text.splitlines():
+                        par = par.strip()
+                        if par == "":
+                            continue
+                        orig_sentences.extend(sent_tokenize(par))
+                    for i in range(int(self.params['clique_size'] / 2)):
+                        orig_sentences.insert(0, "<d>")
+                        orig_sentences.append("</d>")
+                    doc = DocumentWithCliques(orig_sentences, self.params['clique_size'], None, text_id, label)
+                    for sent in doc.orig_sentences:
+                        sent_idx = []
+                        for token in sent:
+                            idx = self.add_token_to_index(token, add_new_words)
+                            sent_idx.append(idx)
+                        doc.index_sentences.append(sent_idx)
+                elif params['model_type'] == 'sent_avg' or params['model_type'] == 'par_seq':
+                    doc = DocumentWithParagraphs(text, label, id=text_id)
+                    # index words
+                    doc_indexed = []
+                    for para in doc.text:
+                        para_indexed = []
+                        for sent in para:
+                            sent_indexed = []
+                            for word in sent:
+                                sent_indexed.append(self.add_token_to_index(word, add_new_words))
+                            para_indexed.append(sent_indexed)
+                        doc_indexed.append(para_indexed)
+                    doc.text_indexed = doc_indexed
+                documents.append(doc)
+        return documents
+
+    # read my Yahoo/Clinton/Enron data for binary ranking permutation task (cross-validation fold)
+    def read_data_perm(self, params, split):
+        # corpus = params['data_dir'].rsplit('/', 2)[1]
+        if split == 'train' or split == 'train_nodev':
+            corpus = params['train_corpus']
+        elif split == 'dev':
+            corpus = params['train_corpus']
+        elif split == 'test':
+            corpus = params['test_corpus']
+        documents = []
+        add_new_words = False
+        if self.word_embeds is None and split == "train":
+            add_new_words = True
+        # get list of files in this split
+        filename = corpus + '_' + split + '_perm.csv'
+        text_ids = []
+        with open(params['data_dir'] + corpus + '/' + filename, 'r') as in_file:
+            reader = csv.DictReader(in_file)
+            for row in reader:
+                text_ids.append(row['text_id'])
+        for text_id in text_ids:
+            # read orig file
+            if not os.path.exists(params['data_dir'] + corpus + '/text_permute/' + text_id + '_sent.txt'):
+                print(text_id + " not found in permutation data.")
+                continue
+            orig_sentences = self.read_orig_doc(params['data_dir'] + corpus + '/text_permute/' + text_id + '_sent.txt', "mine", params['model_type']=='clique')
+            perm_docs = []
+            for i in range(1,21):
+                filename_perm = params['data_dir'] + corpus + '/text_permute/' + text_id + '.perm-' + str(i) + '.txt'
+                if not os.path.exists(filename_perm):
+                    continue
+                perm_docs.append(self.read_perm_doc(filename_perm, orig_sentences, "mine", params['model_type']=='clique'))
+            if len(perm_docs) == 0:
+                continue  # document has no permutations (is only a single sentence) -- remove from data
+            if params['model_type'] == 'clique':
+                doc = DocumentWithCliques(orig_sentences, self.params['clique_size'], perm_docs, text_id)
+                for sent in doc.orig_sentences:
+                    sent_idx = []
+                    for token in sent:
+                        idx = self.add_token_to_index(token, add_new_words)
+                        sent_idx.append(idx)
+                    doc.index_sentences.append(sent_idx)
+            elif params['model_type'] == 'sent_avg' or params['model_type'] == 'par_seq':
+                # note this loses paragraph info (not useful for permutations task)
+                doc = DocumentWithParagraphs("\n".join(orig_sentences), None, orig_sentences, perm_docs, text_id)
+                # index words
+                doc_indexed = []
+                for para in doc.text:
+                    para_indexed = []
+                    for sent in para:
+                        sent_indexed = []
+                        for word in sent:
+                            sent_indexed.append(self.add_token_to_index(word, add_new_words))
+                        para_indexed.append(sent_indexed)
+                    doc_indexed.append(para_indexed)
+                doc.text_indexed = doc_indexed
+            documents.append(doc)
+        return documents
+
+    def add_token_to_index(self, token, add_new_words):
+        if token not in self.word_to_idx and add_new_words:  # add to vocab
+            idx = len(self.word_to_idx)
+            self.word_to_idx[token] = idx
+            self.idx_to_word[idx] = token
+        elif token not in self.word_to_idx and not add_new_words:  # replace with UNK token
+            if 'unk' not in self.word_to_idx:
+                idx = len(self.word_to_idx)
+                self.word_to_idx['unk'] = idx
+                self.idx_to_word[idx] = 'unk'
+            return self.word_to_idx['unk']
+        return self.word_to_idx[token]
+
+    def create_cliques(self, documents, task, limit=None): # create cliques of k sentences
+        items = []
+        labels = []
+        for doc in documents:
+            doc.create_cliques_orig()
+            for clique in doc.orig_cliques:
+                temp_item = []
+                for sent in clique:
+                    # temp_item.append(Variable(LongTensor(list(sent))).view(1, -1))
+                    temp_item.append(list(sent))
+                items.append(temp_item)
+                if task == 'perm':
+                    labels.append(1) # coherent clique
+                elif task == 'class' or task == 'score_pred' or task == 'minority':
+                    labels.append(doc.label)
+            if task == 'perm':
+                doc.create_cliques_neg()
+                for clique in doc.neg_cliques:
+                    temp_item = []
+                    for sent in clique:
+                        temp_item.append(list(sent))
+                    items.append(temp_item)
+                    labels.append(0) # incoherent clique
+                doc.create_cliques_perm()
+        if limit is not None and limit < len(items):
+            indices = list(range(len(items)))
+            random.shuffle(indices)
+            indices = indices[:limit]
+            new_items = []
+            new_labels = []
+            for i in indices:
+                new_items.append(items[i])
+                new_labels.append(labels[i])
+            items = new_items
+            labels = new_labels
+        return items, labels
+
+    def retrieve_doc_cliques_by_label(self, document, task, limit=None): # create cliques of k sentences
+        items_pos = []
+        items_neg = []
+        document.create_cliques_orig()
+        document.create_cliques_neg()
+        for clique in document.orig_cliques:
+            temp_item = []
+            for sent in clique:
+                # temp_item.append(Variable(LongTensor(list(sent))).view(1, -1))
+                temp_item.append(list(sent))
+            items_pos.append(temp_item)
+        if task == 'perm':
+            for perm_doc in document.perm_cliques:
+                perm_temp = []
+                for clique in perm_doc:
+                    temp_item = []
+                    for sent in clique:
+                        # temp_item.append(Variable(LongTensor(list(sent))).view(1, -1))
+                        temp_item.append(list(sent))
+                    perm_temp.append(temp_item)
+                items_neg.append(perm_temp)
+        return items_pos, items_neg
+
+    def retrieve_doc_sents_by_label(self, document, limit=None): # create cliques of k sentences
+        items_pos = []
+        items_neg = []
+        orig_sentences = document.get_sentences()
+        for sent in orig_sentences:
+            # items_pos.append(Variable(LongTensor(list(sent))).view(1, -1))
+            items_pos.append(list(sent))
+        for perm_doc in document.permutation_indices:
+            doc_neg = []
+            for sent_idx in perm_doc:
+                # doc_neg.append(Variable(LongTensor(list(orig_sentences[sent_idx]))).view(1, -1))
+                doc_neg.append(list(orig_sentences[sent_idx]))
+            items_neg.append(doc_neg)
+        return [items_pos], items_neg
+
+    def create_doc_sents(self, documents, split_type, task, limit=-1):
+        items = []
+        labels = []
+        ids = []
+        for doc in documents:
+            doc_items = []
+            if split_type == 'paragraph':
+                for paragraph in doc.get_paragraphs():
+                    par_sentences = []
+                    for sent in paragraph:
+                        par_sentences.append(sent)
+                    doc_items.append(par_sentences)
+            if split_type == 'sentence':
+                if task == 'class' or task == 'score_pred' or task == 'minority':
+                    for sent in doc.get_sentences():
+                        doc_items.append(sent)
+                elif task == 'perm':
+                    orig_sentences = doc.get_sentences()
+                    perm_count = 1
+                    for perm in doc.permutation_indices:
+                        # create permuted doc
+                        doc_items = []
+                        for sent_idx in perm:
+                            doc_items.append(orig_sentences[sent_idx])
+                        items.append(doc_items)
+                        labels.append(0) # permuted
+                        ids.append(doc.id+".0")
+                        # create orig doc for each permuted doc
+                        doc_items = []
+                        for sent in orig_sentences:
+                            doc_items.append(sent)
+                        items.append(doc_items)
+                        labels.append(1)
+                        ids.append(doc.id+"."+str(perm_count))
+                        perm_count += 1
+            if task != "perm":
+                items.append(doc_items)
+                labels.append(doc.label)
+                ids.append(doc.id)
+        if -1 < limit < len(items):
+            indices = list(range(len(items)))
+            random.shuffle(indices)
+            indices = indices[:limit]
+            new_items = []
+            new_labels = []
+            new_ids = []
+            for i in indices:
+                new_items.append(items[i])
+                new_labels.append(labels[i])
+                new_ids.append(ids[i])
+            items = new_items
+            labels = new_labels
+            ids = new_ids
+        return items, labels, ids
+
+    def load_vectors(self):
+        print("\nLoading vectors:")
+        if self.params['vector_type'] == 'glove':
+            data = []
+            for line in open(self.params['vector_path']):
+                tokens = line.split()
+                if len(tokens) != 301:
+                    continue
+                word = tokens[0]
+                vector_len = len(tokens) - 1
+                for t in tokens[1:]:
+                    data.append(float(t))
+                idx = len(self.word_to_idx)
+                self.word_to_idx[word] = idx
+                self.idx_to_word[idx] = word
+            data_arr = np.reshape(data, newshape=(int(len(data)/vector_len), vector_len))
+            # add pad array at index 0
+            data_arr = np.concatenate((np.random.rand(1, vector_len), data_arr), 0)
+            # add OOV array
+            data_arr = np.concatenate((data_arr, np.random.rand(1, vector_len)), 0)
+            idx = len(self.word_to_idx)
+            self.word_to_idx['unk'] = idx
+            self.idx_to_word[idx] = 'unk'
+            # add doc start pad array
+            data_arr = np.concatenate((data_arr, np.random.rand(1, vector_len)), 0)
+            idx = len(self.word_to_idx)
+            self.word_to_idx['<d>'] = idx
+            self.idx_to_word[idx] = '<d>'
+            # add doc end pad array
+            data_arr = np.concatenate((data_arr, np.random.rand(1, vector_len)), 0)
+            idx = len(self.word_to_idx)
+            self.word_to_idx['</d>'] = idx
+            self.idx_to_word[idx] = '</d>'
+            self.word_embeds = nn.Embedding(data_arr.shape[0], data_arr.shape[1])
+            if USE_CUDA:
+                self.word_embeds = self.word_embeds.cuda()
+            self.word_embeds.weight.data.copy_(torch.from_numpy(data_arr))
+            self.word_embeds.weight.requires_grad = False
+            print("loading: done")
+            return self.word_embeds, vector_len
+        else:
+            print("unrecognized vector type")
+
+    def rand_vectors(self, vocab_size):
+        if 'unk' not in self.word_to_idx:
+            idx = len(self.word_to_idx)
+            self.word_to_idx['unk'] = idx
+            self.idx_to_word[idx] = 'unk'
+        if '<d>' not in self.word_to_idx:
+            # add doc start pad
+            idx = len(self.word_to_idx)
+            self.word_to_idx['<d>'] = idx
+            self.idx_to_word[idx] = '<d>'
+        if '</d>' not in self.word_to_idx:
+            # add doc end pad array
+            idx = len(self.word_to_idx)
+            self.word_to_idx['</d>'] = idx
+            self.idx_to_word[idx] = '</d>'
+        self.word_embeds = nn.Embedding(len(self.word_to_idx), self.params['embedding_dim'])
+        if is_cuda:
+            self.word_embeds = self.word_embeds.cuda()
+        return self.word_embeds
+
+    def get_batch(self, data, labels, indices, model_type, clique_size=0):
+        batch = []
+        batch_labels = []
+        if model_type == 'clique':
+            for i in range(clique_size):
+                batch.append([])
+        for idx in indices:
+            batch_labels.append(labels[idx])
+            if model_type == 'sent_avg' or model_type == 'par_seq':
+                batch.append(data[idx])
+            elif model_type == 'clique':
+                for i in range(clique_size):
+                    batch[i].append(data[idx][i])
+        return batch, batch_labels
+
+    def reverse_index(self, sorted_index):
+        rev_index = []
+        for val in sorted_index:
+            rev_index.append(0)
+        for idx, val in enumerate(sorted_index):
+            rev_index[val] = idx
+        return rev_index
+
+    def reorder_list(self, data_list, reorder_idx):
+        new_data_list = []
+        for idx in reorder_idx:
+            new_data_list.append(data_list[idx])
+        return new_data_list
+
+    def pad_to_batch(self, batch, word_to_idx, model_type, clique_size=0):  # batch is list of (sequence, label)
+        if model_type == 'par_seq':
+            input_var = []
+            input_len = []
+            reverse_index = []
+            for doc in batch:
+                doc_var = []
+                doc_len = []
+                doc_index = []
+                for par in doc:
+                    # batch_lengths = LongTensor([seq[0].size(0) for seq in par])
+                    batch_lengths = LongTensor([len(seq) for seq in par])
+                    sorted_lengths, original_index = torch.sort(batch_lengths, 0, descending=True)
+                    doc_index.append(LongTensor(self.reverse_index(original_index)))
+                    sorted_batch = sorted(par, key=lambda b: len(b), reverse=True)
+                    x = sorted_batch
+                    max_x = max([len(s) for s in x])
+                    x_p = []
+                    for i in range(len(par)):
+                        if len(x[i]) < max_x:
+                            x_p.append(torch.cat([Variable(LongTensor(x[i])).view(1,-1),
+                                                  Variable(
+                                                      LongTensor([word_to_idx['<pad>']] * (max_x - len(x[i])))).view(
+                                                      1, -1)], 1))
+                        else:
+                            x_p.append(Variable(LongTensor(x[i])).view(1,-1))
+                    input_var_temp = torch.cat(x_p)
+                    doc_var.append(input_var_temp)
+                    doc_len.append([list(map(lambda s: s == 0, t.data)).count(False) for t in input_var_temp])
+                input_var.append(doc_var)
+                input_len.append(doc_len)
+                reverse_index.append(doc_index)
+        if model_type == 'sent_avg':
+            input_var = []
+            input_len = []
+            reverse_index = []
+            for doc in batch:
+                batch_lengths = LongTensor([len(seq) for seq in doc])
+                sorted_lengths, original_index = torch.sort(batch_lengths, 0, descending=True)
+                reverse_index.append(LongTensor(self.reverse_index(original_index)))
+                sorted_batch = sorted(doc, key=lambda b: len(b), reverse=True)
+                x = sorted_batch
+                max_x = max([len(s) for s in x])
+                x_p = []
+                for i in range(len(doc)):
+                    if len(x[i]) < max_x:
+                        x_p.append(
+                            torch.cat([Variable(LongTensor(x[i])).view(1,-1),
+                                       Variable(LongTensor([word_to_idx['<pad>']] * (max_x - len(x[i])))).view(1,
+                                                                                                                  -1)],
+                                      1))
+                    else:
+                        x_p.append(Variable(LongTensor(x[i])).view(1,-1))
+                input_var_temp = torch.cat(x_p)
+                input_var.append(input_var_temp)
+                input_len.append([list(map(lambda s: s == 0, t.data)).count(False) for t in input_var_temp])
+        elif model_type == 'clique':
+            # list of lists for each sentence-batch in a clique
+            input_var = []
+            input_len = []
+            reverse_index = []
+            for i in range(clique_size):
+                batch_lengths = LongTensor([len(seq) for seq in batch[i]])
+                sorted_lengths, original_index = torch.sort(batch_lengths, 0, descending=True)
+
+                reverse_index.append(LongTensor(self.reverse_index(original_index)))
+                x = sorted(batch[i], key=lambda b: len(b), reverse=True)
+                max_x = max([len(s) for s in x])
+                x_p = []
+                for i in range(len(batch[i])):
+                    if len(x[i]) < max_x:
+                        x_p.append(
+                            torch.cat(
+                                [Variable(LongTensor(x[i])).view(1, -1), Variable(LongTensor([word_to_idx['<pad>']] * (max_x - len(x[i])))).view(1, -1)],
+                                1))
+                    else:
+                        x_p.append(Variable(LongTensor(x[i])).view(1, -1))
+                input_var.append(torch.cat(x_p))
+                input_len.append(list(sorted_lengths))
+        return input_var, input_len, reverse_index
\ No newline at end of file
diff --git a/entity_graph.py b/entity_graph.py
new file mode 100644
index 0000000..bc28135
--- /dev/null
+++ b/entity_graph.py
@@ -0,0 +1,154 @@
+import os, csv, sys
+import numpy as np
+from sklearn.metrics import mean_squared_error
+from scipy.stats import spearmanr
+
+corpus = sys.argv[1]
+evaluation = sys.argv[2]
+graph_type = sys.argv[3]
+if evaluation == 'class':
+    threshold1 = float(sys.argv[4])
+    threshold2 = float(sys.argv[5])
+if evaluation == 'minority':
+    threshold1 = float(sys.argv[4])
+
+
+def compute_corr(test_labels, test_scores):
+    all_labels = []
+    all_scores = []
+    for test_id in test_labels:
+        all_labels.append(test_labels[test_id])
+        all_scores.append(test_scores[test_id])
+    mse = mean_squared_error(all_labels, all_scores)
+    corr = spearmanr(all_labels, all_scores)[0]
+    return mse, corr
+
+
+def compute_fscore(threshold, train_labels, train_scores):
+    tp = 0
+    fp = 0
+    fn = 0
+    for train_id in train_labels:
+        label = train_labels[train_id]
+        score = train_scores[train_id]
+        if score < threshold:
+            pred = 1
+        else:
+            pred = 0
+        if pred == label:
+            if label == 1:
+                tp += 1
+        else: # incorrect prediction
+            if pred == 1:
+                fp += 1
+            else:
+                fn += 1
+    precision = 0
+    if (tp + fp) > 0:
+        precision = tp / (tp + fp)
+    recall = 0
+    if (tp + fn) > 0:
+        recall = tp / (tp + fn)
+    f05 = 0  # compute F0.5 score
+    if (precision + recall) > 0:
+        f05 = (1.25 * precision * recall) / (1.25 * precision + recall)
+    return precision, recall, f05
+
+def evaluate_perm(test_scores_orig, test_scores_perm):
+    num_correct = 0
+    num_total = 0
+    for test_id in test_scores_orig:
+        orig_score = test_scores_orig[test_id]
+        for perm_id in test_scores_perm[test_id]:
+            perm_score = test_scores_perm[test_id][perm_id]
+            if orig_score > perm_score:
+                num_correct += 1
+            num_total += 1
+    return num_correct, num_total
+
+
+in_dir = 'data/' + corpus + '/'
+# read all test data
+test_ids = []
+test_labels = {}
+if evaluation == 'perm':
+    in_filename = in_dir + corpus + '_test_perm.csv'
+else:
+    in_filename = in_dir + corpus + '_test.csv'
+with open(in_filename,'r') as in_file:
+    reader = csv.DictReader(in_file)
+    for row in reader:
+        test_ids.append(row['text_id'])
+        label = None
+        if evaluation == 'class':
+            label = int(row['labelA'])
+        elif evaluation == 'score_pred':
+            labels = [int(row['ratingA1']), int(row['ratingA2']), int(row['ratingA3'])]
+            label = np.mean(labels)
+        elif evaluation == 'minority':
+            num_low_judgments = 0
+            if row['ratingA1'] == '1':
+                num_low_judgments += 1
+            if row['ratingA2'] == '1':
+                num_low_judgments += 1
+            if row['ratingA3'] == '1':
+                num_low_judgments += 1
+            if num_low_judgments >= 2:
+                label = 1
+            else:
+                label = 0
+        test_labels[row['text_id']] = label
+test_scores = {}
+test_scores_perm = {}
+test_scores_orig = {}
+num_correct = 0
+num_total = 0
+for test_id in test_ids:
+    if evaluation == 'perm':
+        orig_filename = in_dir + 'graph_permute/' + test_id + '.0.graph_' + graph_type
+        if not os.path.exists(orig_filename):
+            continue  # no valid permutations
+        with open(in_dir + 'graph_permute/' + test_id + '.0.graph_' + graph_type, 'r') as in_file:
+            for line in in_file:
+                score = float(line.strip())
+                test_scores_orig[test_id] = score
+                test_scores_perm[test_id] = {}
+                break
+        # read permutations
+        for i in range(1, 21):
+            perm_filename = in_dir + 'graph_permute/' + test_id + '.perm-' + str(i) + '.graph_' + graph_type
+            if not os.path.exists(perm_filename):
+                continue
+            with open(perm_filename, 'r') as in_file:
+                for line in in_file:
+                    score = float(line.strip())
+                    test_scores_perm[test_id][i] = score
+                    break
+    else:
+        with open(in_dir + 'graph/' + test_id + '.graph_' + graph_type, 'r') as in_file:
+            for line in in_file:
+                score = float(line.strip())
+                test_scores[test_id] = score
+                if evaluation == 'class':
+                    if score < threshold1:
+                        pred_label = 1
+                    elif score < threshold2:
+                        pred_label = 2
+                    else:
+                        pred_label = 3
+                    gold_label = test_labels[test_id]
+                    if gold_label == pred_label:
+                        num_correct += 1
+                    num_total += 1
+                break
+if evaluation == 'class':
+    print("Results on test:\nAccuracy: %0.2f" % (100 * (num_correct / num_total)))
+elif evaluation == 'minority':
+    precision, recall, fscore = compute_fscore(threshold1, test_labels, test_scores)
+    print("Results on test:\nPrecision: %0.2f  Recall: %0.2f  F0.5: %0.2f" % (precision, recall, fscore))
+elif evaluation == 'score_pred':
+    mse, corr = compute_corr(test_labels, test_scores)
+    print("Results on test:\nSpearman corr: %0.3f  MSE: %0.3f" % (corr, mse))
+elif evaluation == 'perm':
+    num_correct, num_total = evaluate_perm(test_scores_orig, test_scores_perm)
+    print("Results on test:\nAccuracy: %0.2f" % (100 * (num_correct / num_total)))
diff --git a/entity_grid.py b/entity_grid.py
new file mode 100644
index 0000000..30a75d6
--- /dev/null
+++ b/entity_grid.py
@@ -0,0 +1,153 @@
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.ensemble import RandomForestRegressor
+import os, csv, random, sys
+import numpy as np
+from sklearn.metrics import mean_squared_error
+from scipy.stats import spearmanr
+
+corpus = sys.argv[1]
+feature_dirname = sys.argv[2]
+evaluation = sys.argv[3]
+
+
+def evaluate_fscore(labels, predictions):
+    tp = 0
+    fp = 0
+    fn = 0
+    for idx, label in enumerate(labels):
+        pred = predictions[idx]
+        if pred == label:
+            if label == 1:
+                tp += 1
+        else: # incorrect prediction
+            if pred == 1:
+                fp += 1
+            else:
+                fn += 1
+    precision = 0
+    if (tp + fp) > 0:
+        precision = tp / (tp + fp)
+    recall = 0
+    if (tp + fn) > 0:
+        recall = tp / (tp + fn)
+    f05 = 0  # compute F0.5 score
+    if (precision + recall) > 0:
+        f05 = (1.25 * precision * recall) / (1.25 * precision + recall)
+    return precision, recall, f05
+
+def read_features(text_ids, labels_dict):
+    instances = []
+    labels = []
+    for text_id in text_ids:
+        if evaluation == 'perm':
+            orig_instance = []
+            orig_filename = in_dir + 'features_permute/' + feature_dirname + '/' + text_id + '.0.feat'
+            if not os.path.exists(orig_filename):
+                continue  # file without valid permutations
+            with open(in_dir + 'features_permute/' + feature_dirname + '/' + text_id + '.0.feat', 'r') as in_file:
+                for line in in_file:
+                    line = line.strip().split()
+                    for val in line:
+                        orig_instance.append(float(val))
+            for j in range(1, 21):
+                other_doc_instance = []
+                filename = in_dir + 'features_permute/' + feature_dirname + '/' + text_id + '.perm-' + str(j) + '.feat'
+                if not os.path.exists(filename):
+                    continue
+                with open(in_dir + 'features_permute/' + feature_dirname + '/' + text_id + '.perm-' + str(j) + '.feat',
+                          'r') as in_file:
+                    for line in in_file:
+                        line = line.strip().split()
+                        for val in line:
+                            other_doc_instance.append(float(val))
+                # randomly order documents
+                doc_order = random.randint(1, 2)
+                if doc_order == 1:  # doc1 = orig document
+                    feat = np.asarray(orig_instance) - np.asarray(other_doc_instance)
+                    label = 1
+                else:
+                    feat = np.asarray(other_doc_instance) - np.asarray(orig_instance)
+                    label = 2
+                instances.append(feat)
+                labels.append(label)
+        else:
+            instance = []
+            with open(in_dir + 'features/' + feature_dirname + '/' + text_id + '.feat','r') as in_file:
+                for line in in_file:
+                    line = line.strip().split()
+                    for val in line:
+                        instance.append(float(val))
+            labels.append(labels_dict[text_id])
+            instances.append(instance)
+    return instances, labels
+
+
+in_dir = 'data/' + corpus + '/'
+train_ids = []
+train_labels_dict = {}
+eval_ids = []
+eval_labels_dict = {}
+splits = ['train', 'test']
+for split in splits:
+    if evaluation == 'perm':
+        in_filename = in_dir + corpus + '_' + split + '_perm.csv'
+    else:
+        in_filename = in_dir + corpus + '_' + split + '.csv'
+    with open(in_filename, 'r') as in_file:
+        reader = csv.DictReader(in_file)
+        for row in reader:
+            text_id = row['text_id']
+            label = None
+            if evaluation == 'class':
+                label = int(row['labelA'])
+            elif evaluation == 'score_pred':
+                labels = [int(row['ratingA1']), int(row['ratingA2']), int(row['ratingA3'])]
+                label = np.mean(labels)
+            elif evaluation == 'minority':
+                num_low_judgments = 0
+                if row['ratingA1'] == '1':
+                    num_low_judgments += 1
+                if row['ratingA2'] == '1':
+                    num_low_judgments += 1
+                if row['ratingA3'] == '1':
+                    num_low_judgments += 1
+                if num_low_judgments >= 2:
+                    label = 1
+                else:
+                    label = 0
+            if split == 'train':
+                train_ids.append(text_id)
+                train_labels_dict[text_id] = label
+            elif split == 'test':
+                eval_ids.append(text_id)
+                eval_labels_dict[text_id] = label
+# read features
+train_instances, train_labels = read_features(train_ids, train_labels_dict)
+train_arr = np.array(train_instances)
+eval_instances, eval_labels = read_features(eval_ids, eval_labels_dict)
+eval_arr = np.array(eval_instances)
+# shuffle training data
+indices = [idx for idx in range(len(train_instances))]
+random.shuffle(indices)
+shuffle_train_instances = [train_instances[idx] for idx in indices]
+shuffle_train_labels = [train_labels[idx] for idx in indices]
+# train and evaluate model
+if evaluation == 'class' or evaluation == 'minority' or evaluation == 'perm':
+    clf = RandomForestClassifier()
+elif evaluation == 'score_pred':
+    clf = RandomForestRegressor()
+clf.fit(np.array(shuffle_train_instances), np.array(shuffle_train_labels))
+# predictions = clf.predict(np.array(eval_instances))
+if evaluation == 'class' or evaluation == 'perm':
+    accuracy = clf.score(np.array(eval_instances), np.array(eval_labels))
+    print("Results on test:\nAccuracy: %0.2f" % (accuracy * 100))
+elif evaluation == 'score_pred':
+    predictions = clf.predict(np.array(eval_instances))
+    mse = mean_squared_error(eval_labels, predictions)
+    corr = spearmanr(eval_labels, predictions)[0]
+    print("Results on test:\nSpearman corr: %0.3f  MSE: %0.3f" % (corr, mse))
+if evaluation == 'minority':
+    predictions = clf.predict(np.array(eval_instances))
+    eval_precision, eval_recall, eval_fscore = evaluate_fscore(eval_labels, predictions)
+    print("Results on test:\nPrecision: %0.2f  Recall: %0.2f  F0.5: %0.2f" % (eval_precision, eval_recall, eval_fscore))
+
diff --git a/evaluation.py b/evaluation.py
new file mode 100644
index 0000000..b6ee57d
--- /dev/null
+++ b/evaluation.py
@@ -0,0 +1,286 @@
+import numpy as np
+from torch.autograd import Variable
+import torch
+USE_CUDA = torch.cuda.is_available()
+FloatTensor = torch.cuda.FloatTensor if USE_CUDA else torch.FloatTensor
+LongTensor = torch.cuda.LongTensor if USE_CUDA else torch.LongTensor
+from scipy.stats import spearmanr
+import csv
+
+def eval_docs(model, loss_fn, eval_data, labels, data_obj, params):
+    steps = int(len(eval_data) / params['batch_size'])
+    if len(eval_data) % params['batch_size'] != 0:
+        steps += 1
+    eval_indices = list(range(len(eval_data)))
+    eval_pred = []
+    eval_labels = []
+    loss = 0
+    model.eval()
+    for step in range(steps):
+        end_idx = (step + 1) * params['batch_size']
+        if end_idx > len(eval_data):
+            end_idx = len(eval_data)
+        batch_ind = eval_indices[(step * params['batch_size']):end_idx]
+        sentences, orig_batch_labels = data_obj.get_batch(eval_data, labels, batch_ind, params['model_type'])
+        batch_padded, batch_lengths, original_index = data_obj.pad_to_batch(
+            sentences, data_obj.word_to_idx, params['model_type'])
+        batch_pred = model(batch_padded, batch_lengths, original_index)
+        if params['task'] == 'score_pred':
+            loss += loss_fn(batch_pred, Variable(FloatTensor(orig_batch_labels))).cpu().data.numpy()
+            eval_pred.extend(list(batch_pred.cpu().data.numpy()))
+        else:
+            loss += loss_fn(batch_pred, Variable(LongTensor(orig_batch_labels))).cpu().data.numpy()
+            eval_pred.extend(list(np.argmax(batch_pred.cpu().data.numpy(), axis=1)))
+        eval_labels.extend(orig_batch_labels)
+    if params['task'] == 'score_pred':
+        mse = np.square(np.subtract(np.array(eval_pred), np.expand_dims(np.array(eval_labels), 1))).mean()
+        corr = spearmanr(np.array(eval_pred), np.expand_dims(np.array(eval_labels), 1))[0]
+        accuracy = corr
+    elif params['task'] == 'minority':
+        f05, precision, recall = evaluate(eval_pred, eval_labels, "f05")
+    else:
+        accuracy, num_correct, num_total = evaluate(eval_pred, eval_labels, "accuracy")
+    if params['task'] == 'minority':
+        return f05, precision, recall, loss
+    else:
+        return accuracy, loss
+
+
+def eval_docs_rank(model, eval_docs, data_obj, params):
+    num_correct = 0
+    num_total = 0
+    loss = 0
+    model.eval()
+    eval_pred = []
+    eval_ids_perm = []
+    for doc in eval_docs:
+        orig_doc, perm_docs = data_obj.retrieve_doc_sents_by_label(doc)
+        batch_padded_orig, batch_lengths_orig, original_index_orig = data_obj.pad_to_batch(orig_doc, data_obj.word_to_idx, params['model_type'])
+        orig_pred = model(batch_padded_orig, batch_lengths_orig, original_index_orig)
+        orig_coh_score = orig_pred.cpu().data.numpy()[0][1] # probability that doc is coherent
+        for idx, perm_doc in enumerate(perm_docs):
+            perm_doc = [perm_doc]
+            batch_padded_perm, batch_lengths_perm, original_index_perm = data_obj.pad_to_batch(perm_doc, data_obj.word_to_idx, params['model_type'])
+            perm_pred = model(batch_padded_perm, batch_lengths_perm, original_index_perm)
+            pred_coh_score = perm_pred.cpu().data.numpy()[0][1]  # probability that doc is coherent
+            if orig_coh_score > pred_coh_score:
+                num_correct += 1
+                eval_pred.append(1)
+            else:
+                eval_pred.append(0)
+            eval_ids_perm.append(doc.id + "#" + str(idx+1))
+            num_total += 1
+    accuracy = num_correct / num_total
+    return accuracy, loss
+
+
+def evaluate(pred_labels, labels, type):
+    num_correct = 0
+    num_total = 0
+    tp = 0
+    fp = 0
+    fn = 0
+    for index, pred_val in enumerate(pred_labels):
+        gold_val = labels[index]
+        if type == "accuracy":
+            if pred_val == gold_val:
+                num_correct += 1
+        elif type == "f05":
+            if pred_val == gold_val:
+                if gold_val == 1:
+                    tp += 1
+            else:
+                if pred_val == 1:
+                    fp += 1
+                else:
+                    fn += 1
+        num_total += 1
+    if type == "f05":
+        precision = 0
+        if (tp + fp) > 0:
+            precision = tp / (tp + fp)
+        recall = 0
+        if (tp + fn) > 0:
+            recall = tp / (tp + fn)
+        f05 = 0
+        if (precision + recall) > 0:
+            f05 = (1.25 * precision * recall) / (1.25 * precision + recall)
+        return f05, precision, recall
+    return np.sum(np.array(pred_labels) == np.array(labels)) / float(
+        len(pred_labels)), num_correct, num_total
+
+
+def eval_cliques(model, loss_fn, clique_data, clique_labels, batch_size, clique_size, data_obj, model_type, task):
+    steps = int(len(clique_data) / batch_size)
+    if len(clique_data) % batch_size != 0:
+        steps += 1
+    dev_indices = list(range(len(clique_data)))
+    eval_pred = []
+    eval_labels = []
+    loss = 0
+    model.eval()
+    for step in range(steps):
+        end_idx = (step + 1) * batch_size
+        if end_idx > len(clique_data):
+            end_idx = len(clique_data)
+        batch_ind = dev_indices[(step * batch_size):end_idx]
+        sentences, orig_batch_labels = data_obj.get_batch(clique_data, clique_labels, batch_ind, model_type, clique_size)
+        batch_padded, batch_lengths, original_index = data_obj.pad_to_batch(sentences, data_obj.word_to_idx, model_type, clique_size)
+        batch_pred = model(batch_padded, batch_lengths, original_index)
+        if task == 'score_pred':
+            loss += loss_fn(batch_pred, Variable(FloatTensor(orig_batch_labels))).cpu().data.numpy()
+            eval_pred.extend(list(batch_pred.cpu().data.numpy()))
+        else:
+            loss += loss_fn(batch_pred, Variable(LongTensor(orig_batch_labels))).cpu().data.numpy()
+            eval_pred.extend(list(np.argmax(batch_pred.cpu().data.numpy(), axis=1)))
+        eval_labels.extend(orig_batch_labels)
+    if task == 'score_pred':
+        mse = np.square(np.subtract(np.array(eval_pred), np.expand_dims(np.array(eval_labels), 1))).mean()
+        corr = spearmanr(np.array(eval_pred), np.expand_dims(np.array(eval_labels), 1))[0]
+        accuracy = corr
+    else:
+        accuracy, num_correct, num_total = evaluate(eval_pred, eval_labels, "accuracy")
+    return accuracy, loss
+
+
+def eval_doc_cliques(model, docs, data_obj, params):
+    num_correct = 0
+    num_total = 0
+    tp = 0
+    fp = 0
+    fn = 0
+    model.eval()
+    eval_ids = []
+    eval_pred = []
+    eval_labels = []
+    for doc in docs:
+        if params['task'] == 'perm':
+            orig_doc_cliques, perm_doc_cliques = data_obj.retrieve_doc_cliques_by_label(doc, params['task'])
+            orig_doc_score = score_doc(model, orig_doc_cliques, params['batch_size'], params['clique_size'], data_obj, params['model_type'])
+            for perm_count, cliques in enumerate(perm_doc_cliques):
+                perm_doc_score = score_doc(model, cliques, params['batch_size'], params['clique_size'], data_obj, params['model_type'])
+                eval_ids.append(doc.id + "#" + str(perm_count))
+                if orig_doc_score > perm_doc_score:
+                    num_correct += 1
+                    eval_pred.append(1)
+                else:
+                    eval_pred.append(0)
+                num_total += 1
+        elif params['task'] == 'class':
+            orig_doc_cliques, _ = data_obj.retrieve_doc_cliques_by_label(doc, params['task'])
+            pred_label = label_doc(model, orig_doc_cliques, params['batch_size'], params['clique_size'], data_obj, params['model_type'])
+            eval_pred.append(pred_label)
+            if pred_label == doc.label:
+                num_correct += 1
+            num_total += 1
+        elif params['task'] == 'minority':
+            orig_doc_cliques, _ = data_obj.retrieve_doc_cliques_by_label(doc, params['task'])
+            pred_label = label_doc(model, orig_doc_cliques, params['batch_size'], params['clique_size'], data_obj,
+                                   params['model_type'])
+            eval_pred.append(pred_label)
+            if pred_label == doc.label:
+                num_correct += 1
+            if pred_label == doc.label:
+                if doc.label == 1:
+                    tp = 1
+            else:
+                if pred_label == 1:
+                    fp += 1
+                else:
+                    fn += 1
+            num_total += 1
+        elif params['task'] == 'score_pred':
+            orig_doc_cliques, _ = data_obj.retrieve_doc_cliques_by_label(doc, params['task'])
+            pred_score = score_doc_regression(model, orig_doc_cliques, params['batch_size'], params['clique_size'], data_obj, params['model_type'])
+            eval_pred.append(pred_score)
+            eval_labels.append(doc.label)
+    precision = 0
+    recall = 0
+    f05 = 0
+    if params['task'] == 'score_pred':
+        mse = np.square(np.subtract(eval_pred, eval_labels)).mean()
+        corr = spearmanr(eval_pred, eval_labels)[0]
+        accuracy = corr
+    else:
+        accuracy = num_correct / num_total
+        if (tp + fp) > 0:
+            precision = tp / (tp + fp)
+        if (tp + fn) > 0:
+            recall = tp / (tp + fn)
+        if (precision + recall) > 0:
+            f05 = (1.25 * precision * recall) / (1.25 * precision + recall)
+    return accuracy, precision, recall, f05
+
+
+# average scores of all cliques for a single document (3-class task)
+def label_doc(model, doc_cliques, batch_size, clique_size, data_obj, model_type):
+    steps = int(len(doc_cliques) / batch_size)
+    labels = [-1 for clique in doc_cliques]
+    if len(doc_cliques) % batch_size != 0:
+        steps += 1
+    clique_indices = list(range(len(doc_cliques)))
+    pred_distributions = None
+    model.eval()
+    for step in range(steps):
+        end_idx = (step + 1) * batch_size
+        if end_idx > len(doc_cliques):
+            end_idx = len(doc_cliques)
+        batch_ind = clique_indices[(step * batch_size):end_idx]
+        sentences, orig_batch_labels = data_obj.get_batch(doc_cliques, labels, batch_ind, model_type, clique_size)
+        batch_padded, batch_lengths, original_index = data_obj.pad_to_batch(sentences, data_obj.word_to_idx, model_type, clique_size)
+        batch_pred = model(batch_padded, batch_lengths, original_index)
+        batch_data = batch_pred.cpu().data.numpy()
+        if pred_distributions is None:
+            pred_distributions = batch_data
+        else:
+            pred_distributions = np.concatenate([pred_distributions, batch_data])
+    pred_label = np.argmax(np.mean(pred_distributions, axis=0))
+    return pred_label
+
+
+# average scores of all cliques for a single document (binary task)
+def score_doc(model, doc_cliques, batch_size, clique_size, data_obj, model_type):
+    steps = int(len(doc_cliques) / batch_size)
+    labels = [-1 for clique in doc_cliques]
+    if len(doc_cliques) % batch_size != 0:
+        steps += 1
+    clique_indices = list(range(len(doc_cliques)))
+    prob_list = []
+    model.eval()
+    for step in range(steps):
+        end_idx = (step + 1) * batch_size
+        if end_idx > len(doc_cliques):
+            end_idx = len(doc_cliques)
+        batch_ind = clique_indices[(step * batch_size):end_idx]
+        sentences, orig_batch_labels = data_obj.get_batch(doc_cliques, labels, batch_ind, model_type, clique_size)
+        batch_padded, batch_lengths, original_index = data_obj.pad_to_batch(sentences, data_obj.word_to_idx, model_type, clique_size)
+        batch_pred = model(batch_padded, batch_lengths, original_index)
+        batch_data = batch_pred.cpu().data.numpy()
+        for row in batch_data:
+            prob_list.append(row[1]) # probability that the clique is coherent
+    score = np.mean(prob_list)
+    return score
+
+
+# average scores of all cliques for a single document (score prediction task)
+def score_doc_regression(model, doc_cliques, batch_size, clique_size, data_obj, model_type):
+    steps = int(len(doc_cliques) / batch_size)
+    labels = [-1 for clique in doc_cliques]
+    if len(doc_cliques) % batch_size != 0:
+        steps += 1
+    clique_indices = list(range(len(doc_cliques)))
+    prob_list = []
+    model.eval()
+    for step in range(steps):
+        end_idx = (step + 1) * batch_size
+        if end_idx > len(doc_cliques):
+            end_idx = len(doc_cliques)
+        batch_ind = clique_indices[(step * batch_size):end_idx]
+        sentences, orig_batch_labels = data_obj.get_batch(doc_cliques, labels, batch_ind, model_type, clique_size)
+        batch_padded, batch_lengths, original_index = data_obj.pad_to_batch(sentences, data_obj.word_to_idx, model_type, clique_size)
+        batch_pred = model(batch_padded, batch_lengths, original_index)
+        batch_data = batch_pred.cpu().data.numpy()
+        for row in batch_data:
+            prob_list.append(row[0]) # regression score
+    score = np.mean(prob_list)
+    return score
\ No newline at end of file
diff --git a/extract_entity_grid.py b/extract_entity_grid.py
new file mode 100644
index 0000000..9339b13
--- /dev/null
+++ b/extract_entity_grid.py
@@ -0,0 +1,125 @@
+# takes csv files, parses them, and extracts entity grid
+from pycorenlp import StanfordCoreNLP
+import os, json, sys
+
+corpus = sys.argv[1]
+in_dir = 'data/' + corpus + '/'
+nlp = StanfordCoreNLP('http://localhost:9000')  # requires you have the Stanford CoreNLP server running: https://stanfordnlp.github.io/CoreNLP/corenlp-server.html#getting-started
+
+if not os.path.exists(in_dir + 'parsed/'):
+    os.makedirs(in_dir + 'parsed/')
+if not os.path.exists(in_dir + 'grid/'):
+    os.makedirs(in_dir + 'grid/')
+
+
+def update_noun_types(dep_type, np_words, curr_nouns_type):
+    for word in np_words:
+        if word not in curr_nouns_type:
+            curr_nouns_type[word] = dep_type
+        if curr_nouns_type[word] == "x" or curr_nouns_type[word] == "o":
+            curr_nouns_type[word] = dep_type
+    return curr_nouns_type
+
+
+def get_np(dependency, const_parse):
+    target_id = dependency['dependent']
+    index = 0
+    nouns = []
+    for line in const_parse.splitlines():
+        if ")" not in line:
+            continue
+        tokens = line.strip().split(") (")
+        num_tokens = len(tokens)  # remove phrase label
+        phrase_start_idx = index + 1
+        index += num_tokens
+        phrase_end_idx = index + 1
+        if target_id <= index and tokens[0].startswith("(NP"):
+            for token in tokens:
+                if token.startswith("(NP"):
+                    token = token[3:].strip()
+                while token.startswith("("):
+                    token = token[1:] 
+                while token.endswith(")"):
+                    token = token[:-1].strip()
+                word = token.split(None, 1)[1]  # remove POS tag
+                if token.startswith("NN"):
+                    nouns.append(word.lower())
+                elif token.startswith("PRP "):
+                    nouns.append(word.lower())
+                elif token.startswith("DT") and len(tokens) == 1:
+                    nouns.append(word.lower())  # is noun phrase, only one DT word (this, all) in the phrase
+            break
+    return nouns, phrase_start_idx, phrase_end_idx
+
+
+# read all text files, parse and extract entity grid
+for filename in os.listdir(in_dir + "text/"):
+    with open(in_dir + "text/" + filename,'r') as in_file:
+        if not filename.endswith(".txt"):
+            continue
+        nouns_list = []
+        nouns_dict = {}
+        sent_annotations = []
+        text_id = filename.rsplit(".", 1)[0]
+        const_out = open(in_dir + "parsed/" + text_id + ".const_parse", "w")
+        dep_out = open(in_dir + "parsed/" + text_id + ".dep_parse", "w")
+        grid_out = open(in_dir + "grid/" + text_id + ".grid", "w")
+        # read text document
+        document_lines = []
+        for line in in_file:
+            line = line.strip()
+            if line == "":
+                continue
+            if isinstance(line, str):
+                document_lines.append(line)
+        document = " ".join(document_lines)
+        try:
+            output = nlp.annotate(document, properties={
+                'annotators': 'tokenize,ssplit,pos,depparse,parse',
+                'outputFormat': 'json'
+            })
+        except:
+            print('Failed to parse file %s' % filename)
+            continue
+        if output == 'CoreNLP request timed out. Your document may be too long.':
+            print('Timed out when attempting to parse file %s' % filename)
+            continue
+        for sent in output['sentences']:
+            sent_idx = sent['index'] + 1
+            const_out.write(sent['parse'] + "\n")
+            json.dump(sent['basicDependencies'], dep_out)
+            dep_out.write("\n")
+            curr_nouns_type = {}
+            for token in sent['tokens']:
+                # collect all nouns and pronouns
+                if token['pos'].startswith("NN") or token['pos'] == 'PRP':
+                    token_str = token['word'].lower()
+                    curr_nouns_type[token_str] = "x"
+                    if token_str not in nouns_dict:
+                        nouns_list.append(token_str) 
+                        nouns_dict[token_str] = 0
+                    nouns_dict[token_str] += 1
+            # find highest-ranked role of entity in this sentence (subj > obj > other)
+            for dep in sent['basicDependencies']:
+                dep_type = ""
+                if dep['dep'] == 'nsubj' or dep['dep'] == 'nsubjpass':
+                    dep_type = "s"
+                elif dep['dep'] == 'dobj':
+                    dep_type = "o"
+                if dep_type != "":
+                    np, phrase_start_idx, phrase_end_idx = get_np(dep, sent['parse'])
+                    curr_nouns_type = update_noun_types(dep_type, np, curr_nouns_type)
+            sent_annotations.append(curr_nouns_type)
+
+        # output entity grid
+        for noun in nouns_list:
+            grid_out.write(noun + " ")
+            for sent_ann in sent_annotations:
+                if noun in sent_ann: 
+                    grid_out.write(sent_ann[noun] + " ")
+                else:
+                    grid_out.write("- ")
+            grid_out.write(str(nouns_dict[noun]) + "\n")  # entity frequency (salience count)
+        grid_out.close()
+        const_out.close()
+        dep_out.close()
diff --git a/extract_entity_grid_perm.py b/extract_entity_grid_perm.py
new file mode 100644
index 0000000..4b4d61d
--- /dev/null
+++ b/extract_entity_grid_perm.py
@@ -0,0 +1,153 @@
+# takes csv files, parses them, and extracts entity grid
+from pycorenlp import StanfordCoreNLP
+import os, json, sys
+
+nlp = StanfordCoreNLP('http://localhost:9000')
+corpus = sys.argv[1]
+
+in_dir = 'data/' + corpus + '/'
+
+if not os.path.exists(in_dir + 'parsed_permute/'):
+    os.makedirs(in_dir + 'parsed_permute/')
+if not os.path.exists(in_dir + 'grid_permute/'):
+    os.makedirs(in_dir + 'grid_permute/')
+
+def update_noun_types(dep_type, np_words, curr_nouns_type):
+    for word in np_words:
+        if word not in curr_nouns_type:
+            curr_nouns_type[word] = dep_type
+        if curr_nouns_type[word] == "x" or curr_nouns_type[word] == "o":
+            curr_nouns_type[word] = dep_type
+    return curr_nouns_type
+
+
+def get_np(dependency, const_parse):
+    target_id = dependency['dependent']
+    index = 0
+    nouns = []
+    for line in const_parse.splitlines():
+        if ")" not in line:
+            continue
+        tokens = line.strip().split(") (")
+        num_tokens = len(tokens)  # remove phrase label
+        index += num_tokens
+        if target_id <= index and tokens[0].startswith("(NP"):
+            for token in tokens:
+                if token.startswith("(NP"):
+                    token = token[3:].strip()
+                while token.startswith("("):
+                    token = token[1:]
+                while token.endswith(")"):
+                    token = token[:-1].strip()
+                word = token.split(None, 1)[1]  # remove POS tag
+                if token.startswith("NN"):
+                    nouns.append(word.lower())
+                elif token.startswith("PRP "):
+                    nouns.append(word.lower())
+                elif token.startswith("DT") and len(tokens) == 1:
+                    nouns.append(word.lower())  # is noun phrase, only one DT word (this, all) in the phrase
+            break
+    return nouns
+
+# read all text files, parse and extract entity grid
+for filename in os.listdir(in_dir + "text_permute/"):
+    if not filename.endswith("_sent.txt"):
+        continue  # original files only
+    with open(in_dir + "text_permute/" + filename, 'r') as in_file:
+        # process original sentence order file
+        nouns_list = []
+        nouns_dict = {}
+        sent_annotations = []
+        text_id = filename.rsplit("_", 1)[0]
+        const_out_filename = in_dir + "parsed_permute/" + text_id + ".0.const_parse"
+        dep_out_filename = in_dir + "parsed_permute/" + text_id + ".0.dep_parse"
+        grid_out_filename = in_dir + "parsed_permute/" + text_id + ".0.grid"
+        if os.path.exists(const_out_filename) and os.path.exists(dep_out_filename) and os.path.exists(
+                grid_out_filename):
+            continue
+        const_out = open(in_dir + "parsed_permute/" + text_id + ".0.const_parse", "w")
+        const_lines = {}
+        dep_out = open(in_dir + "parsed_permute/" + text_id + ".0.dep_parse", "w")
+        dep_lines = {}
+        grid_out = open(in_dir + "grid_permute/" + text_id + ".0.grid", "w")
+        grid_lines = {}
+        for line in in_file:  # sentences in original order
+            line = line.strip()
+            const_lines[line] = []
+            dep_lines[line] = []
+            grid_lines[line] = []
+            if line.strip() == "":  # not sure if this ever fires (I might have removed line breaks in these files -- for entity grid only)
+                const_out.write("\n\n")
+                dep_out.write("\n\n")
+                continue
+            output = nlp.annotate(line, properties={
+                'annotators': 'tokenize,ssplit,pos,depparse,parse',
+                'outputFormat': 'json'
+            })
+            for sent in output['sentences']:
+                const_out.write(sent['parse'] + "\n")
+                const_lines[line].append(sent['parse'])
+                json.dump(sent['basicDependencies'], dep_out)
+                dep_out.write("\n")
+                dep_lines[line].append(sent['basicDependencies'])
+                curr_nouns_type = {}
+                for token in sent['tokens']:
+                    if token['pos'].startswith("NN") or token['pos'] == 'PRP':
+                        token_str = token['word'].lower()
+                        curr_nouns_type[token_str] = "x"
+                        if token_str not in nouns_dict:
+                            nouns_list.append(token_str)
+                            nouns_dict[token_str] = 0
+                        nouns_dict[token_str] += 1
+                for dep in sent['basicDependencies']:
+                    dep_type = ""
+                    if dep['dep'] == 'nsubj' or dep['dep'] == 'nsubjpass':
+                        dep_type = "s"
+                    elif dep['dep'] == 'dobj':
+                        dep_type = "o"
+                    if dep_type != "":
+                        np = get_np(dep, sent['parse'])
+                        curr_nouns_type = update_noun_types(dep_type, np, curr_nouns_type)
+                sent_annotations.append(curr_nouns_type)
+                grid_lines[line].append(curr_nouns_type)
+
+        for noun in nouns_list:
+            grid_out.write(noun + " ")
+            for sent_ann in sent_annotations:
+                if noun in sent_ann:
+                    grid_out.write(sent_ann[noun] + " ")
+                else:
+                    grid_out.write("- ")
+            grid_out.write(str(nouns_dict[noun]) + "\n")  # frequency for salience feature
+        grid_out.close()
+        const_out.close()
+        dep_out.close()
+        for i in range(1, 21):
+            filename_perm = text_id + ".perm-" + str(i)
+            if not os.path.exists(in_dir + "text_permute/" + filename_perm + ".txt"):
+                continue
+            const_out = open(in_dir + "parsed_permute/" + filename_perm + ".const_parse", "w")
+            dep_out = open(in_dir + "parsed_permute/" + filename_perm + ".dep_parse", "w")
+            grid_out = open(in_dir + "grid_permute/" + filename_perm + ".grid", "w")
+            sent_annotations = []
+            with open(in_dir + "text_permute/" + filename_perm + ".txt", "r") as in_file:
+                for line in in_file:
+                    line = line.strip()
+                    for parse in const_lines[line]:
+                        const_out.write(parse + "\n")
+                    for parse in dep_lines[line]:
+                        json.dump(parse, dep_out)
+                        dep_out.write("\n")
+                    for grid_line in grid_lines[line]:
+                        sent_annotations.append(grid_line)
+            for noun in nouns_list:
+                grid_out.write(noun + " ")
+                for sent_ann in sent_annotations:
+                    if noun in sent_ann:
+                        grid_out.write(sent_ann[noun] + " ")
+                    else:
+                        grid_out.write("- ")
+                grid_out.write(str(nouns_dict[noun]) + "\n")  # saliance frequency feature
+            grid_out.close()
+            const_out.close()
+            dep_out.close()
\ No newline at end of file
diff --git a/extract_features_from_grid.py b/extract_features_from_grid.py
new file mode 100644
index 0000000..6e73a39
--- /dev/null
+++ b/extract_features_from_grid.py
@@ -0,0 +1,104 @@
+import itertools
+import os, sys
+
+corpus = sys.argv[1]
+seq_len = int(sys.argv[2])
+salience_threshold = int(sys.argv[3])
+syntax_opt = int(sys.argv[4])
+is_permute_arg = sys.argv[5]
+is_permute = False
+if is_permute_arg == 'true':
+    is_permute = True
+append_str = ''
+if is_permute:
+    append_str = '_permute'
+
+in_dir = 'data/'+corpus+'/'
+if not os.path.isdir(in_dir + 'features' + append_str + '/'):
+    os.mkdir(in_dir + 'features' + append_str + '/')
+feat_dir = in_dir + 'features' + append_str + '/seq_' + str(seq_len) + '_sal_' + str(salience_threshold) + '_syn_' + str(syntax_opt) + '/'
+if not os.path.isdir(feat_dir):
+    os.mkdir(feat_dir)
+print(feat_dir)
+for filename in os.listdir(in_dir + 'grid' + append_str + '/'):
+    if not filename.endswith("grid"):
+        continue
+    filename_base = filename.rsplit(".", 1)[0]
+    out_file = open(feat_dir + filename_base + ".feat", "w")
+    with open(in_dir + 'grid' + append_str + '/' + filename, "r") as in_file:
+        # read grid
+        sequences = []
+        frequencies = []
+        for line in in_file:
+            line = line.strip()
+            tokens = line.split()
+            try:
+                frequency = int(tokens[-1])
+            except ValueError:
+                print(line)
+                frequency = 0
+            frequencies.append(frequency)
+            sequence = "".join(tokens[1:-1])
+            sequence = "<" + sequence + ">"  # add start and end tokens
+            sequences.append(sequence)
+        in_file.close()
+
+        # compute feature vector
+        if syntax_opt == 1:  # syntax on
+            labels = ['s', 'o', 'x', '-']
+        else:  # syntax off (ignore entity roles)
+            labels = ['x', '-']
+        feature_vector = []
+        for salience_class in [0, 1]:
+            if salience_threshold == 1 and salience_class == 1: # only one salience class
+                break
+            for i in range(seq_len):  # over possible sequence lengths
+                seq_len = i + 1  # shortest seq is length 2
+                num_total_sequences = 0
+                for sent_index, sentence in enumerate(sequences):
+                    if salience_class == 0 and frequencies[sent_index] >= salience_threshold:
+                        num_total_sequences += len(sentence) - seq_len + 1
+                    elif salience_class == 1 and frequencies[sent_index] < salience_threshold:
+                        num_total_sequences += len(sentence) - seq_len + 1
+                total_prob = 0
+                seq_minus_one = {}
+                for possible_seq in itertools.product(labels, repeat=seq_len):
+                    possible_seq_tok = "".join(possible_seq)
+                    seq_minus_one[possible_seq_tok[:-1]] = 1
+                    num_occurrences = 0
+                    for sent_index, sentence in enumerate(sequences):
+                        sentence_temp = sentence
+                        if syntax_opt == 0:
+                            sentence_temp = sentence_temp.replace('s', 'x')
+                            sentence_temp = sentence_temp.replace('o', 'x')
+                        if salience_class == 0 and frequencies[sent_index] >= salience_threshold:
+                            num_occurrences += sum(sentence_temp[j:].startswith(possible_seq_tok) for j in range(len(sentence_temp)))
+                        elif salience_class == 1 and frequencies[sent_index] < salience_threshold:
+                            num_occurrences += sum(sentence_temp[j:].startswith(possible_seq_tok) for j in range(len(sentence_temp)))
+                    feature_prob = 0
+                    if num_total_sequences > 0:
+                        feature_prob = float(num_occurrences) / num_total_sequences
+                    feature_vector.append(feature_prob)
+                    total_prob += feature_prob
+                # add start and end tokens
+                for shorter_seq in seq_minus_one:
+                    possible_seq_toks = ["<" + shorter_seq, shorter_seq + ">"]
+                    for possible_seq_tok in possible_seq_toks:
+                        num_occurrences = 0
+                        for sent_index, sentence in enumerate(sequences):
+                            sentence_temp = sentence
+                            if syntax_opt == 0:
+                                sentence_temp = sentence_temp.replace('s', 'x')
+                                sentence_temp = sentence_temp.replace('o', 'x')
+                            if salience_class == 0 and frequencies[sent_index] >= salience_threshold:
+                                num_occurrences += sum(sentence_temp[j:].startswith(possible_seq_tok) for j in range(len(sentence_temp)))
+                            elif salience_class == 1 and frequencies[sent_index] < salience_threshold:
+                                num_occurrences += sum(sentence[j:].startswith(possible_seq_tok) for j in range(len(sentence)))
+                        feature_prob = 0
+                        if num_total_sequences > 0:
+                            feature_prob = float(num_occurrences) / num_total_sequences
+                        feature_vector.append(feature_prob)
+                        total_prob += feature_prob
+        for val in feature_vector:
+            out_file.write(str(val) + " ")
+        out_file.close()
diff --git a/extract_graph_from_grid.py b/extract_graph_from_grid.py
new file mode 100644
index 0000000..d0daeb6
--- /dev/null
+++ b/extract_graph_from_grid.py
@@ -0,0 +1,115 @@
+import itertools
+import os, sys
+import numpy as np
+
+role_weights = {'s': 3, 'o': 2, 'x': 1}
+
+
+def compute_avg_outdeg(matrix):
+    out_degree_list = []
+    for sent in matrix:
+        out_degree = 0
+        for weight in sent:
+            out_degree += weight
+        out_degree_list.append(out_degree)
+    return np.mean(out_degree_list)
+
+
+corpus = sys.argv[1]
+is_permute_arg = sys.argv[2]
+is_permute = False
+if is_permute_arg == 'true':
+    is_permute = True
+append_str = ''
+if is_permute:
+    append_str = '_permute'
+
+root_dir = 'data/'+corpus+'/'
+in_dir = root_dir + 'grid' + append_str + '/'
+out_dir = root_dir + 'graph' + append_str + '/'
+if not os.path.exists(out_dir):
+    os.makedirs(out_dir)
+
+# process all grid files (*.grid)
+for filename in os.listdir(in_dir):
+    if not filename.endswith("grid"):
+        continue
+    filename_base = filename.rsplit(".", 1)[0] # assumes no periods '.' in grid name
+    out_file_u = open(out_dir + filename_base + ".graph_u", "w")
+    out_file_u_dist = open(out_dir + filename_base + ".graph_u_dist", "w")
+    out_file_w = open(out_dir + filename_base + ".graph_w", "w")
+    out_file_w_dist = open(out_dir + filename_base + ".graph_w_dist", "w")
+    out_file_syn = open(out_dir + filename_base + ".graph_syn", "w")
+    out_file_syn_dist = open(out_dir + filename_base + ".graph_syn_dist", "w")
+    with open(in_dir + filename, "r") as in_file:
+        matrix_u = []
+        matrix_u_dist = []
+        matrix_w = []
+        matrix_w_dist = []
+        matrix_syn = []
+        matrix_syn_dist = []
+        for line in in_file:  # for all entities in text
+            line = line.strip()
+            tokens = line.split()
+            try:
+                count = int(tokens[-1])
+                sentence_roles = tokens[1:-1]
+            except ValueError:
+                sentence_roles = tokens[1:]  # remove frequency count and word
+            while sentence_roles[0] not in {'-', 'x', 's', 'o'}:
+                sentence_roles = sentence_roles[1:]
+            num_sentences = len(sentence_roles)
+            if matrix_u == []:
+                # initialize adjacency matrices
+                for i in range(num_sentences):
+                    list_i = []
+                    for j in range(num_sentences):
+                        list_i.append(0)
+                    matrix_u.append(list(list_i))  # copy list
+                    matrix_u_dist.append(list(list_i))
+                    matrix_w.append(list(list_i))
+                    matrix_w_dist.append(list(list_i))
+                    matrix_syn.append(list(list_i))
+                    matrix_syn_dist.append(list(list_i))
+            # find sentences that contain this entity
+            sentence_indices = []
+            for index, role in enumerate(sentence_roles):
+                if role != "-":
+                    sentence_indices.append(index)
+            for pair in itertools.combinations(sentence_indices, 2):  # get all sentence pairs
+                first_sent = min(pair)
+                second_sent = max(pair)
+                matrix_u[first_sent][second_sent] = 1  # binary
+                matrix_u_dist[first_sent][second_sent] = 1 / (second_sent - first_sent)
+                matrix_w[first_sent][second_sent] += 1  # count
+                matrix_w_dist[first_sent][second_sent] += 1 / (second_sent - first_sent)
+                matrix_syn[first_sent][second_sent] += role_weights[sentence_roles[first_sent]] * role_weights[sentence_roles[second_sent]]
+                matrix_syn_dist[first_sent][second_sent] += role_weights[sentence_roles[first_sent]] * role_weights[sentence_roles[second_sent]] / (second_sent - first_sent)
+        # print graph score to files
+        out_file_u.write(str(compute_avg_outdeg(matrix_u)) + "\n")
+        out_file_u_dist.write(str(compute_avg_outdeg(matrix_u_dist)) + "\n")
+        out_file_w.write(str(compute_avg_outdeg(matrix_w)) + "\n")
+        out_file_w_dist.write(str(compute_avg_outdeg(matrix_w_dist)) + "\n")
+        out_file_syn.write(str(compute_avg_outdeg(matrix_syn)) + "\n")
+        out_file_syn_dist.write(str(compute_avg_outdeg(matrix_syn_dist)) + "\n")
+        # print graph adjacency matrix
+        for i in range(num_sentences):
+            for j in range(num_sentences):
+                out_file_u.write(str(matrix_u[i][j]) + " ")
+                out_file_u_dist.write(str(matrix_u_dist[i][j]) + " ")
+                out_file_w.write(str(matrix_w[i][j]) + " ")
+                out_file_w_dist.write(str(matrix_w_dist[i][j]) + " ")
+                out_file_syn.write(str(matrix_syn[i][j]) + " ")
+                out_file_syn_dist.write(str(matrix_syn_dist[i][j]) + " ")
+            out_file_u.write("\n")
+            out_file_u_dist.write("\n")
+            out_file_w.write("\n")
+            out_file_w_dist.write("\n")
+            out_file_syn.write("\n")
+            out_file_syn_dist.write("\n")
+        out_file_u.close()
+        out_file_u_dist.close()
+        out_file_w.close()
+        out_file_w_dist.close()
+        out_file_syn.close()
+        out_file_syn_dist.close()
diff --git a/generate_high_coh_permutations.py b/generate_high_coh_permutations.py
new file mode 100644
index 0000000..fcc1dc8
--- /dev/null
+++ b/generate_high_coh_permutations.py
@@ -0,0 +1,109 @@
+import os, random, csv, sys
+from nltk.tokenize import sent_tokenize
+import itertools
+import numpy as np
+
+corpus = sys.argv[1]
+root_dir = 'data/' + corpus + '/'
+in_dir = root_dir + 'text/'
+out_dir = root_dir + 'text_permute/'
+
+if not os.path.exists(out_dir):
+    os.makedirs(out_dir)
+
+def is_orig_permutation(orig_sents, perm_sents):
+    for index, sent in enumerate(orig_sents):
+        if sent != perm_sents[index]:
+            return False
+    return True
+
+# which texts to permute
+if corpus == 'Clinton' or corpus == 'Enron' or corpus == 'Yelp' or corpus == 'Dummy':
+    title_row = ["text_id","subject","text","ratingA1","ratingA2","ratingA3","labelA","ratingM1","ratingM2","ratingM3","ratingM4","ratingM5","labelM"]
+elif corpus == 'Yahoo':
+    title_row = ["text_id","question_title","question","text","ratingA1","ratingA2","ratingA3","labelA","ratingM1","ratingM2","ratingM3","ratingM4","ratingM5","labelM"]
+splits = ['train','test']
+high_coh_texts = {}
+total = 0
+for split in splits:
+    in_file = open(root_dir + corpus + '_' + split + '.csv','r')
+    out_file = open(root_dir + corpus + '_' + split + '_perm.csv', 'w')
+    writer = csv.writer(out_file, delimiter=',', quotechar='"', quoting = csv.QUOTE_ALL)
+    writer.writerow(title_row)
+    reader = csv.DictReader(in_file)
+    for row in reader:
+        if row['labelA'] == '3':
+            high_coh_texts[row['text_id']] = 1
+            # print(row)
+            writer.writerow([row[key] for key in row])
+        total += 1
+    out_file.close()
+print(len(high_coh_texts))
+print("total %d" %total)
+
+# read orig texts
+count = 0
+num_files = 0
+for filename in os.listdir(in_dir):
+    if not filename.endswith(".txt"):
+        continue
+    # read sentences and tokenize at sentence boundaries
+    sentences = []
+    text_id = filename.split(".")[0]
+    if text_id not in high_coh_texts:
+        continue
+    with open(in_dir + filename, 'r') as in_file:
+        orig_lines = in_file.readlines()
+        for line in orig_lines:
+            sentences.extend(sent_tokenize(line))
+    # remove empty lines (don't matter for permutations)
+    new_sentences = []
+    for sent in sentences:
+        sent = sent.strip()
+        if sent != "":
+            new_sentences.append(sent)
+    sentences = new_sentences
+    if len(sentences) == 1: # no possible permutations
+        continue
+    out_file_orig = open(out_dir + text_id + "_sent.txt", "w")
+    num_files += 1
+    count += 1
+    for sent in sentences:
+        out_file_orig.write(sent + "\n")
+    out_file_orig.close()
+    # create 20 permutations
+    num_permutations = 0
+    used_permutations = {}
+    found_duplicate = False
+    if len(sentences) < 6: # generate all permutations
+        all_permutations = list(itertools.permutations(sentences))
+        random.shuffle(all_permutations)
+        for perm in all_permutations: 
+            if num_permutations >= 20:
+                break
+            if not found_duplicate:
+                if is_orig_permutation(sentences, perm):
+                    found_duplicate = True
+                    continue
+            out_file_perm = open(out_dir + text_id + ".perm-" + str(num_permutations+1) + ".txt", "w")
+            num_files += 1
+            for sent in perm:
+                out_file_perm.write(sent + "\n") 
+            num_permutations += 1
+            out_file_perm.close()
+    else: # need to sample permutations
+        while num_permutations < 20:
+            permutation = np.random.permutation(len(sentences))
+            permutation_str = [str(num) for num in permutation]
+            permutation_idx_str = ",".join(permutation_str)
+            if permutation_idx_str not in used_permutations:
+                out_file_perm = open(out_dir + text_id + ".perm-" + str(num_permutations+1) + ".txt", "w")
+                num_files += 1
+                for sent_idx in permutation:
+                    out_file_perm.write(sentences[sent_idx] + "\n")
+                out_file_perm.close()
+                num_permutations += 1
+                used_permutations[permutation_idx_str] = 1 
+            
+print(count)
+print(num_files)
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..491786a
--- /dev/null
+++ b/main.py
@@ -0,0 +1,126 @@
+import argparse
+import sys
+from data_loader import *
+from LSTMClique import LSTMClique
+from LSTMSentAvg import LSTMSentAvg
+from LSTMParSeq import LSTMParSeq
+from train_neural_models import *
+
+sys.path.insert(0,os.getcwd())
+
+dirname, filename = os.path.split(os.path.abspath(__file__))
+root_dir = "/".join(dirname.split("/")[:-1])
+
+run_dir = os.path.join(root_dir, "runs")
+
+parser = argparse.ArgumentParser()
+
+# data
+parser.add_argument("--task", type=str, default="class")  # class [classification], perm [binary permutation], score_pred [mean score prediction], minority [minority binary classification]
+
+# model params
+parser.add_argument("--model_type", type=str, default="clique") # clique, doc_seq
+parser.add_argument("--learning_rate", type=float, default=0.001)
+parser.add_argument("--dropout", type=float, default=0)
+parser.add_argument("--lstm_dim", type=int, default=100)
+parser.add_argument("--hidden_dim", type=int, default=200, help="hidden layer dimension")
+parser.add_argument("--clique", type=int, default=3) # number of sentences in each clique (clique model only)
+parser.add_argument("--l2_reg", type=float, default=0)
+
+# training
+parser.add_argument("--batch_size", type=int, default=32)
+parser.add_argument("--num_epochs", type=int, default=10)
+parser.add_argument("--train_data_limit", type=int, default=-1) # for debugging with subset of data
+parser.add_argument("--lr_decay", type=str, default="none")
+
+# vectors
+parser.add_argument("--vector_type", default="glove", help="specify vector type glove/word2vec/none")
+parser.add_argument("--glove_path", type=str, default="data/GloVe/glove.840B.300d.txt")
+parser.add_argument("--embedding_dim", type=int, default=300, help="vector dimension")
+parser.add_argument("--case_sensitive", action="store_true", help="activate this flag if vectors are case-sensitive (don't lower-case the data)")
+
+# per-experiment settings
+parser.add_argument("--model_name", type=str)
+parser.add_argument("--data_dir", default="data/", help="path to the data directory")
+parser.add_argument("--train_corpus", type=str)
+parser.add_argument("--test_corpus", type=str)
+
+
+args = parser.parse_args()
+if args.model_name is None:
+    print("Specify name of experiment")
+    sys.exit(0)
+if args.train_corpus is None:
+    print("Specify train corpus")
+    sys.exit(0)
+if args.test_corpus is None:
+    args.test_corpus = args.train_corpus
+
+params = {
+    'top_dir': root_dir,
+    'run_dir': run_dir,
+    'model_name': args.model_name,
+    'data_dir': args.data_dir,
+    'train_corpus': args.train_corpus,
+    'test_corpus': args.test_corpus,
+    'task': args.task,
+    'train_data_limit': args.train_data_limit,
+    'lr_decay': args.lr_decay,
+    'model_type': args.model_type,
+    'glove_file': args.glove_path,
+    'vector_type': args.vector_type,
+    'embedding_dim': args.embedding_dim,  # word embedding dim
+    'case_sensitive': args.case_sensitive,
+    'learning_rate': args.learning_rate,
+    'dropout': args.dropout,  # 1 = no dropout, 0.5 = dropout
+    'hidden_dim': args.hidden_dim,
+    'lstm_dim': args.lstm_dim,
+    'clique_size': args.clique,
+    'l2_reg': args.l2_reg,
+    'batch_size': args.batch_size,
+    'num_epochs': args.num_epochs,
+}
+
+if not os.path.exists(params['run_dir']):
+    os.mkdir(params['run_dir'])
+model_dir = os.path.join(params['run_dir'], params["model_name"])
+if not os.path.exists(model_dir):
+    os.mkdir(model_dir)
+params['model_dir'] = model_dir
+
+# save parameters
+with open(os.path.join(model_dir, params['model_name'] + '.params'), 'w') as param_file:
+    for key, parameter in params.items():
+        param_file.write("{}: {}".format(key, parameter) + "\n")
+        print((key, parameter))
+
+start = time.time()
+if params['vector_type'] == 'glove':
+    params['vector_path'] = params['glove_file']
+
+# load data
+data = Data(params)
+vectors = None
+if params['vector_type'] != 'none':
+    vectors, vector_dim = data.load_vectors()
+    params['embedding_dim'] = vector_dim
+
+if params['task'] == 'class' or params['task'] == 'score_pred' or params['task'] == 'minority':
+    training_docs = data.read_data_class(params, 'train')
+    test_docs = data.read_data_class(params, 'test')
+else:
+    training_docs = data.read_data_perm(params, 'train')
+    test_docs = data.read_data_perm(params, 'test')
+# dev_docs = None
+if params['vector_type'] == 'none':  # init random vectors
+    vectors = data.rand_vectors(len(data.word_to_idx))
+
+if params['model_type'] == 'clique':
+    model = LSTMClique(params, data)
+    train(params, training_docs, test_docs, data, model)
+elif params['model_type'] == 'sent_avg':
+    model = LSTMSentAvg(params, data)
+    train(params, training_docs, test_docs, data, model)
+elif params['model_type'] == 'par_seq':
+    model = LSTMParSeq(params, data)
+    train(params, training_docs, test_docs, data, model)
diff --git a/train_neural_models.py b/train_neural_models.py
new file mode 100644
index 0000000..03c2b15
--- /dev/null
+++ b/train_neural_models.py
@@ -0,0 +1,149 @@
+import torch
+import torch.optim as optim
+import time
+import random
+from torch.autograd import Variable
+from evaluation import *
+import progressbar
+from torch.optim.lr_scheduler import LambdaLR
+from torch.optim.lr_scheduler import StepLR
+USE_CUDA = torch.cuda.is_available()
+FloatTensor = torch.cuda.FloatTensor if USE_CUDA else torch.FloatTensor
+LongTensor = torch.cuda.LongTensor if USE_CUDA else torch.LongTensor
+
+
+def train(params, training_docs, test_docs, data, model):
+    if params['model_type'] == 'clique':
+        training_data, training_labels = data.create_cliques(training_docs, params['task'], params['train_data_limit'])
+        test_data, test_labels = data.create_cliques(test_docs, params['task'], params['train_data_limit'])
+    elif params['model_type'] == 'sent_avg':
+        training_data, training_labels, train_ids = data.create_doc_sents(training_docs, 'sentence', params['task'], params['train_data_limit'])
+        test_data, test_labels, test_ids = data.create_doc_sents(test_docs, 'sentence', params['task'], params['train_data_limit'])
+    elif params['model_type'] == 'par_seq':
+        training_data, training_labels, train_ids = data.create_doc_sents(training_docs, 'paragraph', params['task'],
+                                                                          params['train_data_limit'])
+        test_data, test_labels, test_ids = data.create_doc_sents(test_docs, 'paragraph', params['task'], params['train_data_limit'])
+    if USE_CUDA:
+        model.cuda()
+    if params['train_data_limit'] != -1:
+        training_docs = training_docs[:10]
+        test_docs = test_docs[:10]
+    parameters = filter(lambda p: p.requires_grad, model.parameters())
+    optimizer = optim.Adam(parameters, weight_decay=params['l2_reg'])
+    scheduler = None
+    if params['lr_decay'] == 'step':
+        scheduler = StepLR(optimizer, step_size=30, gamma=0.1)
+    elif params['lr_decay'] == 'lambda':
+        lambda1 = lambda epoch: 0.95 ** epoch
+        scheduler = LambdaLR(optimizer, lr_lambda=[lambda1])
+    if params['task'] == 'class' or params['task'] == 'perm' or params['task'] == 'minority':
+        loss_fn = torch.nn.CrossEntropyLoss()
+    elif params['task'] == 'score_pred':
+        loss_fn = torch.nn.MSELoss()
+    timestamp = time.time()
+    best_test_acc = 0
+    for epoch in range(params['num_epochs']):
+        if params['lr_decay'] == 'lambda' or params['lr_decay'] == 'step':
+            scheduler.step()
+            print(optimizer.param_groups[0]['lr'])
+        print("EPOCH "+str(epoch))
+        total_loss = 0
+        steps = int(len(training_data) / params['batch_size'])
+        indices = list(range(len(training_data)))
+        random.shuffle(indices)
+        bar = progressbar.ProgressBar()
+        model.train()
+        for step in bar(range(steps)):
+            batch_ind = indices[(step * params["batch_size"]):((step + 1) * params["batch_size"])]
+            sentences, orig_batch_labels = data.get_batch(training_data, training_labels, batch_ind, params['model_type'], params['clique_size'])
+            batch_padded, batch_lengths, original_index = data.pad_to_batch(sentences, data.word_to_idx, params['model_type'], params['clique_size'])
+            model.zero_grad()
+            pred_coherence = model(batch_padded, batch_lengths, original_index)
+            if params['task'] == 'score_pred':
+                loss = loss_fn(pred_coherence, Variable(FloatTensor(orig_batch_labels)))
+            else:
+                loss = loss_fn(pred_coherence, Variable(LongTensor(orig_batch_labels)))
+            mean_loss = loss / params["batch_size"]
+            mean_loss.backward()
+            total_loss += loss.cpu().data.numpy()
+            optimizer.step()
+        current_time = time.time()
+        print("Time %-5.2f min" % ((current_time - timestamp) / 60.0))
+        print("Train loss: " + str(total_loss[0]))
+        output_name = params['model_name'] + '_epoch' + str(epoch)
+        if params['model_type'] == 'sent_avg' or params['model_type'] == 'par_seq':
+            if params['task'] == 'minority':
+                test_f05, test_precision, test_recall, test_loss = eval_docs(model, loss_fn, test_data, test_labels,
+                                                                        data, params)
+            elif params['task'] == 'class' or params['task'] == 'score_pred':
+                test_accuracy, test_loss = eval_docs(model, loss_fn, test_data, test_labels, data, params)
+            elif params['task'] == 'perm':
+                test_accuracy, test_loss = eval_docs_rank(model, test_docs, data, params)
+            print("Test loss: %0.3f" % test_loss)
+            if params['task'] == 'score_pred':
+                print("Test correlation: %0.5f" % (test_accuracy))
+            elif params['task'] == 'minority':
+                print("Test F0.5: %0.2f  Precision: %0.2f  Recall: %0.2f" % (test_f05, test_precision, test_recall))
+            else:
+                print("Test accuracy: %0.2f%%" % (test_accuracy * 100))
+        elif params['model_type'] == 'clique':
+            train_accuracy, train_loss = eval_cliques(model, loss_fn, training_data,
+                                                                                            training_labels,
+                                                                                            params['batch_size'],
+                                                                                            params['clique_size'], data,
+                                                                                            params['model_type'], params['task'])
+            if params['task'] == 'score_pred':
+                print("Train clique corr: %0.5f" % (train_accuracy))
+            else:
+                print("Train clique accuracy: %0.2f%%" % (train_accuracy * 100))
+            test_clique_accuracy, test_loss = eval_cliques(model, loss_fn, test_data,
+                                                                                            test_labels,
+                                                                                            params['batch_size'],
+                                                                                            params['clique_size'], data, params['model_type'], params['task'])
+            print("Test loss: %0.3f" % test_loss)
+            if params['task'] == 'score_pred':
+                print("Test clique corr: %0.5f" % ((test_clique_accuracy)))
+            else:
+                print("Test clique accuracy: %0.2f%%" % ((test_clique_accuracy * 100)))
+            doc_accuracy, test_precision, test_recall, test_f05 = eval_doc_cliques(model, test_docs, data, params)
+            if params['task'] == 'score_pred':
+                print("Test document corr: %0.5f" % (doc_accuracy))
+            elif params['task'] == 'minority':
+                print("Test F0.5: %0.2f  Precision: %0.2f  Recall: %0.2f" % (test_f05, test_precision, test_recall))
+            else:
+                print("Test document ranking accuracy: %0.2f%%" % (doc_accuracy * 100))
+            test_accuracy = doc_accuracy
+        if params['task'] == 'minority':
+            if test_f05 > best_test_acc:
+                best_test_acc = test_f05
+                # save best model
+                torch.save(model.state_dict(), params['model_dir'] + '/' + params['model_name'] + '_best')
+                print('saved model ' + params['model_dir'] + '/' + params['model_name'] + '_best')
+        else:
+            if test_accuracy > best_test_acc:
+                best_test_acc = test_accuracy
+                # save best model
+                torch.save(model.state_dict(), params['model_dir'] + '/' + params['model_name'] + '_best')
+                print('saved model ' + params['model_dir'] + '/' + params['model_name'] + '_best')
+        print()
+    return best_test_acc
+
+
+def test(params, test_docs, data, model):
+    if params['model_type'] == 'clique':
+        test_data, test_labels = data.create_cliques(test_docs, params['task'])
+    elif params['model_type'] == 'sent_avg':
+        test_data, test_labels, test_ids = data.create_doc_sents(test_docs, 'sentence', params['task'], params['train_data_limit'])
+    elif params['model_type'] == 'par_seq':
+        test_data, test_labels, test_ids = data.create_doc_sents(test_docs, 'paragraph', params['task'], params['train_data_limit'])
+
+    if USE_CUDA:
+        model.cuda()
+    loss_fn = torch.nn.CrossEntropyLoss()
+    # output_name = params['model_name'] + '_test'
+    if params['model_type'] == 'par_seq' or params['model_type'] == 'sent_avg':
+        test_accuracy, test_loss = eval_docs(model, loss_fn, test_data, test_labels, data, params)
+        print("Test accuracy: %0.2f%%" % (test_accuracy * 100))
+    elif params['model_type'] == 'clique':
+        doc_accuracy = eval_doc_cliques(model, test_docs, data, params)
+        print("Test document ranking accuracy: %0.2f%%" % (doc_accuracy * 100))