From 2cb78bd990b69760d0183d429dfc4b066866f9d2 Mon Sep 17 00:00:00 2001
From: David Tresner-Kirsch <dwkirsch@gmail.com>
Date: Mon, 27 Aug 2018 12:47:02 -0400
Subject: [PATCH 1/8] add status rationale detection

---
 .../lib/disease_status_extractor.rb           | 20 ++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/info-extraction/lib/disease_status_extractor.rb b/info-extraction/lib/disease_status_extractor.rb
index 0290848..011a69d 100644
--- a/info-extraction/lib/disease_status_extractor.rb
+++ b/info-extraction/lib/disease_status_extractor.rb
@@ -22,7 +22,7 @@ def analyze_text (text)
                                           :start => match.begin(0),
                                           :end => match.end(0) )
     end
-    
+
     annotated.signal.to_enum(:scan, /(not )?(stable|progressing|getting worse|worsening|getting better|improving)/).map{ Regexp.last_match }.each do |match|
       annotated.tags << Standoff::Tag.new(:content => match[0],
                                           :name => "status_value",
@@ -30,11 +30,25 @@ def analyze_text (text)
                                           :end => match.end(0) )
     end
 
+    annotated.signal.to_enum(:scan, /(CT scan|imaging|lab)( results)?/).map{ Regexp.last_match }.each do |match|
+      annotated.tags << Standoff::Tag.new(:content => match[0],
+                                          :name => "status_rationale",
+                                          :start => match.begin(0),
+                                          :end => match.end(0) )
+    end
+
+    
+
     disease_status_assertions = []
+
     annotated.tags.select{|tag| tag.name == "disease_status_key"}.each do |key_tag|
       next_tag = annotated.next_tag key_tag
-      if next_tag.name == "status_value"
-        disease_status_assertions << next_tag.content
+      if next_tag && (next_tag.name == "status_value")
+        disease_status_assertions << {
+          :disease => nil,
+          :rationale => annotated.tags.select{|tag| tag.name == "status_rationale"}.map{|tag| tag.content}.uniq,
+          :status => next_tag.content
+        }
       end
     end
 

From 9ea48d9aa08bbf92cc995886cd36fc947d337bb6 Mon Sep 17 00:00:00 2001
From: David Tresner-Kirsch <dwkirsch@gmail.com>
Date: Mon, 27 Aug 2018 13:02:26 -0400
Subject: [PATCH 2/8] rely on whitelist of DB concept categories; no longer
 need blacklist of specific concept names

---
 info-extraction/lib/watson4fluxnotes.rb | 26 +++----------------------
 1 file changed, 3 insertions(+), 23 deletions(-)

diff --git a/info-extraction/lib/watson4fluxnotes.rb b/info-extraction/lib/watson4fluxnotes.rb
index e0d6b53..b38af3b 100644
--- a/info-extraction/lib/watson4fluxnotes.rb
+++ b/info-extraction/lib/watson4fluxnotes.rb
@@ -32,7 +32,6 @@ def filter_to_desired_categories (results_hash)
     # we only care about entities of type "HealthCondition" (though that may be expanded later)
     results_hash["entities"].select!{|e| e["type"] == "HealthCondition"}
     # and concepts of type "Disease" (though that may be expanded later)
-    #TODO: when we have real DBpedia type checking available, has_relevant_dbpedia_concept_type should be deleted and replaced
     results_hash["concepts"].select!{|c| has_relevant_dbpedia_concept_type c}
     results_hash
   end
@@ -45,35 +44,16 @@ def has_relevant_dbpedia_concept_type ( concept )
       "http://umbel.org/umbel/rc/AilmentCondition"
     ]
 
-    
     types = DBPedia.loadDBPediaDataType(concept['dbpedia_resource'])
 
     if types == nil 
       return false
     end
-    # require 'byebug'
-    #     byebug
-
-    if (types.map{|t| t['value']} & types_we_care_about).length == 0
-       return false
-    end
 
-    # this is a manually curated, example-specific list based on checking DBpedia pages. will be replaced with automated DBpedia queries.
-    concepts_known_not_to_be_disease = [
-      "Chemotherapy",
-      "Pharmacology",
-      "2006 albums",
-      "2008 singles",
-      "HIV",
-      "Pain", # probably we want to catch this one. following the rules for now though.
-      "Pharmaceutical drug",
-      "Prescription drug"
-    ]
-    
-    if concepts_known_not_to_be_disease.include? concept["text"]
-      return false
+    if (types.map{|t| t['value']} & types_we_care_about).length > 0
+       return true
     else
-      return true
+      return false
     end
   end
 

From 8e1babc4f07d1951ccfe3892cb1f35fdc7ca4b56 Mon Sep 17 00:00:00 2001
From: David Tresner-Kirsch <dwkirsch@gmail.com>
Date: Mon, 27 Aug 2018 13:08:00 -0400
Subject: [PATCH 3/8] fixing two things: 1) the previous version was only
 returning the first concept from an array of concepts detected in each chunk
 of text, rather than possible multiple findings per chunk; 2) disease status
 extractor now returns a key-value hash for each detected concept, so we
 should interpolate it as a json literal rather than a string

---
 info-extraction/server.rb | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/info-extraction/server.rb b/info-extraction/server.rb
index f838fa1..f0a5453 100644
--- a/info-extraction/server.rb
+++ b/info-extraction/server.rb
@@ -33,8 +33,10 @@
     end
     content_type :json
     flux_notes_messages = []
-    diseaseResults.each do |res| 
-        flux_notes_messages << "flux_command('insert-structured-phrase', {phrase:'disease status', fields: [{name:'status', value: '#{res[0]}'}]})"
+    diseaseResults.each do |res|
+        res.each do |concept|
+          flux_notes_messages << "flux_command('insert-structured-phrase', {phrase:'disease status', fields: [{name:'status', value: #{concept.to_json}}]})"
+        end
     end
     toxicityResults.each do |tox| 
         tox['concepts'].each do |concept| 
@@ -46,4 +48,4 @@
         toxicity: toxicityResults,
         fluxCommands: flux_notes_messages.uniq
     }.to_json
-end
\ No newline at end of file
+end

From a6670a9ece2d6a6343b9a612de2a385bc512f7a3 Mon Sep 17 00:00:00 2001
From: David Tresner-Kirsch <dwkirsch@gmail.com>
Date: Mon, 27 Aug 2018 13:18:46 -0400
Subject: [PATCH 4/8] expand rationale keywords

---
 info-extraction/lib/disease_status_extractor.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/info-extraction/lib/disease_status_extractor.rb b/info-extraction/lib/disease_status_extractor.rb
index 011a69d..3f5a12e 100644
--- a/info-extraction/lib/disease_status_extractor.rb
+++ b/info-extraction/lib/disease_status_extractor.rb
@@ -30,7 +30,7 @@ def analyze_text (text)
                                           :end => match.end(0) )
     end
 
-    annotated.signal.to_enum(:scan, /(CT scan|imaging|lab)( results)?/).map{ Regexp.last_match }.each do |match|
+    annotated.signal.to_enum(:scan, /((CT scan|imaging|lab|pathology)( results)?)|symptoms?|exam|markers/).map{ Regexp.last_match }.each do |match|
       annotated.tags << Standoff::Tag.new(:content => match[0],
                                           :name => "status_rationale",
                                           :start => match.begin(0),

From 6ed7437e25f9c58c6be6ae7799f9e957c40a403f Mon Sep 17 00:00:00 2001
From: David Tresner-Kirsch <dwkirsch@gmail.com>
Date: Tue, 4 Sep 2018 17:00:52 -0400
Subject: [PATCH 5/8] detect more rationale expressions; normalize both
 rationale assertions and status assertions; new data schema for output

---
 info-extraction/lib/chunker.rb                | 10 +--
 .../lib/disease_status_extractor.rb           | 80 +++++++++++++++++--
 2 files changed, 77 insertions(+), 13 deletions(-)

diff --git a/info-extraction/lib/chunker.rb b/info-extraction/lib/chunker.rb
index c7e0419..6e06fe7 100644
--- a/info-extraction/lib/chunker.rb
+++ b/info-extraction/lib/chunker.rb
@@ -19,14 +19,8 @@ def run(document)
                             /side effects?/i
                           ]
                         when :disease_status
-                          [
-                            /status/i,
-                            /progressing/i,
-                            /stable/i,
-                            /getting worse/i,
-                            /worsening/i,
-                            /improving/i,
-                            /getting better/i
+                          [ # TODO: this is dopey. duplication of search expressions between here and the status_value patterns in DiseaseStatusExtractor should be abstracted and merged.
+                            /((not? )|(complete ))?(stable|progressing|responding|response( to treatment)?|resection|inevaluable|changed?|getting worse|worsening|getting better|improving)/i
                           ]
                         else
                           raise "Unknown chunking target: #{target}"
diff --git a/info-extraction/lib/disease_status_extractor.rb b/info-extraction/lib/disease_status_extractor.rb
index 3f5a12e..b7f55d0 100644
--- a/info-extraction/lib/disease_status_extractor.rb
+++ b/info-extraction/lib/disease_status_extractor.rb
@@ -11,31 +11,97 @@
 
 =end
 
+=begin
+Example output:
+[
+    {
+	"disease": null,
+	"status": {
+	    "mention text": "not changing",
+	    "normalized": "stable"
+	},
+	"rationale": [
+	    {
+		"mention text": "CT scans",
+		"normalized": "Imaging"
+	    }
+	]
+    },
+    {
+	"disease": null,
+	"status": {
+	    "mention text": "progressing",
+	    "normalized": "progressing"
+	},
+	"rationale": [
+	    {
+		"mention text": "CT scans",
+		"normalized": "Imaging"
+	    },
+	    {
+		"mention text": "physical exam",
+		"normalized": "Physical Exam"
+	    }
+	]
+    }
+]
+=end
+
 class DiseaseStatusExtractor
   def analyze_text (text)
     annotated = Standoff::AnnotatedString.new( :signal => text, :tags => [])
 
     # this would be just key.match document, but we want MatchData for possible multiple keyword matches, not just one
-    annotated.signal.to_enum(:scan, /status|disease|cancer/).map{ Regexp.last_match }.each do |match|
+    annotated.signal.to_enum(:scan, /status|disease|cancer/i).map{ Regexp.last_match }.each do |match|
       annotated.tags << Standoff::Tag.new(:content => match[0],
                                           :name => "disease_status_key",
                                           :start => match.begin(0),
                                           :end => match.end(0) )
     end
 
-    annotated.signal.to_enum(:scan, /(not )?(stable|progressing|getting worse|worsening|getting better|improving)/).map{ Regexp.last_match }.each do |match|
+    # TODO: this is dopey. duplication of search expressions between here and the disease_status patterns in Chunker should be abstracted and merged.
+    annotated.signal.to_enum(:scan, /((not? )|(complete ))?(stable|progressing|responding|response( to treatment)?|resection|inevaluable|changed?|getting worse|worsening|getting better|improving)/i).map{ Regexp.last_match }.each do |match|
+      mention_text = match[0]
+      mapped_for_normalization = case mention_text # be very careful with this. it evaluates greedily, and as such the order of expressions here matters a lot.
+                                 when /^not? /i
+                                   "stable"
+                                 when /getting worse|worsening|progress/i
+                                   "progressing"
+                                 when /complete resection/i, /complete response/i
+                                   $&
+                                 when /getting better|improving|response to treatment/i
+                                   "responding"
+                                 when /inevaluable/i, /stable/i, /progressing/i
+                                   $&
+                                 else
+                                   nil
+                                 end
+      normalized = mapped_for_normalization.split(/ |\_/).map(&:capitalize).join(" ") #cap each word
       annotated.tags << Standoff::Tag.new(:content => match[0],
                                           :name => "status_value",
+                                          :attributes => {:normalized => normalized},
                                           :start => match.begin(0),
                                           :end => match.end(0) )
     end
 
-    annotated.signal.to_enum(:scan, /((CT scan|imaging|lab|pathology)( results)?)|symptoms?|exam|markers/).map{ Regexp.last_match }.each do |match|
+    annotated.signal.to_enum(:scan, /((ca?t scan|mri|x-ray|x ray|imaging)( results)?)/i).map{ Regexp.last_match }.each do |match|
       annotated.tags << Standoff::Tag.new(:content => match[0],
                                           :name => "status_rationale",
+                                          :attributes => {:normalized => "Imaging"},
+                                          :start => match.begin(0),
+                                          :end => match.end(0) )
+    end
+    annotated.signal.to_enum(:scan, /(pathology|symptoms|(physical )?exam|markers)/i).map{ Regexp.last_match }.each do |match|
+      mention_text = m[0]
+      mention_text = "physical exam" if mention_text == "exam"
+      normalized = mention_text.split(/ |\_/).map(&:capitalize).join(" ") #cap each word
+      annotated.tags << Standoff::Tag.new(:content => mention_text,
+                                          :name => "status_rationale",
+                                          :attributes => {:normalized => normalized},
                                           :start => match.begin(0),
                                           :end => match.end(0) )
     end
+    
 
     
 
@@ -46,8 +112,12 @@ def analyze_text (text)
       if next_tag && (next_tag.name == "status_value")
         disease_status_assertions << {
           :disease => nil,
-          :rationale => annotated.tags.select{|tag| tag.name == "status_rationale"}.map{|tag| tag.content}.uniq,
-          :status => next_tag.content
+          :rationale => annotated.tags.select{|tag| tag.name == "status_rationale"}.map do|tag|
+            {:mention_text => tag.content, :normalized => tag.attributes[:normalized]}
+          end.uniq,
+          :status => { :mention_text => next_tag.content,
+                       :normalized => next_tag.attributes[:normalized]}
+            
         }
       end
     end

From 889aece2981b0be55c1b7ca6ab9d559c83978c5d Mon Sep 17 00:00:00 2001
From: David Tresner-Kirsch <dwkirsch@gmail.com>
Date: Tue, 11 Sep 2018 10:17:52 -0400
Subject: [PATCH 6/8] bug fix

---
 info-extraction/lib/disease_status_extractor.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/info-extraction/lib/disease_status_extractor.rb b/info-extraction/lib/disease_status_extractor.rb
index b7f55d0..7a4969b 100644
--- a/info-extraction/lib/disease_status_extractor.rb
+++ b/info-extraction/lib/disease_status_extractor.rb
@@ -76,7 +76,7 @@ def analyze_text (text)
                                  else
                                    nil
                                  end
-      normalized = mapped_for_normalization.split(/ |\_/).map(&:capitalize).join(" ") #cap each word
+      normalized = mapped_for_normalization ? mapped_for_normalization.split(/ |\_/).map(&:capitalize).join(" ") : nil#cap each word
       annotated.tags << Standoff::Tag.new(:content => match[0],
                                           :name => "status_value",
                                           :attributes => {:normalized => normalized},

From 57c5d492f2ce8aa9e9b88f6c1d4a2c044f772170 Mon Sep 17 00:00:00 2001
From: David Tresner-Kirsch <dwkirsch@gmail.com>
Date: Thu, 13 Sep 2018 13:01:15 -0400
Subject: [PATCH 7/8] update documentation

---
 info-extraction/lib/disease_status_extractor.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/info-extraction/lib/disease_status_extractor.rb b/info-extraction/lib/disease_status_extractor.rb
index 7a4969b..260214d 100644
--- a/info-extraction/lib/disease_status_extractor.rb
+++ b/info-extraction/lib/disease_status_extractor.rb
@@ -3,7 +3,7 @@
 =begin
 This is a really naive information extractor to detect assertions about disease statuses.
 
-All it does is tag mentions of a list of disease-status-related keywords (disease, cancer, status), tag mentions of disease-status-related value words (e.g. stable, improving). For each keyword, it checks the next tag to the right, and if that tag is a status value, it returns the value.
+All it does is tag mentions of a list of disease-status-related keywords (disease, cancer, status), tag mentions of disease-status-related value words (e.g. stable, improving), and tag mentions of rationale concepts (e.g. imaging, scans, symptoms). For each keyword, it checks the next tag to the right, and if that tag is a status value, it returns the value. It also returns and normalizes all rationale mentions found within the chunk along with that status assertion.
 
 So, it will assert "stable" for the phrase "your cancer is stable."
 

From 7390d5c2061e8d93ec3a58f41aeb5c17cdc23cee Mon Sep 17 00:00:00 2001
From: Andrew Schreiber <schreiaj@gmail.com>
Date: Mon, 17 Sep 2018 13:52:02 -0400
Subject: [PATCH 8/8] Clean up FN integration

---
 info-extraction/Gemfile                      |  2 +-
 info-extraction/Gemfile.lock                 |  4 +++-
 info-extraction/config.ru                    |  3 +++
 info-extraction/lib/fluxnotes_integration.rb | 11 +++++++++++
 info-extraction/server.rb                    |  8 +++++---
 5 files changed, 23 insertions(+), 5 deletions(-)
 create mode 100644 info-extraction/config.ru
 create mode 100644 info-extraction/lib/fluxnotes_integration.rb

diff --git a/info-extraction/Gemfile b/info-extraction/Gemfile
index 3bc00e2..4f0a537 100644
--- a/info-extraction/Gemfile
+++ b/info-extraction/Gemfile
@@ -5,4 +5,4 @@ gem 'json'
 gem 'trollop'
 gem 'standoff'
 
-# gem 'byebug'
\ No newline at end of file
+gem 'byebug'
\ No newline at end of file
diff --git a/info-extraction/Gemfile.lock b/info-extraction/Gemfile.lock
index d8d7629..2e599ba 100644
--- a/info-extraction/Gemfile.lock
+++ b/info-extraction/Gemfile.lock
@@ -1,6 +1,7 @@
 GEM
   remote: https://rubygems.org/
   specs:
+    byebug (10.0.2)
     json (2.1.0)
     mustermann (1.0.2)
     rack (2.0.5)
@@ -19,10 +20,11 @@ PLATFORMS
   ruby
 
 DEPENDENCIES
+  byebug
   json
   sinatra
   standoff
   trollop
 
 BUNDLED WITH
-   1.16.1
+   1.16.4
diff --git a/info-extraction/config.ru b/info-extraction/config.ru
new file mode 100644
index 0000000..1fac37d
--- /dev/null
+++ b/info-extraction/config.ru
@@ -0,0 +1,3 @@
+$:.unshift(File.dirname(__FILE__))
+require 'server'
+run Sinatra::Application
\ No newline at end of file
diff --git a/info-extraction/lib/fluxnotes_integration.rb b/info-extraction/lib/fluxnotes_integration.rb
new file mode 100644
index 0000000..ef2c55b
--- /dev/null
+++ b/info-extraction/lib/fluxnotes_integration.rb
@@ -0,0 +1,11 @@
+require 'json'
+require 'byebug'
+class FluxNotes
+    def self.build_structured_phrase(phrase, fields) 
+        raise ArgumentError.new("Phrase needs to be of type 'String'") if(!phrase.instance_of? String)     
+        raise ArgumentError.new("fields needs to be of type 'Array'") if(!fields.instance_of? Array)
+        fields_data = fields.map{ |field| "{name: '#{field[:name]}', value: '#{field[:value]}'}"}
+        data = "{phrase: #{phrase}, fields: [#{fields_data.join(", ")}]}"
+        return "flux_command('insert-structured-phrase', #{data})"
+    end
+end
\ No newline at end of file
diff --git a/info-extraction/server.rb b/info-extraction/server.rb
index f0a5453..cafae05 100644
--- a/info-extraction/server.rb
+++ b/info-extraction/server.rb
@@ -1,12 +1,13 @@
 require 'sinatra'
 require 'json'
-# require 'byebug'
+require 'byebug'
 
 require_relative "lib/watson4fluxnotes.rb"
 require_relative "lib/meddra4fluxnotes.rb"
 require_relative "lib/chunker.rb"
 require_relative "lib/findings_collector.rb"
 require_relative "lib/disease_status_extractor.rb"
+require_relative "lib/fluxnotes_integration.rb"
 
 
 get '/' do 
@@ -35,12 +36,13 @@
     flux_notes_messages = []
     diseaseResults.each do |res|
         res.each do |concept|
-          flux_notes_messages << "flux_command('insert-structured-phrase', {phrase:'disease status', fields: [{name:'status', value: #{concept.to_json}}]})"
+          flux_notes_messages << FluxNotes.build_structured_phrase('disease status', [{name: 'status', value: concept[:status][:normalized]}])
         end
     end
     toxicityResults.each do |tox| 
         tox['concepts'].each do |concept| 
-            flux_notes_messages << "flux_command('insert-structured-phrase', {phrase:'toxicity', fields: [{name:'adverseEvent', value: '#{concept['text']}'}]})"
+            flux_notes_messages << FluxNotes.build_structured_phrase('toxicity', [{name: 'adverseEvent', value: concept['text']}])
+            # "flux_command('insert-structured-phrase', {phrase:'toxicity', fields: [{name:'adverseEvent', value: '#{concept['text']}'}]})"
         end
     end
     return {