NEO4J/snomed_g_graphdb_cypher_update.template

// -----------------------------------------------------------------------------------------
// Module:  snomed_g_graphdb_update.cypher
// Author: Jay Pedersen, University of Nebraska, August 2015
// Concept: Update a SNOMED_G Graph Database from input CSV files which describe the changes
//          to concepts, descriptions, ISA relationships and defining relationships.
// Input Files:
//          concept_chg.csv
//          concept_new.csv
//          rconcept_chg.csv
//          rconcept_new.csv
//          descrip_chg.csv
//          descrip_new.csv
//          isa_rel_chg.csv
//          isa_rel_new.csv
//          defining_rel_chg.csv
//          defining_rel_new.csv
//          defining_rel_edge_rem.csv

// PREP/CLEANUP
//   -- remove any SNOMED_G_UPDATE_FAILURE node from previous run,
//   -- so any such node afterwards came from the current run.

match (a:SNOMED_G_UPDATE_FAILURE) delete a;

// NEXT STEP
//    -- Remove defining-relationships that have changed role-group numbers,
//    -- NOTE: This is the only removal operation that is applied during the update,
//       all other changes are either new additions or in-place modifications.

RETURN 'Removing defining-relationships (DRs) edges for DRs which changed role-group.';
RETURN 'A CREATE operation later in the update will insert the replacement edge.';
RETURN 'Note: Cant simply modify the endpoint of the existing edge to point to a';
RETURN 'different role-group.  An edge removal+recreation is required by NEO4J.';
LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>defining_rel_edge_rem.csv" as line
CALL {
    MATCH (rg:RoleGroup { sctid: line.sourceId, rolegroup: line.rolegroup })-[r {id: line.id}]->(c:ObjectConcept { sctid: line.destinationId })
    DELETE r
} IN TRANSACTIONS OF 200 ROWS;


// NEXT STEP -- Concept modifications -- new and updated

RETURN 'Creating NEW ObjectConcept nodes';
LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>concept_new.csv" as line
CALL {
    CREATE (:ObjectConcept
        { nodetype:           'concept',
          id:                 line.id,
          sctid:              line.id,
          active:             line.active,
          effectiveTime:      line.effectiveTime,
          moduleId:           line.moduleId,
          definitionStatusId: line.definitionStatusId,
          FSN:                line.FSN,
          history:            line.history} )
      } IN TRANSACTIONS OF 200 ROWS;

RETURN 'Modifying existing ObjectConcept nodes which were updated';
LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>concept_chg.csv" as line
CALL {
    MATCH (n:ObjectConcept { sctid:line.id })
    set n = { nodetype:           'concept',
          id:                 line.id,
          sctid:              line.id,
          active:             line.active,
          effectiveTime:      line.effectiveTime,
          moduleId:           line.moduleId,
          definitionStatusId: line.definitionStatusId,
          FSN:                line.FSN,
          history:            line.history}
    } IN TRANSACTIONS OF 200 ROWS;


// Combined Language+Description file ==> defines Descriptions
RETURN 'Creating NEW Description nodes';
LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>descrip_new.csv" as line
CALL {
    CREATE (:Description
        { nodetype:'description',
          id: line.id,
          sctid: line.sctid,
          active: line.active,
          typeId: line.typeId,
          moduleId: line.moduleId,
          descriptionType: line.descriptionType,
          id128bit: line.id128bit,
          term: line.term,
          effectiveTime: line.effectiveTime,
          acceptabilityId: line.acceptabilityId,
          refsetId: line.refsetId,
          caseSignificanceId: line.caseSignificanceId,
          languageCode: line.languageCode,
          history: line.history} )
} IN TRANSACTIONS OF 200 ROWS;


RETURN 'Modifying existing Description nodes which were updated';
LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>descrip_chg.csv" as line
CALL {
    MATCH (n:Description { id:line.id })
    SET n = { nodetype:'description',  
          id: line.id,
          sctid: line.sctid,
          active: line.active,
          typeId: line.typeId,
          moduleId: line.moduleId,
          descriptionType: line.descriptionType,
          id128bit: line.id128bit,
          term: line.term,
          effectiveTime: line.effectiveTime,
          acceptabilityId: line.acceptabilityId,
          refsetId: line.refsetId,
          caseSignificanceId: line.caseSignificanceId,
          languageCode: line.languageCode,
          history: line.history }
} IN TRANSACTIONS OF 200 ROWS;


// Description edges for new and modified descriptions
RETURN 'Creating HAS_DESCRIPTION edges for new Description nodes related to ObjectConcept nodes';
LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>descrip_new.csv" as line
CALL {
    MATCH (c:ObjectConcept { sctid: line.sctid }), (f:Description { id: line.id })
    MERGE (c)-[:HAS_DESCRIPTION]->(f)
} IN TRANSACTIONS OF 200 ROWS;


RETURN 'Updating existing HAS_DESCRIPTION edges for updated Description nodes related to ObjectConcept nodes';
LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>descrip_chg.csv" as line
CALL {
    MATCH (c:ObjectConcept { sctid: line.sctid }), (f:Description { id: line.id })
    MERGE (c)-[:HAS_DESCRIPTION]->(f)
} IN TRANSACTIONS OF 200 ROWS;


// --------------------------------------------------------------------------------------
// NEXT STEP -- ISA relationships
// --------------------------------------------------------------------------------------

RETURN 'Creating NEW ISA edges';
LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>isa_rel_new.csv" as line
CALL {
    MATCH (c1:ObjectConcept { id: line.sourceId }), (c2:ObjectConcept { id: line.destinationId })
    MERGE (c1)-[:ISA
            { id: line.id,
              active: line.active,
              effectiveTime: line.effectiveTime,
              moduleId: line.moduleId,
              relationshipGroup: line.relationshipGroup,
              typeId: line.typeId,
              characteristicTypeId: line.characteristicTypeId,
              sourceId: line.sourceId,
              destinationId: line.destinationId,
              history: line.history  }]->(c2)
  } IN TRANSACTIONS OF 200 ROWS;

RETURN 'Modifying existing ISA edges which were updated';

MATCH (a:SNOMED_G_ISA_UPDATE_ISSUE) delete a;

LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>isa_rel_chg.csv" as line
CALL {
    OPTIONAL MATCH (c1:ObjectConcept { id: line.sourceId })-[r:ISA { id: line.id }]->(c2:ObjectConcept { id: line.destinationId })
    FOREACH (o IN (CASE WHEN r IS NOT NULL THEN [1] ELSE [] END) | 
    set r = { id: line.id,
          active: line.active,
          effectiveTime: line.effectiveTime,
          moduleId: line.moduleId,
          relationshipGroup: line.relationshipGroup,
          typeId: line.typeId,
          characteristicTypeId: line.characteristicTypeId,
          sourceId: line.sourceId,
          destinationId: line.destinationId,
          history: line.history }
     )
    FOREACH (o IN (CASE WHEN r IS NULL THEN [1] ELSE [] END) |
    CREATE (a:SNOMED_G_ISA_UPDATE_ISSUE 
        { snomedct_object:"ISA",
          id: line.id,
          active: line.active,
          effectiveTime: line.effectiveTime,
          moduleId: line.moduleId,
          relationshipGroup: line.relationshipGroup,
          typeId: line.typeId,
          characteristicTypeId: line.characteristicTypeId,
          sourceId: line.sourceId,
          destinationId: line.destinationId,
          history: line.history })
   );

   match(a:SNOMED_G_ISA_UPDATE_ISSUE)
   create (b:SNOMED_G_UPDATE_FAILURE);

   match(a:SNOMED_G_ISA_UPDATE_ISSUE)
   match (b:ObjectConcept)-[r:ISA {id: a.id}]->(c:ObjectConcept)
   delete a,r
} IN TRANSACTIONS OF 200 ROWS;


// --------------------------------------------------------------------------------------
// NEXT STEP -- Create RoleGroup nodes
// --------------------------------------------------------------------------------------
RETURN 'Creating RoleGroup nodes';
LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>rolegroups.csv" as line
CALL {
    MERGE (rg:RoleGroup
       { nodetype:'rolegroup',
         sctid: line.sctid,
         rolegroup: line.rolegroup})
} IN TRANSACTIONS OF 500 ROWS;


// Add edge in 2nd step, Java memory issue
RETURN 'Creating HAS_ROLE_GROUP edges';
LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>rolegroups.csv" as line
CALL {
    MATCH (c:ObjectConcept { sctid: line.sctid }), (rg:RoleGroup { sctid: line.sctid, rolegroup: line.rolegroup })
    MERGE (c)-[:HAS_ROLE_GROUP]->(rg)
} IN TRANSACTIONS OF 200 ROWS;

// --------------------------------------------------------------------------------------
// NEXT STEP -- Modify existing defining relationships
// --------------------------------------------------------------------------------------

RETURN 'Modify updated Defining relationships';

MATCH (a:SNOMED_G_DEFREL_UPDATE_ISSUE) delete a;

LOAD CSV with headers from "<<<file_protocol>>><<<output_dir>>>defining_rel_chg.csv" as line
CALL {
    OPTIONAL MATCH (rg:RoleGroup { sctid: line.sctid, rolegroup: line.rolegroup })-[r { id: line.id} ]->(c:ObjectConcept { sctid: line.destinationId })
    FOREACH (o IN (CASE WHEN r IS NOT NULL THEN [r] ELSE [] END) | 
    SET r = { id: line.id,
          active: line.active,
          sctid: line.sctid,
          typeId: line.typeId,
          rolegroup: line.rolegroup,
          effectiveTime: line.effectiveTime,
          moduleId: line.moduleId,
          characteristicTypeId: line.characteristicTypeId,
          modifierId: line.modifierId,
          history: line.history }
    )
    FOREACH (o IN (CASE WHEN r IS NULL THEN [1] ELSE [] END) |
    CREATE (a:SNOMED_G_DEFREL_UPDATE_ISSUE 
        { snomedct_object:"DEFINING_REL",
          id: line.id,
          active: line.active,
          sctid: line.sctid,
          typeId: line.typeId,
          rolegroup: line.rolegroup,
          effectiveTime: line.effectiveTime,
          moduleId: line.moduleId,
          characteristicTypeId: line.characteristicTypeId,
          modifierId: line.modifierId,
          history: line.history })
    );

    match(a:SNOMED_G_DEFREL_UPDATE_ISSUE)
    create (b:SNOMED_G_UPDATE_FAILURE);

    match(a:SNOMED_G_DEFREL_UPDATE_ISSUE)
    match (b:RoleGroup)-[r {id: a.id}]->(c:ObjectConcept)
    delete a,r
} IN TRANSACTIONS OF 200 ROWS;


// --------------------------------------------------------------------------------------
// NEXT STEP -- Create new defining relationships
// --------------------------------------------------------------------------------------