Skip to content

Commit

Permalink
Merge pull request #70 from tdt/development
Browse files Browse the repository at this point in the history
Version 1.0.0
  • Loading branch information
coreation committed Feb 5, 2014
2 parents 6e5bb18 + ec5026c commit a4d0dd6
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 28 deletions.
7 changes: 6 additions & 1 deletion src/tdt/input/controllers/InputController.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace tdt\input\controllers;

use tdt\core\ContentNegotiator;
use tdt\core\auth\Auth;

class InputController extends \Controller{

Expand All @@ -14,16 +15,20 @@ public function handle(){
switch($method){
case "PUT":

$uri = self::getUri();
Auth::requirePermissions('tdt.input.create');

$uri = self::getUri();
return self::createJob($uri);
break;
case "GET":

Auth::requirePermissions('tdt.input.view');

return self::getJob();
break;
case "DELETE":

Auth::requirePermissions('tdt.input.delete');
return self::deleteJob();
break;
default:
Expand Down
2 changes: 1 addition & 1 deletion src/tdt/input/emlp/load/ALoader.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public function __construct($loader){
abstract public function execute(&$chunk);

/**
* Clean up is called after the loading is executed.
* Clean up is called after the execute() function is performed.
*/
public function cleanUp(){

Expand Down
117 changes: 91 additions & 26 deletions src/tdt/input/emlp/load/Sparql.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

namespace tdt\input\emlp\load;

/**
* The Sparql class loads triples into a triplestore.
*/
class Sparql extends ALoader {


Expand All @@ -17,6 +20,7 @@ public function __construct($model) {

// Create the graph name
$graph_name = $model->graph_name;

$this->log("Preparing the Sparql loader, the graph that will be used is named $graph_name.");

// Store the graph to counter dirty reads
Expand Down Expand Up @@ -48,7 +52,7 @@ public function cleanUp(){
$this->log("Found $count remaining triples in the buffer, preparing them to load into the store.");

$triples_to_send = array_slice($this->buffer, 0, $count);
$this->addTriples(implode(' ', $triples_to_send));
$this->addTriples($triples_to_send);

$this->buffer = array_slice($this->buffer, $count);

Expand All @@ -66,9 +70,16 @@ public function cleanUp(){
$this->graph->save();
}

/**
* Perform the load.
*
* @param EasyRDF_Graph $chunk
* @return void
*/
public function execute(&$chunk){

if (!$chunk->isEmpty()) {
if(!$chunk->isEmpty()){

preg_match_all("/(<.*\.)/", $chunk->serialise('ntriples'), $matches);

if($matches[0])
Expand All @@ -84,7 +95,7 @@ public function execute(&$chunk){
$buffer_size = $this->loader->buffer_size;

$triples_to_send = array_slice($this->buffer, 0, $buffer_size);
$this->addTriples(implode(' ', $triples_to_send));
$this->addTriples($triples_to_send);
$this->buffer = array_slice($this->buffer, $buffer_size);

$duration = round((microtime(true) - $start) * 1000, 2);
Expand All @@ -93,36 +104,88 @@ public function execute(&$chunk){
}
}

/**
* Insert triples into the triple store
* @param array $triples
*
* @return void
*/
private function addTriples($triples) {

$serialized = preg_replace_callback('/(?:\\\\u[0-9a-fA-Z]{4})+/', function ($v) {
$v = strtr($v[0], array('\\u' => ''));
return mb_convert_encoding(pack('H*', $v), 'UTF-8', 'UTF-16BE');
},
$triples);
$triples_string = implode(' ', $triples);

$serialized = $this->serialize($triples_string);

$query = $this->createInsertQuery($serialized);

// If the insert fails, insert every triple one by one
if(!$this->performInsertQuery($query)){

$this->log("Inserting triple by triple to avoid good triples not getting inserted because of the presence of a bad triple.");

$totalTriples = count($triples);
$this->log("Total triples to be inserted one by one is $totalTriples.");

// Insert every triple one by one
foreach($triples as $triple){

$serialized = $this->serialize($triple);
$query = $this->createInsertQuery($serialized);

if(!$this->performInsertQuery($query)){
$this->log("ERROR: failed to insert the following triple: " . $triple);
}else{
$this->log("Succesfully inserted a triple to the triplestore.");
}
}
}
}

/**
* Create an insert SPARQL query based on the graph id
* @param string $triples (need to be serialized == properly encoded)
*
* @return string Insert query
*/
private function createInsertQuery($triples){

$graph_id = $this->graph->graph_id;

$query = "INSERT DATA INTO <$graph_id> {";
$query .= $serialized;
$query .= $triples;
$query .= ' }';

if ($this->execSPARQL($query) !== false)
$this->log("The triples were succesfully inserted into the store.");
else
$this->log("The triples were not successfully inserted into the store.");
return $query;
}

/**
* Serialize triples to a format acceptable for a triplestore endpoint
* @param string $triples
*
* @return string
*/
private function serialize($triples){

$serialized_triples = preg_replace_callback('/(?:\\\\u[0-9a-fA-Z]{4})+/', function ($v) {
$v = strtr($v[0], array('\\u' => ''));
return mb_convert_encoding(pack('H*', $v), 'UTF-8', 'UTF-16BE');
},
$triples);

return $serialized_triples;
}

/**
* Send a POST requst using cURL
* Send a POST request to the triplestore endpoint using cURL
* @param string $url to request
* @param array $post values to send
* @param array $options for cURL
* @return string
* @return boolean
*/
private function execSPARQL($query, $method = "POST") {
private function performInsertQuery($query, $method = "POST") {

if (!function_exists('curl_init')) {
$this->log("cURL could not be retrieved as a command, make sure the CLI cURL is installed. Aborting the emlp sequence.");
$this->log("cURL could not be retrieved as a command, make sure the CLI cURL is installed because it is necessary to perform the load. Aborting EML sequence.");
exit();
}

Expand All @@ -133,6 +196,7 @@ private function execSPARQL($query, $method = "POST") {
$url = $this->loader->endpoint . "?query=" . urlencode($query);

$defaults = array(

CURLOPT_CUSTOMREQUEST => $method,
CURLOPT_HEADER => 0,
CURLOPT_URL => $url,
Expand All @@ -143,13 +207,11 @@ private function execSPARQL($query, $method = "POST") {
CURLOPT_FORBID_REUSE => 1,
CURLOPT_TIMEOUT => 4,
CURLOPT_POSTFIELDS => http_build_query($post)

);

// Get curl handle and initiate the request
$ch = curl_init();
curl_setopt_array($ch, $defaults);
//curl_setopt($ch, CURLOPT_HTTPHEADER, array("Content-Type: text/plain"));

$response = curl_exec($ch);

Expand All @@ -159,15 +221,18 @@ private function execSPARQL($query, $method = "POST") {
curl_close($ch);

if ($response_code >= 400) {
$this->log("The query failed with code " . $response_code . " and response: " . $response);
$this->log("The executed query that failed was the following: " . $query);
}

return $response;
$this->log("The query failed with code " . $response_code);
return false;
}else{

$this->log("The triples were succesfully inserted into the store.");
return true;
}
}

/**
* Clear the old associated graphs with the given eml sequence
* Clear the old associated graphs with the given EML sequence based on the graph name.
*/
private function deleteOldGraphs() {

Expand All @@ -182,7 +247,7 @@ private function deleteOldGraphs() {

$query = "CLEAR GRAPH <$graph->graph_id>";

$result = $this->execSPARQL($query);
$result = $this->performInsertQuery($query);

// If all went ok, delete the graph entry
if($result !== false){
Expand All @@ -208,7 +273,7 @@ private function addTimestamp($datetime){
$query .= "<" . $graph_id . "> <http://purl.org/dc/terms/created> \"$datetime\"^^<http://www.w3.org/2001/XMLSchema#dateTime> .";
$query .= ' }';

if ($this->execSPARQL($query) !== false)
if ($this->performInsertQuery($query) !== false)
$this->log("Added the datetime ($datetime) meta-data to graph identified by " . $graph_id);
else
$this->log("Failed adding the datetime ($datetime) meta-data to graph identified by " . $graph_id);
Expand Down

0 comments on commit a4d0dd6

Please sign in to comment.