diff --git a/Classes/Common/Document.php b/Classes/Common/Document.php
index dec043cf3..bdfd6a969 100644
--- a/Classes/Common/Document.php
+++ b/Classes/Common/Document.php
@@ -570,8 +570,8 @@ public static function &getInstance($uid, $pid = 0, $forceReload = false)
if (!empty($extConf['caching'])) {
Helper::saveToSession(self::$registry, get_class($instance));
}
+ $instance->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(get_class($instance));
}
- $instance->logger = GeneralUtility::makeInstance(LogManager::class)->getLogger(get_class($instance));
// Return new instance.
return $instance;
}
@@ -638,10 +638,8 @@ public function getPhysicalPage($logicalPage)
}
/**
- * This extracts the raw text for a physical structure node / IIIF Manifest / Canvas. Text might be
- * given as ALTO for METS or as annotations or ALTO for IIIF resources. If IIIF plain text annotations
- * with the motivation "painting" should be treated as full text representations, the extension has to be
- * configured accordingly.
+ * This extracts the OCR full text for a physical structure node / IIIF Manifest / Canvas. Text might be
+ * given as ALTO for METS or as annotations or ALTO for IIIF resources.
*
* @access public
*
@@ -650,23 +648,23 @@ public function getPhysicalPage($logicalPage)
* @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
* of the Manifest / Range (IIIF)
*
- * @return string The physical structure node's / IIIF resource's raw text
+ * @return string The OCR full text
*/
- public abstract function getRawText($id);
+ public abstract function getFullText($id);
/**
- * This extracts the raw text for a physical structure node / IIIF Manifest / Canvas from an
- * XML fulltext representation (currently only ALTO). For IIIF manifests, ALTO documents have
+ * This extracts the OCR full text for a physical structure node / IIIF Manifest / Canvas from an
+ * XML full text representation (currently only ALTO). For IIIF manifests, ALTO documents have
* to be given in the Canvas' / Manifest's "seeAlso" property.
*
* @param string $id: The @ID attribute of the physical structure node (METS) or the @id property
* of the Manifest / Range (IIIF)
*
- * @return string The physical structure node's / IIIF resource's raw text from XML
+ * @return string The OCR full text
*/
- protected function getRawTextFromXml($id)
+ protected function getFullTextFromXml($id)
{
- $rawText = '';
+ $fullText = '';
// Load available text formats, ...
$this->loadFormats();
// ... physical structure ...
@@ -677,54 +675,87 @@ protected function getRawTextFromXml($id)
if (!empty($this->physicalStructureInfo[$id])) {
while ($fileGrpFulltext = array_shift($fileGrpsFulltext)) {
if (!empty($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext])) {
- // Get fulltext file.
- $file = GeneralUtility::getUrl($this->getFileLocation($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext]));
- if ($file !== false) {
- // Turn off libxml's error logging.
- $libxmlErrors = libxml_use_internal_errors(true);
- // Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept.
- $previousValueOfEntityLoader = libxml_disable_entity_loader(true);
- // Load XML from file.
- $rawTextXml = simplexml_load_string($file);
- // Reset entity loader setting.
- libxml_disable_entity_loader($previousValueOfEntityLoader);
- // Reset libxml's error logging.
- libxml_use_internal_errors($libxmlErrors);
- // Get the root element's name as text format.
- $textFormat = strtoupper($rawTextXml->getName());
+ // Get full text file.
+ $fileContent = GeneralUtility::getUrl($this->getFileLocation($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext]));
+ if ($fileContent !== false) {
+ $textFormat = $this->getTextFormat($fileContent);
} else {
- $this->logger->warning('Couldn\'t load fulltext file for structure node @ID "' . $id . '"');
- return $rawText;
+ $this->logger->warning('Couldn\'t load full text file for structure node @ID "' . $id . '"');
+ return $fullText;
}
break;
}
}
} else {
$this->logger->warning('Invalid structure node @ID "' . $id . '"');
- return $rawText;
+ return $fullText;
}
// Is this text format supported?
- if (
- !empty($rawTextXml)
- && !empty($this->formats[$textFormat])
- ) {
- if (!empty($this->formats[$textFormat]['class'])) {
- $class = $this->formats[$textFormat]['class'];
- // Get the raw text from class.
- if (
- class_exists($class)
- && ($obj = GeneralUtility::makeInstance($class)) instanceof FulltextInterface
- ) {
- $rawText = $obj->getRawText($rawTextXml);
- $this->rawTextArray[$id] = $rawText;
- } else {
- $this->logger->warning('Invalid class/method "' . $class . '->getRawText()" for text format "' . $textFormat . '"');
- }
- }
+ // This part actually differs from previous version of indexed OCR
+ if (!empty($fileContent) && !empty($this->formats[$textFormat])) {
+ $fullText = $this->getFullTextWithoutImages($fileContent);
} else {
$this->logger->warning('Unsupported text format "' . $textFormat . '" in physical node with @ID "' . $id . '"');
}
- return $rawText;
+ return $fullText;
+ }
+
+ /**
+ * Get content of the OCR full text file without images
+ *
+ * @access private
+ *
+ * @param string $fileContent: content of the XML file
+ *
+ * @return string The content of the OCR full text file without images
+ */
+ private function getFullTextWithoutImages($fileContent)
+ {
+ $objectXml = $this->getFullTextAsObjectXML($fileContent);
+ if (isset($objectXml->Layout->Page->PrintSpace->Illustration)) {
+ unset($objectXml->Layout->Page->PrintSpace->Illustration);
+ }
+ return $objectXml->asXML();
+ }
+
+ /**
+ * Get format of the OCR full text
+ *
+ * @access private
+ *
+ * @param string $fileContent: content of the XML file
+ *
+ * @return string The format of the OCR full text
+ */
+ private function getTextFormat($fileContent)
+ {
+ // Get the root element's name as text format.
+ return strtoupper($this->getFullTextAsObjectXML($fileContent)->getName());
+ }
+
+ /**
+ * Get content of the OCR full text file
+ *
+ * @access private
+ *
+ * @param string $fileContent: content of the XML file
+ *
+ * @return \SimpleXMLElement content of the XML file as object
+ */
+ private function getFullTextAsObjectXML($fileContent)
+ {
+ // Turn off libxml's error logging.
+ $libxmlErrors = libxml_use_internal_errors(true);
+ // Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept.
+ $previousValueOfEntityLoader = libxml_disable_entity_loader(true);
+ // Load XML from file.
+ $objectXml = simplexml_load_string($fileContent);
+ // Reset entity loader setting.
+ libxml_disable_entity_loader($previousValueOfEntityLoader);
+ // Reset libxml's error logging.
+ libxml_use_internal_errors($libxmlErrors);
+ // Get the root element
+ return $objectXml;
}
/**
@@ -1306,9 +1337,14 @@ public function save($pid = 0, $core = 0, $owner = null)
}
// Add document to index.
if ($core) {
- Indexer::add($this, $core);
+ //TODO: change return of this method to true on success and false on failure
+ $hasErrors = Indexer::add($this, $core);
+ if ($hasErrors) {
+ return false;
+ }
} else {
$this->logger->notice('Invalid UID "' . $core . '" for Solr core');
+ return false;
}
return true;
}
diff --git a/Classes/Common/DocumentList.php b/Classes/Common/DocumentList.php
index 246c4547b..acaa7c1b9 100644
--- a/Classes/Common/DocumentList.php
+++ b/Classes/Common/DocumentList.php
@@ -12,8 +12,10 @@
namespace Kitodo\Dlf\Common;
+use Kitodo\Dlf\Common\SolrSearchResult\ResultDocument;
use Psr\Log\LoggerAwareInterface;
use Psr\Log\LoggerAwareTrait;
+use Solarium\QueryType\Select\Result\Result;
use TYPO3\CMS\Core\SingletonInterface;
use TYPO3\CMS\Core\Database\ConnectionPool;
use TYPO3\CMS\Core\Utility\GeneralUtility;
@@ -237,74 +239,8 @@ protected function getRecord($element)
&& $this->metadata['options']['source'] == 'search'
) {
if ($this->solrConnect()) {
- $fields = Solr::getFields();
- $params = [];
- // Restrict the fields to the required ones
- $params['fields'] = $fields['uid'] . ',' . $fields['id'] . ',' . $fields['toplevel'] . ',' . $fields['thumbnail'] . ',' . $fields['page'];
- foreach ($this->solrConfig as $solr_name) {
- $params['fields'] .= ',' . $solr_name;
- }
- // If it is a fulltext search, enable highlighting.
- if ($this->metadata['fulltextSearch']) {
- $params['component'] = [
- 'highlighting' => [
- 'query' => Solr::escapeQuery($this->metadata['searchString']),
- 'field' => $fields['fulltext'],
- 'usefastvectorhighlighter' => true
- ]
- ];
- }
- // Set additional query parameters.
- $params['start'] = 0;
- // Set reasonable limit for safety reasons.
- // We don't expect to get more than 10.000 hits per UID.
- $params['rows'] = 10000;
- // Take over existing filter queries.
- $params['filterquery'] = isset($this->metadata['options']['params']['filterquery']) ? $this->metadata['options']['params']['filterquery'] : [];
- // Extend filter query to get all documents with the same UID.
- foreach ($params['filterquery'] as $key => $value) {
- if (isset($value['query'])) {
- $params['filterquery'][$key]['query'] = $value['query'] . ' OR ' . $fields['toplevel'] . ':true';
- }
- }
- // Add filter query to get all documents with the required uid.
- $params['filterquery'][] = ['query' => $fields['uid'] . ':' . Solr::escapeQuery($record['uid'])];
- // Add sorting.
- $params['sort'] = $this->metadata['options']['params']['sort'];
- // Set query.
- $params['query'] = $this->metadata['options']['select'] . ' OR ' . $fields['toplevel'] . ':true';
- // Perform search for all documents with the same uid that either fit to the search or marked as toplevel.
- $selectQuery = $this->solr->service->createSelect($params);
- $result = $this->solr->service->select($selectQuery);
- // If it is a fulltext search, fetch the highlighting results.
- if ($this->metadata['fulltextSearch']) {
- $highlighting = $result->getHighlighting();
- }
- // Process results.
- foreach ($result as $resArray) {
- // Prepare document's metadata.
- $metadata = [];
- foreach ($this->solrConfig as $index_name => $solr_name) {
- if (!empty($resArray->$solr_name)) {
- $metadata[$index_name] = (is_array($resArray->$solr_name) ? $resArray->$solr_name : [$resArray->$solr_name]);
- }
- }
- // Add metadata to list elements.
- if ($resArray->toplevel) {
- $record['thumbnail'] = $resArray->thumbnail;
- $record['metadata'] = $metadata;
- } else {
- $highlightedDoc = !empty($highlighting) ? $highlighting->getResult($resArray->id) : null;
- $highlight = !empty($highlightedDoc) ? $highlightedDoc->getField($fields['fulltext'])[0] : '';
- $record['subparts'][$resArray->id] = [
- 'uid' => $resArray->uid,
- 'page' => $resArray->page,
- 'preview' => $highlight,
- 'thumbnail' => $resArray->thumbnail,
- 'metadata' => $metadata
- ];
- }
- }
+ $result = $this->getSolrResult($record);
+ $record = $this->getSolrRecord($record, $result);
}
}
// Save record for later usage.
@@ -316,6 +252,120 @@ protected function getRecord($element)
return $record;
}
+ /**
+ * It gets SOLR result
+ *
+ * @access private
+ *
+ * @param array $record: for searched document
+ *
+ * @return Result
+ */
+ private function getSolrResult($record) {
+ $fields = Solr::getFields();
+
+ $query = $this->solr->service->createSelect();
+ // Restrict the fields to the required ones
+ $query->setFields($fields['uid'] .',' . $fields['id'] .',' . $fields['toplevel'] .',' . $fields['thumbnail'] .',' . $fields['page']);
+ foreach ($this->solrConfig as $solr_name) {
+ $query->addField($solr_name);
+ }
+ // Set additional query parameters.
+ // Set reasonable limit for safety reasons.
+ // We don't expect to get more than 10.000 hits per UID.
+ $query->setStart(0)->setRows(10000);
+ // Take over existing filter queries.
+ $filterQueries = isset($this->metadata['options']['params']['filterquery']) ? $this->metadata['options']['params']['filterquery'] : [];
+ // Extend filter query to get all documents with the same UID.
+ foreach ($filterQueries as $key => $value) {
+ if (isset($value['query'])) {
+ $filterQuery[$key] = $value['query'] . ' OR ' . $fields['toplevel'] . ':true';
+ $filterQuery = [
+ 'key' => $key,
+ 'query' => $value['query'] . ' OR ' . $fields['toplevel'] . ':true'
+ ];
+ $query->addFilterQuery($filterQuery);
+ }
+ }
+ // Add filter query to get all documents with the required uid.
+ $query->createFilterQuery('uid')->setQuery($fields['uid'] . ':' . Solr::escapeQuery($record['uid']));
+ // Add sorting.
+ $query->addSort('score', $this->metadata['options']['params']['sort']['score']);
+ // Set query.
+ $query->setQuery($this->metadata['options']['select'] . ' OR ' . $fields['toplevel'] . ':true');
+
+ // If it is a fulltext search, enable highlighting.
+ if ($this->metadata['fulltextSearch']) {
+ $query->getHighlighting();
+ };
+
+ $solrRequest = $this->solr->service->createRequest($query);
+
+ // If it is a fulltext search, enable highlighting.
+ if ($this->metadata['fulltextSearch']) {
+ // field for which highlighting is going to be performed,
+ // is required if you want to have OCR highlighting
+ $solrRequest->addParam('hl.ocr.fl', $fields['fulltext']);
+ // return the coordinates of highlighted search as absolute coordinates
+ $solrRequest->addParam('hl.ocr.absoluteHighlights', 'on');
+ // max amount of snippets for a single page
+ $solrRequest->addParam('hl.snippets', 20);
+ }
+ // Perform search for all documents with the same uid that either fit to the search or marked as toplevel.
+ $response = $this->solr->service->executeRequest($solrRequest);
+ return $this->solr->service->createResult($query, $response);
+ }
+
+ /**
+ * It processes SOLR result into record, which is
+ * going to be displayed in the frontend list.
+ *
+ * @access private
+ *
+ * @param array $record: for searched document
+ * @param Result $result: found in the SOLR index
+ *
+ * @return array
+ */
+ private function getSolrRecord($record, $result) {
+ // If it is a fulltext search, fetch the highlighting results.
+ if ($this->metadata['fulltextSearch']) {
+ $data = $result->getData();
+ $highlighting = $data['ocrHighlighting'];
+ }
+
+ // Process results.
+ foreach ($result as $resArray) {
+ // Prepare document's metadata.
+ $metadata = [];
+ foreach ($this->solrConfig as $index_name => $solr_name) {
+ if (!empty($resArray->$solr_name)) {
+ $metadata[$index_name] = (is_array($resArray->$solr_name) ? $resArray->$solr_name : [$resArray->$solr_name]);
+ }
+ }
+ // Add metadata to list elements.
+ if ($resArray->toplevel) {
+ $record['thumbnail'] = $resArray->thumbnail;
+ $record['metadata'] = $metadata;
+ } else {
+ $highlight = '';
+ if (!empty($highlighting)) {
+ $resultDocument = new ResultDocument($resArray, $highlighting, Solr::getFields());
+ $highlight = $resultDocument->getSnippets();
+ }
+
+ $record['subparts'][$resArray->id] = [
+ 'uid' => $resArray->uid,
+ 'page' => $resArray->page,
+ 'preview' => $highlight,
+ 'thumbnail' => $resArray->thumbnail,
+ 'metadata' => $metadata
+ ];
+ }
+ }
+ return $record;
+ }
+
/**
* This returns the current position
* @see \Iterator::key()
diff --git a/Classes/Common/FulltextInterface.php b/Classes/Common/FulltextInterface.php
index 68d10f17a..755b7ae53 100644
--- a/Classes/Common/FulltextInterface.php
+++ b/Classes/Common/FulltextInterface.php
@@ -21,6 +21,7 @@
* @access public
* @abstract
*/
+//TODO: check if this is still needed when actually full text xml is indexed
interface FulltextInterface
{
/**
diff --git a/Classes/Common/IiifManifest.php b/Classes/Common/IiifManifest.php
index 2955cdff7..8f7f50448 100644
--- a/Classes/Common/IiifManifest.php
+++ b/Classes/Common/IiifManifest.php
@@ -786,9 +786,10 @@ protected function getParentDocumentUidForSaving($pid, $core, $owner)
/**
* {@inheritDoc}
- * @see Document::getRawText()
+ * @see Document::getFullText()
*/
- public function getRawText($id)
+ //TODO: rewrite it to get full OCR
+ public function getFullText($id)
{
$rawText = '';
// Get text from raw text array if available.
@@ -805,7 +806,7 @@ public function getRawText($id)
if (!empty($this->physicalStructureInfo[$id])) {
while ($fileGrpFulltext = array_shift($fileGrpsFulltext)) {
if (!empty($this->physicalStructureInfo[$id]['files'][$fileGrpFulltext])) {
- $rawText = parent::getRawTextFromXml($id);
+ $rawText = parent::getFullTextFromXml($id);
break;
}
}
diff --git a/Classes/Common/Indexer.php b/Classes/Common/Indexer.php
index 01ffe73b7..c2cc786ef 100644
--- a/Classes/Common/Indexer.php
+++ b/Classes/Common/Indexer.php
@@ -116,6 +116,7 @@ public static function add(Document &$doc, $core = 0)
$updateQuery = self::$solr->service->createUpdate();
$updateQuery->addDeleteQuery('uid:' . $doc->uid);
self::$solr->service->update($updateQuery);
+
// Index every logical unit as separate Solr document.
foreach ($doc->tableOfContents as $logicalUnit) {
if (!$errors) {
@@ -124,7 +125,7 @@ public static function add(Document &$doc, $core = 0)
break;
}
}
- // Index fulltext files if available.
+ // Index full text files if available.
if ($doc->hasFulltext) {
foreach ($doc->physicalStructure as $pageNumber => $xmlId) {
if (!$errors) {
@@ -315,6 +316,8 @@ protected static function loadIndexConf($pid)
*/
protected static function processLogical(Document &$doc, array $logicalUnit)
{
+ $logger = GeneralUtility::makeInstance('TYPO3\CMS\Core\Log\LogManager')->getLogger(__CLASS__);
+
$errors = 0;
// Get metadata for logical unit.
$metadata = $doc->metadataArray[$logicalUnit['id']];
@@ -364,6 +367,7 @@ protected static function processLogical(Document &$doc, array $logicalUnit)
$solrDoc->setField('terms', $metadata['terms']);
$solrDoc->setField('restrictions', $metadata['restrictions']);
$solrDoc->setField('collection', $doc->metadataArray[$doc->toplevelId]['collection']);
+ $solrDoc->setField('fulltext', '');
$coordinates = json_decode($metadata['coordinates'][0]);
if (is_object($coordinates)) {
$solrDoc->setField('geom', json_encode($coordinates->features[0]));
@@ -413,6 +417,7 @@ protected static function processLogical(Document &$doc, array $logicalUnit)
'core.template.flashMessages'
);
}
+ $logger->error('Apache Solr threw exception: "' . $e->getMessage() . '"');
return 1;
}
}
@@ -443,10 +448,9 @@ protected static function processLogical(Document &$doc, array $logicalUnit)
*/
protected static function processPhysical(Document &$doc, $page, array $physicalUnit)
{
- if (
- $doc->hasFulltext
- && $fulltext = $doc->getRawText($physicalUnit['id'])
- ) {
+ $logger = GeneralUtility::makeInstance('TYPO3\CMS\Core\Log\LogManager')->getLogger(__CLASS__);
+
+ if ($doc->hasFulltext && $fullText = $doc->getFullText($physicalUnit['id'])) {
// Read extension configuration.
$extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
// Create new Solr document.
@@ -470,7 +474,7 @@ protected static function processPhysical(Document &$doc, $page, array $physical
$solrDoc->setField('toplevel', false);
$solrDoc->setField('type', $physicalUnit['type'], self::$fields['fieldboost']['type']);
$solrDoc->setField('collection', $doc->metadataArray[$doc->toplevelId]['collection']);
- $solrDoc->setField('fulltext', htmlspecialchars($fulltext));
+ $solrDoc->setField('fulltext', $fullText);
// Add faceting information to physical sub-elements if applicable.
foreach ($doc->metadataArray[$doc->toplevelId] as $index_name => $data) {
if (
@@ -510,6 +514,7 @@ protected static function processPhysical(Document &$doc, $page, array $physical
true,
'core.template.flashMessages'
);
+ $logger->error('Apache Solr threw exception: "' . $e->getMessage() . '"');
}
return 1;
}
diff --git a/Classes/Common/MetsDocument.php b/Classes/Common/MetsDocument.php
index 70db04396..06900fd81 100644
--- a/Classes/Common/MetsDocument.php
+++ b/Classes/Common/MetsDocument.php
@@ -667,21 +667,18 @@ class_exists($class)
/**
* {@inheritDoc}
- * @see \Kitodo\Dlf\Common\Document::getRawText()
+ * @see \Kitodo\Dlf\Common\Document::getFullText()
*/
- public function getRawText($id)
+ public function getFullText($id)
{
- $rawText = '';
- // Get text from raw text array if available.
- if (!empty($this->rawTextArray[$id])) {
- return $this->rawTextArray[$id];
- }
- // Load fileGrps and check for fulltext files.
+ $fullText = '';
+
+ // Load fileGrps and check for full text files.
$this->_getFileGrps();
if ($this->hasFulltext) {
- $rawText = $this->getRawTextFromXml($id);
+ $fullText = $this->getFullTextFromXml($id);
}
- return $rawText;
+ return $fullText;
}
/**
diff --git a/Classes/Common/Solr.php b/Classes/Common/Solr.php
index 1ba3edb0a..693df5937 100644
--- a/Classes/Common/Solr.php
+++ b/Classes/Common/Solr.php
@@ -257,7 +257,7 @@ public static function escapeQueryKeepField($query, $pid)
*/
public static function getFields()
{
- $conf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
+ $conf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey);
$fields = [];
$fields['id'] = $conf['solrFieldId'];
diff --git a/Classes/Common/SolrSearchResult/ResultDocument.php b/Classes/Common/SolrSearchResult/ResultDocument.php
index 7a7d7cc7a..53cf4452a 100644
--- a/Classes/Common/SolrSearchResult/ResultDocument.php
+++ b/Classes/Common/SolrSearchResult/ResultDocument.php
@@ -184,6 +184,7 @@ public function getPages()
*
* @return array(Region) All result's regions which contain search phrase
*/
+
public function getRegions()
{
return $this->regions;
diff --git a/Classes/Plugin/Eid/SearchInDocument.php b/Classes/Plugin/Eid/SearchInDocument.php
index 85e7f0425..920b9df65 100644
--- a/Classes/Plugin/Eid/SearchInDocument.php
+++ b/Classes/Plugin/Eid/SearchInDocument.php
@@ -14,6 +14,7 @@
use Kitodo\Dlf\Common\Helper;
use Kitodo\Dlf\Common\Solr;
+use Kitodo\Dlf\Common\SolrSearchResult\ResultDocument;
use Psr\Http\Message\ResponseInterface;
use Psr\Http\Message\ServerRequestInterface;
use TYPO3\CMS\Core\Http\Response;
@@ -58,21 +59,38 @@ public function main(ServerRequestInterface $request)
if ($solr->ready) {
$query = $solr->service->createSelect();
$query->setFields([$fields['id'], $fields['uid'], $fields['page']]);
- $query->setQuery($fields['fulltext'] . ':(' . Solr::escapeQuery((string) $parameters['q']) . ') AND ' . $fields['uid'] . ':' . intval($parameters['uid']));
+ $query->setQuery($this->getQuery($fields, $parameters));
$query->setStart($count)->setRows(20);
- $hl = $query->getHighlighting();
- $hl->setFields([$fields['fulltext']]);
- $hl->setUseFastVectorHighlighter(true);
- $results = $solr->service->select($query);
- $output['numFound'] = $results->getNumFound();
- $highlighting = $results->getHighlighting();
- foreach ($results as $result) {
- $snippet = $highlighting->getResult($result->id)->getField($fields['fulltext']);
+ $query->getHighlighting();
+ $solrRequest = $solr->service->createRequest($query);
+
+ // it is necessary to add the custom parameters to the request
+ // because query object doesn't allow custom parameters
+
+ // field for which highlighting is going to be performed,
+ // is required if you want to have OCR highlighting
+ $solrRequest->addParam('hl.ocr.fl', $fields['fulltext']);
+ // return the coordinates of highlighted search as absolute coordinates
+ $solrRequest->addParam('hl.ocr.absoluteHighlights', 'on');
+ // max amount of snippets for a single page
+ $solrRequest->addParam('hl.snippets', 20);
+
+ $response = $solr->service->executeRequest($solrRequest);
+ $result = $solr->service->createResult($query, $response);
+ /** @scrutinizer ignore-call */
+ $output['numFound'] = $result->getNumFound();
+ $data = $result->getData();
+ $highlighting = $data['ocrHighlighting'];
+
+ foreach ($result as $record) {
+ $resultDocument = new ResultDocument($record, $highlighting, $fields);
+
$document = [
- 'id' => $result->id,
- 'uid' => $result->uid,
- 'page' => $result->page,
- 'snippet' => !empty($snippet) ? implode(' [...] ', $snippet) : ''
+ 'id' => $resultDocument->getId(),
+ 'uid' => !empty($resultDocument->getUid()) ? $resultDocument->getUid() : $parameters['uid'],
+ 'page' => $resultDocument->getPage(),
+ 'snippet' => $resultDocument->getSnippets(),
+ 'highlight' => $resultDocument->getHighlightsIds()
];
$output['documents'][$count] = $document;
$count++;
@@ -84,4 +102,12 @@ public function main(ServerRequestInterface $request)
$response->getBody()->write(json_encode($output));
return $response;
}
+
+ private function getQuery($fields, $parameters) {
+ return $fields['fulltext'] . ':(' . Solr::escapeQuery((string) $parameters['q']) . ') AND ' . $fields['uid'] . ':' . $this->getUid($parameters['uid']);
+ }
+
+ private function getUid($uid) {
+ return is_numeric($uid) > 0 ? intval($uid) : $uid;
+ }
}
diff --git a/Classes/Plugin/Tools/SearchInDocumentTool.php b/Classes/Plugin/Tools/SearchInDocumentTool.php
index 90fef9ac2..7bd8eb5cc 100644
--- a/Classes/Plugin/Tools/SearchInDocumentTool.php
+++ b/Classes/Plugin/Tools/SearchInDocumentTool.php
@@ -144,7 +144,9 @@ protected function getActionUrl()
}
/**
- * Get current document id
+ * Get current document id. As default the uid will be used.
+ * In case there is defined documentIdUrlSchema then the id will
+ * extracted from this URL.
*
* @access protected
*
@@ -154,6 +156,7 @@ protected function getCurrentDocumentId()
{
$id = $this->doc->uid;
+ // example: https://host.de/items/*id*/record
if (!empty($this->conf['documentIdUrlSchema'])) {
$arr = explode('*', $this->conf['documentIdUrlSchema']);
diff --git a/Configuration/ApacheSolr/configsets/dlf/conf/schema.xml b/Configuration/ApacheSolr/configsets/dlf/conf/schema.xml
index 517439d40..056a0c59c 100644
--- a/Configuration/ApacheSolr/configsets/dlf/conf/schema.xml
+++ b/Configuration/ApacheSolr/configsets/dlf/conf/schema.xml
@@ -39,8 +39,12 @@ limitations under the License.
-
+
+
+
+
+
@@ -55,6 +59,9 @@ limitations under the License.
+
+
+
@@ -127,7 +134,7 @@ limitations under the License.
-
+
diff --git a/Configuration/ApacheSolr/configsets/dlf/conf/solrconfig.xml b/Configuration/ApacheSolr/configsets/dlf/conf/solrconfig.xml
index 3e2944740..f44bf0870 100644
--- a/Configuration/ApacheSolr/configsets/dlf/conf/solrconfig.xml
+++ b/Configuration/ApacheSolr/configsets/dlf/conf/solrconfig.xml
@@ -83,6 +83,8 @@
+
+
+
+ Instruct the request handlers you want to enable OCR highlighting for to include the
+ search component you defined above. This example uses the standard /select handler.
+
+ CAUTION: Make sure that the OCR highlight component is listed **before** the standard
+ highlighting component, but **after** the query component.
+ -->
+
+
+ query
+ facet
+ ocrHighlight
+ highlight
+
+
+
+
diff --git a/Documentation/Plugins/Index.rst b/Documentation/Plugins/Index.rst
index e840f00c4..bbca8932c 100644
--- a/Documentation/Plugins/Index.rst
+++ b/Documentation/Plugins/Index.rst
@@ -1115,6 +1115,9 @@ This plugin adds an possibility to search all appearances of the phrase in curre
:Data Type:
:ref:`t3tsref:data-type-string`
:Default:
+ empty
+ :Values:
+ https://host.de/items/*id*/record - example value
- :Property:
idInputName
@@ -1157,3 +1160,4 @@ This plugin adds an possibility to search all appearances of the phrase in curre
:ref:`t3tsref:data-type-string`
:Default:
tx_dlf[encrypted]
+
diff --git a/Resources/Public/Javascript/PageView/PageView.js b/Resources/Public/Javascript/PageView/PageView.js
index 3457ff041..9c96875ab 100644
--- a/Resources/Public/Javascript/PageView/PageView.js
+++ b/Resources/Public/Javascript/PageView/PageView.js
@@ -81,6 +81,12 @@ var dlfViewer = function(settings){
* @private
*/
this.highlightKeys = 'tx_dlf[highlight_word]';
+
+ /**
+ * @type {string|undefined}
+ * @private
+ */
+ this.highlightWords = null;
/**
* @type {Object|undefined}
@@ -292,7 +298,10 @@ dlfViewer.prototype.createControls_ = function(controlNames, layers) {
/**
* Displays highlight words
*/
-dlfViewer.prototype.displayHighlightWord = function() {
+dlfViewer.prototype.displayHighlightWord = function(highlightWords = null) {
+ if(highlightWords != null) {
+ this.highlightWords = highlightWords;
+ }
if (!dlfUtils.exists(this.highlightLayer)) {
@@ -349,11 +358,18 @@ dlfViewer.prototype.displayHighlightWord = function() {
}
if (hasOwnProperty && this.fulltexts[0] !== undefined && this.fulltexts[0].url !== '' && this.images.length > 0) {
- var value = urlParams[param],
- values = value.split(';'),
+ var value = undefined,
fulltextData = dlfFullTextUtils.fetchFullTextDataFromServer(this.fulltexts[0].url, this.images[0]),
fulltextDataImageTwo = undefined;
+ if(this.highlightWords != null) {
+ value = this.highlightWords;
+ } else {
+ value = urlParams[param];
+ }
+
+ var values = decodeURIComponent(value).split(';');
+
// check if there is another image / fulltext to look for
if (this.images.length === 2 & this.fulltexts[1] !== undefined && this.fulltexts[1].url !== '') {
var image = $.extend({}, this.images[1]);
@@ -364,7 +380,7 @@ dlfViewer.prototype.displayHighlightWord = function() {
var stringFeatures = fulltextDataImageTwo === undefined ? fulltextData.getStringFeatures() :
fulltextData.getStringFeatures().concat(fulltextDataImageTwo.getStringFeatures());
values.forEach($.proxy(function(value) {
- var features = dlfUtils.searchFeatureCollectionForText(stringFeatures, value);
+ var features = dlfUtils.searchFeatureCollectionForCoordinates(stringFeatures, value);
if (features !== undefined) {
for (var i = 0; i < features.length; i++) {
this.highlightLayer.getSource().addFeatures([features[i]]);
diff --git a/Resources/Public/Javascript/PageView/SearchInDocument.js b/Resources/Public/Javascript/PageView/SearchInDocument.js
index 5ea4a6b8c..1e456e777 100644
--- a/Resources/Public/Javascript/PageView/SearchInDocument.js
+++ b/Resources/Public/Javascript/PageView/SearchInDocument.js
@@ -8,12 +8,12 @@
* LICENSE.txt file that was distributed with this source code.
*/
- /**
- * This function increases the start parameter of the search form and submits
- * the form.
- *
- * @returns void
- */
+/**
+ * This function increases the start parameter of the search form and submits
+ * the form.
+ *
+ * @returns void
+ */
function nextResultPage() {
var currentStart = $("#tx-dlf-search-in-document-form input[id='tx-dlf-search-in-document-start']").val();
var newStart = parseInt(currentStart) + 20;
@@ -43,10 +43,226 @@ function resetStart() {
$("#tx-dlf-search-in-document-form input[id='tx-dlf-search-in-document-start']").val(0);
}
+/**
+ * Add highlight effect for found search phrase.
+ * @param {array} highlightIds
+ *
+ * @returns void
+ */
+function addHighlightEffect(highlightIds) {
+ if (highlightIds.length > 0) {
+ highlightIds.forEach(function (highlightId) {
+ var targetElement = $('#' + highlightId);
+
+ if (targetElement.length > 0 && !targetElement.hasClass('highlight')) {
+ targetElement.addClass('highlight');
+ }
+ });
+ }
+}
+
+/**
+ * Get base URL for snippet links.
+ *
+ * @param {string} id
+ *
+ * @returns {string}
+ */
+function getBaseUrl(id) {
+ // Take the workview baseUrl from the form action.
+ // The URL may be in the following form
+ // - http://example.com/index.php?id=14
+ // - http://example.com/workview (using slug on page with uid=14)
+ var baseUrl = $("form#tx-dlf-search-in-document-form").attr('action');
+
+ // check if action URL contains id, if not, get URL from window
+ if(baseUrl === undefined || baseUrl.split('?')[0].indexOf(id) === -1) {
+ baseUrl = $(location).attr('href');
+ }
+
+ return baseUrl;
+}
+
+function getHighlights(highlight) {
+ var highlights = "";
+
+ for(var i = 0; i < highlight.length; i++) {
+ if (highlights === "") {
+ highlights += highlight[i];
+ } else {
+ if(highlights.indexOf(highlight[i]) === -1) {
+ highlights += ';' + highlight[i];
+ }
+ }
+ }
+
+ return highlights;
+}
+
+/**
+ * Get current URL query parameters.
+ * It returns array of params in form 'param=value' if there are any params supplied in the given url. If there are none it returns empty array
+ *
+ * @param {string} baseUrl
+ *
+ * @returns {array} array with params or empty
+ */
+function getCurrentQueryParams(baseUrl) {
+ if(baseUrl.indexOf('?') > 0) {
+ return baseUrl.slice(baseUrl.indexOf('?') + 1).split('&');
+ }
+
+ return [];
+}
+
+/**
+ * Get all URL query parameters for snippet links.
+ * All means that it includes together params which were already supplied in the page url and params which are returned as search results.
+ *
+ * @param {string} baseUrl
+ * @param {array} queryParams
+ *
+ * @returns {array} array with params in form 'param' => 'value'
+ */
+function getAllQueryParams(baseUrl, queryParams) {
+ var params = getCurrentQueryParams(baseUrl);
+
+ var queryParam;
+ for(var i = 0; i < params.length; i++) {
+ queryParam = params[i].split('=');
+ if(queryParams.indexOf(decodeURIComponent(queryParam[0])) === -1) {
+ queryParams.push(decodeURIComponent(queryParam[0]));
+ queryParams[decodeURIComponent(queryParam[0])] = queryParam[1];
+ }
+ }
+ return queryParams;
+}
+
+/**
+ * Get needed URL query parameters.
+ * It returns array of params as objects 'param' => 'value'. It contains exactly 3 params which are taken out of search result.
+ *
+ * @param {array} element
+ *
+ * @returns {array} array with params in form 'param' => 'value'
+ */
+function getNeededQueryParams(element) {
+ var id = $("input[id='tx-dlf-search-in-document-id']").attr('name');
+ var highlightWord = $("input[id='tx-dlf-search-in-document-highlight-word']").attr('name');
+ var page = $("input[id='tx-dlf-search-in-document-page']").attr('name');
+
+ var queryParams = [];
+
+ if(id && getBaseUrl(element['uid']).split('?')[0].indexOf(element['uid']) === -1) {
+ queryParams.push(id);
+ queryParams[id] = element['uid'];
+ }
+
+ if(highlightWord) {
+ queryParams.push(highlightWord);
+ queryParams[highlightWord] = encodeURIComponent($("input[id='tx-dlf-search-in-document-query']").val());
+ }
+
+ if(page) {
+ queryParams.push(page);
+ queryParams[page] = element['page'];
+ }
+
+ return queryParams;
+}
+
+/**
+ * Get snippet link.
+ *
+ * @param {array} element
+ *
+ * @returns {string}
+ */
+function getLink(element) {
+ var baseUrl = getBaseUrl(element['uid']);
+
+ var queryParams = getNeededQueryParams(element);
+
+ if (baseUrl.indexOf('?') > 0) {
+ queryParams = getAllQueryParams(baseUrl, queryParams);
+ baseUrl = baseUrl.split('?')[0];
+ }
+
+ var link = baseUrl + '?';
+
+ // add query params to result link
+ for(var i = 0; i < queryParams.length; i++) {
+ link += queryParams[i] + '=' + queryParams[queryParams[i]] + '&';
+ }
+ link = link.slice(0, -1);
+ return link;
+}
+
+function getNavigationButtons(start, numFound) {
+ var buttons = "";
+
+ if (start > 0) {
+ buttons += '';
+ }
+
+ if (numFound > (start + 20)) {
+ buttons += '';
+ }
+ return buttons;
+}
+
+function getCurrentPage() {
+ var page = 1;
+ var queryParams = getCurrentQueryParams(getBaseUrl(" "));
+
+ for(var i = 0; i < queryParams.length; i++) {
+ var queryParam = queryParams[i].split('=');
+
+ if(decodeURIComponent(queryParam[0]) === $("input[id='tx-dlf-search-in-document-page']").attr('name')) {
+ page = parseInt(queryParam[1], 10);
+ }
+ }
+
+ return page;
+}
+
+function addImageHighlight(data) {
+ var page = getCurrentPage();
+
+ data['documents'].forEach(function (element, i) {
+ if(element['page'] === page) {
+ if (element['highlight'].length > 0) {
+ if(tx_dlf_viewer.map != null) {
+ tx_dlf_viewer.displayHighlightWord(encodeURIComponent(getHighlights(element['highlight'])));
+ } else {
+ setTimeout(addImageHighlight, 500, data);
+ }
+ }
+ addHighlightEffect(element['highlight']);
+ }
+ });
+}
+
+function triggerSearchAfterHitLoad() {
+ var queryParams = getCurrentQueryParams(getBaseUrl(" "));
+ var searchedQueryParam = $("input[id='tx-dlf-search-in-document-highlight-word']").attr('name');
+
+ for(var i = 0; i < queryParams.length; i++) {
+ var queryParam = queryParams[i].split('=');
+
+ if(searchedQueryParam && decodeURIComponent(queryParam[0]).indexOf(searchedQueryParam) !== -1) {
+ $("input[id='tx-dlf-search-in-document-query']").val(decodeURIComponent(queryParam[1]));
+ $("#tx-dlf-search-in-document-form").submit();
+ break;
+ }
+ }
+}
+
$(document).ready(function() {
$("#tx-dlf-search-in-document-form").submit(function(event) {
// Stop form from submitting normally
event.preventDefault();
+
$('#tx-dlf-search-in-document-loading').show();
$('#tx-dlf-search-in-document-clearing').hide();
$('#tx-dlf-search-in-document-button-next').hide();
@@ -66,35 +282,16 @@ $(document).ready(function() {
var resultList = '';
var start = -1;
if (data['numFound'] > 0) {
- // Take the workview baseUrl from the form action.
- // The URL may be in the following form
- // - http://example.com/index.php?id=14
- // - http://example.com/workview (using slug on page with uid=14)
- var baseUrl = $("form#tx-dlf-search-in-document-form").attr('action');
-
- if (baseUrl.indexOf('?') > 0) {
- baseUrl += '&';
- } else {
- baseUrl += '?';
- }
data['documents'].forEach(function (element, i) {
if (start < 0) {
start = i;
}
- var searchWord = element['snippet'];
- searchWord = searchWord.substring(searchWord.indexOf('') + 4, searchWord.indexOf(''));
-
- var link = baseUrl
- + 'tx_dlf[id]=' + element['uid']
- + '&tx_dlf[highlight_word]=' + encodeURIComponent(searchWord)
- + '&tx_dlf[page]=' + element['page'];
-
if (element['snippet'].length > 0) {
resultItems[element['page']] = ''
+ $('#tx-dlf-search-in-document-label-page').text() + ' ' + element['page']
+ '
'
+ ''
- + '' + element['snippet'] + ''
+ + '' + element['snippet'] + ''
+ '';
}
});
@@ -105,29 +302,28 @@ $(document).ready(function() {
resultItems.forEach(function (item, index) {
resultList += '- ' + item + '
';
});
+
+ addImageHighlight(data);
} else {
resultList += '- ' + $('#tx-dlf-search-in-document-label-noresult').text() + '
';
}
resultList += '
';
- if (start > 0) {
- resultList += '';
- }
- if (data['numFound'] > (start + 20)) {
- resultList += '';
- }
+ resultList += getNavigationButtons(start, data['numFound']);
$('#tx-dlf-search-in-document-results').html(resultList);
},
"json"
- )
- .done(function( data ) {
+ ).done(function (data) {
$('#tx-dfgviewer-sru-results-loading').hide();
$('#tx-dfgviewer-sru-results-clearing').show();
});
});
- // clearing button
- $('#tx-dlf-search-in-document-clearing').click(function() {
+
+ // clearing button
+ $('#tx-dlf-search-in-document-clearing').click(function() {
$('#tx-dlf-search-in-document-results ul').remove();
$('.results-active-indicator').remove();
$('#tx-dlf-search-in-document-query').val('');
});
+
+ triggerSearchAfterHitLoad();
});
diff --git a/Resources/Public/Javascript/PageView/Utility.js b/Resources/Public/Javascript/PageView/Utility.js
index d731be7cf..48cc51719 100644
--- a/Resources/Public/Javascript/PageView/Utility.js
+++ b/Resources/Public/Javascript/PageView/Utility.js
@@ -865,16 +865,18 @@ dlfUtils.scaleToImageSize = function (features, imageObj, width, height, opt_off
};
/**
- * Search a feature collcetion for a feature with the given text
+ * Search a feature collection for a feature with the given coordinates
* @param {Array.} featureCollection
- * @param {string} text
+ * @param {string} coordinates
* @return {Array.|undefined}
*/
-dlfUtils.searchFeatureCollectionForText = function (featureCollection, text) {
+dlfUtils.searchFeatureCollectionForCoordinates = function (featureCollection, coordinates) {
var features = [];
featureCollection.forEach(function (ft) {
if (ft.get('fulltext') !== undefined) {
- if (ft.get('fulltext').toLowerCase().indexOf(text.toLowerCase()) > -1) features.push(ft);
+ if ((ft.get('width') + '_' + ft.get('height') + '_' + ft.get('hpos') + '_' + ft.get('vpos')) === coordinates) {
+ features.push(ft);
+ }
}
});
return features.length > 0 ? features : undefined;