From bcd5fa5076998569ca4de0d5534123d1c6d17dcd Mon Sep 17 00:00:00 2001 From: Mark Bennett Date: Mon, 11 Aug 2014 06:42:26 -0700 Subject: [PATCH] Experiments to workaround problems with Tika and GIF files --- .../com/lucidworks/dq/util/SolrUtils.java | 20 +++++++++++++++++-- .../com/lucidworks/dq/util/StringUtils.java | 13 ++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/lucidworks/dq/util/SolrUtils.java b/src/main/java/com/lucidworks/dq/util/SolrUtils.java index ebfb145..3683f97 100644 --- a/src/main/java/com/lucidworks/dq/util/SolrUtils.java +++ b/src/main/java/com/lucidworks/dq/util/SolrUtils.java @@ -356,13 +356,24 @@ public static Map getAllStoredValuesAndCountsForField_ViaGroupedQue return out; } + public static String escapeFieldName( String inFieldName ) { + // Tika fields from GIF files? + String outFieldName = inFieldName; + // "attr_Chroma BlackIsZero_" + outFieldName = StringUtils.escapeSpaces( outFieldName ); + // attr_cp:subject_ + outFieldName = StringUtils.escapeColons( outFieldName ); + return outFieldName; + } + public static long getTotalDocCount( HttpSolrServer server ) throws SolrServerException { return getDocCountForQuery( server, "*:*" ); } public static long getDocCountForField( HttpSolrServer server, String fieldName ) throws SolrServerException { // NullPointerException for location // com.spatial4j.core.io.ParseUtils.parsePoint(ParseUtils.java:42) - String queryStr = fieldName + ":[* TO *]"; + // String queryStr = fieldName + ":[* TO *]"; + String queryStr = escapeFieldName(fieldName) + ":[* TO *]"; try { return getDocCountForQuery( server, queryStr ); } @@ -568,7 +579,12 @@ public static Map< String, Map> > getStoredValuesForFi if ( null!=fieldNames && ! fieldNames.isEmpty() ) { boolean haveSeenId = false; for ( String fieldName : fieldNames ) { - q.addField( fieldName ); + // q.addField( fieldName ); + // Tika GIF meta fields, ex: "attr_meta:save-date_" + // escapeFieldName does NOT escape the asterisk, which we wouldn't want + // q.addField( escapeFieldName(fieldName) ); + // try double escaping + q.addField( escapeFieldName(escapeFieldName(fieldName)) ); if ( fieldName.equals("*") ) { sawWildcard = true; haveSeenId = true; diff --git a/src/main/java/com/lucidworks/dq/util/StringUtils.java b/src/main/java/com/lucidworks/dq/util/StringUtils.java index afb096f..d34394a 100644 --- a/src/main/java/com/lucidworks/dq/util/StringUtils.java +++ b/src/main/java/com/lucidworks/dq/util/StringUtils.java @@ -50,6 +50,19 @@ public static Set splitCsv( String inStr ) { return out; } + public static String escapeSpaces( String inStr ) { + if ( null==inStr ) { + return null; + } + return inStr.replaceAll( "[ ]", "\\\\ " ); + } + public static String escapeColons( String inStr ) { + if ( null==inStr ) { + return null; + } + return inStr.replaceAll( "[:]", "\\\\:" ); + } + /** * Based on code from: * http://stackoverflow.com/questions/1247772 and