Skip to content

Commit

Permalink
Experiments to workaround problems with Tika and GIF files
Browse files Browse the repository at this point in the history
  • Loading branch information
Mark Bennett committed Aug 11, 2014
1 parent 75e4529 commit bcd5fa5
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 2 deletions.
20 changes: 18 additions & 2 deletions src/main/java/com/lucidworks/dq/util/SolrUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -356,13 +356,24 @@ public static Map<Object,Long> getAllStoredValuesAndCountsForField_ViaGroupedQue
return out;
}

public static String escapeFieldName( String inFieldName ) {
// Tika fields from GIF files?
String outFieldName = inFieldName;
// "attr_Chroma BlackIsZero_"
outFieldName = StringUtils.escapeSpaces( outFieldName );
// attr_cp:subject_
outFieldName = StringUtils.escapeColons( outFieldName );
return outFieldName;
}

public static long getTotalDocCount( HttpSolrServer server ) throws SolrServerException {
return getDocCountForQuery( server, "*:*" );
}
public static long getDocCountForField( HttpSolrServer server, String fieldName ) throws SolrServerException {
// NullPointerException for location
// com.spatial4j.core.io.ParseUtils.parsePoint(ParseUtils.java:42)
String queryStr = fieldName + ":[* TO *]";
// String queryStr = fieldName + ":[* TO *]";
String queryStr = escapeFieldName(fieldName) + ":[* TO *]";
try {
return getDocCountForQuery( server, queryStr );
}
Expand Down Expand Up @@ -568,7 +579,12 @@ public static Map< String, Map<String,Collection<Object>> > getStoredValuesForFi
if ( null!=fieldNames && ! fieldNames.isEmpty() ) {
boolean haveSeenId = false;
for ( String fieldName : fieldNames ) {
q.addField( fieldName );
// q.addField( fieldName );
// Tika GIF meta fields, ex: "attr_meta:save-date_"
// escapeFieldName does NOT escape the asterisk, which we wouldn't want
// q.addField( escapeFieldName(fieldName) );
// try double escaping
q.addField( escapeFieldName(escapeFieldName(fieldName)) );
if ( fieldName.equals("*") ) {
sawWildcard = true;
haveSeenId = true;
Expand Down
13 changes: 13 additions & 0 deletions src/main/java/com/lucidworks/dq/util/StringUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,19 @@ public static Set<String> splitCsv( String inStr ) {
return out;
}

public static String escapeSpaces( String inStr ) {
if ( null==inStr ) {
return null;
}
return inStr.replaceAll( "[ ]", "\\\\ " );
}
public static String escapeColons( String inStr ) {
if ( null==inStr ) {
return null;
}
return inStr.replaceAll( "[:]", "\\\\:" );
}

/**
* Based on code from:
* http://stackoverflow.com/questions/1247772 and
Expand Down

0 comments on commit bcd5fa5

Please sign in to comment.