Skip to content

Commit

Permalink
Merge pull request #72 from unchartedsoftware/add-croc-threshold
Browse files Browse the repository at this point in the history
Added thrshold to croc label parsing.
  • Loading branch information
kbirk authored Sep 7, 2018
2 parents 3d429a1 + d7fd5cc commit 386ed8b
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 7 deletions.
8 changes: 7 additions & 1 deletion cmd/distil-featurize/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ func main() {
Name: "has-header",
Usage: "Whether or not the CSV file has a header row",
},
cli.Float64Flag{
Name: "threshold",
Value: 0.2,
Usage: "Confidence threshold to use for labels",
},
}
app.Action = func(c *cli.Context) error {
if c.String("rest-endpoint") == "" {
Expand All @@ -107,6 +112,7 @@ func main() {
schemaPath := c.String("schema")
outputFilePath := c.String("output")
hasHeader := c.Bool("has-header")
threshold := c.Float64("threshold")

// initialize REST client
log.Infof("Using REST interface at `%s` ", restBaseEndpoint)
Expand Down Expand Up @@ -136,7 +142,7 @@ func main() {
}

// featurize data
err = feature.FeaturizeDataset(meta, featurizer, datasetPath, mediaPath, outputFilePath, outputData, outputSchema, hasHeader)
err = feature.FeaturizeDataset(meta, featurizer, datasetPath, mediaPath, outputFilePath, outputData, outputSchema, hasHeader, threshold)
if err != nil {
log.Errorf("%v", err)
return cli.NewExitError(errors.Cause(err), 2)
Expand Down
24 changes: 18 additions & 6 deletions feature/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func getDataResource(meta *metadata.Metadata, resID string) *metadata.DataResour
// FeaturizeDataset reads adds features based on referenced data resources
// in the metadata. The features are added as a reference resource in
// the metadata and written to the output path.
func FeaturizeDataset(meta *metadata.Metadata, imageFeaturizer *rest.Featurizer, sourcePath string, mediaPath string, outputFolder string, outputPathData string, outputPathSchema string, hasHeader bool) error {
func FeaturizeDataset(meta *metadata.Metadata, imageFeaturizer *rest.Featurizer, sourcePath string, mediaPath string, outputFolder string, outputPathData string, outputPathSchema string, hasHeader bool, threshold float64) error {
// find the main data resource
mainDR := meta.GetMainDataResource()

Expand Down Expand Up @@ -86,7 +86,7 @@ func FeaturizeDataset(meta *metadata.Metadata, imageFeaturizer *rest.Featurizer,
for index, colDR := range colsToFeaturize {
imagePath := fmt.Sprintf("%s/%s", mediaPath, path.Join(colDR.originalResPath, line[index]))
log.Infof("Featurizing %s", imagePath)
feature, err := featurizeImage(imagePath, imageFeaturizer)
feature, err := featurizeImage(imagePath, imageFeaturizer, threshold)
if err != nil {
return errors.Wrap(err, "error getting image feature output")
}
Expand Down Expand Up @@ -158,7 +158,7 @@ func addFeaturesToSchema(meta *metadata.Metadata, mainDR *metadata.DataResource,
return colsToFeaturize
}

func featurizeImage(filepath string, featurizer *rest.Featurizer) (string, error) {
func featurizeImage(filepath string, featurizer *rest.Featurizer, threshold float64) (string, error) {
feature, err := featurizer.FeaturizeImage(filepath)
if err != nil {
return "", errors.Wrap(err, "failed to featurize image")
Expand All @@ -174,10 +174,22 @@ func featurizeImage(filepath string, featurizer *rest.Featurizer) (string, error
return "", errors.Wrap(err, "image feature labels in unexpected format")
}

confidences, ok := objs["confidence"].(map[string]interface{})
if !ok {
return "", errors.Wrap(err, "image feature confidences in unexpected format")
}

labelText := make([]string, 0)
for _, l := range labels {
cleanedLabel := strings.Replace(l.(string), "_", " ", -1)
labelText = append(labelText, cleanedLabel)
for i, l := range labels {
if confidences[i].(float64) >= threshold {
cleanedLabel := strings.Replace(l.(string), "_", " ", -1)
labelText = append(labelText, cleanedLabel)
}
}

// use default value if no labels had high enough confidence
if len(labelText) == 0 {
labelText = append(labelText, "other")
}

return strings.Join(labelText, ","), nil
Expand Down

0 comments on commit 386ed8b

Please sign in to comment.