From dd6da5f4676293b15e95b4a72c5c2e6a061a4573 Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Fri, 5 Oct 2018 11:07:05 -0400 Subject: [PATCH 01/23] Initial addition of primitive based ingest. --- cmd/distil-classify/main.go | 55 +- cmd/distil-cluster/main.go | 64 +- cmd/distil-featurize/main.go | 66 +- cmd/distil-rank/main.go | 112 +- cmd/distil-summary/main.go | 53 +- pipeline/core.pb.go | 3079 +++++++++++++++++ pipeline/core.proto | 473 +++ pipeline/execute.pb.go | 213 ++ pipeline/execute.proto | 18 + pipeline/pipeline.json | 0 pipeline/pipeline.pb.go | 1585 +++++++++ pipeline/pipeline.proto | 229 ++ pipeline/primitive.pb.go | 115 + pipeline/primitive.proto | 15 + pipeline/problem.pb.go | 599 ++++ pipeline/problem.proto | 117 + pipeline/value.pb.go | 847 +++++ pipeline/value.proto | 109 + primitive/classify.go | 70 + primitive/cluster.go | 92 + primitive/compute/client.go | 393 +++ primitive/compute/description/builder.go | 148 + .../description/inference_step_data.go | 92 + .../compute/description/preprocessing.go | 127 + .../compute/description/primitive_steps.go | 244 ++ primitive/compute/description/step_data.go | 267 ++ primitive/compute/execute_pipeline_request.go | 260 ++ primitive/compute/persist.go | 161 + primitive/compute/pull.go | 47 + primitive/compute/result/complex_field.peg | 46 + primitive/compute/result/complex_field.peg.go | 1256 +++++++ .../compute/result/complex_field_test.go | 117 + primitive/compute/result/result_csv_parser.go | 90 + .../compute/result/result_csv_parser_test.go | 25 + primitive/compute/result/testdata/test.csv | 11 + primitive/compute/solution_request.go | 386 +++ primitive/compute/stop_solution_request.go | 26 + primitive/compute/ta3ta2.go | 245 ++ primitive/feature.go | 91 + primitive/pipeline.go | 256 ++ primitive/rank.go | 68 + primitive/summarize.go | 62 + util/file.go | 58 + 43 files changed, 12136 insertions(+), 251 deletions(-) create mode 100644 pipeline/core.pb.go create mode 100644 pipeline/core.proto create mode 100644 pipeline/execute.pb.go create mode 100644 pipeline/execute.proto create mode 100644 pipeline/pipeline.json create mode 100644 pipeline/pipeline.pb.go create mode 100644 pipeline/pipeline.proto create mode 100644 pipeline/primitive.pb.go create mode 100644 pipeline/primitive.proto create mode 100644 pipeline/problem.pb.go create mode 100644 pipeline/problem.proto create mode 100644 pipeline/value.pb.go create mode 100644 pipeline/value.proto create mode 100644 primitive/classify.go create mode 100644 primitive/cluster.go create mode 100644 primitive/compute/client.go create mode 100644 primitive/compute/description/builder.go create mode 100644 primitive/compute/description/inference_step_data.go create mode 100644 primitive/compute/description/preprocessing.go create mode 100644 primitive/compute/description/primitive_steps.go create mode 100644 primitive/compute/description/step_data.go create mode 100644 primitive/compute/execute_pipeline_request.go create mode 100644 primitive/compute/persist.go create mode 100644 primitive/compute/pull.go create mode 100644 primitive/compute/result/complex_field.peg create mode 100644 primitive/compute/result/complex_field.peg.go create mode 100644 primitive/compute/result/complex_field_test.go create mode 100644 primitive/compute/result/result_csv_parser.go create mode 100644 primitive/compute/result/result_csv_parser_test.go create mode 100644 primitive/compute/result/testdata/test.csv create mode 100644 primitive/compute/solution_request.go create mode 100644 primitive/compute/stop_solution_request.go create mode 100644 primitive/compute/ta3ta2.go create mode 100644 primitive/feature.go create mode 100644 primitive/pipeline.go create mode 100644 primitive/rank.go create mode 100644 primitive/summarize.go create mode 100644 util/file.go diff --git a/cmd/distil-classify/main.go b/cmd/distil-classify/main.go index d5c16a2..48278be 100644 --- a/cmd/distil-classify/main.go +++ b/cmd/distil-classify/main.go @@ -1,8 +1,6 @@ package main import ( - "encoding/json" - "io/ioutil" "os" "runtime" "strings" @@ -11,7 +9,8 @@ import ( "github.com/unchartedsoftware/plog" "github.com/urfave/cli" - "github.com/unchartedsoftware/distil-ingest/rest" + "github.com/unchartedsoftware/distil-ingest/primitive" + "github.com/unchartedsoftware/distil-ingest/primitive/compute" ) func splitAndTrim(arg string) []string { @@ -34,17 +33,12 @@ func main() { app.Name = "distil-classify" app.Version = "0.1.0" app.Usage = "Classify D3M merged datasets" - app.UsageText = "distil-classify --rest-endpoint= --classification-function= --dataset= --output=" + app.UsageText = "distil-classify --endpoint= --dataset= --output=" app.Flags = []cli.Flag{ cli.StringFlag{ - Name: "rest-endpoint", + Name: "endpoint", Value: "", - Usage: "The REST endpoint url", - }, - cli.StringFlag{ - Name: "classification-function", - Value: "", - Usage: "The classification function to use", + Usage: "The pipeline runner endpoint", }, cli.StringFlag{ Name: "dataset", @@ -63,48 +57,33 @@ func main() { }, } app.Action = func(c *cli.Context) error { - if c.String("rest-endpoint") == "" { - return cli.NewExitError("missing commandline flag `--rest-endpoint`", 1) - } - if c.String("classification-function") == "" { - return cli.NewExitError("missing commandline flag `--classification-function`", 1) + if c.String("endpoint") == "" { + return cli.NewExitError("missing commandline flag `--endpoint`", 1) } if c.String("dataset") == "" { return cli.NewExitError("missing commandline flag `--dataset`", 1) } - classificationFunction := c.String("classification-function") - restBaseEndpoint := c.String("rest-endpoint") + endpoint := c.String("endpoint") path := c.String("dataset") outputFilePath := c.String("output") - // initialize REST client - log.Infof("Using REST interface at `%s` ", restBaseEndpoint) - client := rest.NewClient(restBaseEndpoint) - - // create classifier - classifier := rest.NewClassifier(classificationFunction, client) - - // classify the file - classification, err := classifier.ClassifyFile(path) + // initialize client + log.Infof("Using pipeline runner interface at `%s` ", endpoint) + client, err := compute.NewRunner(endpoint, true, "distil-ingest", 60, 10, true) if err != nil { log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) } - log.Infof("Classification for `%s` successful", path) - // marshall result - bytes, err := json.MarshalIndent(classification, "", " ") - if err != nil { - log.Errorf("%+v", err) - return cli.NewExitError(errors.Cause(err), 2) - } - // write to file - log.Infof("Writing classification to file `%s`", outputFilePath) - err = ioutil.WriteFile(outputFilePath, bytes, 0644) + step := primitive.NewIngestStep(client) + + // classify the file + err = step.ClassifyPrimitive(path, outputFilePath) if err != nil { - log.Errorf("%+v", err) + log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) } + log.Infof("Classification for `%s` successful", path) return nil } diff --git a/cmd/distil-cluster/main.go b/cmd/distil-cluster/main.go index 9534858..98f2290 100644 --- a/cmd/distil-cluster/main.go +++ b/cmd/distil-cluster/main.go @@ -10,9 +10,9 @@ import ( "github.com/unchartedsoftware/plog" "github.com/urfave/cli" - "github.com/unchartedsoftware/distil-ingest/feature" - "github.com/unchartedsoftware/distil-ingest/metadata" - "github.com/unchartedsoftware/distil-ingest/rest" + "github.com/unchartedsoftware/distil-ingest/primitive" + "github.com/unchartedsoftware/distil-ingest/primitive/compute" + "github.com/unchartedsoftware/distil-ingest/util" ) func splitAndTrim(arg string) []string { @@ -35,17 +35,12 @@ func main() { app.Name = "distil-cluster" app.Version = "0.1.0" app.Usage = "Cluster D3M datasets" - app.UsageText = "distil-cluster --rest-endpoint= --cluster-function= --dataset= --output=" + app.UsageText = "distil-cluster --endpoint= --dataset= --output=" app.Flags = []cli.Flag{ cli.StringFlag{ - Name: "rest-endpoint", + Name: "endpoint", Value: "", - Usage: "The REST endpoint url", - }, - cli.StringFlag{ - Name: "cluster-function", - Value: "", - Usage: "The clustering function to use", + Usage: "The pipeline runner endpoint", }, cli.StringFlag{ Name: "dataset", @@ -88,33 +83,34 @@ func main() { }, } app.Action = func(c *cli.Context) error { - if c.String("rest-endpoint") == "" { - return cli.NewExitError("missing commandline flag `--rest-endpoint`", 1) - } - if c.String("cluster-function") == "" { - return cli.NewExitError("missing commandline flag `--cluster-function`", 1) + if c.String("endpoint") == "" { + return cli.NewExitError("missing commandline flag `--endpoint`", 1) } if c.String("dataset") == "" { return cli.NewExitError("missing commandline flag `--dataset`", 1) } - clusterFunction := c.String("cluster-function") - restBaseEndpoint := c.String("rest-endpoint") + endpoint := c.String("endpoint") datasetPath := c.String("dataset") - mediaPath := c.String("media-path") + //mediaPath := c.String("media-path") outputSchema := c.String("output-schema") - outputData := c.String("output-data") + //outputData := c.String("output-data") schemaPath := c.String("schema") outputFilePath := c.String("output") hasHeader := c.Bool("has-header") - // initialize REST client - log.Infof("Using REST interface at `%s` ", restBaseEndpoint) - client := rest.NewClient(restBaseEndpoint) + // initialize client + log.Infof("Using pipeline runner interface at `%s` ", endpoint) + client, err := compute.NewRunner(endpoint, true, "distil-ingest", 60, 10, true) + if err != nil { + log.Errorf("%v", err) + return cli.NewExitError(errors.Cause(err), 2) + } + step := primitive.NewIngestStep(client) // create feature folder clusterPath := path.Join(outputFilePath, "clusters") - if dirExists(clusterPath) { + if util.DirExists(clusterPath) { // delete existing data to overwrite with latest os.RemoveAll(clusterPath) log.Infof("Deleted data at %s", clusterPath) @@ -126,22 +122,11 @@ func main() { os.Remove(path.Join(outputFilePath, "clusterDatasetDoc.json")) // create featurizer - featurizer := rest.NewFeaturizer(clusterFunction, client) - - // load metadata from original schema - meta, err := metadata.LoadMetadataFromOriginalSchema(schemaPath) + err = step.ClusterPrimitive(schemaPath, datasetPath, datasetPath, outputSchema, outputFilePath, hasHeader) if err != nil { log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) } - - // featurize data - err = feature.ClusterDataset(meta, featurizer, datasetPath, mediaPath, outputFilePath, outputData, outputSchema, hasHeader) - if err != nil { - log.Errorf("%v", err) - return cli.NewExitError(errors.Cause(err), 2) - } - log.Infof("Clustered data written to %s", outputFilePath) return nil @@ -149,10 +134,3 @@ func main() { // run app app.Run(os.Args) } - -func dirExists(path string) bool { - if _, err := os.Stat(path); os.IsNotExist(err) { - return false - } - return true -} diff --git a/cmd/distil-featurize/main.go b/cmd/distil-featurize/main.go index 891a750..94dadb6 100644 --- a/cmd/distil-featurize/main.go +++ b/cmd/distil-featurize/main.go @@ -10,9 +10,9 @@ import ( "github.com/unchartedsoftware/plog" "github.com/urfave/cli" - "github.com/unchartedsoftware/distil-ingest/feature" - "github.com/unchartedsoftware/distil-ingest/metadata" - "github.com/unchartedsoftware/distil-ingest/rest" + "github.com/unchartedsoftware/distil-ingest/primitive" + "github.com/unchartedsoftware/distil-ingest/primitive/compute" + "github.com/unchartedsoftware/distil-ingest/util" ) func splitAndTrim(arg string) []string { @@ -35,17 +35,12 @@ func main() { app.Name = "distil-featurize" app.Version = "0.1.0" app.Usage = "Featurize D3M datasets" - app.UsageText = "distil-featurize --rest-endpoint= --featurize-function= --dataset= --output=" + app.UsageText = "distil-featurize --endpoint= --dataset= --output=" app.Flags = []cli.Flag{ cli.StringFlag{ - Name: "rest-endpoint", + Name: "endpoint", Value: "", - Usage: "The REST endpoint url", - }, - cli.StringFlag{ - Name: "featurize-function", - Value: "", - Usage: "The featurize function to use", + Usage: "The pipeline runner endpoint", }, cli.StringFlag{ Name: "dataset", @@ -93,34 +88,35 @@ func main() { }, } app.Action = func(c *cli.Context) error { - if c.String("rest-endpoint") == "" { - return cli.NewExitError("missing commandline flag `--rest-endpoint`", 1) - } - if c.String("featurize-function") == "" { - return cli.NewExitError("missing commandline flag `--featurize-function`", 1) + if c.String("endpoint") == "" { + return cli.NewExitError("missing commandline flag `--endpoint`", 1) } if c.String("dataset") == "" { return cli.NewExitError("missing commandline flag `--dataset`", 1) } - featurizeFunction := c.String("featurize-function") - restBaseEndpoint := c.String("rest-endpoint") + endpoint := c.String("endpoint") datasetPath := c.String("dataset") - mediaPath := c.String("media-path") + //mediaPath := c.String("media-path") outputSchema := c.String("output-schema") - outputData := c.String("output-data") + //outputData := c.String("output-data") schemaPath := c.String("schema") outputFilePath := c.String("output") hasHeader := c.Bool("has-header") - threshold := c.Float64("threshold") + //threshold := c.Float64("threshold") - // initialize REST client - log.Infof("Using REST interface at `%s` ", restBaseEndpoint) - client := rest.NewClient(restBaseEndpoint) + // initialize client + log.Infof("Using pipeline runner interface at `%s` ", endpoint) + client, err := compute.NewRunner(endpoint, true, "distil-ingest", 60, 10, true) + if err != nil { + log.Errorf("%v", err) + return cli.NewExitError(errors.Cause(err), 2) + } + step := primitive.NewIngestStep(client) // create feature folder featurePath := path.Join(outputFilePath, "features") - if dirExists(featurePath) { + if util.DirExists(featurePath) { // delete existing data to overwrite with latest os.RemoveAll(featurePath) log.Infof("Deleted data at %s", featurePath) @@ -132,22 +128,11 @@ func main() { os.Remove(path.Join(outputFilePath, "featureDatasetDoc.json")) // create featurizer - featurizer := rest.NewFeaturizer(featurizeFunction, client) - - // load metadata from original schema - meta, err := metadata.LoadMetadataFromOriginalSchema(schemaPath) + err = step.FeaturizePrimitive(schemaPath, datasetPath, datasetPath, outputSchema, outputFilePath, hasHeader) if err != nil { log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) } - - // featurize data - err = feature.FeaturizeDataset(meta, featurizer, datasetPath, mediaPath, outputFilePath, outputData, outputSchema, hasHeader, threshold) - if err != nil { - log.Errorf("%v", err) - return cli.NewExitError(errors.Cause(err), 2) - } - log.Infof("Featurized data written to %s", outputFilePath) return nil @@ -155,10 +140,3 @@ func main() { // run app app.Run(os.Args) } - -func dirExists(path string) bool { - if _, err := os.Stat(path); os.IsNotExist(err) { - return false - } - return true -} diff --git a/cmd/distil-rank/main.go b/cmd/distil-rank/main.go index a618123..7b1fcac 100644 --- a/cmd/distil-rank/main.go +++ b/cmd/distil-rank/main.go @@ -3,9 +3,7 @@ package main import ( "bytes" "encoding/csv" - "encoding/json" "io" - "io/ioutil" "os" "path/filepath" "runtime" @@ -15,8 +13,8 @@ import ( "github.com/unchartedsoftware/plog" "github.com/urfave/cli" - "github.com/unchartedsoftware/distil-ingest/metadata" - "github.com/unchartedsoftware/distil-ingest/rest" + "github.com/unchartedsoftware/distil-ingest/primitive" + "github.com/unchartedsoftware/distil-ingest/primitive/compute" ) func splitAndTrim(arg string) []string { @@ -39,7 +37,7 @@ func main() { app.Name = "distil-rank" app.Version = "0.1.0" app.Usage = "Rank D3M merged datasets" - app.UsageText = "distil-rank --kafka-endpoints= --dataset= --output=" + app.UsageText = "distil-rank --endpoint= --dataset= --output=" app.Flags = []cli.Flag{ cli.StringFlag{ Name: "schema", @@ -66,14 +64,9 @@ func main() { Usage: "Whether or not the CSV file has a header row", }, cli.StringFlag{ - Name: "rest-endpoint", + Name: "endpoint", Value: "", - Usage: "The REST endpoint url", - }, - cli.StringFlag{ - Name: "ranking-function", - Value: "", - Usage: "The ranking function to use", + Usage: "The pipeline runner endpoint", }, cli.StringFlag{ Name: "output", @@ -96,98 +89,39 @@ func main() { if c.String("dataset") == "" { return cli.NewExitError("missing commandline flag `--dataset`", 1) } - if c.String("rest-endpoint") == "" { - return cli.NewExitError("missing commandline flag `--rest-endpoint`", 1) - } - if c.String("ranking-function") == "" { - return cli.NewExitError("missing commandline flag `--ranking-function`", 1) + if c.String("endpoint") == "" { + return cli.NewExitError("missing commandline flag `--endpoint`", 1) } if c.String("ranking-output") == "" { return cli.NewExitError("missing commandline flag `--ranking-output`", 1) } - classificationPath := filepath.Clean(c.String("classification")) - typeSource := c.String("type-source") - schemaPath := filepath.Clean(c.String("schema")) - rankingFunction := c.String("ranking-function") - restBaseEndpoint := c.String("rest-endpoint") + //classificationPath := filepath.Clean(c.String("classification")) + //typeSource := c.String("type-source") + //schemaPath := filepath.Clean(c.String("schema")) + endpoint := c.String("endpoint") datasetPath := filepath.Clean(c.String("dataset")) - rankingOutputFile := c.String("ranking-output") - rowLimit := c.Int("row-limit") - hasHeader := c.Bool("has-header") - + //rankingOutputFile := c.String("ranking-output") + //rowLimit := c.Int("row-limit") + //hasHeader := c.Bool("has-header") outputFilePath := c.String("output") - var err error - - // load the metadata - var meta *metadata.Metadata - if schemaPath == "" || schemaPath == "." { - log.Infof("Loading metadata from raw file") - meta, err = metadata.LoadMetadataFromRawFile(datasetPath, classificationPath) - } else if typeSource == "classification" { - log.Infof("Loading metadata from classification file") - meta, err = metadata.LoadMetadataFromClassification( - schemaPath, - classificationPath) - } else { - log.Infof("Loading metadata from schema file") - meta, err = metadata.LoadMetadataFromMergedSchema( - schemaPath) - } + // initialize client + log.Infof("Using pipeline runner interface at `%s` ", endpoint) + client, err := compute.NewRunner(endpoint, true, "distil-ingest", 60, 10, true) if err != nil { - log.Errorf("%+v", err) + log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) } + step := primitive.NewIngestStep(client) - // get header for the merged data - headers, err := meta.GenerateHeaders() - if err != nil { - log.Errorf("%+v", err) - return cli.NewExitError(errors.Cause(err), 2) - } - - // merged data only has 1 header - header := headers[0] - - // add the header to the raw data - data, err := getMergedData(header, datasetPath, hasHeader, rowLimit) - - // write to file to submit the file - err = ioutil.WriteFile(rankingOutputFile, data, 0644) - if err != nil { - log.Errorf("%+v", err) - return cli.NewExitError(errors.Cause(err), 2) - } - - // create the REST client - log.Infof("Using REST interface at `%s/%s` ", restBaseEndpoint, rankingFunction) - client := rest.NewClient(restBaseEndpoint) - - // create ranker - ranker := rest.NewRanker(rankingFunction, client) - - // get the importance from the REST interface - log.Infof("Getting importance ranking of file `%s`", rankingOutputFile) - importance, err := ranker.RankFile(rankingOutputFile) - if err != nil { - log.Errorf("%+v", err) - return cli.NewExitError(errors.Cause(err), 2) - } - - // marshall result - bytes, err := json.MarshalIndent(importance, "", " ") - if err != nil { - log.Errorf("%+v", err) - return cli.NewExitError(errors.Cause(err), 2) - } - // write to file - log.Infof("Writing importance ranking to file `%s`", outputFilePath) - err = ioutil.WriteFile(outputFilePath, bytes, 0644) + // rank the dataset variable importance + err = step.RankPrimitive(datasetPath, outputFilePath) if err != nil { - log.Errorf("%+v", err) + log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) } + log.Infof("Ranked data written to %s", outputFilePath) return nil } diff --git a/cmd/distil-summary/main.go b/cmd/distil-summary/main.go index f7d0f2c..2edf4e2 100644 --- a/cmd/distil-summary/main.go +++ b/cmd/distil-summary/main.go @@ -1,8 +1,6 @@ package main import ( - "encoding/json" - "io/ioutil" "os" "runtime" @@ -10,7 +8,8 @@ import ( "github.com/unchartedsoftware/plog" "github.com/urfave/cli" - "github.com/unchartedsoftware/distil-ingest/rest" + "github.com/unchartedsoftware/distil-ingest/primitive" + "github.com/unchartedsoftware/distil-ingest/primitive/compute" ) func main() { @@ -21,17 +20,12 @@ func main() { app.Name = "distil-summary" app.Version = "0.1.0" app.Usage = "Summarize D3M datasets" - app.UsageText = "distil-summary --rest-endpoint= --summary-function= --dataset= --output=" + app.UsageText = "distil-summary --endpoint= --dataset= --output=" app.Flags = []cli.Flag{ cli.StringFlag{ - Name: "rest-endpoint", + Name: "endpoint", Value: "", - Usage: "The REST endpoint url", - }, - cli.StringFlag{ - Name: "summary-function", - Value: "", - Usage: "The summary function to use", + Usage: "The pipeline runner endpoint", }, cli.StringFlag{ Name: "dataset", @@ -45,12 +39,9 @@ func main() { }, } app.Action = func(c *cli.Context) error { - if c.String("rest-endpoint") == "" { + if c.String("endpoint") == "" { return cli.NewExitError("missing commandline flag `--rest-endpoint`", 1) } - if c.String("summary-function") == "" { - return cli.NewExitError("missing commandline flag `--summary-function`", 1) - } if c.String("dataset") == "" { return cli.NewExitError("missing commandline flag `--dataset`", 1) } @@ -58,38 +49,26 @@ func main() { return cli.NewExitError("missing commandline flag `--output`", 1) } - summaryFunction := c.String("summary-function") - restBaseEndpoint := c.String("rest-endpoint") + endpoint := c.String("endpoint") path := c.String("dataset") outputFilePath := c.String("output") - // initialize REST client - log.Infof("Using REST interface at `%s` ", restBaseEndpoint) - client := rest.NewClient(restBaseEndpoint) - - // create classifier - summarizer := rest.NewSummarizer(summaryFunction, client) - - // classify the file - summary, err := summarizer.SummarizeFile(path) + // initialize client + log.Infof("Using pipeline runner interface at `%s` ", endpoint) + client, err := compute.NewRunner(endpoint, true, "distil-ingest", 60, 10, true) if err != nil { log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) } - log.Infof("Summary for `%s` successful", path) - // marshall result - bytes, err := json.MarshalIndent(summary, "", " ") - if err != nil { - log.Errorf("%+v", err) - return cli.NewExitError(errors.Cause(err), 2) - } - // write to file - log.Infof("Writing summary to file `%s`", outputFilePath) - err = ioutil.WriteFile(outputFilePath, bytes, 0644) + step := primitive.NewIngestStep(client) + + // classify the dataset + err = step.SummarizePrimitive(path, outputFilePath) if err != nil { - log.Errorf("%+v", err) + log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) } + log.Infof("Summarized data written to %s", outputFilePath) return nil } diff --git a/pipeline/core.pb.go b/pipeline/core.pb.go new file mode 100644 index 0000000..c2d380f --- /dev/null +++ b/pipeline/core.pb.go @@ -0,0 +1,3079 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// source: core.proto + +package pipeline + +import proto "github.com/golang/protobuf/proto" +import fmt "fmt" +import math "math" +import descriptor "github.com/golang/protobuf/protoc-gen-go/descriptor" +import timestamp "github.com/golang/protobuf/ptypes/timestamp" + +import ( + context "golang.org/x/net/context" + grpc "google.golang.org/grpc" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package + +type EvaluationMethod int32 + +const ( + // Default value. Not to be used. + EvaluationMethod_EVALUATION_METHOD_UNDEFINED EvaluationMethod = 0 + // The following are the only evaluation methods required + // to be supported for the "ScoreSolution" call. + EvaluationMethod_HOLDOUT EvaluationMethod = 1 + EvaluationMethod_K_FOLD EvaluationMethod = 2 + // The rest are defined to allow expressing internal evaluation + // methods used by TA2 during solution search. If any method being used + // is missing, feel free to request it to be added. + EvaluationMethod_LEAVE_ONE_OUT EvaluationMethod = 100 + // Instead of really scoring, a TA2 might predict the score only. + EvaluationMethod_PREDICTION EvaluationMethod = 101 + // Training data is reused to test as well. + EvaluationMethod_TRAINING_DATA EvaluationMethod = 102 +) + +var EvaluationMethod_name = map[int32]string{ + 0: "EVALUATION_METHOD_UNDEFINED", + 1: "HOLDOUT", + 2: "K_FOLD", + 100: "LEAVE_ONE_OUT", + 101: "PREDICTION", + 102: "TRAINING_DATA", +} +var EvaluationMethod_value = map[string]int32{ + "EVALUATION_METHOD_UNDEFINED": 0, + "HOLDOUT": 1, + "K_FOLD": 2, + "LEAVE_ONE_OUT": 100, + "PREDICTION": 101, + "TRAINING_DATA": 102, +} + +func (x EvaluationMethod) String() string { + return proto.EnumName(EvaluationMethod_name, int32(x)) +} +func (EvaluationMethod) EnumDescriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{0} +} + +type ProgressState int32 + +const ( + // Default value. Not to be used. + ProgressState_PROGRESS_UNKNOWN ProgressState = 0 + // The process has been scheduled but is pending execution. + ProgressState_PENDING ProgressState = 1 + // The process is currently running. There can be multiple messages with this state + // (while the process is running). + ProgressState_RUNNING ProgressState = 2 + // The process completed and final results are available. + ProgressState_COMPLETED ProgressState = 3 + // The process failed. + ProgressState_ERRORED ProgressState = 4 +) + +var ProgressState_name = map[int32]string{ + 0: "PROGRESS_UNKNOWN", + 1: "PENDING", + 2: "RUNNING", + 3: "COMPLETED", + 4: "ERRORED", +} +var ProgressState_value = map[string]int32{ + "PROGRESS_UNKNOWN": 0, + "PENDING": 1, + "RUNNING": 2, + "COMPLETED": 3, + "ERRORED": 4, +} + +func (x ProgressState) String() string { + return proto.EnumName(ProgressState_name, int32(x)) +} +func (ProgressState) EnumDescriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{1} +} + +type ScoringConfiguration struct { + // The evaluation method to use. + Method EvaluationMethod `protobuf:"varint,1,opt,name=method,proto3,enum=EvaluationMethod" json:"method,omitempty"` + // Number of folds made, if applicable. + Folds int32 `protobuf:"varint,2,opt,name=folds,proto3" json:"folds,omitempty"` + // Ratio of train set vs. test set, if applicable. + TrainTestRatio float64 `protobuf:"fixed64,3,opt,name=train_test_ratio,json=trainTestRatio,proto3" json:"train_test_ratio,omitempty"` + // Shuffle data? Set to true if employed. + Shuffle bool `protobuf:"varint,4,opt,name=shuffle,proto3" json:"shuffle,omitempty"` + // Value for random seed to use for shuffling. Optional. + RandomSeed int32 `protobuf:"varint,5,opt,name=random_seed,json=randomSeed,proto3" json:"random_seed,omitempty"` + // Do stratified k-fold? Set to true if employed. + Stratified bool `protobuf:"varint,6,opt,name=stratified,proto3" json:"stratified,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ScoringConfiguration) Reset() { *m = ScoringConfiguration{} } +func (m *ScoringConfiguration) String() string { return proto.CompactTextString(m) } +func (*ScoringConfiguration) ProtoMessage() {} +func (*ScoringConfiguration) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{0} +} +func (m *ScoringConfiguration) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ScoringConfiguration.Unmarshal(m, b) +} +func (m *ScoringConfiguration) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ScoringConfiguration.Marshal(b, m, deterministic) +} +func (dst *ScoringConfiguration) XXX_Merge(src proto.Message) { + xxx_messageInfo_ScoringConfiguration.Merge(dst, src) +} +func (m *ScoringConfiguration) XXX_Size() int { + return xxx_messageInfo_ScoringConfiguration.Size(m) +} +func (m *ScoringConfiguration) XXX_DiscardUnknown() { + xxx_messageInfo_ScoringConfiguration.DiscardUnknown(m) +} + +var xxx_messageInfo_ScoringConfiguration proto.InternalMessageInfo + +func (m *ScoringConfiguration) GetMethod() EvaluationMethod { + if m != nil { + return m.Method + } + return EvaluationMethod_EVALUATION_METHOD_UNDEFINED +} + +func (m *ScoringConfiguration) GetFolds() int32 { + if m != nil { + return m.Folds + } + return 0 +} + +func (m *ScoringConfiguration) GetTrainTestRatio() float64 { + if m != nil { + return m.TrainTestRatio + } + return 0 +} + +func (m *ScoringConfiguration) GetShuffle() bool { + if m != nil { + return m.Shuffle + } + return false +} + +func (m *ScoringConfiguration) GetRandomSeed() int32 { + if m != nil { + return m.RandomSeed + } + return 0 +} + +func (m *ScoringConfiguration) GetStratified() bool { + if m != nil { + return m.Stratified + } + return false +} + +type Score struct { + Metric *ProblemPerformanceMetric `protobuf:"bytes,1,opt,name=metric,proto3" json:"metric,omitempty"` + // When doing multiple folds, which fold is this score associated with, 0-based. + // We do not aggregate scores across folds on the TA2 side, but expose everything to the TA3. + // If scoring was not done as part of the cross-validation, then it can be returned + // as the first and only fold, in which case the value of this field should be 0. + Fold int32 `protobuf:"varint,2,opt,name=fold,proto3" json:"fold,omitempty"` + // To which target or targets does this score apply? + Targets []*ProblemTarget `protobuf:"bytes,3,rep,name=targets,proto3" json:"targets,omitempty"` + Value *Value `protobuf:"bytes,4,opt,name=value,proto3" json:"value,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *Score) Reset() { *m = Score{} } +func (m *Score) String() string { return proto.CompactTextString(m) } +func (*Score) ProtoMessage() {} +func (*Score) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{1} +} +func (m *Score) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_Score.Unmarshal(m, b) +} +func (m *Score) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_Score.Marshal(b, m, deterministic) +} +func (dst *Score) XXX_Merge(src proto.Message) { + xxx_messageInfo_Score.Merge(dst, src) +} +func (m *Score) XXX_Size() int { + return xxx_messageInfo_Score.Size(m) +} +func (m *Score) XXX_DiscardUnknown() { + xxx_messageInfo_Score.DiscardUnknown(m) +} + +var xxx_messageInfo_Score proto.InternalMessageInfo + +func (m *Score) GetMetric() *ProblemPerformanceMetric { + if m != nil { + return m.Metric + } + return nil +} + +func (m *Score) GetFold() int32 { + if m != nil { + return m.Fold + } + return 0 +} + +func (m *Score) GetTargets() []*ProblemTarget { + if m != nil { + return m.Targets + } + return nil +} + +func (m *Score) GetValue() *Value { + if m != nil { + return m.Value + } + return nil +} + +// After "state" becomes "COMPLETED" or "ERRORED" stream closes. +// The granularity of progress updates is not specified by the API at this time. Some systems +// might be updating frequently and provide many updates of the progress of a whole process +// as well as individual pipeline steps. Some systems might just report these high-level +// progress states once, not doing any progress updates in the meantime. The "status" field +// should contain information to supplement the progress state, such as specific failure details +// in the case of an "ERRORED" state being returned. +type Progress struct { + State ProgressState `protobuf:"varint,1,opt,name=state,proto3,enum=ProgressState" json:"state,omitempty"` + Status string `protobuf:"bytes,2,opt,name=status,proto3" json:"status,omitempty"` + // Set only after state becomes "RUNNING". If it never really properly runs, but errors + // when attempted to run, then it should be the timestamp of the error. + Start *timestamp.Timestamp `protobuf:"bytes,3,opt,name=start,proto3" json:"start,omitempty"` + // Set only when state is "COMPLETED" or "ERRORED". + End *timestamp.Timestamp `protobuf:"bytes,4,opt,name=end,proto3" json:"end,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *Progress) Reset() { *m = Progress{} } +func (m *Progress) String() string { return proto.CompactTextString(m) } +func (*Progress) ProtoMessage() {} +func (*Progress) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{2} +} +func (m *Progress) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_Progress.Unmarshal(m, b) +} +func (m *Progress) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_Progress.Marshal(b, m, deterministic) +} +func (dst *Progress) XXX_Merge(src proto.Message) { + xxx_messageInfo_Progress.Merge(dst, src) +} +func (m *Progress) XXX_Size() int { + return xxx_messageInfo_Progress.Size(m) +} +func (m *Progress) XXX_DiscardUnknown() { + xxx_messageInfo_Progress.DiscardUnknown(m) +} + +var xxx_messageInfo_Progress proto.InternalMessageInfo + +func (m *Progress) GetState() ProgressState { + if m != nil { + return m.State + } + return ProgressState_PROGRESS_UNKNOWN +} + +func (m *Progress) GetStatus() string { + if m != nil { + return m.Status + } + return "" +} + +func (m *Progress) GetStart() *timestamp.Timestamp { + if m != nil { + return m.Start + } + return nil +} + +func (m *Progress) GetEnd() *timestamp.Timestamp { + if m != nil { + return m.End + } + return nil +} + +// Updates problem with new description. This also updates the problem description for all +// ongoing solution searches associated with this problem. Internal behavior of TA2 +// is unspecified: it can simply start a new search using new problem description, or +// it can start modifying solutions it has already found to new problem description, or +// it can use it to further help narrow down ongoing solution searches. In any case, after +// this call returns, all reported solutions for searches associated with this problem +// should be for the updated problem description. +type UpdateProblemRequest struct { + SearchId string `protobuf:"bytes,1,opt,name=search_id,json=searchId,proto3" json:"search_id,omitempty"` + // New problem description. It has to be provided in full and it replaces existing + // problem description. + Problem *ProblemDescription `protobuf:"bytes,2,opt,name=problem,proto3" json:"problem,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *UpdateProblemRequest) Reset() { *m = UpdateProblemRequest{} } +func (m *UpdateProblemRequest) String() string { return proto.CompactTextString(m) } +func (*UpdateProblemRequest) ProtoMessage() {} +func (*UpdateProblemRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{3} +} +func (m *UpdateProblemRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_UpdateProblemRequest.Unmarshal(m, b) +} +func (m *UpdateProblemRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_UpdateProblemRequest.Marshal(b, m, deterministic) +} +func (dst *UpdateProblemRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_UpdateProblemRequest.Merge(dst, src) +} +func (m *UpdateProblemRequest) XXX_Size() int { + return xxx_messageInfo_UpdateProblemRequest.Size(m) +} +func (m *UpdateProblemRequest) XXX_DiscardUnknown() { + xxx_messageInfo_UpdateProblemRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_UpdateProblemRequest proto.InternalMessageInfo + +func (m *UpdateProblemRequest) GetSearchId() string { + if m != nil { + return m.SearchId + } + return "" +} + +func (m *UpdateProblemRequest) GetProblem() *ProblemDescription { + if m != nil { + return m.Problem + } + return nil +} + +type UpdateProblemResponse struct { + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *UpdateProblemResponse) Reset() { *m = UpdateProblemResponse{} } +func (m *UpdateProblemResponse) String() string { return proto.CompactTextString(m) } +func (*UpdateProblemResponse) ProtoMessage() {} +func (*UpdateProblemResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{4} +} +func (m *UpdateProblemResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_UpdateProblemResponse.Unmarshal(m, b) +} +func (m *UpdateProblemResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_UpdateProblemResponse.Marshal(b, m, deterministic) +} +func (dst *UpdateProblemResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_UpdateProblemResponse.Merge(dst, src) +} +func (m *UpdateProblemResponse) XXX_Size() int { + return xxx_messageInfo_UpdateProblemResponse.Size(m) +} +func (m *UpdateProblemResponse) XXX_DiscardUnknown() { + xxx_messageInfo_UpdateProblemResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_UpdateProblemResponse proto.InternalMessageInfo + +// Starts a new solution search. Found solutions have not necessary been fitted on the provided +// inputs. Problem description and inputs are used only to help guide the search process. +// Consider any found solutions to be just a static description of solutions at this stage. +// Multiple parallel solution searches can happen at the same time. +type SearchSolutionsRequest struct { + // Some string identifying the name and version of the TA3 system. + UserAgent string `protobuf:"bytes,1,opt,name=user_agent,json=userAgent,proto3" json:"user_agent,omitempty"` + // Shall be set to "protocol_version" above. + Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` + // Desired upper limit of time for solution search, expressed in minutes. + // Is suggestion, and TA2's should attempt to obey, but TA3's should realize may be + // violated. Default value of 0 (and any negative number) signifies no time bound. + TimeBound float64 `protobuf:"fixed64,3,opt,name=time_bound,json=timeBound,proto3" json:"time_bound,omitempty"` + // Value stating the priority of the search. If multiple searches are queued then highest + // priority (largest number) should be started next by TA2. Primarily used to sort any + // queue, but no further guarantee that TA2 can give more resources to high priority + // searches. If unspecified, by default search will have priority 0. Negative numbers have + // still lower priority. + Priority float64 `protobuf:"fixed64,4,opt,name=priority,proto3" json:"priority,omitempty"` + // Which value types can a TA2 system use to communicate values to a TA3 system? + // The order is important as a TA2 system will try value types in order until one works out, + // or an error will be returned instead of the value. + AllowedValueTypes []ValueType `protobuf:"varint,5,rep,packed,name=allowed_value_types,json=allowedValueTypes,proto3,enum=ValueType" json:"allowed_value_types,omitempty"` + // Problem description to use for the solution search. + Problem *ProblemDescription `protobuf:"bytes,6,opt,name=problem,proto3" json:"problem,omitempty"` + // A pipeline template to use for search or to execute. If template is omitted, then a + // regular solution search is done. If template consists only of one placeholder step, + // then a regular solution search is done to replace that step. If there is no placeholder + // step, but template describes a full pipeline with free hyper-parameters, then this + // call becomes a hyper-paramater tuning call over free hyper-paramaters and found solutions + // share the same pipeline, but different hyper-parameter configurations. If there is no + // placeholder step and all hyper-parameters are fixed as part of the pipeline, then this + // call only checks the given template and returns the solution with same pipeline back, to + // be executed. This allows fixed computations to be done on data, for example, pipeline can + // consist of only one primitive with fixed hyper-parameters to execute that one primitive. + // Moreover, such fully specified pipelines with fixed hyper-parametres can have any + // inputs and any outputs. Otherwise pipelines have to be from a Dataset container value + // to predictions Pandas dataframe. While there are all these options possible, only a + // subset has to be supported by all systems. See README for more details. + Template *PipelineDescription `protobuf:"bytes,7,opt,name=template,proto3" json:"template,omitempty"` + // Pipeline inputs used during solution search. They have to point to Dataset container + // values. Order matters as each input is mapped to a template's input in order. Optional + // for templates without a placeholder and with all hyper-parameters fixed. + Inputs []*Value `protobuf:"bytes,8,rep,name=inputs,proto3" json:"inputs,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *SearchSolutionsRequest) Reset() { *m = SearchSolutionsRequest{} } +func (m *SearchSolutionsRequest) String() string { return proto.CompactTextString(m) } +func (*SearchSolutionsRequest) ProtoMessage() {} +func (*SearchSolutionsRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{5} +} +func (m *SearchSolutionsRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_SearchSolutionsRequest.Unmarshal(m, b) +} +func (m *SearchSolutionsRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_SearchSolutionsRequest.Marshal(b, m, deterministic) +} +func (dst *SearchSolutionsRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_SearchSolutionsRequest.Merge(dst, src) +} +func (m *SearchSolutionsRequest) XXX_Size() int { + return xxx_messageInfo_SearchSolutionsRequest.Size(m) +} +func (m *SearchSolutionsRequest) XXX_DiscardUnknown() { + xxx_messageInfo_SearchSolutionsRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_SearchSolutionsRequest proto.InternalMessageInfo + +func (m *SearchSolutionsRequest) GetUserAgent() string { + if m != nil { + return m.UserAgent + } + return "" +} + +func (m *SearchSolutionsRequest) GetVersion() string { + if m != nil { + return m.Version + } + return "" +} + +func (m *SearchSolutionsRequest) GetTimeBound() float64 { + if m != nil { + return m.TimeBound + } + return 0 +} + +func (m *SearchSolutionsRequest) GetPriority() float64 { + if m != nil { + return m.Priority + } + return 0 +} + +func (m *SearchSolutionsRequest) GetAllowedValueTypes() []ValueType { + if m != nil { + return m.AllowedValueTypes + } + return nil +} + +func (m *SearchSolutionsRequest) GetProblem() *ProblemDescription { + if m != nil { + return m.Problem + } + return nil +} + +func (m *SearchSolutionsRequest) GetTemplate() *PipelineDescription { + if m != nil { + return m.Template + } + return nil +} + +func (m *SearchSolutionsRequest) GetInputs() []*Value { + if m != nil { + return m.Inputs + } + return nil +} + +// Call returns immediately with the ID. Use "GetFoundSolutions" call to get results. +type SearchSolutionsResponse struct { + // An ID identifying this solution search. This string should be at least 22 characters + // long to ensure enough entropy to not be guessable. + SearchId string `protobuf:"bytes,1,opt,name=search_id,json=searchId,proto3" json:"search_id,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *SearchSolutionsResponse) Reset() { *m = SearchSolutionsResponse{} } +func (m *SearchSolutionsResponse) String() string { return proto.CompactTextString(m) } +func (*SearchSolutionsResponse) ProtoMessage() {} +func (*SearchSolutionsResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{6} +} +func (m *SearchSolutionsResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_SearchSolutionsResponse.Unmarshal(m, b) +} +func (m *SearchSolutionsResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_SearchSolutionsResponse.Marshal(b, m, deterministic) +} +func (dst *SearchSolutionsResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_SearchSolutionsResponse.Merge(dst, src) +} +func (m *SearchSolutionsResponse) XXX_Size() int { + return xxx_messageInfo_SearchSolutionsResponse.Size(m) +} +func (m *SearchSolutionsResponse) XXX_DiscardUnknown() { + xxx_messageInfo_SearchSolutionsResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_SearchSolutionsResponse proto.InternalMessageInfo + +func (m *SearchSolutionsResponse) GetSearchId() string { + if m != nil { + return m.SearchId + } + return "" +} + +// Ends the search and releases all resources associated with the solution search. +// If the call is made in parallel with a running search and results are being streamed, +// the search is stopped and the "GetSearchSolutionsResults" stream is closed by TA2 +// (as happens when the search is concluded on its own, or when a search is stopped +// by "StopSearchSolutions"). Found solution IDs during the search are no longer valid +// after this call. +type EndSearchSolutionsRequest struct { + SearchId string `protobuf:"bytes,1,opt,name=search_id,json=searchId,proto3" json:"search_id,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *EndSearchSolutionsRequest) Reset() { *m = EndSearchSolutionsRequest{} } +func (m *EndSearchSolutionsRequest) String() string { return proto.CompactTextString(m) } +func (*EndSearchSolutionsRequest) ProtoMessage() {} +func (*EndSearchSolutionsRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{7} +} +func (m *EndSearchSolutionsRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_EndSearchSolutionsRequest.Unmarshal(m, b) +} +func (m *EndSearchSolutionsRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_EndSearchSolutionsRequest.Marshal(b, m, deterministic) +} +func (dst *EndSearchSolutionsRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_EndSearchSolutionsRequest.Merge(dst, src) +} +func (m *EndSearchSolutionsRequest) XXX_Size() int { + return xxx_messageInfo_EndSearchSolutionsRequest.Size(m) +} +func (m *EndSearchSolutionsRequest) XXX_DiscardUnknown() { + xxx_messageInfo_EndSearchSolutionsRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_EndSearchSolutionsRequest proto.InternalMessageInfo + +func (m *EndSearchSolutionsRequest) GetSearchId() string { + if m != nil { + return m.SearchId + } + return "" +} + +type EndSearchSolutionsResponse struct { + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *EndSearchSolutionsResponse) Reset() { *m = EndSearchSolutionsResponse{} } +func (m *EndSearchSolutionsResponse) String() string { return proto.CompactTextString(m) } +func (*EndSearchSolutionsResponse) ProtoMessage() {} +func (*EndSearchSolutionsResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{8} +} +func (m *EndSearchSolutionsResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_EndSearchSolutionsResponse.Unmarshal(m, b) +} +func (m *EndSearchSolutionsResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_EndSearchSolutionsResponse.Marshal(b, m, deterministic) +} +func (dst *EndSearchSolutionsResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_EndSearchSolutionsResponse.Merge(dst, src) +} +func (m *EndSearchSolutionsResponse) XXX_Size() int { + return xxx_messageInfo_EndSearchSolutionsResponse.Size(m) +} +func (m *EndSearchSolutionsResponse) XXX_DiscardUnknown() { + xxx_messageInfo_EndSearchSolutionsResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_EndSearchSolutionsResponse proto.InternalMessageInfo + +// Stops the search but leaves all currently found solutions available. +// If the call is made in parallel with a running search and results are being streamed, +// the "GetSearchSolutionsResults" stream is closed by the TA2 (as happens when the search +// is concluded on its own). Search cannot be re-started after it has been stopped. +type StopSearchSolutionsRequest struct { + SearchId string `protobuf:"bytes,1,opt,name=search_id,json=searchId,proto3" json:"search_id,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *StopSearchSolutionsRequest) Reset() { *m = StopSearchSolutionsRequest{} } +func (m *StopSearchSolutionsRequest) String() string { return proto.CompactTextString(m) } +func (*StopSearchSolutionsRequest) ProtoMessage() {} +func (*StopSearchSolutionsRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{9} +} +func (m *StopSearchSolutionsRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_StopSearchSolutionsRequest.Unmarshal(m, b) +} +func (m *StopSearchSolutionsRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_StopSearchSolutionsRequest.Marshal(b, m, deterministic) +} +func (dst *StopSearchSolutionsRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_StopSearchSolutionsRequest.Merge(dst, src) +} +func (m *StopSearchSolutionsRequest) XXX_Size() int { + return xxx_messageInfo_StopSearchSolutionsRequest.Size(m) +} +func (m *StopSearchSolutionsRequest) XXX_DiscardUnknown() { + xxx_messageInfo_StopSearchSolutionsRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_StopSearchSolutionsRequest proto.InternalMessageInfo + +func (m *StopSearchSolutionsRequest) GetSearchId() string { + if m != nil { + return m.SearchId + } + return "" +} + +type StopSearchSolutionsResponse struct { + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *StopSearchSolutionsResponse) Reset() { *m = StopSearchSolutionsResponse{} } +func (m *StopSearchSolutionsResponse) String() string { return proto.CompactTextString(m) } +func (*StopSearchSolutionsResponse) ProtoMessage() {} +func (*StopSearchSolutionsResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{10} +} +func (m *StopSearchSolutionsResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_StopSearchSolutionsResponse.Unmarshal(m, b) +} +func (m *StopSearchSolutionsResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_StopSearchSolutionsResponse.Marshal(b, m, deterministic) +} +func (dst *StopSearchSolutionsResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_StopSearchSolutionsResponse.Merge(dst, src) +} +func (m *StopSearchSolutionsResponse) XXX_Size() int { + return xxx_messageInfo_StopSearchSolutionsResponse.Size(m) +} +func (m *StopSearchSolutionsResponse) XXX_DiscardUnknown() { + xxx_messageInfo_StopSearchSolutionsResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_StopSearchSolutionsResponse proto.InternalMessageInfo + +// Description of a TA2 score done during solution search. Because there is a wide range of +// potential approaches a TA2 can use to score candidate solutions this might not capture what +// your TA2 is doing. Feel free to request additions to be able to describe your approach. +type SolutionSearchScore struct { + ScoringConfiguration *ScoringConfiguration `protobuf:"bytes,1,opt,name=scoring_configuration,json=scoringConfiguration,proto3" json:"scoring_configuration,omitempty"` + Scores []*Score `protobuf:"bytes,2,rep,name=scores,proto3" json:"scores,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *SolutionSearchScore) Reset() { *m = SolutionSearchScore{} } +func (m *SolutionSearchScore) String() string { return proto.CompactTextString(m) } +func (*SolutionSearchScore) ProtoMessage() {} +func (*SolutionSearchScore) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{11} +} +func (m *SolutionSearchScore) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_SolutionSearchScore.Unmarshal(m, b) +} +func (m *SolutionSearchScore) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_SolutionSearchScore.Marshal(b, m, deterministic) +} +func (dst *SolutionSearchScore) XXX_Merge(src proto.Message) { + xxx_messageInfo_SolutionSearchScore.Merge(dst, src) +} +func (m *SolutionSearchScore) XXX_Size() int { + return xxx_messageInfo_SolutionSearchScore.Size(m) +} +func (m *SolutionSearchScore) XXX_DiscardUnknown() { + xxx_messageInfo_SolutionSearchScore.DiscardUnknown(m) +} + +var xxx_messageInfo_SolutionSearchScore proto.InternalMessageInfo + +func (m *SolutionSearchScore) GetScoringConfiguration() *ScoringConfiguration { + if m != nil { + return m.ScoringConfiguration + } + return nil +} + +func (m *SolutionSearchScore) GetScores() []*Score { + if m != nil { + return m.Scores + } + return nil +} + +// Get all solutions presently identified by the search and start receiving any +// further solutions also found as well. +type GetSearchSolutionsResultsRequest struct { + SearchId string `protobuf:"bytes,1,opt,name=search_id,json=searchId,proto3" json:"search_id,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *GetSearchSolutionsResultsRequest) Reset() { *m = GetSearchSolutionsResultsRequest{} } +func (m *GetSearchSolutionsResultsRequest) String() string { return proto.CompactTextString(m) } +func (*GetSearchSolutionsResultsRequest) ProtoMessage() {} +func (*GetSearchSolutionsResultsRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{12} +} +func (m *GetSearchSolutionsResultsRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_GetSearchSolutionsResultsRequest.Unmarshal(m, b) +} +func (m *GetSearchSolutionsResultsRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_GetSearchSolutionsResultsRequest.Marshal(b, m, deterministic) +} +func (dst *GetSearchSolutionsResultsRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_GetSearchSolutionsResultsRequest.Merge(dst, src) +} +func (m *GetSearchSolutionsResultsRequest) XXX_Size() int { + return xxx_messageInfo_GetSearchSolutionsResultsRequest.Size(m) +} +func (m *GetSearchSolutionsResultsRequest) XXX_DiscardUnknown() { + xxx_messageInfo_GetSearchSolutionsResultsRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_GetSearchSolutionsResultsRequest proto.InternalMessageInfo + +func (m *GetSearchSolutionsResultsRequest) GetSearchId() string { + if m != nil { + return m.SearchId + } + return "" +} + +type GetSearchSolutionsResultsResponse struct { + // Overall process progress, not progress per solution. While solutions are being found and + // returned, or scores computed and updated, progress state should be kept at "RUNNING". + Progress *Progress `protobuf:"bytes,1,opt,name=progress,proto3" json:"progress,omitempty"` + // A measure of progress during search. It can be any number of internal steps or + // actions a TA2 is doing during search. It can be even number of how many candidate + // solutions were already examined. It does not even have to be an integer. + // How regularly a change to this number is reported to TA3 is left to TA2's discretion, + // but a rule of thumb is at least once a minute if the number changes. + DoneTicks float64 `protobuf:"fixed64,2,opt,name=done_ticks,json=doneTicks,proto3" json:"done_ticks,omitempty"` + // If TA2 knows how many internal steps or actions are there, it can set this field. + // This can also be updated through time if more (or even less) internal steps or + // actions are determined to be necessary. If this value is non-zero, then it should + // always hold that "done_ticks" <= "all_ticks". + AllTicks float64 `protobuf:"fixed64,3,opt,name=all_ticks,json=allTicks,proto3" json:"all_ticks,omitempty"` + SolutionId string `protobuf:"bytes,4,opt,name=solution_id,json=solutionId,proto3" json:"solution_id,omitempty"` + // Internal score for this solution between 0.0 and 1.0 where 1.0 is the highest score. + // There is no other meaning to this score and it does not necessary depend on scores + // listed in the problem description. Optional. + // Because this field is optional, if omitted the default value will be 0. But 0 is a + // valid value for this field. Because of that you should never omit the field. + // If you do not have internal score to provide, use NaN for the value of this field + // to signal that. + InternalScore float64 `protobuf:"fixed64,5,opt,name=internal_score,json=internalScore,proto3" json:"internal_score,omitempty"` + // TA2 might be able to provide more meaningful scores as well, depending on its + // approach to solution search. Moreover, even the same TA2 might not use the same scoring + // approach for all of its solutions. Optional. + Scores []*SolutionSearchScore `protobuf:"bytes,6,rep,name=scores,proto3" json:"scores,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *GetSearchSolutionsResultsResponse) Reset() { *m = GetSearchSolutionsResultsResponse{} } +func (m *GetSearchSolutionsResultsResponse) String() string { return proto.CompactTextString(m) } +func (*GetSearchSolutionsResultsResponse) ProtoMessage() {} +func (*GetSearchSolutionsResultsResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{13} +} +func (m *GetSearchSolutionsResultsResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_GetSearchSolutionsResultsResponse.Unmarshal(m, b) +} +func (m *GetSearchSolutionsResultsResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_GetSearchSolutionsResultsResponse.Marshal(b, m, deterministic) +} +func (dst *GetSearchSolutionsResultsResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_GetSearchSolutionsResultsResponse.Merge(dst, src) +} +func (m *GetSearchSolutionsResultsResponse) XXX_Size() int { + return xxx_messageInfo_GetSearchSolutionsResultsResponse.Size(m) +} +func (m *GetSearchSolutionsResultsResponse) XXX_DiscardUnknown() { + xxx_messageInfo_GetSearchSolutionsResultsResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_GetSearchSolutionsResultsResponse proto.InternalMessageInfo + +func (m *GetSearchSolutionsResultsResponse) GetProgress() *Progress { + if m != nil { + return m.Progress + } + return nil +} + +func (m *GetSearchSolutionsResultsResponse) GetDoneTicks() float64 { + if m != nil { + return m.DoneTicks + } + return 0 +} + +func (m *GetSearchSolutionsResultsResponse) GetAllTicks() float64 { + if m != nil { + return m.AllTicks + } + return 0 +} + +func (m *GetSearchSolutionsResultsResponse) GetSolutionId() string { + if m != nil { + return m.SolutionId + } + return "" +} + +func (m *GetSearchSolutionsResultsResponse) GetInternalScore() float64 { + if m != nil { + return m.InternalScore + } + return 0 +} + +func (m *GetSearchSolutionsResultsResponse) GetScores() []*SolutionSearchScore { + if m != nil { + return m.Scores + } + return nil +} + +// Request a detailed description of the found solution. +type DescribeSolutionRequest struct { + SolutionId string `protobuf:"bytes,1,opt,name=solution_id,json=solutionId,proto3" json:"solution_id,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *DescribeSolutionRequest) Reset() { *m = DescribeSolutionRequest{} } +func (m *DescribeSolutionRequest) String() string { return proto.CompactTextString(m) } +func (*DescribeSolutionRequest) ProtoMessage() {} +func (*DescribeSolutionRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{14} +} +func (m *DescribeSolutionRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_DescribeSolutionRequest.Unmarshal(m, b) +} +func (m *DescribeSolutionRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_DescribeSolutionRequest.Marshal(b, m, deterministic) +} +func (dst *DescribeSolutionRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_DescribeSolutionRequest.Merge(dst, src) +} +func (m *DescribeSolutionRequest) XXX_Size() int { + return xxx_messageInfo_DescribeSolutionRequest.Size(m) +} +func (m *DescribeSolutionRequest) XXX_DiscardUnknown() { + xxx_messageInfo_DescribeSolutionRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_DescribeSolutionRequest proto.InternalMessageInfo + +func (m *DescribeSolutionRequest) GetSolutionId() string { + if m != nil { + return m.SolutionId + } + return "" +} + +type PrimitiveStepDescription struct { + // Selected value for free pipeline hyper-parameters. + Hyperparams map[string]*Value `protobuf:"bytes,1,rep,name=hyperparams,proto3" json:"hyperparams,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PrimitiveStepDescription) Reset() { *m = PrimitiveStepDescription{} } +func (m *PrimitiveStepDescription) String() string { return proto.CompactTextString(m) } +func (*PrimitiveStepDescription) ProtoMessage() {} +func (*PrimitiveStepDescription) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{15} +} +func (m *PrimitiveStepDescription) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PrimitiveStepDescription.Unmarshal(m, b) +} +func (m *PrimitiveStepDescription) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PrimitiveStepDescription.Marshal(b, m, deterministic) +} +func (dst *PrimitiveStepDescription) XXX_Merge(src proto.Message) { + xxx_messageInfo_PrimitiveStepDescription.Merge(dst, src) +} +func (m *PrimitiveStepDescription) XXX_Size() int { + return xxx_messageInfo_PrimitiveStepDescription.Size(m) +} +func (m *PrimitiveStepDescription) XXX_DiscardUnknown() { + xxx_messageInfo_PrimitiveStepDescription.DiscardUnknown(m) +} + +var xxx_messageInfo_PrimitiveStepDescription proto.InternalMessageInfo + +func (m *PrimitiveStepDescription) GetHyperparams() map[string]*Value { + if m != nil { + return m.Hyperparams + } + return nil +} + +type SubpipelineStepDescription struct { + // Each step in a sub-pipeline has a description. These are reported in the order of steps + // in the sub-pipeline. + Steps []*StepDescription `protobuf:"bytes,1,rep,name=steps,proto3" json:"steps,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *SubpipelineStepDescription) Reset() { *m = SubpipelineStepDescription{} } +func (m *SubpipelineStepDescription) String() string { return proto.CompactTextString(m) } +func (*SubpipelineStepDescription) ProtoMessage() {} +func (*SubpipelineStepDescription) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{16} +} +func (m *SubpipelineStepDescription) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_SubpipelineStepDescription.Unmarshal(m, b) +} +func (m *SubpipelineStepDescription) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_SubpipelineStepDescription.Marshal(b, m, deterministic) +} +func (dst *SubpipelineStepDescription) XXX_Merge(src proto.Message) { + xxx_messageInfo_SubpipelineStepDescription.Merge(dst, src) +} +func (m *SubpipelineStepDescription) XXX_Size() int { + return xxx_messageInfo_SubpipelineStepDescription.Size(m) +} +func (m *SubpipelineStepDescription) XXX_DiscardUnknown() { + xxx_messageInfo_SubpipelineStepDescription.DiscardUnknown(m) +} + +var xxx_messageInfo_SubpipelineStepDescription proto.InternalMessageInfo + +func (m *SubpipelineStepDescription) GetSteps() []*StepDescription { + if m != nil { + return m.Steps + } + return nil +} + +type StepDescription struct { + // Types that are valid to be assigned to Step: + // *StepDescription_Primitive + // *StepDescription_Pipeline + Step isStepDescription_Step `protobuf_oneof:"step"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *StepDescription) Reset() { *m = StepDescription{} } +func (m *StepDescription) String() string { return proto.CompactTextString(m) } +func (*StepDescription) ProtoMessage() {} +func (*StepDescription) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{17} +} +func (m *StepDescription) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_StepDescription.Unmarshal(m, b) +} +func (m *StepDescription) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_StepDescription.Marshal(b, m, deterministic) +} +func (dst *StepDescription) XXX_Merge(src proto.Message) { + xxx_messageInfo_StepDescription.Merge(dst, src) +} +func (m *StepDescription) XXX_Size() int { + return xxx_messageInfo_StepDescription.Size(m) +} +func (m *StepDescription) XXX_DiscardUnknown() { + xxx_messageInfo_StepDescription.DiscardUnknown(m) +} + +var xxx_messageInfo_StepDescription proto.InternalMessageInfo + +type isStepDescription_Step interface { + isStepDescription_Step() +} + +type StepDescription_Primitive struct { + Primitive *PrimitiveStepDescription `protobuf:"bytes,1,opt,name=primitive,proto3,oneof"` +} +type StepDescription_Pipeline struct { + Pipeline *SubpipelineStepDescription `protobuf:"bytes,2,opt,name=pipeline,proto3,oneof"` +} + +func (*StepDescription_Primitive) isStepDescription_Step() {} +func (*StepDescription_Pipeline) isStepDescription_Step() {} + +func (m *StepDescription) GetStep() isStepDescription_Step { + if m != nil { + return m.Step + } + return nil +} + +func (m *StepDescription) GetPrimitive() *PrimitiveStepDescription { + if x, ok := m.GetStep().(*StepDescription_Primitive); ok { + return x.Primitive + } + return nil +} + +func (m *StepDescription) GetPipeline() *SubpipelineStepDescription { + if x, ok := m.GetStep().(*StepDescription_Pipeline); ok { + return x.Pipeline + } + return nil +} + +// XXX_OneofFuncs is for the internal use of the proto package. +func (*StepDescription) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) { + return _StepDescription_OneofMarshaler, _StepDescription_OneofUnmarshaler, _StepDescription_OneofSizer, []interface{}{ + (*StepDescription_Primitive)(nil), + (*StepDescription_Pipeline)(nil), + } +} + +func _StepDescription_OneofMarshaler(msg proto.Message, b *proto.Buffer) error { + m := msg.(*StepDescription) + // step + switch x := m.Step.(type) { + case *StepDescription_Primitive: + b.EncodeVarint(1<<3 | proto.WireBytes) + if err := b.EncodeMessage(x.Primitive); err != nil { + return err + } + case *StepDescription_Pipeline: + b.EncodeVarint(2<<3 | proto.WireBytes) + if err := b.EncodeMessage(x.Pipeline); err != nil { + return err + } + case nil: + default: + return fmt.Errorf("StepDescription.Step has unexpected type %T", x) + } + return nil +} + +func _StepDescription_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) { + m := msg.(*StepDescription) + switch tag { + case 1: // step.primitive + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + msg := new(PrimitiveStepDescription) + err := b.DecodeMessage(msg) + m.Step = &StepDescription_Primitive{msg} + return true, err + case 2: // step.pipeline + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + msg := new(SubpipelineStepDescription) + err := b.DecodeMessage(msg) + m.Step = &StepDescription_Pipeline{msg} + return true, err + default: + return false, nil + } +} + +func _StepDescription_OneofSizer(msg proto.Message) (n int) { + m := msg.(*StepDescription) + // step + switch x := m.Step.(type) { + case *StepDescription_Primitive: + s := proto.Size(x.Primitive) + n += 1 // tag and wire + n += proto.SizeVarint(uint64(s)) + n += s + case *StepDescription_Pipeline: + s := proto.Size(x.Pipeline) + n += 1 // tag and wire + n += proto.SizeVarint(uint64(s)) + n += s + case nil: + default: + panic(fmt.Sprintf("proto: unexpected type %T in oneof", x)) + } + return n +} + +type DescribeSolutionResponse struct { + // A pipeline description. Nested pipelines should be fully described as well. + Pipeline *PipelineDescription `protobuf:"bytes,1,opt,name=pipeline,proto3" json:"pipeline,omitempty"` + // Each step in a pipeline has description. These are reported in the order of steps in + // the pipeline. + Steps []*StepDescription `protobuf:"bytes,2,rep,name=steps,proto3" json:"steps,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *DescribeSolutionResponse) Reset() { *m = DescribeSolutionResponse{} } +func (m *DescribeSolutionResponse) String() string { return proto.CompactTextString(m) } +func (*DescribeSolutionResponse) ProtoMessage() {} +func (*DescribeSolutionResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{18} +} +func (m *DescribeSolutionResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_DescribeSolutionResponse.Unmarshal(m, b) +} +func (m *DescribeSolutionResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_DescribeSolutionResponse.Marshal(b, m, deterministic) +} +func (dst *DescribeSolutionResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_DescribeSolutionResponse.Merge(dst, src) +} +func (m *DescribeSolutionResponse) XXX_Size() int { + return xxx_messageInfo_DescribeSolutionResponse.Size(m) +} +func (m *DescribeSolutionResponse) XXX_DiscardUnknown() { + xxx_messageInfo_DescribeSolutionResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_DescribeSolutionResponse proto.InternalMessageInfo + +func (m *DescribeSolutionResponse) GetPipeline() *PipelineDescription { + if m != nil { + return m.Pipeline + } + return nil +} + +func (m *DescribeSolutionResponse) GetSteps() []*StepDescription { + if m != nil { + return m.Steps + } + return nil +} + +type StepProgress struct { + Progress *Progress `protobuf:"bytes,1,opt,name=progress,proto3" json:"progress,omitempty"` + // If step is a sub-pipeline, then this list contains progress for each step in the + // sub-pipeline, in order. + // List can be incomplete while the process is in progress. Systems can provide + // steps only at the end (when "progress" equals COMPLETED) and not during running. + Steps []*StepProgress `protobuf:"bytes,2,rep,name=steps,proto3" json:"steps,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *StepProgress) Reset() { *m = StepProgress{} } +func (m *StepProgress) String() string { return proto.CompactTextString(m) } +func (*StepProgress) ProtoMessage() {} +func (*StepProgress) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{19} +} +func (m *StepProgress) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_StepProgress.Unmarshal(m, b) +} +func (m *StepProgress) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_StepProgress.Marshal(b, m, deterministic) +} +func (dst *StepProgress) XXX_Merge(src proto.Message) { + xxx_messageInfo_StepProgress.Merge(dst, src) +} +func (m *StepProgress) XXX_Size() int { + return xxx_messageInfo_StepProgress.Size(m) +} +func (m *StepProgress) XXX_DiscardUnknown() { + xxx_messageInfo_StepProgress.DiscardUnknown(m) +} + +var xxx_messageInfo_StepProgress proto.InternalMessageInfo + +func (m *StepProgress) GetProgress() *Progress { + if m != nil { + return m.Progress + } + return nil +} + +func (m *StepProgress) GetSteps() []*StepProgress { + if m != nil { + return m.Steps + } + return nil +} + +// User associated with the run of the solution. +type SolutionRunUser struct { + // A UUID of the user. It does not have to map to any real ID, just that it is possible + // to connect multiple solution actions by the same user together, if necessary. + Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + // Was this run because solution was choosen by this user. + Choosen bool `protobuf:"varint,2,opt,name=choosen,proto3" json:"choosen,omitempty"` + // Textual reason provided by the user why the run was choosen by this user. + Reason string `protobuf:"bytes,3,opt,name=reason,proto3" json:"reason,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *SolutionRunUser) Reset() { *m = SolutionRunUser{} } +func (m *SolutionRunUser) String() string { return proto.CompactTextString(m) } +func (*SolutionRunUser) ProtoMessage() {} +func (*SolutionRunUser) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{20} +} +func (m *SolutionRunUser) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_SolutionRunUser.Unmarshal(m, b) +} +func (m *SolutionRunUser) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_SolutionRunUser.Marshal(b, m, deterministic) +} +func (dst *SolutionRunUser) XXX_Merge(src proto.Message) { + xxx_messageInfo_SolutionRunUser.Merge(dst, src) +} +func (m *SolutionRunUser) XXX_Size() int { + return xxx_messageInfo_SolutionRunUser.Size(m) +} +func (m *SolutionRunUser) XXX_DiscardUnknown() { + xxx_messageInfo_SolutionRunUser.DiscardUnknown(m) +} + +var xxx_messageInfo_SolutionRunUser proto.InternalMessageInfo + +func (m *SolutionRunUser) GetId() string { + if m != nil { + return m.Id + } + return "" +} + +func (m *SolutionRunUser) GetChoosen() bool { + if m != nil { + return m.Choosen + } + return false +} + +func (m *SolutionRunUser) GetReason() string { + if m != nil { + return m.Reason + } + return "" +} + +// Request solution to be scored given inputs. Inputs have to be Dataset container values +// and pipeline outputs have to be predictions. It can internally run multiple fit + produce +// runs of the pipeline on permutations of inputs data (e.g., for cross-validation). This is +// also why we cannot expose outputs here. +type ScoreSolutionRequest struct { + SolutionId string `protobuf:"bytes,1,opt,name=solution_id,json=solutionId,proto3" json:"solution_id,omitempty"` + Inputs []*Value `protobuf:"bytes,2,rep,name=inputs,proto3" json:"inputs,omitempty"` + PerformanceMetrics []*ProblemPerformanceMetric `protobuf:"bytes,3,rep,name=performance_metrics,json=performanceMetrics,proto3" json:"performance_metrics,omitempty"` + // Any users associated with this call itself. Optional. + Users []*SolutionRunUser `protobuf:"bytes,4,rep,name=users,proto3" json:"users,omitempty"` + Configuration *ScoringConfiguration `protobuf:"bytes,5,opt,name=configuration,proto3" json:"configuration,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ScoreSolutionRequest) Reset() { *m = ScoreSolutionRequest{} } +func (m *ScoreSolutionRequest) String() string { return proto.CompactTextString(m) } +func (*ScoreSolutionRequest) ProtoMessage() {} +func (*ScoreSolutionRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{21} +} +func (m *ScoreSolutionRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ScoreSolutionRequest.Unmarshal(m, b) +} +func (m *ScoreSolutionRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ScoreSolutionRequest.Marshal(b, m, deterministic) +} +func (dst *ScoreSolutionRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_ScoreSolutionRequest.Merge(dst, src) +} +func (m *ScoreSolutionRequest) XXX_Size() int { + return xxx_messageInfo_ScoreSolutionRequest.Size(m) +} +func (m *ScoreSolutionRequest) XXX_DiscardUnknown() { + xxx_messageInfo_ScoreSolutionRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_ScoreSolutionRequest proto.InternalMessageInfo + +func (m *ScoreSolutionRequest) GetSolutionId() string { + if m != nil { + return m.SolutionId + } + return "" +} + +func (m *ScoreSolutionRequest) GetInputs() []*Value { + if m != nil { + return m.Inputs + } + return nil +} + +func (m *ScoreSolutionRequest) GetPerformanceMetrics() []*ProblemPerformanceMetric { + if m != nil { + return m.PerformanceMetrics + } + return nil +} + +func (m *ScoreSolutionRequest) GetUsers() []*SolutionRunUser { + if m != nil { + return m.Users + } + return nil +} + +func (m *ScoreSolutionRequest) GetConfiguration() *ScoringConfiguration { + if m != nil { + return m.Configuration + } + return nil +} + +type ScoreSolutionResponse struct { + RequestId string `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ScoreSolutionResponse) Reset() { *m = ScoreSolutionResponse{} } +func (m *ScoreSolutionResponse) String() string { return proto.CompactTextString(m) } +func (*ScoreSolutionResponse) ProtoMessage() {} +func (*ScoreSolutionResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{22} +} +func (m *ScoreSolutionResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ScoreSolutionResponse.Unmarshal(m, b) +} +func (m *ScoreSolutionResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ScoreSolutionResponse.Marshal(b, m, deterministic) +} +func (dst *ScoreSolutionResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_ScoreSolutionResponse.Merge(dst, src) +} +func (m *ScoreSolutionResponse) XXX_Size() int { + return xxx_messageInfo_ScoreSolutionResponse.Size(m) +} +func (m *ScoreSolutionResponse) XXX_DiscardUnknown() { + xxx_messageInfo_ScoreSolutionResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_ScoreSolutionResponse proto.InternalMessageInfo + +func (m *ScoreSolutionResponse) GetRequestId() string { + if m != nil { + return m.RequestId + } + return "" +} + +// Get all score results computed until now and start receiving any +// new score results computed as well. +type GetScoreSolutionResultsRequest struct { + RequestId string `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *GetScoreSolutionResultsRequest) Reset() { *m = GetScoreSolutionResultsRequest{} } +func (m *GetScoreSolutionResultsRequest) String() string { return proto.CompactTextString(m) } +func (*GetScoreSolutionResultsRequest) ProtoMessage() {} +func (*GetScoreSolutionResultsRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{23} +} +func (m *GetScoreSolutionResultsRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_GetScoreSolutionResultsRequest.Unmarshal(m, b) +} +func (m *GetScoreSolutionResultsRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_GetScoreSolutionResultsRequest.Marshal(b, m, deterministic) +} +func (dst *GetScoreSolutionResultsRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_GetScoreSolutionResultsRequest.Merge(dst, src) +} +func (m *GetScoreSolutionResultsRequest) XXX_Size() int { + return xxx_messageInfo_GetScoreSolutionResultsRequest.Size(m) +} +func (m *GetScoreSolutionResultsRequest) XXX_DiscardUnknown() { + xxx_messageInfo_GetScoreSolutionResultsRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_GetScoreSolutionResultsRequest proto.InternalMessageInfo + +func (m *GetScoreSolutionResultsRequest) GetRequestId() string { + if m != nil { + return m.RequestId + } + return "" +} + +type GetScoreSolutionResultsResponse struct { + // Overall process progress. + Progress *Progress `protobuf:"bytes,1,opt,name=progress,proto3" json:"progress,omitempty"` + // List of score results. List can be incomplete while the process is in progress. + Scores []*Score `protobuf:"bytes,2,rep,name=scores,proto3" json:"scores,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *GetScoreSolutionResultsResponse) Reset() { *m = GetScoreSolutionResultsResponse{} } +func (m *GetScoreSolutionResultsResponse) String() string { return proto.CompactTextString(m) } +func (*GetScoreSolutionResultsResponse) ProtoMessage() {} +func (*GetScoreSolutionResultsResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{24} +} +func (m *GetScoreSolutionResultsResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_GetScoreSolutionResultsResponse.Unmarshal(m, b) +} +func (m *GetScoreSolutionResultsResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_GetScoreSolutionResultsResponse.Marshal(b, m, deterministic) +} +func (dst *GetScoreSolutionResultsResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_GetScoreSolutionResultsResponse.Merge(dst, src) +} +func (m *GetScoreSolutionResultsResponse) XXX_Size() int { + return xxx_messageInfo_GetScoreSolutionResultsResponse.Size(m) +} +func (m *GetScoreSolutionResultsResponse) XXX_DiscardUnknown() { + xxx_messageInfo_GetScoreSolutionResultsResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_GetScoreSolutionResultsResponse proto.InternalMessageInfo + +func (m *GetScoreSolutionResultsResponse) GetProgress() *Progress { + if m != nil { + return m.Progress + } + return nil +} + +func (m *GetScoreSolutionResultsResponse) GetScores() []*Score { + if m != nil { + return m.Scores + } + return nil +} + +// Fit the solution on given inputs. If a solution is already fitted on inputs this is a NOOP +// (if no additional outputs should be exposed). This can happen when a TA2 simultaneously +// fits the solution as part of the solution search phase. +type FitSolutionRequest struct { + SolutionId string `protobuf:"bytes,1,opt,name=solution_id,json=solutionId,proto3" json:"solution_id,omitempty"` + Inputs []*Value `protobuf:"bytes,2,rep,name=inputs,proto3" json:"inputs,omitempty"` + // List of data references of step outputs which should be exposed to the TA3 system. + // If you want to expose outputs of the whole pipeline (e.g., predictions themselves), + // list them here as well. These can be recursive data references like + // "steps.1.steps.4.produce" to point to an output inside a sub-pipeline. + // Systems only have to support exposing final outputs and can return "ValueError" for + // intermediate values. + ExposeOutputs []string `protobuf:"bytes,3,rep,name=expose_outputs,json=exposeOutputs,proto3" json:"expose_outputs,omitempty"` + // Which value types should be used for exposing outputs. If not provided, the allowed + // value types list from hello call is used instead. + // The order is important as TA2 system will try value types in order until one works out, + // or an error will be returned instead of the value. An error exposing a value does not + // stop the overall process. + ExposeValueTypes []ValueType `protobuf:"varint,4,rep,packed,name=expose_value_types,json=exposeValueTypes,proto3,enum=ValueType" json:"expose_value_types,omitempty"` + // Any users associated with this call itself. Optional. + Users []*SolutionRunUser `protobuf:"bytes,5,rep,name=users,proto3" json:"users,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *FitSolutionRequest) Reset() { *m = FitSolutionRequest{} } +func (m *FitSolutionRequest) String() string { return proto.CompactTextString(m) } +func (*FitSolutionRequest) ProtoMessage() {} +func (*FitSolutionRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{25} +} +func (m *FitSolutionRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_FitSolutionRequest.Unmarshal(m, b) +} +func (m *FitSolutionRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_FitSolutionRequest.Marshal(b, m, deterministic) +} +func (dst *FitSolutionRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_FitSolutionRequest.Merge(dst, src) +} +func (m *FitSolutionRequest) XXX_Size() int { + return xxx_messageInfo_FitSolutionRequest.Size(m) +} +func (m *FitSolutionRequest) XXX_DiscardUnknown() { + xxx_messageInfo_FitSolutionRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_FitSolutionRequest proto.InternalMessageInfo + +func (m *FitSolutionRequest) GetSolutionId() string { + if m != nil { + return m.SolutionId + } + return "" +} + +func (m *FitSolutionRequest) GetInputs() []*Value { + if m != nil { + return m.Inputs + } + return nil +} + +func (m *FitSolutionRequest) GetExposeOutputs() []string { + if m != nil { + return m.ExposeOutputs + } + return nil +} + +func (m *FitSolutionRequest) GetExposeValueTypes() []ValueType { + if m != nil { + return m.ExposeValueTypes + } + return nil +} + +func (m *FitSolutionRequest) GetUsers() []*SolutionRunUser { + if m != nil { + return m.Users + } + return nil +} + +type FitSolutionResponse struct { + RequestId string `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *FitSolutionResponse) Reset() { *m = FitSolutionResponse{} } +func (m *FitSolutionResponse) String() string { return proto.CompactTextString(m) } +func (*FitSolutionResponse) ProtoMessage() {} +func (*FitSolutionResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{26} +} +func (m *FitSolutionResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_FitSolutionResponse.Unmarshal(m, b) +} +func (m *FitSolutionResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_FitSolutionResponse.Marshal(b, m, deterministic) +} +func (dst *FitSolutionResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_FitSolutionResponse.Merge(dst, src) +} +func (m *FitSolutionResponse) XXX_Size() int { + return xxx_messageInfo_FitSolutionResponse.Size(m) +} +func (m *FitSolutionResponse) XXX_DiscardUnknown() { + xxx_messageInfo_FitSolutionResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_FitSolutionResponse proto.InternalMessageInfo + +func (m *FitSolutionResponse) GetRequestId() string { + if m != nil { + return m.RequestId + } + return "" +} + +// Get all fitted results currently available and start receiving any further +// new fitted results as well. +type GetFitSolutionResultsRequest struct { + RequestId string `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *GetFitSolutionResultsRequest) Reset() { *m = GetFitSolutionResultsRequest{} } +func (m *GetFitSolutionResultsRequest) String() string { return proto.CompactTextString(m) } +func (*GetFitSolutionResultsRequest) ProtoMessage() {} +func (*GetFitSolutionResultsRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{27} +} +func (m *GetFitSolutionResultsRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_GetFitSolutionResultsRequest.Unmarshal(m, b) +} +func (m *GetFitSolutionResultsRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_GetFitSolutionResultsRequest.Marshal(b, m, deterministic) +} +func (dst *GetFitSolutionResultsRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_GetFitSolutionResultsRequest.Merge(dst, src) +} +func (m *GetFitSolutionResultsRequest) XXX_Size() int { + return xxx_messageInfo_GetFitSolutionResultsRequest.Size(m) +} +func (m *GetFitSolutionResultsRequest) XXX_DiscardUnknown() { + xxx_messageInfo_GetFitSolutionResultsRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_GetFitSolutionResultsRequest proto.InternalMessageInfo + +func (m *GetFitSolutionResultsRequest) GetRequestId() string { + if m != nil { + return m.RequestId + } + return "" +} + +type GetFitSolutionResultsResponse struct { + // Overall process progress. + Progress *Progress `protobuf:"bytes,1,opt,name=progress,proto3" json:"progress,omitempty"` + // The list contains progress for each step in the pipeline, in order. + // List can be incomplete while the process is in progress. Systems can provide + // steps only at the end (when "progress" equals COMPLETED) and not during running. + Steps []*StepProgress `protobuf:"bytes,2,rep,name=steps,proto3" json:"steps,omitempty"` + // A mapping between data references of step outputs and values. + ExposedOutputs map[string]*Value `protobuf:"bytes,3,rep,name=exposed_outputs,json=exposedOutputs,proto3" json:"exposed_outputs,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + // The fitted solution ID, once progress = COMPLETED. + FittedSolutionId string `protobuf:"bytes,4,opt,name=fitted_solution_id,json=fittedSolutionId,proto3" json:"fitted_solution_id,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *GetFitSolutionResultsResponse) Reset() { *m = GetFitSolutionResultsResponse{} } +func (m *GetFitSolutionResultsResponse) String() string { return proto.CompactTextString(m) } +func (*GetFitSolutionResultsResponse) ProtoMessage() {} +func (*GetFitSolutionResultsResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{28} +} +func (m *GetFitSolutionResultsResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_GetFitSolutionResultsResponse.Unmarshal(m, b) +} +func (m *GetFitSolutionResultsResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_GetFitSolutionResultsResponse.Marshal(b, m, deterministic) +} +func (dst *GetFitSolutionResultsResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_GetFitSolutionResultsResponse.Merge(dst, src) +} +func (m *GetFitSolutionResultsResponse) XXX_Size() int { + return xxx_messageInfo_GetFitSolutionResultsResponse.Size(m) +} +func (m *GetFitSolutionResultsResponse) XXX_DiscardUnknown() { + xxx_messageInfo_GetFitSolutionResultsResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_GetFitSolutionResultsResponse proto.InternalMessageInfo + +func (m *GetFitSolutionResultsResponse) GetProgress() *Progress { + if m != nil { + return m.Progress + } + return nil +} + +func (m *GetFitSolutionResultsResponse) GetSteps() []*StepProgress { + if m != nil { + return m.Steps + } + return nil +} + +func (m *GetFitSolutionResultsResponse) GetExposedOutputs() map[string]*Value { + if m != nil { + return m.ExposedOutputs + } + return nil +} + +func (m *GetFitSolutionResultsResponse) GetFittedSolutionId() string { + if m != nil { + return m.FittedSolutionId + } + return "" +} + +// Produce (execute) the solution on given inputs. A solution has to have been fitted for this +// to be possible (even if in cases where this is just created by transformations). +type ProduceSolutionRequest struct { + FittedSolutionId string `protobuf:"bytes,1,opt,name=fitted_solution_id,json=fittedSolutionId,proto3" json:"fitted_solution_id,omitempty"` + Inputs []*Value `protobuf:"bytes,2,rep,name=inputs,proto3" json:"inputs,omitempty"` + // List of data references of step outputs which should be exposed to the TA3 system. + // If you want to expose outputs of the whole pipeline (e.g., predictions themselves), + // list them here as well. These can be recursive data references like + // "steps.1.steps.4.produce" to point to an output inside a sub-pipeline. + // Systems only have to support exposing final outputs and can return "ValueError" for + // intermediate values. + ExposeOutputs []string `protobuf:"bytes,3,rep,name=expose_outputs,json=exposeOutputs,proto3" json:"expose_outputs,omitempty"` + // Which value types should be used for exposing outputs. If not provided, the allowed + // value types list from a hello call is used instead. + // The order is important as the TA2 system will try value types in order until one works + // out, or an error will be returned instead of the value. An error exposing a value does + // not stop the overall process. + ExposeValueTypes []ValueType `protobuf:"varint,4,rep,packed,name=expose_value_types,json=exposeValueTypes,proto3,enum=ValueType" json:"expose_value_types,omitempty"` + // Any users associated with this call itself. Optional. + Users []*SolutionRunUser `protobuf:"bytes,5,rep,name=users,proto3" json:"users,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ProduceSolutionRequest) Reset() { *m = ProduceSolutionRequest{} } +func (m *ProduceSolutionRequest) String() string { return proto.CompactTextString(m) } +func (*ProduceSolutionRequest) ProtoMessage() {} +func (*ProduceSolutionRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{29} +} +func (m *ProduceSolutionRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ProduceSolutionRequest.Unmarshal(m, b) +} +func (m *ProduceSolutionRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ProduceSolutionRequest.Marshal(b, m, deterministic) +} +func (dst *ProduceSolutionRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_ProduceSolutionRequest.Merge(dst, src) +} +func (m *ProduceSolutionRequest) XXX_Size() int { + return xxx_messageInfo_ProduceSolutionRequest.Size(m) +} +func (m *ProduceSolutionRequest) XXX_DiscardUnknown() { + xxx_messageInfo_ProduceSolutionRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_ProduceSolutionRequest proto.InternalMessageInfo + +func (m *ProduceSolutionRequest) GetFittedSolutionId() string { + if m != nil { + return m.FittedSolutionId + } + return "" +} + +func (m *ProduceSolutionRequest) GetInputs() []*Value { + if m != nil { + return m.Inputs + } + return nil +} + +func (m *ProduceSolutionRequest) GetExposeOutputs() []string { + if m != nil { + return m.ExposeOutputs + } + return nil +} + +func (m *ProduceSolutionRequest) GetExposeValueTypes() []ValueType { + if m != nil { + return m.ExposeValueTypes + } + return nil +} + +func (m *ProduceSolutionRequest) GetUsers() []*SolutionRunUser { + if m != nil { + return m.Users + } + return nil +} + +type ProduceSolutionResponse struct { + RequestId string `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ProduceSolutionResponse) Reset() { *m = ProduceSolutionResponse{} } +func (m *ProduceSolutionResponse) String() string { return proto.CompactTextString(m) } +func (*ProduceSolutionResponse) ProtoMessage() {} +func (*ProduceSolutionResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{30} +} +func (m *ProduceSolutionResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ProduceSolutionResponse.Unmarshal(m, b) +} +func (m *ProduceSolutionResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ProduceSolutionResponse.Marshal(b, m, deterministic) +} +func (dst *ProduceSolutionResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_ProduceSolutionResponse.Merge(dst, src) +} +func (m *ProduceSolutionResponse) XXX_Size() int { + return xxx_messageInfo_ProduceSolutionResponse.Size(m) +} +func (m *ProduceSolutionResponse) XXX_DiscardUnknown() { + xxx_messageInfo_ProduceSolutionResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_ProduceSolutionResponse proto.InternalMessageInfo + +func (m *ProduceSolutionResponse) GetRequestId() string { + if m != nil { + return m.RequestId + } + return "" +} + +// Get all producing results computed until now and start receiving any +// new producing results computed as well. +type GetProduceSolutionResultsRequest struct { + RequestId string `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *GetProduceSolutionResultsRequest) Reset() { *m = GetProduceSolutionResultsRequest{} } +func (m *GetProduceSolutionResultsRequest) String() string { return proto.CompactTextString(m) } +func (*GetProduceSolutionResultsRequest) ProtoMessage() {} +func (*GetProduceSolutionResultsRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{31} +} +func (m *GetProduceSolutionResultsRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_GetProduceSolutionResultsRequest.Unmarshal(m, b) +} +func (m *GetProduceSolutionResultsRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_GetProduceSolutionResultsRequest.Marshal(b, m, deterministic) +} +func (dst *GetProduceSolutionResultsRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_GetProduceSolutionResultsRequest.Merge(dst, src) +} +func (m *GetProduceSolutionResultsRequest) XXX_Size() int { + return xxx_messageInfo_GetProduceSolutionResultsRequest.Size(m) +} +func (m *GetProduceSolutionResultsRequest) XXX_DiscardUnknown() { + xxx_messageInfo_GetProduceSolutionResultsRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_GetProduceSolutionResultsRequest proto.InternalMessageInfo + +func (m *GetProduceSolutionResultsRequest) GetRequestId() string { + if m != nil { + return m.RequestId + } + return "" +} + +type GetProduceSolutionResultsResponse struct { + // Overall process progress. + Progress *Progress `protobuf:"bytes,1,opt,name=progress,proto3" json:"progress,omitempty"` + // The list contains progress for each step in the pipeline, in order. + // List can be incomplete while the process is in progress. Systems can provide + // steps only at the end (when "progress" equals COMPLETED) and not during running. + Steps []*StepProgress `protobuf:"bytes,2,rep,name=steps,proto3" json:"steps,omitempty"` + // A mapping between data references of step outputs and values. + ExposedOutputs map[string]*Value `protobuf:"bytes,3,rep,name=exposed_outputs,json=exposedOutputs,proto3" json:"exposed_outputs,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *GetProduceSolutionResultsResponse) Reset() { *m = GetProduceSolutionResultsResponse{} } +func (m *GetProduceSolutionResultsResponse) String() string { return proto.CompactTextString(m) } +func (*GetProduceSolutionResultsResponse) ProtoMessage() {} +func (*GetProduceSolutionResultsResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{32} +} +func (m *GetProduceSolutionResultsResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_GetProduceSolutionResultsResponse.Unmarshal(m, b) +} +func (m *GetProduceSolutionResultsResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_GetProduceSolutionResultsResponse.Marshal(b, m, deterministic) +} +func (dst *GetProduceSolutionResultsResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_GetProduceSolutionResultsResponse.Merge(dst, src) +} +func (m *GetProduceSolutionResultsResponse) XXX_Size() int { + return xxx_messageInfo_GetProduceSolutionResultsResponse.Size(m) +} +func (m *GetProduceSolutionResultsResponse) XXX_DiscardUnknown() { + xxx_messageInfo_GetProduceSolutionResultsResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_GetProduceSolutionResultsResponse proto.InternalMessageInfo + +func (m *GetProduceSolutionResultsResponse) GetProgress() *Progress { + if m != nil { + return m.Progress + } + return nil +} + +func (m *GetProduceSolutionResultsResponse) GetSteps() []*StepProgress { + if m != nil { + return m.Steps + } + return nil +} + +func (m *GetProduceSolutionResultsResponse) GetExposedOutputs() map[string]*Value { + if m != nil { + return m.ExposedOutputs + } + return nil +} + +// Exports a solution for evaluation purposes based on NIST specifications. +type SolutionExportRequest struct { + // Found solution to export. + FittedSolutionId string `protobuf:"bytes,1,opt,name=fitted_solution_id,json=fittedSolutionId,proto3" json:"fitted_solution_id,omitempty"` + // Solution rank to be used for the exported solution. Lower numbers represent + // better solutions. Presently NIST requirements are that ranks should be non-negative + // and that each exported pipeline have a different rank. TA3 should make sure not to repeat ranks. + // Filenames of exported files are left to be chosen by the TA2 system. + Rank float64 `protobuf:"fixed64,2,opt,name=rank,proto3" json:"rank,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *SolutionExportRequest) Reset() { *m = SolutionExportRequest{} } +func (m *SolutionExportRequest) String() string { return proto.CompactTextString(m) } +func (*SolutionExportRequest) ProtoMessage() {} +func (*SolutionExportRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{33} +} +func (m *SolutionExportRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_SolutionExportRequest.Unmarshal(m, b) +} +func (m *SolutionExportRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_SolutionExportRequest.Marshal(b, m, deterministic) +} +func (dst *SolutionExportRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_SolutionExportRequest.Merge(dst, src) +} +func (m *SolutionExportRequest) XXX_Size() int { + return xxx_messageInfo_SolutionExportRequest.Size(m) +} +func (m *SolutionExportRequest) XXX_DiscardUnknown() { + xxx_messageInfo_SolutionExportRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_SolutionExportRequest proto.InternalMessageInfo + +func (m *SolutionExportRequest) GetFittedSolutionId() string { + if m != nil { + return m.FittedSolutionId + } + return "" +} + +func (m *SolutionExportRequest) GetRank() float64 { + if m != nil { + return m.Rank + } + return 0 +} + +type SolutionExportResponse struct { + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *SolutionExportResponse) Reset() { *m = SolutionExportResponse{} } +func (m *SolutionExportResponse) String() string { return proto.CompactTextString(m) } +func (*SolutionExportResponse) ProtoMessage() {} +func (*SolutionExportResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{34} +} +func (m *SolutionExportResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_SolutionExportResponse.Unmarshal(m, b) +} +func (m *SolutionExportResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_SolutionExportResponse.Marshal(b, m, deterministic) +} +func (dst *SolutionExportResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_SolutionExportResponse.Merge(dst, src) +} +func (m *SolutionExportResponse) XXX_Size() int { + return xxx_messageInfo_SolutionExportResponse.Size(m) +} +func (m *SolutionExportResponse) XXX_DiscardUnknown() { + xxx_messageInfo_SolutionExportResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_SolutionExportResponse proto.InternalMessageInfo + +// List all primitives known to TA2, their IDs, versions, names, and digests. Using this +// information a TA3 should know which primitives may be put into a pipeline template. +// To narrow down potential primitives to use a TA3 can also ask a TA2 to do a solution +// search and then observe which primitives the TA2 is using. If more metadata about primitives +// is needed, then a TA3 can use the results of this call to map primitives to metadata +// (from Python code or primitive annotations) on its own. +type ListPrimitivesRequest struct { + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ListPrimitivesRequest) Reset() { *m = ListPrimitivesRequest{} } +func (m *ListPrimitivesRequest) String() string { return proto.CompactTextString(m) } +func (*ListPrimitivesRequest) ProtoMessage() {} +func (*ListPrimitivesRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{35} +} +func (m *ListPrimitivesRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ListPrimitivesRequest.Unmarshal(m, b) +} +func (m *ListPrimitivesRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ListPrimitivesRequest.Marshal(b, m, deterministic) +} +func (dst *ListPrimitivesRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_ListPrimitivesRequest.Merge(dst, src) +} +func (m *ListPrimitivesRequest) XXX_Size() int { + return xxx_messageInfo_ListPrimitivesRequest.Size(m) +} +func (m *ListPrimitivesRequest) XXX_DiscardUnknown() { + xxx_messageInfo_ListPrimitivesRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_ListPrimitivesRequest proto.InternalMessageInfo + +type ListPrimitivesResponse struct { + Primitives []*Primitive `protobuf:"bytes,1,rep,name=primitives,proto3" json:"primitives,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ListPrimitivesResponse) Reset() { *m = ListPrimitivesResponse{} } +func (m *ListPrimitivesResponse) String() string { return proto.CompactTextString(m) } +func (*ListPrimitivesResponse) ProtoMessage() {} +func (*ListPrimitivesResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{36} +} +func (m *ListPrimitivesResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ListPrimitivesResponse.Unmarshal(m, b) +} +func (m *ListPrimitivesResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ListPrimitivesResponse.Marshal(b, m, deterministic) +} +func (dst *ListPrimitivesResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_ListPrimitivesResponse.Merge(dst, src) +} +func (m *ListPrimitivesResponse) XXX_Size() int { + return xxx_messageInfo_ListPrimitivesResponse.Size(m) +} +func (m *ListPrimitivesResponse) XXX_DiscardUnknown() { + xxx_messageInfo_ListPrimitivesResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_ListPrimitivesResponse proto.InternalMessageInfo + +func (m *ListPrimitivesResponse) GetPrimitives() []*Primitive { + if m != nil { + return m.Primitives + } + return nil +} + +// Identify a TA2 and get supported features. +// This call is also suitable for a ping/pong call to check that the gRPC connection to the +// TA2 is ready. +type HelloRequest struct { + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *HelloRequest) Reset() { *m = HelloRequest{} } +func (m *HelloRequest) String() string { return proto.CompactTextString(m) } +func (*HelloRequest) ProtoMessage() {} +func (*HelloRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{37} +} +func (m *HelloRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_HelloRequest.Unmarshal(m, b) +} +func (m *HelloRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_HelloRequest.Marshal(b, m, deterministic) +} +func (dst *HelloRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_HelloRequest.Merge(dst, src) +} +func (m *HelloRequest) XXX_Size() int { + return xxx_messageInfo_HelloRequest.Size(m) +} +func (m *HelloRequest) XXX_DiscardUnknown() { + xxx_messageInfo_HelloRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_HelloRequest proto.InternalMessageInfo + +type HelloResponse struct { + // Some string identifying the name and version of the TA2 system. + UserAgent string `protobuf:"bytes,1,opt,name=user_agent,json=userAgent,proto3" json:"user_agent,omitempty"` + // Shall be set to "protocol_version" above. + Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` + // List of value types that a TA3 system can use to communicate values to a TA2 system. + // The order is important as a TA3 system should try value types in order until one works + // out, or an error will be returned instead of the value. + AllowedValueTypes []ValueType `protobuf:"varint,3,rep,packed,name=allowed_value_types,json=allowedValueTypes,proto3,enum=ValueType" json:"allowed_value_types,omitempty"` + // List of API extensions that a TA2 supports. + SupportedExtensions []string `protobuf:"bytes,4,rep,name=supported_extensions,json=supportedExtensions,proto3" json:"supported_extensions,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *HelloResponse) Reset() { *m = HelloResponse{} } +func (m *HelloResponse) String() string { return proto.CompactTextString(m) } +func (*HelloResponse) ProtoMessage() {} +func (*HelloResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_core_c0cb5d706bddf259, []int{38} +} +func (m *HelloResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_HelloResponse.Unmarshal(m, b) +} +func (m *HelloResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_HelloResponse.Marshal(b, m, deterministic) +} +func (dst *HelloResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_HelloResponse.Merge(dst, src) +} +func (m *HelloResponse) XXX_Size() int { + return xxx_messageInfo_HelloResponse.Size(m) +} +func (m *HelloResponse) XXX_DiscardUnknown() { + xxx_messageInfo_HelloResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_HelloResponse proto.InternalMessageInfo + +func (m *HelloResponse) GetUserAgent() string { + if m != nil { + return m.UserAgent + } + return "" +} + +func (m *HelloResponse) GetVersion() string { + if m != nil { + return m.Version + } + return "" +} + +func (m *HelloResponse) GetAllowedValueTypes() []ValueType { + if m != nil { + return m.AllowedValueTypes + } + return nil +} + +func (m *HelloResponse) GetSupportedExtensions() []string { + if m != nil { + return m.SupportedExtensions + } + return nil +} + +var E_ProtocolVersion = &proto.ExtensionDesc{ + ExtendedType: (*descriptor.FileOptions)(nil), + ExtensionType: (*string)(nil), + Field: 54100, + Name: "protocol_version", + Tag: "bytes,54100,opt,name=protocol_version,json=protocolVersion", + Filename: "core.proto", +} + +func init() { + proto.RegisterType((*ScoringConfiguration)(nil), "ScoringConfiguration") + proto.RegisterType((*Score)(nil), "Score") + proto.RegisterType((*Progress)(nil), "Progress") + proto.RegisterType((*UpdateProblemRequest)(nil), "UpdateProblemRequest") + proto.RegisterType((*UpdateProblemResponse)(nil), "UpdateProblemResponse") + proto.RegisterType((*SearchSolutionsRequest)(nil), "SearchSolutionsRequest") + proto.RegisterType((*SearchSolutionsResponse)(nil), "SearchSolutionsResponse") + proto.RegisterType((*EndSearchSolutionsRequest)(nil), "EndSearchSolutionsRequest") + proto.RegisterType((*EndSearchSolutionsResponse)(nil), "EndSearchSolutionsResponse") + proto.RegisterType((*StopSearchSolutionsRequest)(nil), "StopSearchSolutionsRequest") + proto.RegisterType((*StopSearchSolutionsResponse)(nil), "StopSearchSolutionsResponse") + proto.RegisterType((*SolutionSearchScore)(nil), "SolutionSearchScore") + proto.RegisterType((*GetSearchSolutionsResultsRequest)(nil), "GetSearchSolutionsResultsRequest") + proto.RegisterType((*GetSearchSolutionsResultsResponse)(nil), "GetSearchSolutionsResultsResponse") + proto.RegisterType((*DescribeSolutionRequest)(nil), "DescribeSolutionRequest") + proto.RegisterType((*PrimitiveStepDescription)(nil), "PrimitiveStepDescription") + proto.RegisterMapType((map[string]*Value)(nil), "PrimitiveStepDescription.HyperparamsEntry") + proto.RegisterType((*SubpipelineStepDescription)(nil), "SubpipelineStepDescription") + proto.RegisterType((*StepDescription)(nil), "StepDescription") + proto.RegisterType((*DescribeSolutionResponse)(nil), "DescribeSolutionResponse") + proto.RegisterType((*StepProgress)(nil), "StepProgress") + proto.RegisterType((*SolutionRunUser)(nil), "SolutionRunUser") + proto.RegisterType((*ScoreSolutionRequest)(nil), "ScoreSolutionRequest") + proto.RegisterType((*ScoreSolutionResponse)(nil), "ScoreSolutionResponse") + proto.RegisterType((*GetScoreSolutionResultsRequest)(nil), "GetScoreSolutionResultsRequest") + proto.RegisterType((*GetScoreSolutionResultsResponse)(nil), "GetScoreSolutionResultsResponse") + proto.RegisterType((*FitSolutionRequest)(nil), "FitSolutionRequest") + proto.RegisterType((*FitSolutionResponse)(nil), "FitSolutionResponse") + proto.RegisterType((*GetFitSolutionResultsRequest)(nil), "GetFitSolutionResultsRequest") + proto.RegisterType((*GetFitSolutionResultsResponse)(nil), "GetFitSolutionResultsResponse") + proto.RegisterMapType((map[string]*Value)(nil), "GetFitSolutionResultsResponse.ExposedOutputsEntry") + proto.RegisterType((*ProduceSolutionRequest)(nil), "ProduceSolutionRequest") + proto.RegisterType((*ProduceSolutionResponse)(nil), "ProduceSolutionResponse") + proto.RegisterType((*GetProduceSolutionResultsRequest)(nil), "GetProduceSolutionResultsRequest") + proto.RegisterType((*GetProduceSolutionResultsResponse)(nil), "GetProduceSolutionResultsResponse") + proto.RegisterMapType((map[string]*Value)(nil), "GetProduceSolutionResultsResponse.ExposedOutputsEntry") + proto.RegisterType((*SolutionExportRequest)(nil), "SolutionExportRequest") + proto.RegisterType((*SolutionExportResponse)(nil), "SolutionExportResponse") + proto.RegisterType((*ListPrimitivesRequest)(nil), "ListPrimitivesRequest") + proto.RegisterType((*ListPrimitivesResponse)(nil), "ListPrimitivesResponse") + proto.RegisterType((*HelloRequest)(nil), "HelloRequest") + proto.RegisterType((*HelloResponse)(nil), "HelloResponse") + proto.RegisterEnum("EvaluationMethod", EvaluationMethod_name, EvaluationMethod_value) + proto.RegisterEnum("ProgressState", ProgressState_name, ProgressState_value) + proto.RegisterExtension(E_ProtocolVersion) +} + +// Reference imports to suppress errors if they are not otherwise used. +var _ context.Context +var _ grpc.ClientConn + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +const _ = grpc.SupportPackageIsVersion4 + +// CoreClient is the client API for Core service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream. +type CoreClient interface { + SearchSolutions(ctx context.Context, in *SearchSolutionsRequest, opts ...grpc.CallOption) (*SearchSolutionsResponse, error) + GetSearchSolutionsResults(ctx context.Context, in *GetSearchSolutionsResultsRequest, opts ...grpc.CallOption) (Core_GetSearchSolutionsResultsClient, error) + EndSearchSolutions(ctx context.Context, in *EndSearchSolutionsRequest, opts ...grpc.CallOption) (*EndSearchSolutionsResponse, error) + StopSearchSolutions(ctx context.Context, in *StopSearchSolutionsRequest, opts ...grpc.CallOption) (*StopSearchSolutionsResponse, error) + DescribeSolution(ctx context.Context, in *DescribeSolutionRequest, opts ...grpc.CallOption) (*DescribeSolutionResponse, error) + ScoreSolution(ctx context.Context, in *ScoreSolutionRequest, opts ...grpc.CallOption) (*ScoreSolutionResponse, error) + GetScoreSolutionResults(ctx context.Context, in *GetScoreSolutionResultsRequest, opts ...grpc.CallOption) (Core_GetScoreSolutionResultsClient, error) + FitSolution(ctx context.Context, in *FitSolutionRequest, opts ...grpc.CallOption) (*FitSolutionResponse, error) + GetFitSolutionResults(ctx context.Context, in *GetFitSolutionResultsRequest, opts ...grpc.CallOption) (Core_GetFitSolutionResultsClient, error) + ProduceSolution(ctx context.Context, in *ProduceSolutionRequest, opts ...grpc.CallOption) (*ProduceSolutionResponse, error) + GetProduceSolutionResults(ctx context.Context, in *GetProduceSolutionResultsRequest, opts ...grpc.CallOption) (Core_GetProduceSolutionResultsClient, error) + SolutionExport(ctx context.Context, in *SolutionExportRequest, opts ...grpc.CallOption) (*SolutionExportResponse, error) + UpdateProblem(ctx context.Context, in *UpdateProblemRequest, opts ...grpc.CallOption) (*UpdateProblemResponse, error) + ListPrimitives(ctx context.Context, in *ListPrimitivesRequest, opts ...grpc.CallOption) (*ListPrimitivesResponse, error) + Hello(ctx context.Context, in *HelloRequest, opts ...grpc.CallOption) (*HelloResponse, error) +} + +type coreClient struct { + cc *grpc.ClientConn +} + +func NewCoreClient(cc *grpc.ClientConn) CoreClient { + return &coreClient{cc} +} + +func (c *coreClient) SearchSolutions(ctx context.Context, in *SearchSolutionsRequest, opts ...grpc.CallOption) (*SearchSolutionsResponse, error) { + out := new(SearchSolutionsResponse) + err := c.cc.Invoke(ctx, "/Core/SearchSolutions", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *coreClient) GetSearchSolutionsResults(ctx context.Context, in *GetSearchSolutionsResultsRequest, opts ...grpc.CallOption) (Core_GetSearchSolutionsResultsClient, error) { + stream, err := c.cc.NewStream(ctx, &_Core_serviceDesc.Streams[0], "/Core/GetSearchSolutionsResults", opts...) + if err != nil { + return nil, err + } + x := &coreGetSearchSolutionsResultsClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type Core_GetSearchSolutionsResultsClient interface { + Recv() (*GetSearchSolutionsResultsResponse, error) + grpc.ClientStream +} + +type coreGetSearchSolutionsResultsClient struct { + grpc.ClientStream +} + +func (x *coreGetSearchSolutionsResultsClient) Recv() (*GetSearchSolutionsResultsResponse, error) { + m := new(GetSearchSolutionsResultsResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *coreClient) EndSearchSolutions(ctx context.Context, in *EndSearchSolutionsRequest, opts ...grpc.CallOption) (*EndSearchSolutionsResponse, error) { + out := new(EndSearchSolutionsResponse) + err := c.cc.Invoke(ctx, "/Core/EndSearchSolutions", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *coreClient) StopSearchSolutions(ctx context.Context, in *StopSearchSolutionsRequest, opts ...grpc.CallOption) (*StopSearchSolutionsResponse, error) { + out := new(StopSearchSolutionsResponse) + err := c.cc.Invoke(ctx, "/Core/StopSearchSolutions", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *coreClient) DescribeSolution(ctx context.Context, in *DescribeSolutionRequest, opts ...grpc.CallOption) (*DescribeSolutionResponse, error) { + out := new(DescribeSolutionResponse) + err := c.cc.Invoke(ctx, "/Core/DescribeSolution", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *coreClient) ScoreSolution(ctx context.Context, in *ScoreSolutionRequest, opts ...grpc.CallOption) (*ScoreSolutionResponse, error) { + out := new(ScoreSolutionResponse) + err := c.cc.Invoke(ctx, "/Core/ScoreSolution", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *coreClient) GetScoreSolutionResults(ctx context.Context, in *GetScoreSolutionResultsRequest, opts ...grpc.CallOption) (Core_GetScoreSolutionResultsClient, error) { + stream, err := c.cc.NewStream(ctx, &_Core_serviceDesc.Streams[1], "/Core/GetScoreSolutionResults", opts...) + if err != nil { + return nil, err + } + x := &coreGetScoreSolutionResultsClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type Core_GetScoreSolutionResultsClient interface { + Recv() (*GetScoreSolutionResultsResponse, error) + grpc.ClientStream +} + +type coreGetScoreSolutionResultsClient struct { + grpc.ClientStream +} + +func (x *coreGetScoreSolutionResultsClient) Recv() (*GetScoreSolutionResultsResponse, error) { + m := new(GetScoreSolutionResultsResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *coreClient) FitSolution(ctx context.Context, in *FitSolutionRequest, opts ...grpc.CallOption) (*FitSolutionResponse, error) { + out := new(FitSolutionResponse) + err := c.cc.Invoke(ctx, "/Core/FitSolution", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *coreClient) GetFitSolutionResults(ctx context.Context, in *GetFitSolutionResultsRequest, opts ...grpc.CallOption) (Core_GetFitSolutionResultsClient, error) { + stream, err := c.cc.NewStream(ctx, &_Core_serviceDesc.Streams[2], "/Core/GetFitSolutionResults", opts...) + if err != nil { + return nil, err + } + x := &coreGetFitSolutionResultsClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type Core_GetFitSolutionResultsClient interface { + Recv() (*GetFitSolutionResultsResponse, error) + grpc.ClientStream +} + +type coreGetFitSolutionResultsClient struct { + grpc.ClientStream +} + +func (x *coreGetFitSolutionResultsClient) Recv() (*GetFitSolutionResultsResponse, error) { + m := new(GetFitSolutionResultsResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *coreClient) ProduceSolution(ctx context.Context, in *ProduceSolutionRequest, opts ...grpc.CallOption) (*ProduceSolutionResponse, error) { + out := new(ProduceSolutionResponse) + err := c.cc.Invoke(ctx, "/Core/ProduceSolution", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *coreClient) GetProduceSolutionResults(ctx context.Context, in *GetProduceSolutionResultsRequest, opts ...grpc.CallOption) (Core_GetProduceSolutionResultsClient, error) { + stream, err := c.cc.NewStream(ctx, &_Core_serviceDesc.Streams[3], "/Core/GetProduceSolutionResults", opts...) + if err != nil { + return nil, err + } + x := &coreGetProduceSolutionResultsClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type Core_GetProduceSolutionResultsClient interface { + Recv() (*GetProduceSolutionResultsResponse, error) + grpc.ClientStream +} + +type coreGetProduceSolutionResultsClient struct { + grpc.ClientStream +} + +func (x *coreGetProduceSolutionResultsClient) Recv() (*GetProduceSolutionResultsResponse, error) { + m := new(GetProduceSolutionResultsResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *coreClient) SolutionExport(ctx context.Context, in *SolutionExportRequest, opts ...grpc.CallOption) (*SolutionExportResponse, error) { + out := new(SolutionExportResponse) + err := c.cc.Invoke(ctx, "/Core/SolutionExport", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *coreClient) UpdateProblem(ctx context.Context, in *UpdateProblemRequest, opts ...grpc.CallOption) (*UpdateProblemResponse, error) { + out := new(UpdateProblemResponse) + err := c.cc.Invoke(ctx, "/Core/UpdateProblem", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *coreClient) ListPrimitives(ctx context.Context, in *ListPrimitivesRequest, opts ...grpc.CallOption) (*ListPrimitivesResponse, error) { + out := new(ListPrimitivesResponse) + err := c.cc.Invoke(ctx, "/Core/ListPrimitives", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *coreClient) Hello(ctx context.Context, in *HelloRequest, opts ...grpc.CallOption) (*HelloResponse, error) { + out := new(HelloResponse) + err := c.cc.Invoke(ctx, "/Core/Hello", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +// CoreServer is the server API for Core service. +type CoreServer interface { + SearchSolutions(context.Context, *SearchSolutionsRequest) (*SearchSolutionsResponse, error) + GetSearchSolutionsResults(*GetSearchSolutionsResultsRequest, Core_GetSearchSolutionsResultsServer) error + EndSearchSolutions(context.Context, *EndSearchSolutionsRequest) (*EndSearchSolutionsResponse, error) + StopSearchSolutions(context.Context, *StopSearchSolutionsRequest) (*StopSearchSolutionsResponse, error) + DescribeSolution(context.Context, *DescribeSolutionRequest) (*DescribeSolutionResponse, error) + ScoreSolution(context.Context, *ScoreSolutionRequest) (*ScoreSolutionResponse, error) + GetScoreSolutionResults(*GetScoreSolutionResultsRequest, Core_GetScoreSolutionResultsServer) error + FitSolution(context.Context, *FitSolutionRequest) (*FitSolutionResponse, error) + GetFitSolutionResults(*GetFitSolutionResultsRequest, Core_GetFitSolutionResultsServer) error + ProduceSolution(context.Context, *ProduceSolutionRequest) (*ProduceSolutionResponse, error) + GetProduceSolutionResults(*GetProduceSolutionResultsRequest, Core_GetProduceSolutionResultsServer) error + SolutionExport(context.Context, *SolutionExportRequest) (*SolutionExportResponse, error) + UpdateProblem(context.Context, *UpdateProblemRequest) (*UpdateProblemResponse, error) + ListPrimitives(context.Context, *ListPrimitivesRequest) (*ListPrimitivesResponse, error) + Hello(context.Context, *HelloRequest) (*HelloResponse, error) +} + +func RegisterCoreServer(s *grpc.Server, srv CoreServer) { + s.RegisterService(&_Core_serviceDesc, srv) +} + +func _Core_SearchSolutions_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(SearchSolutionsRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(CoreServer).SearchSolutions(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/Core/SearchSolutions", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(CoreServer).SearchSolutions(ctx, req.(*SearchSolutionsRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Core_GetSearchSolutionsResults_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(GetSearchSolutionsResultsRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(CoreServer).GetSearchSolutionsResults(m, &coreGetSearchSolutionsResultsServer{stream}) +} + +type Core_GetSearchSolutionsResultsServer interface { + Send(*GetSearchSolutionsResultsResponse) error + grpc.ServerStream +} + +type coreGetSearchSolutionsResultsServer struct { + grpc.ServerStream +} + +func (x *coreGetSearchSolutionsResultsServer) Send(m *GetSearchSolutionsResultsResponse) error { + return x.ServerStream.SendMsg(m) +} + +func _Core_EndSearchSolutions_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(EndSearchSolutionsRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(CoreServer).EndSearchSolutions(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/Core/EndSearchSolutions", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(CoreServer).EndSearchSolutions(ctx, req.(*EndSearchSolutionsRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Core_StopSearchSolutions_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(StopSearchSolutionsRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(CoreServer).StopSearchSolutions(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/Core/StopSearchSolutions", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(CoreServer).StopSearchSolutions(ctx, req.(*StopSearchSolutionsRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Core_DescribeSolution_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(DescribeSolutionRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(CoreServer).DescribeSolution(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/Core/DescribeSolution", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(CoreServer).DescribeSolution(ctx, req.(*DescribeSolutionRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Core_ScoreSolution_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ScoreSolutionRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(CoreServer).ScoreSolution(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/Core/ScoreSolution", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(CoreServer).ScoreSolution(ctx, req.(*ScoreSolutionRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Core_GetScoreSolutionResults_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(GetScoreSolutionResultsRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(CoreServer).GetScoreSolutionResults(m, &coreGetScoreSolutionResultsServer{stream}) +} + +type Core_GetScoreSolutionResultsServer interface { + Send(*GetScoreSolutionResultsResponse) error + grpc.ServerStream +} + +type coreGetScoreSolutionResultsServer struct { + grpc.ServerStream +} + +func (x *coreGetScoreSolutionResultsServer) Send(m *GetScoreSolutionResultsResponse) error { + return x.ServerStream.SendMsg(m) +} + +func _Core_FitSolution_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(FitSolutionRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(CoreServer).FitSolution(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/Core/FitSolution", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(CoreServer).FitSolution(ctx, req.(*FitSolutionRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Core_GetFitSolutionResults_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(GetFitSolutionResultsRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(CoreServer).GetFitSolutionResults(m, &coreGetFitSolutionResultsServer{stream}) +} + +type Core_GetFitSolutionResultsServer interface { + Send(*GetFitSolutionResultsResponse) error + grpc.ServerStream +} + +type coreGetFitSolutionResultsServer struct { + grpc.ServerStream +} + +func (x *coreGetFitSolutionResultsServer) Send(m *GetFitSolutionResultsResponse) error { + return x.ServerStream.SendMsg(m) +} + +func _Core_ProduceSolution_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ProduceSolutionRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(CoreServer).ProduceSolution(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/Core/ProduceSolution", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(CoreServer).ProduceSolution(ctx, req.(*ProduceSolutionRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Core_GetProduceSolutionResults_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(GetProduceSolutionResultsRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(CoreServer).GetProduceSolutionResults(m, &coreGetProduceSolutionResultsServer{stream}) +} + +type Core_GetProduceSolutionResultsServer interface { + Send(*GetProduceSolutionResultsResponse) error + grpc.ServerStream +} + +type coreGetProduceSolutionResultsServer struct { + grpc.ServerStream +} + +func (x *coreGetProduceSolutionResultsServer) Send(m *GetProduceSolutionResultsResponse) error { + return x.ServerStream.SendMsg(m) +} + +func _Core_SolutionExport_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(SolutionExportRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(CoreServer).SolutionExport(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/Core/SolutionExport", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(CoreServer).SolutionExport(ctx, req.(*SolutionExportRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Core_UpdateProblem_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(UpdateProblemRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(CoreServer).UpdateProblem(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/Core/UpdateProblem", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(CoreServer).UpdateProblem(ctx, req.(*UpdateProblemRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Core_ListPrimitives_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ListPrimitivesRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(CoreServer).ListPrimitives(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/Core/ListPrimitives", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(CoreServer).ListPrimitives(ctx, req.(*ListPrimitivesRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Core_Hello_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(HelloRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(CoreServer).Hello(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/Core/Hello", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(CoreServer).Hello(ctx, req.(*HelloRequest)) + } + return interceptor(ctx, in, info, handler) +} + +var _Core_serviceDesc = grpc.ServiceDesc{ + ServiceName: "Core", + HandlerType: (*CoreServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "SearchSolutions", + Handler: _Core_SearchSolutions_Handler, + }, + { + MethodName: "EndSearchSolutions", + Handler: _Core_EndSearchSolutions_Handler, + }, + { + MethodName: "StopSearchSolutions", + Handler: _Core_StopSearchSolutions_Handler, + }, + { + MethodName: "DescribeSolution", + Handler: _Core_DescribeSolution_Handler, + }, + { + MethodName: "ScoreSolution", + Handler: _Core_ScoreSolution_Handler, + }, + { + MethodName: "FitSolution", + Handler: _Core_FitSolution_Handler, + }, + { + MethodName: "ProduceSolution", + Handler: _Core_ProduceSolution_Handler, + }, + { + MethodName: "SolutionExport", + Handler: _Core_SolutionExport_Handler, + }, + { + MethodName: "UpdateProblem", + Handler: _Core_UpdateProblem_Handler, + }, + { + MethodName: "ListPrimitives", + Handler: _Core_ListPrimitives_Handler, + }, + { + MethodName: "Hello", + Handler: _Core_Hello_Handler, + }, + }, + Streams: []grpc.StreamDesc{ + { + StreamName: "GetSearchSolutionsResults", + Handler: _Core_GetSearchSolutionsResults_Handler, + ServerStreams: true, + }, + { + StreamName: "GetScoreSolutionResults", + Handler: _Core_GetScoreSolutionResults_Handler, + ServerStreams: true, + }, + { + StreamName: "GetFitSolutionResults", + Handler: _Core_GetFitSolutionResults_Handler, + ServerStreams: true, + }, + { + StreamName: "GetProduceSolutionResults", + Handler: _Core_GetProduceSolutionResults_Handler, + ServerStreams: true, + }, + }, + Metadata: "core.proto", +} + +func init() { proto.RegisterFile("core.proto", fileDescriptor_core_c0cb5d706bddf259) } + +var fileDescriptor_core_c0cb5d706bddf259 = []byte{ + // 1994 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xd4, 0x58, 0xcd, 0x73, 0x1b, 0x49, + 0x15, 0xd7, 0xc8, 0x96, 0x22, 0x3d, 0xad, 0x64, 0x6d, 0xdb, 0x96, 0x94, 0x71, 0x12, 0x7b, 0x07, + 0x42, 0x99, 0x54, 0xe8, 0x4d, 0x04, 0x95, 0x4d, 0x42, 0x51, 0x8b, 0x63, 0xc9, 0xb6, 0x76, 0x1d, + 0x49, 0xd5, 0x92, 0x43, 0x91, 0x3d, 0x0c, 0xb2, 0xa6, 0x65, 0x4f, 0x65, 0x3c, 0x33, 0xcc, 0xb4, + 0xc2, 0xfa, 0x08, 0xa7, 0xbd, 0x71, 0xe3, 0xc2, 0x8d, 0x03, 0xc5, 0x91, 0x0b, 0xc5, 0x99, 0x23, + 0x77, 0x8e, 0x14, 0xff, 0x02, 0xff, 0x00, 0x07, 0xaa, 0x3f, 0x66, 0xac, 0x8f, 0x91, 0x6c, 0xa7, + 0x96, 0x03, 0xb7, 0xe9, 0xf7, 0x35, 0xaf, 0xdf, 0xfb, 0xcd, 0xfb, 0x18, 0x80, 0xa1, 0x17, 0x50, + 0xec, 0x07, 0x1e, 0xf3, 0xf4, 0x9d, 0x33, 0xcf, 0x3b, 0x73, 0xe8, 0xa7, 0xe2, 0x74, 0x3a, 0x1e, + 0x7d, 0x6a, 0xd1, 0x70, 0x18, 0xd8, 0x3e, 0xf3, 0x02, 0x25, 0xb1, 0x3d, 0x2b, 0xc1, 0xec, 0x0b, + 0x1a, 0xb2, 0xc1, 0x85, 0xaf, 0x04, 0x4a, 0xbe, 0xed, 0x53, 0xc7, 0x76, 0x23, 0x93, 0x6b, 0x7e, + 0x60, 0x5f, 0xd8, 0xcc, 0x7e, 0x1f, 0x11, 0x8a, 0x7e, 0xe0, 0x9d, 0x3a, 0xf4, 0x42, 0x1d, 0x0b, + 0xef, 0x07, 0xce, 0x58, 0xf1, 0x8c, 0x7f, 0x6a, 0xb0, 0xd1, 0x1b, 0x7a, 0x81, 0xed, 0x9e, 0xed, + 0x7b, 0xee, 0xc8, 0x3e, 0x1b, 0x07, 0x03, 0x66, 0x7b, 0x2e, 0xfa, 0x3e, 0x64, 0x2f, 0x28, 0x3b, + 0xf7, 0xac, 0x9a, 0xb6, 0xa3, 0xed, 0x96, 0xea, 0x1f, 0xe3, 0x26, 0xd7, 0x13, 0xcc, 0xd7, 0x82, + 0x41, 0x94, 0x00, 0xda, 0x80, 0xcc, 0xc8, 0x73, 0xac, 0xb0, 0x96, 0xde, 0xd1, 0x76, 0x33, 0x44, + 0x1e, 0xd0, 0x2e, 0x94, 0x59, 0x30, 0xb0, 0x5d, 0x93, 0xd1, 0x90, 0x99, 0xc2, 0x6a, 0x6d, 0x65, + 0x47, 0xdb, 0xd5, 0x48, 0x49, 0xd0, 0xfb, 0x34, 0x64, 0x84, 0x53, 0x51, 0x0d, 0xee, 0x84, 0xe7, + 0xe3, 0xd1, 0xc8, 0xa1, 0xb5, 0xd5, 0x1d, 0x6d, 0x37, 0x47, 0xa2, 0x23, 0xda, 0x86, 0x42, 0x30, + 0x70, 0x2d, 0xef, 0xc2, 0x0c, 0x29, 0xb5, 0x6a, 0x19, 0x61, 0x1f, 0x24, 0xa9, 0x47, 0xa9, 0x85, + 0x1e, 0x00, 0x84, 0x8c, 0xdb, 0x1e, 0xd9, 0xd4, 0xaa, 0x65, 0x85, 0xf6, 0x04, 0xc5, 0xf8, 0x9d, + 0x06, 0x19, 0x7e, 0x3d, 0x8a, 0x9e, 0x8a, 0xfb, 0x04, 0xf6, 0x50, 0xdc, 0xa7, 0x50, 0xbf, 0x8b, + 0xbb, 0x32, 0x2a, 0x5d, 0x1a, 0x8c, 0xbc, 0xe0, 0x62, 0xe0, 0x0e, 0xe9, 0x6b, 0x21, 0x40, 0x94, + 0x20, 0x42, 0xb0, 0xca, 0xaf, 0xa2, 0xae, 0x25, 0x9e, 0xd1, 0x2e, 0xdc, 0x61, 0x83, 0xe0, 0x8c, + 0xb2, 0xb0, 0xb6, 0xb2, 0xb3, 0xb2, 0x5b, 0xa8, 0x97, 0x22, 0x3b, 0x7d, 0x41, 0x26, 0x11, 0x1b, + 0xdd, 0x83, 0x8c, 0x08, 0xb4, 0xb8, 0x53, 0xa1, 0x9e, 0xc5, 0x6f, 0xf8, 0x89, 0x48, 0xa2, 0xf1, + 0x27, 0x0d, 0x72, 0xdd, 0xc0, 0x3b, 0x0b, 0x68, 0x18, 0xa2, 0xef, 0x42, 0x26, 0x64, 0x03, 0x46, + 0x55, 0xa8, 0x85, 0x49, 0xc1, 0xe9, 0x71, 0x2a, 0x91, 0x4c, 0x54, 0x81, 0x2c, 0x7f, 0x18, 0xcb, + 0x38, 0xe7, 0x89, 0x3a, 0xa1, 0x27, 0x42, 0x3b, 0x60, 0x22, 0xba, 0x85, 0xba, 0x8e, 0x25, 0x60, + 0x70, 0x04, 0x18, 0xdc, 0x8f, 0x00, 0x43, 0xa4, 0x20, 0x7a, 0x0c, 0x2b, 0xd4, 0xb5, 0x94, 0x63, + 0xcb, 0xe4, 0xb9, 0x98, 0x71, 0x0a, 0x1b, 0x27, 0xbe, 0x35, 0x60, 0x54, 0x5d, 0x94, 0xd0, 0x5f, + 0x8e, 0x69, 0xc8, 0xd0, 0x16, 0xe4, 0x43, 0x3a, 0x08, 0x86, 0xe7, 0xa6, 0x2d, 0x41, 0x92, 0x27, + 0x39, 0x49, 0x68, 0x59, 0xe8, 0x07, 0x70, 0x47, 0xa1, 0x4e, 0x78, 0x5b, 0xa8, 0xaf, 0x47, 0x71, + 0x6a, 0x28, 0x80, 0xdb, 0x9e, 0x4b, 0x22, 0x19, 0xa3, 0x0a, 0x9b, 0x33, 0xef, 0x08, 0x7d, 0xcf, + 0x0d, 0xa9, 0xf1, 0xf7, 0x34, 0x54, 0x7a, 0xc2, 0x68, 0xcf, 0x73, 0xc6, 0x5c, 0x29, 0x8c, 0xde, + 0x7f, 0x1f, 0x60, 0x1c, 0xd2, 0xc0, 0x1c, 0x9c, 0x51, 0x97, 0x29, 0x07, 0xf2, 0x9c, 0xb2, 0xc7, + 0x09, 0x1c, 0x55, 0xef, 0x69, 0x10, 0xda, 0x9e, 0xab, 0xe2, 0x15, 0x1d, 0xb9, 0x22, 0xff, 0x86, + 0xcc, 0x53, 0x6f, 0xec, 0x5a, 0x0a, 0x93, 0x79, 0x4e, 0x79, 0xc5, 0x09, 0x48, 0x87, 0x9c, 0x1f, + 0xd8, 0x5e, 0x60, 0xb3, 0x4b, 0x11, 0x22, 0x8d, 0xc4, 0x67, 0xf4, 0x12, 0xd6, 0x07, 0x8e, 0xe3, + 0xfd, 0x8a, 0x5a, 0xa6, 0xc8, 0xa3, 0xc9, 0x2e, 0x7d, 0x1a, 0xd6, 0x32, 0x3b, 0x2b, 0xbb, 0xa5, + 0x3a, 0xc8, 0x14, 0xf7, 0x2f, 0x7d, 0x4a, 0x3e, 0x56, 0x62, 0x31, 0x25, 0x9c, 0x0c, 0x49, 0xf6, + 0xfa, 0x90, 0xa0, 0x27, 0x90, 0x63, 0xf4, 0xc2, 0x77, 0x38, 0x2e, 0xee, 0x08, 0xf9, 0x0d, 0xdc, + 0x55, 0x5f, 0xfa, 0xa4, 0x42, 0x2c, 0x85, 0x1e, 0x40, 0xd6, 0x76, 0xfd, 0x31, 0x0b, 0x6b, 0x39, + 0x01, 0xcd, 0x08, 0x72, 0x8a, 0x6a, 0x3c, 0x83, 0xea, 0x5c, 0x28, 0x65, 0x98, 0x97, 0xe6, 0xd2, + 0x78, 0x0e, 0x77, 0x9b, 0xae, 0xb5, 0x20, 0x0b, 0x4b, 0x35, 0xef, 0x81, 0x9e, 0xa4, 0xa9, 0x72, + 0xfb, 0x02, 0xf4, 0x1e, 0xf3, 0xfc, 0x0f, 0x31, 0x7c, 0x1f, 0xb6, 0x12, 0x55, 0x95, 0xe5, 0x5f, + 0x6b, 0xb0, 0x1e, 0x51, 0x95, 0x8c, 0x28, 0x02, 0x5f, 0xc0, 0x66, 0x28, 0x8b, 0x9d, 0x39, 0x9c, + 0xac, 0x76, 0xaa, 0x26, 0x6c, 0xe2, 0xa4, 0x52, 0x48, 0x36, 0xc2, 0xa4, 0x02, 0xf9, 0x00, 0xb2, + 0x9c, 0x4e, 0xf9, 0xe7, 0x28, 0xa3, 0x2d, 0xde, 0x41, 0x14, 0xd5, 0xf8, 0x1c, 0x76, 0x0e, 0x29, + 0x9b, 0xf7, 0x70, 0xec, 0xb0, 0x9b, 0xdd, 0xf1, 0x3f, 0x1a, 0x7c, 0xb2, 0xc4, 0x82, 0xca, 0xdc, + 0x43, 0x8e, 0x56, 0x59, 0x2d, 0xd4, 0x2d, 0xf2, 0x71, 0xf9, 0x20, 0x31, 0x8b, 0x63, 0xde, 0xf2, + 0x5c, 0x6a, 0x32, 0x7b, 0xf8, 0x4e, 0x16, 0x10, 0x8d, 0xe4, 0x39, 0xa5, 0xcf, 0x09, 0xdc, 0x91, + 0x81, 0xe3, 0x28, 0xae, 0xfc, 0x22, 0x72, 0x03, 0xc7, 0x91, 0xcc, 0x6d, 0x28, 0x84, 0xea, 0xf5, + 0xdc, 0xcf, 0x55, 0xe1, 0x27, 0x44, 0xa4, 0x96, 0x85, 0x1e, 0x42, 0xc9, 0x76, 0x19, 0x0d, 0xdc, + 0x81, 0x63, 0x8a, 0xdb, 0x8b, 0x4a, 0xad, 0x91, 0x62, 0x44, 0x95, 0xd1, 0x7f, 0x1c, 0x47, 0x2c, + 0x2b, 0x22, 0xb6, 0x81, 0x13, 0x72, 0x14, 0xc7, 0xef, 0x25, 0x54, 0x25, 0xcc, 0x4f, 0x69, 0x24, + 0x16, 0x85, 0x6d, 0xc6, 0x21, 0x6d, 0xd6, 0x21, 0xe3, 0xcf, 0x1a, 0xd4, 0xba, 0x51, 0x17, 0xec, + 0x31, 0xea, 0x4f, 0x7c, 0x30, 0xe8, 0x18, 0x0a, 0xe7, 0x97, 0x3e, 0x0d, 0xfc, 0x41, 0x30, 0xb8, + 0xe0, 0x41, 0xe3, 0xbe, 0x3c, 0xc2, 0x8b, 0xe4, 0xf1, 0xd1, 0x95, 0x70, 0xd3, 0x65, 0xc1, 0x25, + 0x99, 0x54, 0xd7, 0x0f, 0xa0, 0x3c, 0x2b, 0x80, 0xca, 0xb0, 0xf2, 0x8e, 0x5e, 0x2a, 0xbf, 0xf8, + 0xe3, 0x55, 0x33, 0x48, 0x27, 0x34, 0x83, 0x97, 0xe9, 0xe7, 0x9a, 0xd1, 0x00, 0xbd, 0x37, 0x3e, + 0x8d, 0x5a, 0xf9, 0xac, 0xcf, 0xdf, 0xe3, 0x35, 0x9e, 0xfa, 0x91, 0xb7, 0x65, 0x3c, 0x23, 0x40, + 0x24, 0xdb, 0xf8, 0xad, 0x06, 0x6b, 0xb3, 0xba, 0x2f, 0x20, 0x1f, 0x4f, 0x04, 0x13, 0xcd, 0x2f, + 0xf9, 0xb6, 0x47, 0x29, 0x72, 0x25, 0x8d, 0x5e, 0x40, 0x2e, 0xf2, 0x48, 0x79, 0xbe, 0x85, 0x17, + 0x7b, 0x79, 0x94, 0x22, 0xb1, 0xf8, 0xab, 0x2c, 0xac, 0x72, 0x97, 0x0c, 0x06, 0xb5, 0xf9, 0x34, + 0x2a, 0xec, 0x3e, 0x99, 0x30, 0xaf, 0x2d, 0x2b, 0x71, 0x91, 0xd4, 0x55, 0x1c, 0xd2, 0xcb, 0xe3, + 0xf0, 0x16, 0x3e, 0xe2, 0x9c, 0xb8, 0xc3, 0xde, 0xf0, 0x2b, 0xf9, 0xce, 0xb4, 0xf9, 0x22, 0x9e, + 0x34, 0x12, 0xd9, 0xee, 0xc1, 0x5a, 0x7c, 0x93, 0xb1, 0x7b, 0x12, 0xd2, 0x00, 0x95, 0x20, 0x1d, + 0xe3, 0x30, 0x6d, 0x5b, 0xbc, 0xf7, 0x0c, 0xcf, 0x3d, 0x2f, 0xa4, 0xb2, 0xf7, 0xe4, 0x48, 0x74, + 0xe4, 0x4d, 0x3c, 0xa0, 0x83, 0xd0, 0x73, 0xc5, 0x57, 0x96, 0x27, 0xea, 0x64, 0x7c, 0x93, 0x96, + 0x73, 0xd8, 0xad, 0xb1, 0x3e, 0x51, 0xf5, 0xd3, 0x49, 0x55, 0x1f, 0x7d, 0x01, 0xeb, 0xfe, 0xd5, + 0x88, 0x63, 0xca, 0xd9, 0x26, 0x9a, 0x5e, 0x96, 0x4c, 0x41, 0xc8, 0x9f, 0x25, 0x85, 0x3c, 0xfc, + 0xbc, 0xc1, 0x86, 0xb5, 0xd5, 0x28, 0xfc, 0xd3, 0x81, 0x20, 0x92, 0x8d, 0x7e, 0x0c, 0xc5, 0xe9, + 0xfa, 0x9a, 0x59, 0x56, 0x5f, 0xa7, 0x65, 0x8d, 0x67, 0xb0, 0x39, 0x13, 0x09, 0x05, 0x97, 0xfb, + 0x00, 0x81, 0x8c, 0xca, 0x55, 0x24, 0xf2, 0x8a, 0xd2, 0xb2, 0x8c, 0xcf, 0xe1, 0x01, 0x2f, 0x97, + 0x33, 0xaa, 0x93, 0xe5, 0xf6, 0x1a, 0x03, 0xe7, 0xb0, 0xbd, 0xd0, 0xc0, 0xed, 0xaa, 0xed, 0x75, + 0xbd, 0xe1, 0x5f, 0x1a, 0xa0, 0x03, 0x9b, 0x7d, 0xeb, 0xb9, 0x7e, 0x08, 0x25, 0xfa, 0xb5, 0xef, + 0x85, 0xd4, 0xf4, 0xc6, 0x4c, 0xc8, 0xf1, 0x34, 0xe7, 0x49, 0x51, 0x52, 0x3b, 0x92, 0x88, 0x9e, + 0x03, 0x52, 0x62, 0x93, 0x43, 0xcc, 0xea, 0xdc, 0x10, 0x53, 0x96, 0x52, 0x13, 0x33, 0x4c, 0x0c, + 0x80, 0xcc, 0x52, 0x00, 0x18, 0x3f, 0x82, 0xf5, 0xa9, 0xfb, 0xdd, 0x2c, 0x83, 0x3f, 0x81, 0x7b, + 0x87, 0x94, 0x4d, 0x2b, 0xde, 0x22, 0x7f, 0x7f, 0x4b, 0xc3, 0xfd, 0x05, 0xfa, 0xb7, 0x4b, 0xdf, + 0x4d, 0xca, 0x00, 0xfa, 0x0a, 0xd6, 0x64, 0x78, 0xac, 0xa9, 0x60, 0x17, 0xea, 0x75, 0xbc, 0xd4, + 0x09, 0xdc, 0x94, 0x5a, 0x2a, 0x19, 0xb2, 0xa5, 0xa8, 0xb4, 0x45, 0x44, 0xf4, 0x18, 0xd0, 0xc8, + 0x66, 0x8c, 0x5a, 0xe6, 0x7c, 0xe7, 0x2d, 0x4b, 0x4e, 0x2f, 0x86, 0x85, 0xde, 0x82, 0xf5, 0x04, + 0xa3, 0x1f, 0xd4, 0x86, 0xfe, 0xad, 0x41, 0xa5, 0x1b, 0x78, 0xd6, 0x78, 0x38, 0x57, 0x89, 0x92, + 0x7d, 0xd2, 0x92, 0x7d, 0xfa, 0xff, 0x81, 0xea, 0x73, 0xa8, 0xce, 0x5d, 0xf8, 0x66, 0x70, 0xdd, + 0x13, 0x13, 0xde, 0xbc, 0xf2, 0x2d, 0x20, 0xfb, 0xfb, 0xb4, 0x98, 0xf1, 0x16, 0xd9, 0xf8, 0x1f, + 0xc0, 0xd6, 0x5c, 0x04, 0xdb, 0x67, 0xf8, 0x5a, 0x47, 0x6e, 0x02, 0xdd, 0x6f, 0x13, 0x8c, 0x3f, + 0x87, 0xcd, 0xc8, 0x13, 0x6e, 0x32, 0x60, 0x1f, 0x06, 0x45, 0x04, 0xab, 0xc1, 0xc0, 0x7d, 0xa7, + 0xa6, 0x5e, 0xf1, 0x6c, 0xd4, 0xa0, 0x32, 0x6b, 0x5a, 0xed, 0x0e, 0x55, 0xd8, 0x3c, 0xb6, 0x43, + 0x16, 0x0f, 0x48, 0x51, 0x2a, 0x8d, 0x06, 0x54, 0x66, 0x19, 0x2a, 0x3f, 0x8f, 0x00, 0xe2, 0x99, + 0x29, 0x1a, 0xd1, 0xe0, 0x6a, 0xc4, 0x22, 0x13, 0x5c, 0xa3, 0x04, 0x1f, 0x1d, 0x51, 0xc7, 0xf1, + 0x22, 0xab, 0x7f, 0xd5, 0xa0, 0xa8, 0x08, 0x57, 0xa8, 0xfb, 0xb0, 0xbd, 0x76, 0xc1, 0x72, 0xba, + 0x72, 0x93, 0xe5, 0xf4, 0x29, 0x6c, 0x84, 0x63, 0x9f, 0x07, 0x82, 0x5a, 0x26, 0xfd, 0x9a, 0x51, + 0x97, 0x9b, 0x94, 0x5f, 0x5a, 0x9e, 0xac, 0xc7, 0xbc, 0x66, 0xcc, 0x7a, 0xf4, 0x1b, 0x0d, 0xca, + 0xb3, 0xff, 0x84, 0xd0, 0x36, 0x6c, 0x35, 0xdf, 0xec, 0x1d, 0x9f, 0xec, 0xf5, 0x5b, 0x9d, 0xb6, + 0xf9, 0xba, 0xd9, 0x3f, 0xea, 0x34, 0xcc, 0x93, 0x76, 0xa3, 0x79, 0xd0, 0x6a, 0x37, 0x1b, 0xe5, + 0x14, 0x2a, 0xc0, 0x9d, 0xa3, 0xce, 0x71, 0xa3, 0x73, 0xd2, 0x2f, 0x6b, 0x08, 0x20, 0xfb, 0xa5, + 0x79, 0xd0, 0x39, 0x6e, 0x94, 0xd3, 0xe8, 0x63, 0x28, 0x1e, 0x37, 0xf7, 0xde, 0x34, 0xcd, 0x4e, + 0xbb, 0x69, 0x72, 0xb6, 0x85, 0x4a, 0x00, 0x5d, 0xd2, 0x6c, 0xb4, 0xf6, 0xb9, 0xb1, 0x32, 0xe5, + 0x22, 0x7d, 0xb2, 0xd7, 0x6a, 0xb7, 0xda, 0x87, 0x66, 0x63, 0xaf, 0xbf, 0x57, 0x1e, 0x3d, 0xfa, + 0x0a, 0x8a, 0x53, 0x3f, 0x4b, 0xd0, 0x06, 0x94, 0xbb, 0xa4, 0x73, 0x48, 0x9a, 0xbd, 0x9e, 0x79, + 0xd2, 0xfe, 0xb2, 0xdd, 0xf9, 0x59, 0x5b, 0xbe, 0xb5, 0xdb, 0x6c, 0x37, 0x5a, 0xed, 0xc3, 0xb2, + 0xc6, 0x0f, 0xe4, 0xa4, 0xcd, 0xad, 0x94, 0xd3, 0xa8, 0x08, 0xf9, 0xfd, 0xce, 0xeb, 0xee, 0x71, + 0xb3, 0xdf, 0x6c, 0x94, 0x57, 0x38, 0xaf, 0x49, 0x48, 0x87, 0x34, 0x1b, 0xe5, 0xd5, 0xfa, 0x37, + 0x79, 0x58, 0xdd, 0xe7, 0x9b, 0xcb, 0x01, 0xac, 0xcd, 0xac, 0x61, 0xa8, 0x8a, 0x93, 0xf7, 0x56, + 0xbd, 0x86, 0x17, 0x6d, 0xa5, 0x29, 0x34, 0x82, 0xbb, 0x0b, 0x37, 0x3a, 0xf4, 0x09, 0xbe, 0x6e, + 0x5f, 0xd4, 0x0d, 0x7c, 0xed, 0x42, 0x68, 0xa4, 0x9e, 0x68, 0xa8, 0x03, 0x68, 0x7e, 0xef, 0x46, + 0x3a, 0x5e, 0xb8, 0xc6, 0xeb, 0x5b, 0x78, 0xc9, 0xa2, 0x9e, 0x42, 0x04, 0xd6, 0x13, 0xf6, 0x6d, + 0xb4, 0x85, 0x17, 0x2f, 0xf0, 0xfa, 0x3d, 0xbc, 0x6c, 0x45, 0x4f, 0xa1, 0x16, 0x94, 0x67, 0x37, + 0x03, 0x54, 0xc3, 0x0b, 0x76, 0x3e, 0xfd, 0x2e, 0x5e, 0xb4, 0x46, 0x18, 0x29, 0xf4, 0x53, 0x28, + 0x4e, 0x8d, 0x6d, 0x48, 0x4e, 0x9a, 0x73, 0x46, 0x2a, 0x38, 0x71, 0xb2, 0x34, 0x52, 0xe8, 0x17, + 0x50, 0x5d, 0x30, 0xfb, 0xa1, 0x6d, 0xbc, 0x7c, 0xac, 0xd4, 0x77, 0xf0, 0x35, 0x63, 0xa3, 0xc8, + 0xc9, 0x4b, 0x28, 0x4c, 0x0c, 0x05, 0x68, 0x1d, 0xcf, 0x0f, 0x80, 0xfa, 0x06, 0x4e, 0x98, 0x9a, + 0x8c, 0x14, 0x7a, 0x0b, 0x9b, 0x89, 0x33, 0x05, 0xba, 0x8f, 0x97, 0x0d, 0x4c, 0xfa, 0x83, 0xe5, + 0xa3, 0x88, 0xf0, 0xeb, 0x00, 0xd6, 0x66, 0xaa, 0x3e, 0xaa, 0xe2, 0xe4, 0x11, 0x40, 0xaf, 0xe1, + 0x05, 0xad, 0x32, 0xc6, 0x76, 0x72, 0x03, 0x91, 0xd8, 0x5e, 0xda, 0x29, 0x25, 0xb6, 0x97, 0xf7, + 0x1f, 0xe1, 0xef, 0x3e, 0x94, 0xa6, 0x2b, 0x37, 0xaa, 0xe0, 0xc4, 0x2e, 0xa1, 0x57, 0xf1, 0x82, + 0x12, 0x2f, 0x00, 0x33, 0xf5, 0xbf, 0x11, 0x6d, 0xe2, 0xa4, 0x7f, 0x9c, 0x7a, 0x05, 0x27, 0xff, + 0x96, 0x4c, 0x71, 0x37, 0xa6, 0xbb, 0x01, 0xaa, 0xe0, 0xc4, 0xbe, 0xa1, 0x57, 0x71, 0x72, 0xdb, + 0x30, 0x52, 0x68, 0x17, 0x32, 0xa2, 0xf6, 0xa3, 0x22, 0x9e, 0x6c, 0x0a, 0x7a, 0x09, 0x4f, 0xb5, + 0x04, 0x23, 0xf5, 0xb2, 0x05, 0x65, 0xf1, 0x7f, 0x76, 0xe8, 0x39, 0x66, 0x54, 0xef, 0xef, 0xcd, + 0xfd, 0xb9, 0x3d, 0xb0, 0x1d, 0xda, 0x11, 0x8b, 0x70, 0x58, 0xfb, 0xc7, 0x1f, 0xe5, 0x86, 0xb9, + 0x16, 0xe9, 0xbd, 0x91, 0x6a, 0xaf, 0x2a, 0x7f, 0xf8, 0x8b, 0x9e, 0xab, 0x3f, 0x79, 0xfa, 0x1c, + 0x7f, 0x86, 0x3f, 0x7b, 0x1b, 0xef, 0xd6, 0xa7, 0x59, 0x21, 0xf8, 0xc3, 0xff, 0x06, 0x00, 0x00, + 0xff, 0xff, 0xb8, 0xef, 0x0c, 0x1c, 0x9f, 0x18, 0x00, 0x00, +} diff --git a/pipeline/core.proto b/pipeline/core.proto new file mode 100644 index 0000000..4d9200d --- /dev/null +++ b/pipeline/core.proto @@ -0,0 +1,473 @@ +syntax = "proto3"; +option go_package = "pipeline"; + +import "google/protobuf/descriptor.proto"; +import "google/protobuf/timestamp.proto"; + +import "pipeline.proto"; +import "primitive.proto"; +import "problem.proto"; +import "value.proto"; + +extend google.protobuf.FileOptions { + // 54100 is from the range reserved for internal use within individual organizations. + // If we make this protocol public, we should obtain globally unique field number from Google. + string protocol_version = 54100; +} + +// Date-based version string. Use this to populate version field in "SearchSolutionsRequest" +// and "SearchSolutionsResponse" messages. +option (protocol_version) = "2018.7.7"; + +enum EvaluationMethod { + // Default value. Not to be used. + EVALUATION_METHOD_UNDEFINED = 0; + + // The following are the only evaluation methods required + // to be supported for the "ScoreSolution" call. + HOLDOUT = 1; + K_FOLD = 2; + + // The rest are defined to allow expressing internal evaluation + // methods used by TA2 during solution search. If any method being used + // is missing, feel free to request it to be added. + LEAVE_ONE_OUT = 100; + // Instead of really scoring, a TA2 might predict the score only. + PREDICTION = 101; + // Training data is reused to test as well. + TRAINING_DATA = 102; +} + +message ScoringConfiguration { + // The evaluation method to use. + EvaluationMethod method = 1; + // Number of folds made, if applicable. + int32 folds = 2; + // Ratio of train set vs. test set, if applicable. + double train_test_ratio = 3; + // Shuffle data? Set to true if employed. + bool shuffle = 4; + // Value for random seed to use for shuffling. Optional. + int32 random_seed = 5; + // Do stratified k-fold? Set to true if employed. + bool stratified = 6; +} + +message Score { + ProblemPerformanceMetric metric = 1; + // When doing multiple folds, which fold is this score associated with, 0-based. + // We do not aggregate scores across folds on the TA2 side, but expose everything to the TA3. + // If scoring was not done as part of the cross-validation, then it can be returned + // as the first and only fold, in which case the value of this field should be 0. + int32 fold = 2; + // To which target or targets does this score apply? + repeated ProblemTarget targets = 3; + Value value = 4; +} + +enum ProgressState { + // Default value. Not to be used. + PROGRESS_UNKNOWN = 0; + + // The process has been scheduled but is pending execution. + PENDING = 1; + // The process is currently running. There can be multiple messages with this state + // (while the process is running). + RUNNING = 2; + // The process completed and final results are available. + COMPLETED = 3; + // The process failed. + ERRORED = 4; +} + +// After "state" becomes "COMPLETED" or "ERRORED" stream closes. +// The granularity of progress updates is not specified by the API at this time. Some systems +// might be updating frequently and provide many updates of the progress of a whole process +// as well as individual pipeline steps. Some systems might just report these high-level +// progress states once, not doing any progress updates in the meantime. The "status" field +// should contain information to supplement the progress state, such as specific failure details +// in the case of an "ERRORED" state being returned. +message Progress { + ProgressState state = 1; + string status = 2; + // Set only after state becomes "RUNNING". If it never really properly runs, but errors + // when attempted to run, then it should be the timestamp of the error. + google.protobuf.Timestamp start = 3; + // Set only when state is "COMPLETED" or "ERRORED". + google.protobuf.Timestamp end = 4; +} + +// Updates problem with new description. This also updates the problem description for all +// ongoing solution searches associated with this problem. Internal behavior of TA2 +// is unspecified: it can simply start a new search using new problem description, or +// it can start modifying solutions it has already found to new problem description, or +// it can use it to further help narrow down ongoing solution searches. In any case, after +// this call returns, all reported solutions for searches associated with this problem +// should be for the updated problem description. +message UpdateProblemRequest { + string search_id = 1; + // New problem description. It has to be provided in full and it replaces existing + // problem description. + ProblemDescription problem = 2; +} + +message UpdateProblemResponse {} + +// Starts a new solution search. Found solutions have not necessary been fitted on the provided +// inputs. Problem description and inputs are used only to help guide the search process. +// Consider any found solutions to be just a static description of solutions at this stage. +// Multiple parallel solution searches can happen at the same time. +message SearchSolutionsRequest { + // Some string identifying the name and version of the TA3 system. + string user_agent = 1; + // Shall be set to "protocol_version" above. + string version = 2; + // Desired upper limit of time for solution search, expressed in minutes. + // Is suggestion, and TA2's should attempt to obey, but TA3's should realize may be + // violated. Default value of 0 (and any negative number) signifies no time bound. + double time_bound = 3; + // Value stating the priority of the search. If multiple searches are queued then highest + // priority (largest number) should be started next by TA2. Primarily used to sort any + // queue, but no further guarantee that TA2 can give more resources to high priority + // searches. If unspecified, by default search will have priority 0. Negative numbers have + // still lower priority. + double priority = 4; + // Which value types can a TA2 system use to communicate values to a TA3 system? + // The order is important as a TA2 system will try value types in order until one works out, + // or an error will be returned instead of the value. + repeated ValueType allowed_value_types = 5; + // Problem description to use for the solution search. + ProblemDescription problem = 6; + // A pipeline template to use for search or to execute. If template is omitted, then a + // regular solution search is done. If template consists only of one placeholder step, + // then a regular solution search is done to replace that step. If there is no placeholder + // step, but template describes a full pipeline with free hyper-parameters, then this + // call becomes a hyper-paramater tuning call over free hyper-paramaters and found solutions + // share the same pipeline, but different hyper-parameter configurations. If there is no + // placeholder step and all hyper-parameters are fixed as part of the pipeline, then this + // call only checks the given template and returns the solution with same pipeline back, to + // be executed. This allows fixed computations to be done on data, for example, pipeline can + // consist of only one primitive with fixed hyper-parameters to execute that one primitive. + // Moreover, such fully specified pipelines with fixed hyper-parametres can have any + // inputs and any outputs. Otherwise pipelines have to be from a Dataset container value + // to predictions Pandas dataframe. While there are all these options possible, only a + // subset has to be supported by all systems. See README for more details. + PipelineDescription template = 7; + // Pipeline inputs used during solution search. They have to point to Dataset container + // values. Order matters as each input is mapped to a template's input in order. Optional + // for templates without a placeholder and with all hyper-parameters fixed. + repeated Value inputs = 8; +} + +// Call returns immediately with the ID. Use "GetFoundSolutions" call to get results. +message SearchSolutionsResponse { + // An ID identifying this solution search. This string should be at least 22 characters + // long to ensure enough entropy to not be guessable. + string search_id = 1; +} + +// Ends the search and releases all resources associated with the solution search. +// If the call is made in parallel with a running search and results are being streamed, +// the search is stopped and the "GetSearchSolutionsResults" stream is closed by TA2 +// (as happens when the search is concluded on its own, or when a search is stopped +// by "StopSearchSolutions"). Found solution IDs during the search are no longer valid +// after this call. +message EndSearchSolutionsRequest { + string search_id = 1; +} + +message EndSearchSolutionsResponse {} + +// Stops the search but leaves all currently found solutions available. +// If the call is made in parallel with a running search and results are being streamed, +// the "GetSearchSolutionsResults" stream is closed by the TA2 (as happens when the search +// is concluded on its own). Search cannot be re-started after it has been stopped. +message StopSearchSolutionsRequest { + string search_id = 1; +} + +message StopSearchSolutionsResponse {} + +// Description of a TA2 score done during solution search. Because there is a wide range of +// potential approaches a TA2 can use to score candidate solutions this might not capture what +// your TA2 is doing. Feel free to request additions to be able to describe your approach. +message SolutionSearchScore { + ScoringConfiguration scoring_configuration = 1; + repeated Score scores = 2; +} + +// Get all solutions presently identified by the search and start receiving any +// further solutions also found as well. +message GetSearchSolutionsResultsRequest { + string search_id = 1; +} + +message GetSearchSolutionsResultsResponse { + // Overall process progress, not progress per solution. While solutions are being found and + // returned, or scores computed and updated, progress state should be kept at "RUNNING". + Progress progress = 1; + // A measure of progress during search. It can be any number of internal steps or + // actions a TA2 is doing during search. It can be even number of how many candidate + // solutions were already examined. It does not even have to be an integer. + // How regularly a change to this number is reported to TA3 is left to TA2's discretion, + // but a rule of thumb is at least once a minute if the number changes. + double done_ticks = 2; + // If TA2 knows how many internal steps or actions are there, it can set this field. + // This can also be updated through time if more (or even less) internal steps or + // actions are determined to be necessary. If this value is non-zero, then it should + // always hold that "done_ticks" <= "all_ticks". + double all_ticks = 3; + string solution_id = 4; + // Internal score for this solution between 0.0 and 1.0 where 1.0 is the highest score. + // There is no other meaning to this score and it does not necessary depend on scores + // listed in the problem description. Optional. + // Because this field is optional, if omitted the default value will be 0. But 0 is a + // valid value for this field. Because of that you should never omit the field. + // If you do not have internal score to provide, use NaN for the value of this field + // to signal that. + double internal_score = 5; + // TA2 might be able to provide more meaningful scores as well, depending on its + // approach to solution search. Moreover, even the same TA2 might not use the same scoring + // approach for all of its solutions. Optional. + repeated SolutionSearchScore scores = 6; +} + +// Request a detailed description of the found solution. +message DescribeSolutionRequest { + string solution_id = 1; +} + +message PrimitiveStepDescription { + // Selected value for free pipeline hyper-parameters. + map hyperparams = 1; +} + +message SubpipelineStepDescription { + // Each step in a sub-pipeline has a description. These are reported in the order of steps + // in the sub-pipeline. + repeated StepDescription steps = 1; +} + +message StepDescription { + oneof step { + PrimitiveStepDescription primitive = 1; + SubpipelineStepDescription pipeline = 2; + } +} + +message DescribeSolutionResponse { + // A pipeline description. Nested pipelines should be fully described as well. + PipelineDescription pipeline = 1; + // Each step in a pipeline has description. These are reported in the order of steps in + // the pipeline. + repeated StepDescription steps = 2; +} + +message StepProgress { + Progress progress = 1; + // If step is a sub-pipeline, then this list contains progress for each step in the + // sub-pipeline, in order. + // List can be incomplete while the process is in progress. Systems can provide + // steps only at the end (when "progress" equals COMPLETED) and not during running. + repeated StepProgress steps = 2; +} + +// User associated with the run of the solution. +message SolutionRunUser { + // A UUID of the user. It does not have to map to any real ID, just that it is possible + // to connect multiple solution actions by the same user together, if necessary. + string id = 1; + // Was this run because solution was choosen by this user. + bool choosen = 2; + // Textual reason provided by the user why the run was choosen by this user. + string reason = 3; +} + +// Request solution to be scored given inputs. Inputs have to be Dataset container values +// and pipeline outputs have to be predictions. It can internally run multiple fit + produce +// runs of the pipeline on permutations of inputs data (e.g., for cross-validation). This is +// also why we cannot expose outputs here. +message ScoreSolutionRequest { + string solution_id = 1; + repeated Value inputs = 2; + repeated ProblemPerformanceMetric performance_metrics = 3; + // Any users associated with this call itself. Optional. + repeated SolutionRunUser users = 4; + ScoringConfiguration configuration = 5; +} + +message ScoreSolutionResponse { + string request_id = 1; +} + +// Get all score results computed until now and start receiving any +// new score results computed as well. +message GetScoreSolutionResultsRequest { + string request_id = 1; +} + +message GetScoreSolutionResultsResponse { + // Overall process progress. + Progress progress = 1; + // List of score results. List can be incomplete while the process is in progress. + repeated Score scores = 2; +} + +// Fit the solution on given inputs. If a solution is already fitted on inputs this is a NOOP +// (if no additional outputs should be exposed). This can happen when a TA2 simultaneously +// fits the solution as part of the solution search phase. +message FitSolutionRequest { + string solution_id = 1; + repeated Value inputs = 2; + // List of data references of step outputs which should be exposed to the TA3 system. + // If you want to expose outputs of the whole pipeline (e.g., predictions themselves), + // list them here as well. These can be recursive data references like + // "steps.1.steps.4.produce" to point to an output inside a sub-pipeline. + // Systems only have to support exposing final outputs and can return "ValueError" for + // intermediate values. + repeated string expose_outputs = 3; + // Which value types should be used for exposing outputs. If not provided, the allowed + // value types list from hello call is used instead. + // The order is important as TA2 system will try value types in order until one works out, + // or an error will be returned instead of the value. An error exposing a value does not + // stop the overall process. + repeated ValueType expose_value_types = 4; + // Any users associated with this call itself. Optional. + repeated SolutionRunUser users = 5; +} + +message FitSolutionResponse { + string request_id = 1; +} + +// Get all fitted results currently available and start receiving any further +// new fitted results as well. +message GetFitSolutionResultsRequest { + string request_id = 1; +} + +message GetFitSolutionResultsResponse { + // Overall process progress. + Progress progress = 1; + // The list contains progress for each step in the pipeline, in order. + // List can be incomplete while the process is in progress. Systems can provide + // steps only at the end (when "progress" equals COMPLETED) and not during running. + repeated StepProgress steps = 2; + // A mapping between data references of step outputs and values. + map exposed_outputs = 3; + // The fitted solution ID, once progress = COMPLETED. + string fitted_solution_id = 4; +} + +// Produce (execute) the solution on given inputs. A solution has to have been fitted for this +// to be possible (even if in cases where this is just created by transformations). +message ProduceSolutionRequest { + string fitted_solution_id = 1; + repeated Value inputs = 2; + // List of data references of step outputs which should be exposed to the TA3 system. + // If you want to expose outputs of the whole pipeline (e.g., predictions themselves), + // list them here as well. These can be recursive data references like + // "steps.1.steps.4.produce" to point to an output inside a sub-pipeline. + // Systems only have to support exposing final outputs and can return "ValueError" for + // intermediate values. + repeated string expose_outputs = 3; + // Which value types should be used for exposing outputs. If not provided, the allowed + // value types list from a hello call is used instead. + // The order is important as the TA2 system will try value types in order until one works + // out, or an error will be returned instead of the value. An error exposing a value does + // not stop the overall process. + repeated ValueType expose_value_types = 4; + // Any users associated with this call itself. Optional. + repeated SolutionRunUser users = 5; +} + +message ProduceSolutionResponse { + string request_id = 1; +} + +// Get all producing results computed until now and start receiving any +// new producing results computed as well. +message GetProduceSolutionResultsRequest { + string request_id = 1; +} + +message GetProduceSolutionResultsResponse { + // Overall process progress. + Progress progress = 1; + // The list contains progress for each step in the pipeline, in order. + // List can be incomplete while the process is in progress. Systems can provide + // steps only at the end (when "progress" equals COMPLETED) and not during running. + repeated StepProgress steps = 2; + // A mapping between data references of step outputs and values. + map exposed_outputs = 3; +} + +// Exports a solution for evaluation purposes based on NIST specifications. +message SolutionExportRequest { + // Found solution to export. + string fitted_solution_id = 1; + // Solution rank to be used for the exported solution. Lower numbers represent + // better solutions. Presently NIST requirements are that ranks should be non-negative + // and that each exported pipeline have a different rank. TA3 should make sure not to repeat ranks. + // Filenames of exported files are left to be chosen by the TA2 system. + double rank = 2; +} + +message SolutionExportResponse {} + +// List all primitives known to TA2, their IDs, versions, names, and digests. Using this +// information a TA3 should know which primitives may be put into a pipeline template. +// To narrow down potential primitives to use a TA3 can also ask a TA2 to do a solution +// search and then observe which primitives the TA2 is using. If more metadata about primitives +// is needed, then a TA3 can use the results of this call to map primitives to metadata +// (from Python code or primitive annotations) on its own. +message ListPrimitivesRequest {} + +message ListPrimitivesResponse { + repeated Primitive primitives = 1; +} + +// Identify a TA2 and get supported features. +// This call is also suitable for a ping/pong call to check that the gRPC connection to the +// TA2 is ready. +message HelloRequest {} + +message HelloResponse { + // Some string identifying the name and version of the TA2 system. + string user_agent = 1; + // Shall be set to "protocol_version" above. + string version = 2; + // List of value types that a TA3 system can use to communicate values to a TA2 system. + // The order is important as a TA3 system should try value types in order until one works + // out, or an error will be returned instead of the value. + repeated ValueType allowed_value_types = 3; + // List of API extensions that a TA2 supports. + repeated string supported_extensions = 4; +} + +// See each message's comments for information about each particular call. +service Core { + rpc SearchSolutions (SearchSolutionsRequest) returns (SearchSolutionsResponse) {} + rpc GetSearchSolutionsResults (GetSearchSolutionsResultsRequest) returns (stream GetSearchSolutionsResultsResponse) {} + rpc EndSearchSolutions (EndSearchSolutionsRequest) returns (EndSearchSolutionsResponse) {} + rpc StopSearchSolutions (StopSearchSolutionsRequest) returns (StopSearchSolutionsResponse) {} + + rpc DescribeSolution (DescribeSolutionRequest) returns (DescribeSolutionResponse) {} + + rpc ScoreSolution (ScoreSolutionRequest) returns (ScoreSolutionResponse) {} + rpc GetScoreSolutionResults (GetScoreSolutionResultsRequest) returns (stream GetScoreSolutionResultsResponse) {} + + rpc FitSolution (FitSolutionRequest) returns (FitSolutionResponse) {} + rpc GetFitSolutionResults (GetFitSolutionResultsRequest) returns (stream GetFitSolutionResultsResponse) {} + + rpc ProduceSolution (ProduceSolutionRequest) returns (ProduceSolutionResponse) {} + rpc GetProduceSolutionResults (GetProduceSolutionResultsRequest) returns (stream GetProduceSolutionResultsResponse) {} + + rpc SolutionExport (SolutionExportRequest) returns (SolutionExportResponse) {} + + rpc UpdateProblem (UpdateProblemRequest) returns (UpdateProblemResponse) {} + + rpc ListPrimitives (ListPrimitivesRequest) returns (ListPrimitivesResponse) {} + + rpc Hello (HelloRequest) returns (HelloResponse) {} +} diff --git a/pipeline/execute.pb.go b/pipeline/execute.pb.go new file mode 100644 index 0000000..bee08d9 --- /dev/null +++ b/pipeline/execute.pb.go @@ -0,0 +1,213 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// source: execute.proto + +package pipeline + +import proto "github.com/golang/protobuf/proto" +import fmt "fmt" +import math "math" + +import ( + context "golang.org/x/net/context" + grpc "google.golang.org/grpc" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package + +type PipelineExecuteRequest struct { + PipelineDescription *PipelineDescription `protobuf:"bytes,1,opt,name=pipelineDescription,proto3" json:"pipelineDescription,omitempty"` + Inputs []*Value `protobuf:"bytes,2,rep,name=inputs,proto3" json:"inputs,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PipelineExecuteRequest) Reset() { *m = PipelineExecuteRequest{} } +func (m *PipelineExecuteRequest) String() string { return proto.CompactTextString(m) } +func (*PipelineExecuteRequest) ProtoMessage() {} +func (*PipelineExecuteRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_execute_4748e52c7921ce40, []int{0} +} +func (m *PipelineExecuteRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PipelineExecuteRequest.Unmarshal(m, b) +} +func (m *PipelineExecuteRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PipelineExecuteRequest.Marshal(b, m, deterministic) +} +func (dst *PipelineExecuteRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_PipelineExecuteRequest.Merge(dst, src) +} +func (m *PipelineExecuteRequest) XXX_Size() int { + return xxx_messageInfo_PipelineExecuteRequest.Size(m) +} +func (m *PipelineExecuteRequest) XXX_DiscardUnknown() { + xxx_messageInfo_PipelineExecuteRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_PipelineExecuteRequest proto.InternalMessageInfo + +func (m *PipelineExecuteRequest) GetPipelineDescription() *PipelineDescription { + if m != nil { + return m.PipelineDescription + } + return nil +} + +func (m *PipelineExecuteRequest) GetInputs() []*Value { + if m != nil { + return m.Inputs + } + return nil +} + +type PipelineExecuteResponse struct { + ResultURI string `protobuf:"bytes,1,opt,name=resultURI,proto3" json:"resultURI,omitempty"` + Error string `protobuf:"bytes,2,opt,name=error,proto3" json:"error,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PipelineExecuteResponse) Reset() { *m = PipelineExecuteResponse{} } +func (m *PipelineExecuteResponse) String() string { return proto.CompactTextString(m) } +func (*PipelineExecuteResponse) ProtoMessage() {} +func (*PipelineExecuteResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_execute_4748e52c7921ce40, []int{1} +} +func (m *PipelineExecuteResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PipelineExecuteResponse.Unmarshal(m, b) +} +func (m *PipelineExecuteResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PipelineExecuteResponse.Marshal(b, m, deterministic) +} +func (dst *PipelineExecuteResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_PipelineExecuteResponse.Merge(dst, src) +} +func (m *PipelineExecuteResponse) XXX_Size() int { + return xxx_messageInfo_PipelineExecuteResponse.Size(m) +} +func (m *PipelineExecuteResponse) XXX_DiscardUnknown() { + xxx_messageInfo_PipelineExecuteResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_PipelineExecuteResponse proto.InternalMessageInfo + +func (m *PipelineExecuteResponse) GetResultURI() string { + if m != nil { + return m.ResultURI + } + return "" +} + +func (m *PipelineExecuteResponse) GetError() string { + if m != nil { + return m.Error + } + return "" +} + +func init() { + proto.RegisterType((*PipelineExecuteRequest)(nil), "PipelineExecuteRequest") + proto.RegisterType((*PipelineExecuteResponse)(nil), "PipelineExecuteResponse") +} + +// Reference imports to suppress errors if they are not otherwise used. +var _ context.Context +var _ grpc.ClientConn + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +const _ = grpc.SupportPackageIsVersion4 + +// ExecutorClient is the client API for Executor service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream. +type ExecutorClient interface { + ExecutePipeline(ctx context.Context, in *PipelineExecuteRequest, opts ...grpc.CallOption) (*PipelineExecuteResponse, error) +} + +type executorClient struct { + cc *grpc.ClientConn +} + +func NewExecutorClient(cc *grpc.ClientConn) ExecutorClient { + return &executorClient{cc} +} + +func (c *executorClient) ExecutePipeline(ctx context.Context, in *PipelineExecuteRequest, opts ...grpc.CallOption) (*PipelineExecuteResponse, error) { + out := new(PipelineExecuteResponse) + err := c.cc.Invoke(ctx, "/Executor/ExecutePipeline", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +// ExecutorServer is the server API for Executor service. +type ExecutorServer interface { + ExecutePipeline(context.Context, *PipelineExecuteRequest) (*PipelineExecuteResponse, error) +} + +func RegisterExecutorServer(s *grpc.Server, srv ExecutorServer) { + s.RegisterService(&_Executor_serviceDesc, srv) +} + +func _Executor_ExecutePipeline_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(PipelineExecuteRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(ExecutorServer).ExecutePipeline(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/Executor/ExecutePipeline", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(ExecutorServer).ExecutePipeline(ctx, req.(*PipelineExecuteRequest)) + } + return interceptor(ctx, in, info, handler) +} + +var _Executor_serviceDesc = grpc.ServiceDesc{ + ServiceName: "Executor", + HandlerType: (*ExecutorServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "ExecutePipeline", + Handler: _Executor_ExecutePipeline_Handler, + }, + }, + Streams: []grpc.StreamDesc{}, + Metadata: "execute.proto", +} + +func init() { proto.RegisterFile("execute.proto", fileDescriptor_execute_4748e52c7921ce40) } + +var fileDescriptor_execute_4748e52c7921ce40 = []byte{ + // 213 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0xe2, 0x4d, 0xad, 0x48, 0x4d, + 0x2e, 0x2d, 0x49, 0xd5, 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x97, 0xe2, 0x2b, 0xc8, 0x2c, 0x48, 0xcd, + 0xc9, 0xcc, 0x83, 0xf1, 0xb9, 0xcb, 0x12, 0x73, 0x4a, 0xa1, 0x1c, 0xa5, 0x06, 0x46, 0x2e, 0xb1, + 0x00, 0xa8, 0xbc, 0x2b, 0x44, 0x5b, 0x50, 0x6a, 0x61, 0x69, 0x6a, 0x71, 0x89, 0x90, 0x1b, 0x97, + 0x30, 0x4c, 0xa7, 0x4b, 0x6a, 0x71, 0x72, 0x51, 0x66, 0x41, 0x49, 0x66, 0x7e, 0x9e, 0x04, 0xa3, + 0x02, 0xa3, 0x06, 0xb7, 0x91, 0x88, 0x5e, 0x00, 0xa6, 0x5c, 0x10, 0x36, 0x0d, 0x42, 0x72, 0x5c, + 0x6c, 0x99, 0x79, 0x05, 0xa5, 0x25, 0xc5, 0x12, 0x4c, 0x0a, 0xcc, 0x1a, 0xdc, 0x46, 0x6c, 0x7a, + 0x61, 0x20, 0x07, 0x04, 0x41, 0x45, 0x95, 0x7c, 0xb9, 0xc4, 0x31, 0x5c, 0x50, 0x5c, 0x90, 0x9f, + 0x57, 0x9c, 0x2a, 0x24, 0xc3, 0xc5, 0x59, 0x94, 0x5a, 0x5c, 0x9a, 0x53, 0x12, 0x1a, 0xe4, 0x09, + 0xb6, 0x98, 0x33, 0x08, 0x21, 0x20, 0x24, 0xc2, 0xc5, 0x9a, 0x5a, 0x54, 0x94, 0x5f, 0x24, 0xc1, + 0x04, 0x96, 0x81, 0x70, 0x8c, 0x82, 0xb8, 0x38, 0x20, 0xc6, 0xe4, 0x17, 0x09, 0xb9, 0x71, 0xf1, + 0x43, 0x8d, 0x84, 0xd9, 0x20, 0x24, 0xae, 0x87, 0xdd, 0xbb, 0x52, 0x12, 0x7a, 0x38, 0x5c, 0xa1, + 0xc4, 0x90, 0xc4, 0x06, 0x0e, 0x2c, 0x63, 0x40, 0x00, 0x00, 0x00, 0xff, 0xff, 0x79, 0x7c, 0x13, + 0x0f, 0x5a, 0x01, 0x00, 0x00, +} diff --git a/pipeline/execute.proto b/pipeline/execute.proto new file mode 100644 index 0000000..945cdc6 --- /dev/null +++ b/pipeline/execute.proto @@ -0,0 +1,18 @@ +syntax = "proto3"; + +import "pipeline.proto"; +import "value.proto"; + +message PipelineExecuteRequest { + PipelineDescription pipelineDescription = 1; + repeated Value inputs = 2; +} + +message PipelineExecuteResponse { + string resultURI = 1; + string error = 2; +} + +service Executor { + rpc ExecutePipeline (PipelineExecuteRequest) returns (PipelineExecuteResponse) {} +} diff --git a/pipeline/pipeline.json b/pipeline/pipeline.json new file mode 100644 index 0000000..e69de29 diff --git a/pipeline/pipeline.pb.go b/pipeline/pipeline.pb.go new file mode 100644 index 0000000..e629e6e --- /dev/null +++ b/pipeline/pipeline.pb.go @@ -0,0 +1,1585 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// source: pipeline.proto + +package pipeline + +import proto "github.com/golang/protobuf/proto" +import fmt "fmt" +import math "math" +import _ "github.com/golang/protobuf/protoc-gen-go/descriptor" +import timestamp "github.com/golang/protobuf/ptypes/timestamp" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package + +type PipelineContext int32 + +const ( + // Default value. Not to be used. + PipelineContext_PIPELINE_CONTEXT_UNKNOWN PipelineContext = 0 + // Pipeline was created during building/training of the system itself, e.g., during metalearning. + PipelineContext_PRETRAINING PipelineContext = 1 + // Pipeline was created during development or testing of the system itself, e.g., during debugging. + PipelineContext_TESTING PipelineContext = 2 + // Pipeline was created during evaluation of the system itself, e.g., NIST blind evaluation. + PipelineContext_EVALUATION PipelineContext = 3 + // Pipeline was created during regular (production) operation of the system. + PipelineContext_PRODUCTION PipelineContext = 4 +) + +var PipelineContext_name = map[int32]string{ + 0: "PIPELINE_CONTEXT_UNKNOWN", + 1: "PRETRAINING", + 2: "TESTING", + 3: "EVALUATION", + 4: "PRODUCTION", +} +var PipelineContext_value = map[string]int32{ + "PIPELINE_CONTEXT_UNKNOWN": 0, + "PRETRAINING": 1, + "TESTING": 2, + "EVALUATION": 3, + "PRODUCTION": 4, +} + +func (x PipelineContext) String() string { + return proto.EnumName(PipelineContext_name, int32(x)) +} +func (PipelineContext) EnumDescriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{0} +} + +type ContainerArgument struct { + // Data reference. + Data string `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ContainerArgument) Reset() { *m = ContainerArgument{} } +func (m *ContainerArgument) String() string { return proto.CompactTextString(m) } +func (*ContainerArgument) ProtoMessage() {} +func (*ContainerArgument) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{0} +} +func (m *ContainerArgument) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ContainerArgument.Unmarshal(m, b) +} +func (m *ContainerArgument) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ContainerArgument.Marshal(b, m, deterministic) +} +func (dst *ContainerArgument) XXX_Merge(src proto.Message) { + xxx_messageInfo_ContainerArgument.Merge(dst, src) +} +func (m *ContainerArgument) XXX_Size() int { + return xxx_messageInfo_ContainerArgument.Size(m) +} +func (m *ContainerArgument) XXX_DiscardUnknown() { + xxx_messageInfo_ContainerArgument.DiscardUnknown(m) +} + +var xxx_messageInfo_ContainerArgument proto.InternalMessageInfo + +func (m *ContainerArgument) GetData() string { + if m != nil { + return m.Data + } + return "" +} + +type DataArgument struct { + // Data reference. + Data string `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *DataArgument) Reset() { *m = DataArgument{} } +func (m *DataArgument) String() string { return proto.CompactTextString(m) } +func (*DataArgument) ProtoMessage() {} +func (*DataArgument) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{1} +} +func (m *DataArgument) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_DataArgument.Unmarshal(m, b) +} +func (m *DataArgument) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_DataArgument.Marshal(b, m, deterministic) +} +func (dst *DataArgument) XXX_Merge(src proto.Message) { + xxx_messageInfo_DataArgument.Merge(dst, src) +} +func (m *DataArgument) XXX_Size() int { + return xxx_messageInfo_DataArgument.Size(m) +} +func (m *DataArgument) XXX_DiscardUnknown() { + xxx_messageInfo_DataArgument.DiscardUnknown(m) +} + +var xxx_messageInfo_DataArgument proto.InternalMessageInfo + +func (m *DataArgument) GetData() string { + if m != nil { + return m.Data + } + return "" +} + +type DataArguments struct { + Data []string `protobuf:"bytes,1,rep,name=data,proto3" json:"data,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *DataArguments) Reset() { *m = DataArguments{} } +func (m *DataArguments) String() string { return proto.CompactTextString(m) } +func (*DataArguments) ProtoMessage() {} +func (*DataArguments) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{2} +} +func (m *DataArguments) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_DataArguments.Unmarshal(m, b) +} +func (m *DataArguments) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_DataArguments.Marshal(b, m, deterministic) +} +func (dst *DataArguments) XXX_Merge(src proto.Message) { + xxx_messageInfo_DataArguments.Merge(dst, src) +} +func (m *DataArguments) XXX_Size() int { + return xxx_messageInfo_DataArguments.Size(m) +} +func (m *DataArguments) XXX_DiscardUnknown() { + xxx_messageInfo_DataArguments.DiscardUnknown(m) +} + +var xxx_messageInfo_DataArguments proto.InternalMessageInfo + +func (m *DataArguments) GetData() []string { + if m != nil { + return m.Data + } + return nil +} + +type PrimitiveArgument struct { + // 0-based index identifying a step of which primitive is used as a value. + Data int32 `protobuf:"varint,1,opt,name=data,proto3" json:"data,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PrimitiveArgument) Reset() { *m = PrimitiveArgument{} } +func (m *PrimitiveArgument) String() string { return proto.CompactTextString(m) } +func (*PrimitiveArgument) ProtoMessage() {} +func (*PrimitiveArgument) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{3} +} +func (m *PrimitiveArgument) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PrimitiveArgument.Unmarshal(m, b) +} +func (m *PrimitiveArgument) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PrimitiveArgument.Marshal(b, m, deterministic) +} +func (dst *PrimitiveArgument) XXX_Merge(src proto.Message) { + xxx_messageInfo_PrimitiveArgument.Merge(dst, src) +} +func (m *PrimitiveArgument) XXX_Size() int { + return xxx_messageInfo_PrimitiveArgument.Size(m) +} +func (m *PrimitiveArgument) XXX_DiscardUnknown() { + xxx_messageInfo_PrimitiveArgument.DiscardUnknown(m) +} + +var xxx_messageInfo_PrimitiveArgument proto.InternalMessageInfo + +func (m *PrimitiveArgument) GetData() int32 { + if m != nil { + return m.Data + } + return 0 +} + +type PrimitiveArguments struct { + // 0-based index identifying a step of which primitive is used as a value. + Data []int32 `protobuf:"varint,1,rep,packed,name=data,proto3" json:"data,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PrimitiveArguments) Reset() { *m = PrimitiveArguments{} } +func (m *PrimitiveArguments) String() string { return proto.CompactTextString(m) } +func (*PrimitiveArguments) ProtoMessage() {} +func (*PrimitiveArguments) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{4} +} +func (m *PrimitiveArguments) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PrimitiveArguments.Unmarshal(m, b) +} +func (m *PrimitiveArguments) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PrimitiveArguments.Marshal(b, m, deterministic) +} +func (dst *PrimitiveArguments) XXX_Merge(src proto.Message) { + xxx_messageInfo_PrimitiveArguments.Merge(dst, src) +} +func (m *PrimitiveArguments) XXX_Size() int { + return xxx_messageInfo_PrimitiveArguments.Size(m) +} +func (m *PrimitiveArguments) XXX_DiscardUnknown() { + xxx_messageInfo_PrimitiveArguments.DiscardUnknown(m) +} + +var xxx_messageInfo_PrimitiveArguments proto.InternalMessageInfo + +func (m *PrimitiveArguments) GetData() []int32 { + if m != nil { + return m.Data + } + return nil +} + +type ValueArgument struct { + Data *Value `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ValueArgument) Reset() { *m = ValueArgument{} } +func (m *ValueArgument) String() string { return proto.CompactTextString(m) } +func (*ValueArgument) ProtoMessage() {} +func (*ValueArgument) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{5} +} +func (m *ValueArgument) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ValueArgument.Unmarshal(m, b) +} +func (m *ValueArgument) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ValueArgument.Marshal(b, m, deterministic) +} +func (dst *ValueArgument) XXX_Merge(src proto.Message) { + xxx_messageInfo_ValueArgument.Merge(dst, src) +} +func (m *ValueArgument) XXX_Size() int { + return xxx_messageInfo_ValueArgument.Size(m) +} +func (m *ValueArgument) XXX_DiscardUnknown() { + xxx_messageInfo_ValueArgument.DiscardUnknown(m) +} + +var xxx_messageInfo_ValueArgument proto.InternalMessageInfo + +func (m *ValueArgument) GetData() *Value { + if m != nil { + return m.Data + } + return nil +} + +type PrimitiveStepArgument struct { + // Types that are valid to be assigned to Argument: + // *PrimitiveStepArgument_Container + // *PrimitiveStepArgument_Data + Argument isPrimitiveStepArgument_Argument `protobuf_oneof:"argument"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PrimitiveStepArgument) Reset() { *m = PrimitiveStepArgument{} } +func (m *PrimitiveStepArgument) String() string { return proto.CompactTextString(m) } +func (*PrimitiveStepArgument) ProtoMessage() {} +func (*PrimitiveStepArgument) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{6} +} +func (m *PrimitiveStepArgument) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PrimitiveStepArgument.Unmarshal(m, b) +} +func (m *PrimitiveStepArgument) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PrimitiveStepArgument.Marshal(b, m, deterministic) +} +func (dst *PrimitiveStepArgument) XXX_Merge(src proto.Message) { + xxx_messageInfo_PrimitiveStepArgument.Merge(dst, src) +} +func (m *PrimitiveStepArgument) XXX_Size() int { + return xxx_messageInfo_PrimitiveStepArgument.Size(m) +} +func (m *PrimitiveStepArgument) XXX_DiscardUnknown() { + xxx_messageInfo_PrimitiveStepArgument.DiscardUnknown(m) +} + +var xxx_messageInfo_PrimitiveStepArgument proto.InternalMessageInfo + +type isPrimitiveStepArgument_Argument interface { + isPrimitiveStepArgument_Argument() +} + +type PrimitiveStepArgument_Container struct { + Container *ContainerArgument `protobuf:"bytes,1,opt,name=container,proto3,oneof"` +} +type PrimitiveStepArgument_Data struct { + Data *DataArgument `protobuf:"bytes,2,opt,name=data,proto3,oneof"` +} + +func (*PrimitiveStepArgument_Container) isPrimitiveStepArgument_Argument() {} +func (*PrimitiveStepArgument_Data) isPrimitiveStepArgument_Argument() {} + +func (m *PrimitiveStepArgument) GetArgument() isPrimitiveStepArgument_Argument { + if m != nil { + return m.Argument + } + return nil +} + +func (m *PrimitiveStepArgument) GetContainer() *ContainerArgument { + if x, ok := m.GetArgument().(*PrimitiveStepArgument_Container); ok { + return x.Container + } + return nil +} + +func (m *PrimitiveStepArgument) GetData() *DataArgument { + if x, ok := m.GetArgument().(*PrimitiveStepArgument_Data); ok { + return x.Data + } + return nil +} + +// XXX_OneofFuncs is for the internal use of the proto package. +func (*PrimitiveStepArgument) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) { + return _PrimitiveStepArgument_OneofMarshaler, _PrimitiveStepArgument_OneofUnmarshaler, _PrimitiveStepArgument_OneofSizer, []interface{}{ + (*PrimitiveStepArgument_Container)(nil), + (*PrimitiveStepArgument_Data)(nil), + } +} + +func _PrimitiveStepArgument_OneofMarshaler(msg proto.Message, b *proto.Buffer) error { + m := msg.(*PrimitiveStepArgument) + // argument + switch x := m.Argument.(type) { + case *PrimitiveStepArgument_Container: + b.EncodeVarint(1<<3 | proto.WireBytes) + if err := b.EncodeMessage(x.Container); err != nil { + return err + } + case *PrimitiveStepArgument_Data: + b.EncodeVarint(2<<3 | proto.WireBytes) + if err := b.EncodeMessage(x.Data); err != nil { + return err + } + case nil: + default: + return fmt.Errorf("PrimitiveStepArgument.Argument has unexpected type %T", x) + } + return nil +} + +func _PrimitiveStepArgument_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) { + m := msg.(*PrimitiveStepArgument) + switch tag { + case 1: // argument.container + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + msg := new(ContainerArgument) + err := b.DecodeMessage(msg) + m.Argument = &PrimitiveStepArgument_Container{msg} + return true, err + case 2: // argument.data + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + msg := new(DataArgument) + err := b.DecodeMessage(msg) + m.Argument = &PrimitiveStepArgument_Data{msg} + return true, err + default: + return false, nil + } +} + +func _PrimitiveStepArgument_OneofSizer(msg proto.Message) (n int) { + m := msg.(*PrimitiveStepArgument) + // argument + switch x := m.Argument.(type) { + case *PrimitiveStepArgument_Container: + s := proto.Size(x.Container) + n += 1 // tag and wire + n += proto.SizeVarint(uint64(s)) + n += s + case *PrimitiveStepArgument_Data: + s := proto.Size(x.Data) + n += 1 // tag and wire + n += proto.SizeVarint(uint64(s)) + n += s + case nil: + default: + panic(fmt.Sprintf("proto: unexpected type %T in oneof", x)) + } + return n +} + +type PrimitiveStepHyperparameter struct { + // Types that are valid to be assigned to Argument: + // *PrimitiveStepHyperparameter_Container + // *PrimitiveStepHyperparameter_Data + // *PrimitiveStepHyperparameter_Primitive + // *PrimitiveStepHyperparameter_Value + // *PrimitiveStepHyperparameter_DataSet + // *PrimitiveStepHyperparameter_PrimitivesSet + Argument isPrimitiveStepHyperparameter_Argument `protobuf_oneof:"argument"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PrimitiveStepHyperparameter) Reset() { *m = PrimitiveStepHyperparameter{} } +func (m *PrimitiveStepHyperparameter) String() string { return proto.CompactTextString(m) } +func (*PrimitiveStepHyperparameter) ProtoMessage() {} +func (*PrimitiveStepHyperparameter) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{7} +} +func (m *PrimitiveStepHyperparameter) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PrimitiveStepHyperparameter.Unmarshal(m, b) +} +func (m *PrimitiveStepHyperparameter) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PrimitiveStepHyperparameter.Marshal(b, m, deterministic) +} +func (dst *PrimitiveStepHyperparameter) XXX_Merge(src proto.Message) { + xxx_messageInfo_PrimitiveStepHyperparameter.Merge(dst, src) +} +func (m *PrimitiveStepHyperparameter) XXX_Size() int { + return xxx_messageInfo_PrimitiveStepHyperparameter.Size(m) +} +func (m *PrimitiveStepHyperparameter) XXX_DiscardUnknown() { + xxx_messageInfo_PrimitiveStepHyperparameter.DiscardUnknown(m) +} + +var xxx_messageInfo_PrimitiveStepHyperparameter proto.InternalMessageInfo + +type isPrimitiveStepHyperparameter_Argument interface { + isPrimitiveStepHyperparameter_Argument() +} + +type PrimitiveStepHyperparameter_Container struct { + Container *ContainerArgument `protobuf:"bytes,1,opt,name=container,proto3,oneof"` +} +type PrimitiveStepHyperparameter_Data struct { + Data *DataArgument `protobuf:"bytes,2,opt,name=data,proto3,oneof"` +} +type PrimitiveStepHyperparameter_Primitive struct { + Primitive *PrimitiveArgument `protobuf:"bytes,3,opt,name=primitive,proto3,oneof"` +} +type PrimitiveStepHyperparameter_Value struct { + Value *ValueArgument `protobuf:"bytes,4,opt,name=value,proto3,oneof"` +} +type PrimitiveStepHyperparameter_DataSet struct { + DataSet *DataArguments `protobuf:"bytes,5,opt,name=data_set,json=dataSet,proto3,oneof"` +} +type PrimitiveStepHyperparameter_PrimitivesSet struct { + PrimitivesSet *PrimitiveArguments `protobuf:"bytes,6,opt,name=primitives_set,json=primitivesSet,proto3,oneof"` +} + +func (*PrimitiveStepHyperparameter_Container) isPrimitiveStepHyperparameter_Argument() {} +func (*PrimitiveStepHyperparameter_Data) isPrimitiveStepHyperparameter_Argument() {} +func (*PrimitiveStepHyperparameter_Primitive) isPrimitiveStepHyperparameter_Argument() {} +func (*PrimitiveStepHyperparameter_Value) isPrimitiveStepHyperparameter_Argument() {} +func (*PrimitiveStepHyperparameter_DataSet) isPrimitiveStepHyperparameter_Argument() {} +func (*PrimitiveStepHyperparameter_PrimitivesSet) isPrimitiveStepHyperparameter_Argument() {} + +func (m *PrimitiveStepHyperparameter) GetArgument() isPrimitiveStepHyperparameter_Argument { + if m != nil { + return m.Argument + } + return nil +} + +func (m *PrimitiveStepHyperparameter) GetContainer() *ContainerArgument { + if x, ok := m.GetArgument().(*PrimitiveStepHyperparameter_Container); ok { + return x.Container + } + return nil +} + +func (m *PrimitiveStepHyperparameter) GetData() *DataArgument { + if x, ok := m.GetArgument().(*PrimitiveStepHyperparameter_Data); ok { + return x.Data + } + return nil +} + +func (m *PrimitiveStepHyperparameter) GetPrimitive() *PrimitiveArgument { + if x, ok := m.GetArgument().(*PrimitiveStepHyperparameter_Primitive); ok { + return x.Primitive + } + return nil +} + +func (m *PrimitiveStepHyperparameter) GetValue() *ValueArgument { + if x, ok := m.GetArgument().(*PrimitiveStepHyperparameter_Value); ok { + return x.Value + } + return nil +} + +func (m *PrimitiveStepHyperparameter) GetDataSet() *DataArguments { + if x, ok := m.GetArgument().(*PrimitiveStepHyperparameter_DataSet); ok { + return x.DataSet + } + return nil +} + +func (m *PrimitiveStepHyperparameter) GetPrimitivesSet() *PrimitiveArguments { + if x, ok := m.GetArgument().(*PrimitiveStepHyperparameter_PrimitivesSet); ok { + return x.PrimitivesSet + } + return nil +} + +// XXX_OneofFuncs is for the internal use of the proto package. +func (*PrimitiveStepHyperparameter) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) { + return _PrimitiveStepHyperparameter_OneofMarshaler, _PrimitiveStepHyperparameter_OneofUnmarshaler, _PrimitiveStepHyperparameter_OneofSizer, []interface{}{ + (*PrimitiveStepHyperparameter_Container)(nil), + (*PrimitiveStepHyperparameter_Data)(nil), + (*PrimitiveStepHyperparameter_Primitive)(nil), + (*PrimitiveStepHyperparameter_Value)(nil), + (*PrimitiveStepHyperparameter_DataSet)(nil), + (*PrimitiveStepHyperparameter_PrimitivesSet)(nil), + } +} + +func _PrimitiveStepHyperparameter_OneofMarshaler(msg proto.Message, b *proto.Buffer) error { + m := msg.(*PrimitiveStepHyperparameter) + // argument + switch x := m.Argument.(type) { + case *PrimitiveStepHyperparameter_Container: + b.EncodeVarint(1<<3 | proto.WireBytes) + if err := b.EncodeMessage(x.Container); err != nil { + return err + } + case *PrimitiveStepHyperparameter_Data: + b.EncodeVarint(2<<3 | proto.WireBytes) + if err := b.EncodeMessage(x.Data); err != nil { + return err + } + case *PrimitiveStepHyperparameter_Primitive: + b.EncodeVarint(3<<3 | proto.WireBytes) + if err := b.EncodeMessage(x.Primitive); err != nil { + return err + } + case *PrimitiveStepHyperparameter_Value: + b.EncodeVarint(4<<3 | proto.WireBytes) + if err := b.EncodeMessage(x.Value); err != nil { + return err + } + case *PrimitiveStepHyperparameter_DataSet: + b.EncodeVarint(5<<3 | proto.WireBytes) + if err := b.EncodeMessage(x.DataSet); err != nil { + return err + } + case *PrimitiveStepHyperparameter_PrimitivesSet: + b.EncodeVarint(6<<3 | proto.WireBytes) + if err := b.EncodeMessage(x.PrimitivesSet); err != nil { + return err + } + case nil: + default: + return fmt.Errorf("PrimitiveStepHyperparameter.Argument has unexpected type %T", x) + } + return nil +} + +func _PrimitiveStepHyperparameter_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) { + m := msg.(*PrimitiveStepHyperparameter) + switch tag { + case 1: // argument.container + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + msg := new(ContainerArgument) + err := b.DecodeMessage(msg) + m.Argument = &PrimitiveStepHyperparameter_Container{msg} + return true, err + case 2: // argument.data + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + msg := new(DataArgument) + err := b.DecodeMessage(msg) + m.Argument = &PrimitiveStepHyperparameter_Data{msg} + return true, err + case 3: // argument.primitive + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + msg := new(PrimitiveArgument) + err := b.DecodeMessage(msg) + m.Argument = &PrimitiveStepHyperparameter_Primitive{msg} + return true, err + case 4: // argument.value + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + msg := new(ValueArgument) + err := b.DecodeMessage(msg) + m.Argument = &PrimitiveStepHyperparameter_Value{msg} + return true, err + case 5: // argument.data_set + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + msg := new(DataArguments) + err := b.DecodeMessage(msg) + m.Argument = &PrimitiveStepHyperparameter_DataSet{msg} + return true, err + case 6: // argument.primitives_set + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + msg := new(PrimitiveArguments) + err := b.DecodeMessage(msg) + m.Argument = &PrimitiveStepHyperparameter_PrimitivesSet{msg} + return true, err + default: + return false, nil + } +} + +func _PrimitiveStepHyperparameter_OneofSizer(msg proto.Message) (n int) { + m := msg.(*PrimitiveStepHyperparameter) + // argument + switch x := m.Argument.(type) { + case *PrimitiveStepHyperparameter_Container: + s := proto.Size(x.Container) + n += 1 // tag and wire + n += proto.SizeVarint(uint64(s)) + n += s + case *PrimitiveStepHyperparameter_Data: + s := proto.Size(x.Data) + n += 1 // tag and wire + n += proto.SizeVarint(uint64(s)) + n += s + case *PrimitiveStepHyperparameter_Primitive: + s := proto.Size(x.Primitive) + n += 1 // tag and wire + n += proto.SizeVarint(uint64(s)) + n += s + case *PrimitiveStepHyperparameter_Value: + s := proto.Size(x.Value) + n += 1 // tag and wire + n += proto.SizeVarint(uint64(s)) + n += s + case *PrimitiveStepHyperparameter_DataSet: + s := proto.Size(x.DataSet) + n += 1 // tag and wire + n += proto.SizeVarint(uint64(s)) + n += s + case *PrimitiveStepHyperparameter_PrimitivesSet: + s := proto.Size(x.PrimitivesSet) + n += 1 // tag and wire + n += proto.SizeVarint(uint64(s)) + n += s + case nil: + default: + panic(fmt.Sprintf("proto: unexpected type %T in oneof", x)) + } + return n +} + +type StepInput struct { + // Data reference. + Data string `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *StepInput) Reset() { *m = StepInput{} } +func (m *StepInput) String() string { return proto.CompactTextString(m) } +func (*StepInput) ProtoMessage() {} +func (*StepInput) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{8} +} +func (m *StepInput) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_StepInput.Unmarshal(m, b) +} +func (m *StepInput) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_StepInput.Marshal(b, m, deterministic) +} +func (dst *StepInput) XXX_Merge(src proto.Message) { + xxx_messageInfo_StepInput.Merge(dst, src) +} +func (m *StepInput) XXX_Size() int { + return xxx_messageInfo_StepInput.Size(m) +} +func (m *StepInput) XXX_DiscardUnknown() { + xxx_messageInfo_StepInput.DiscardUnknown(m) +} + +var xxx_messageInfo_StepInput proto.InternalMessageInfo + +func (m *StepInput) GetData() string { + if m != nil { + return m.Data + } + return "" +} + +type StepOutput struct { + // Name which becomes part of the data reference. + Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *StepOutput) Reset() { *m = StepOutput{} } +func (m *StepOutput) String() string { return proto.CompactTextString(m) } +func (*StepOutput) ProtoMessage() {} +func (*StepOutput) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{9} +} +func (m *StepOutput) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_StepOutput.Unmarshal(m, b) +} +func (m *StepOutput) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_StepOutput.Marshal(b, m, deterministic) +} +func (dst *StepOutput) XXX_Merge(src proto.Message) { + xxx_messageInfo_StepOutput.Merge(dst, src) +} +func (m *StepOutput) XXX_Size() int { + return xxx_messageInfo_StepOutput.Size(m) +} +func (m *StepOutput) XXX_DiscardUnknown() { + xxx_messageInfo_StepOutput.DiscardUnknown(m) +} + +var xxx_messageInfo_StepOutput proto.InternalMessageInfo + +func (m *StepOutput) GetId() string { + if m != nil { + return m.Id + } + return "" +} + +type PipelineSource struct { + // String representing name of the author, team. + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + // An URI to contact the source. + Contact string `protobuf:"bytes,2,opt,name=contact,proto3" json:"contact,omitempty"` + // A list of pipeline IDs used to derive the pipeline. + Pipelines []string `protobuf:"bytes,3,rep,name=pipelines,proto3" json:"pipelines,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PipelineSource) Reset() { *m = PipelineSource{} } +func (m *PipelineSource) String() string { return proto.CompactTextString(m) } +func (*PipelineSource) ProtoMessage() {} +func (*PipelineSource) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{10} +} +func (m *PipelineSource) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PipelineSource.Unmarshal(m, b) +} +func (m *PipelineSource) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PipelineSource.Marshal(b, m, deterministic) +} +func (dst *PipelineSource) XXX_Merge(src proto.Message) { + xxx_messageInfo_PipelineSource.Merge(dst, src) +} +func (m *PipelineSource) XXX_Size() int { + return xxx_messageInfo_PipelineSource.Size(m) +} +func (m *PipelineSource) XXX_DiscardUnknown() { + xxx_messageInfo_PipelineSource.DiscardUnknown(m) +} + +var xxx_messageInfo_PipelineSource proto.InternalMessageInfo + +func (m *PipelineSource) GetName() string { + if m != nil { + return m.Name + } + return "" +} + +func (m *PipelineSource) GetContact() string { + if m != nil { + return m.Contact + } + return "" +} + +func (m *PipelineSource) GetPipelines() []string { + if m != nil { + return m.Pipelines + } + return nil +} + +// User associated with the creation of the template/pipeline, or selection of a primitive. +type PipelineDescriptionUser struct { + // Globally unique ID for this user. It can be opaque, but it should identify the same user + // across sessions. Consider using UUID variant 5 with namespace set to the name of your system + // and name to an ID in your system's database. It does not have to map to any real ID, just + // that it is possible to connect mutliple pipelines/templates by the same user together, + // if necessary. + Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + // A natural language description of what the user did to be on the list, e.g., "Picked + // a pipeline from a list of pipelines.". + Reason string `protobuf:"bytes,2,opt,name=reason,proto3" json:"reason,omitempty"` + // A natural language description by the user of what the user did, + // e.g., "I picked a pipeline because it looks short in comparison with others.". + Rationale string `protobuf:"bytes,3,opt,name=rationale,proto3" json:"rationale,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PipelineDescriptionUser) Reset() { *m = PipelineDescriptionUser{} } +func (m *PipelineDescriptionUser) String() string { return proto.CompactTextString(m) } +func (*PipelineDescriptionUser) ProtoMessage() {} +func (*PipelineDescriptionUser) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{11} +} +func (m *PipelineDescriptionUser) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PipelineDescriptionUser.Unmarshal(m, b) +} +func (m *PipelineDescriptionUser) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PipelineDescriptionUser.Marshal(b, m, deterministic) +} +func (dst *PipelineDescriptionUser) XXX_Merge(src proto.Message) { + xxx_messageInfo_PipelineDescriptionUser.Merge(dst, src) +} +func (m *PipelineDescriptionUser) XXX_Size() int { + return xxx_messageInfo_PipelineDescriptionUser.Size(m) +} +func (m *PipelineDescriptionUser) XXX_DiscardUnknown() { + xxx_messageInfo_PipelineDescriptionUser.DiscardUnknown(m) +} + +var xxx_messageInfo_PipelineDescriptionUser proto.InternalMessageInfo + +func (m *PipelineDescriptionUser) GetId() string { + if m != nil { + return m.Id + } + return "" +} + +func (m *PipelineDescriptionUser) GetReason() string { + if m != nil { + return m.Reason + } + return "" +} + +func (m *PipelineDescriptionUser) GetRationale() string { + if m != nil { + return m.Rationale + } + return "" +} + +// Possible input to the pipeline or template. +type PipelineDescriptionInput struct { + // Human friendly name of the input. + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PipelineDescriptionInput) Reset() { *m = PipelineDescriptionInput{} } +func (m *PipelineDescriptionInput) String() string { return proto.CompactTextString(m) } +func (*PipelineDescriptionInput) ProtoMessage() {} +func (*PipelineDescriptionInput) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{12} +} +func (m *PipelineDescriptionInput) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PipelineDescriptionInput.Unmarshal(m, b) +} +func (m *PipelineDescriptionInput) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PipelineDescriptionInput.Marshal(b, m, deterministic) +} +func (dst *PipelineDescriptionInput) XXX_Merge(src proto.Message) { + xxx_messageInfo_PipelineDescriptionInput.Merge(dst, src) +} +func (m *PipelineDescriptionInput) XXX_Size() int { + return xxx_messageInfo_PipelineDescriptionInput.Size(m) +} +func (m *PipelineDescriptionInput) XXX_DiscardUnknown() { + xxx_messageInfo_PipelineDescriptionInput.DiscardUnknown(m) +} + +var xxx_messageInfo_PipelineDescriptionInput proto.InternalMessageInfo + +func (m *PipelineDescriptionInput) GetName() string { + if m != nil { + return m.Name + } + return "" +} + +// Available output of the pipeline or template. +type PipelineDescriptionOutput struct { + // Human friendly name of the output. + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + // Data reference, probably of an output of a step. + Data string `protobuf:"bytes,2,opt,name=data,proto3" json:"data,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PipelineDescriptionOutput) Reset() { *m = PipelineDescriptionOutput{} } +func (m *PipelineDescriptionOutput) String() string { return proto.CompactTextString(m) } +func (*PipelineDescriptionOutput) ProtoMessage() {} +func (*PipelineDescriptionOutput) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{13} +} +func (m *PipelineDescriptionOutput) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PipelineDescriptionOutput.Unmarshal(m, b) +} +func (m *PipelineDescriptionOutput) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PipelineDescriptionOutput.Marshal(b, m, deterministic) +} +func (dst *PipelineDescriptionOutput) XXX_Merge(src proto.Message) { + xxx_messageInfo_PipelineDescriptionOutput.Merge(dst, src) +} +func (m *PipelineDescriptionOutput) XXX_Size() int { + return xxx_messageInfo_PipelineDescriptionOutput.Size(m) +} +func (m *PipelineDescriptionOutput) XXX_DiscardUnknown() { + xxx_messageInfo_PipelineDescriptionOutput.DiscardUnknown(m) +} + +var xxx_messageInfo_PipelineDescriptionOutput proto.InternalMessageInfo + +func (m *PipelineDescriptionOutput) GetName() string { + if m != nil { + return m.Name + } + return "" +} + +func (m *PipelineDescriptionOutput) GetData() string { + if m != nil { + return m.Data + } + return "" +} + +type PrimitivePipelineDescriptionStep struct { + Primitive *Primitive `protobuf:"bytes,1,opt,name=primitive,proto3" json:"primitive,omitempty"` + // Arguments to the primitive. Constructor arguments should not be listed here, because they + // can be automatically created from other information. All these arguments are listed as kind + // "PIPELINE" in primitive's metadata. + Arguments map[string]*PrimitiveStepArgument `protobuf:"bytes,2,rep,name=arguments,proto3" json:"arguments,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + // List of produce metods providing data. One can reference using data reference these outputs + // then in arguments (inputs) in other steps or pipeline outputs. + Outputs []*StepOutput `protobuf:"bytes,3,rep,name=outputs,proto3" json:"outputs,omitempty"` + // Some hyper-parameters are not really tunable and should be fixed as part of template/pipeline. + // This can be done here. Hyper-parameters listed here cannot be tuned or overridden. Author of a + // template/pipeline decides which hyper-parameter are which, probably based on their semantic type. + // TA3 can specify a list of hyper-parameters to fix, and TA2 can add to the list additional + // hyper-paramaters in found pipelines. + Hyperparams map[string]*PrimitiveStepHyperparameter `protobuf:"bytes,4,rep,name=hyperparams,proto3" json:"hyperparams,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + // List of users associated with selection of this primitive/arguments/hyper-parameters. Optional. + Users []*PipelineDescriptionUser `protobuf:"bytes,5,rep,name=users,proto3" json:"users,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PrimitivePipelineDescriptionStep) Reset() { *m = PrimitivePipelineDescriptionStep{} } +func (m *PrimitivePipelineDescriptionStep) String() string { return proto.CompactTextString(m) } +func (*PrimitivePipelineDescriptionStep) ProtoMessage() {} +func (*PrimitivePipelineDescriptionStep) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{14} +} +func (m *PrimitivePipelineDescriptionStep) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PrimitivePipelineDescriptionStep.Unmarshal(m, b) +} +func (m *PrimitivePipelineDescriptionStep) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PrimitivePipelineDescriptionStep.Marshal(b, m, deterministic) +} +func (dst *PrimitivePipelineDescriptionStep) XXX_Merge(src proto.Message) { + xxx_messageInfo_PrimitivePipelineDescriptionStep.Merge(dst, src) +} +func (m *PrimitivePipelineDescriptionStep) XXX_Size() int { + return xxx_messageInfo_PrimitivePipelineDescriptionStep.Size(m) +} +func (m *PrimitivePipelineDescriptionStep) XXX_DiscardUnknown() { + xxx_messageInfo_PrimitivePipelineDescriptionStep.DiscardUnknown(m) +} + +var xxx_messageInfo_PrimitivePipelineDescriptionStep proto.InternalMessageInfo + +func (m *PrimitivePipelineDescriptionStep) GetPrimitive() *Primitive { + if m != nil { + return m.Primitive + } + return nil +} + +func (m *PrimitivePipelineDescriptionStep) GetArguments() map[string]*PrimitiveStepArgument { + if m != nil { + return m.Arguments + } + return nil +} + +func (m *PrimitivePipelineDescriptionStep) GetOutputs() []*StepOutput { + if m != nil { + return m.Outputs + } + return nil +} + +func (m *PrimitivePipelineDescriptionStep) GetHyperparams() map[string]*PrimitiveStepHyperparameter { + if m != nil { + return m.Hyperparams + } + return nil +} + +func (m *PrimitivePipelineDescriptionStep) GetUsers() []*PipelineDescriptionUser { + if m != nil { + return m.Users + } + return nil +} + +type SubpipelinePipelineDescriptionStep struct { + // Only "id" field is required in this case to reference another pipeline in the template. + Pipeline *PipelineDescription `protobuf:"bytes,1,opt,name=pipeline,proto3" json:"pipeline,omitempty"` + // List of data references, probably of an output of a step or pipeline input, + // mapped to sub-pipeline's inputs in order. + Inputs []*StepInput `protobuf:"bytes,2,rep,name=inputs,proto3" json:"inputs,omitempty"` + // List of IDs to be used in data references, mapping sub-pipeline's outputs in order. + Outputs []*StepOutput `protobuf:"bytes,3,rep,name=outputs,proto3" json:"outputs,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *SubpipelinePipelineDescriptionStep) Reset() { *m = SubpipelinePipelineDescriptionStep{} } +func (m *SubpipelinePipelineDescriptionStep) String() string { return proto.CompactTextString(m) } +func (*SubpipelinePipelineDescriptionStep) ProtoMessage() {} +func (*SubpipelinePipelineDescriptionStep) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{15} +} +func (m *SubpipelinePipelineDescriptionStep) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_SubpipelinePipelineDescriptionStep.Unmarshal(m, b) +} +func (m *SubpipelinePipelineDescriptionStep) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_SubpipelinePipelineDescriptionStep.Marshal(b, m, deterministic) +} +func (dst *SubpipelinePipelineDescriptionStep) XXX_Merge(src proto.Message) { + xxx_messageInfo_SubpipelinePipelineDescriptionStep.Merge(dst, src) +} +func (m *SubpipelinePipelineDescriptionStep) XXX_Size() int { + return xxx_messageInfo_SubpipelinePipelineDescriptionStep.Size(m) +} +func (m *SubpipelinePipelineDescriptionStep) XXX_DiscardUnknown() { + xxx_messageInfo_SubpipelinePipelineDescriptionStep.DiscardUnknown(m) +} + +var xxx_messageInfo_SubpipelinePipelineDescriptionStep proto.InternalMessageInfo + +func (m *SubpipelinePipelineDescriptionStep) GetPipeline() *PipelineDescription { + if m != nil { + return m.Pipeline + } + return nil +} + +func (m *SubpipelinePipelineDescriptionStep) GetInputs() []*StepInput { + if m != nil { + return m.Inputs + } + return nil +} + +func (m *SubpipelinePipelineDescriptionStep) GetOutputs() []*StepOutput { + if m != nil { + return m.Outputs + } + return nil +} + +// Used to represent a pipeline template which can be used to generate full pipelines. +// A placeholder is replaced with a pipeline step to form a pipeline. See README.md +// for restrictions on the number of them, their position, allowed inputs and outputs, +// etc. +type PlaceholderPipelineDescriptionStep struct { + // List of inputs which can be used as inputs to resulting sub-pipeline. Resulting + // sub-pipeline does not have to use all the inputs, but it cannot use any other inputs. + Inputs []*StepInput `protobuf:"bytes,1,rep,name=inputs,proto3" json:"inputs,omitempty"` + // A list of outputs of the resulting sub-pipeline. + Outputs []*StepOutput `protobuf:"bytes,2,rep,name=outputs,proto3" json:"outputs,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PlaceholderPipelineDescriptionStep) Reset() { *m = PlaceholderPipelineDescriptionStep{} } +func (m *PlaceholderPipelineDescriptionStep) String() string { return proto.CompactTextString(m) } +func (*PlaceholderPipelineDescriptionStep) ProtoMessage() {} +func (*PlaceholderPipelineDescriptionStep) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{16} +} +func (m *PlaceholderPipelineDescriptionStep) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PlaceholderPipelineDescriptionStep.Unmarshal(m, b) +} +func (m *PlaceholderPipelineDescriptionStep) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PlaceholderPipelineDescriptionStep.Marshal(b, m, deterministic) +} +func (dst *PlaceholderPipelineDescriptionStep) XXX_Merge(src proto.Message) { + xxx_messageInfo_PlaceholderPipelineDescriptionStep.Merge(dst, src) +} +func (m *PlaceholderPipelineDescriptionStep) XXX_Size() int { + return xxx_messageInfo_PlaceholderPipelineDescriptionStep.Size(m) +} +func (m *PlaceholderPipelineDescriptionStep) XXX_DiscardUnknown() { + xxx_messageInfo_PlaceholderPipelineDescriptionStep.DiscardUnknown(m) +} + +var xxx_messageInfo_PlaceholderPipelineDescriptionStep proto.InternalMessageInfo + +func (m *PlaceholderPipelineDescriptionStep) GetInputs() []*StepInput { + if m != nil { + return m.Inputs + } + return nil +} + +func (m *PlaceholderPipelineDescriptionStep) GetOutputs() []*StepOutput { + if m != nil { + return m.Outputs + } + return nil +} + +type PipelineDescriptionStep struct { + // Types that are valid to be assigned to Step: + // *PipelineDescriptionStep_Primitive + // *PipelineDescriptionStep_Pipeline + // *PipelineDescriptionStep_Placeholder + Step isPipelineDescriptionStep_Step `protobuf_oneof:"step"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PipelineDescriptionStep) Reset() { *m = PipelineDescriptionStep{} } +func (m *PipelineDescriptionStep) String() string { return proto.CompactTextString(m) } +func (*PipelineDescriptionStep) ProtoMessage() {} +func (*PipelineDescriptionStep) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{17} +} +func (m *PipelineDescriptionStep) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PipelineDescriptionStep.Unmarshal(m, b) +} +func (m *PipelineDescriptionStep) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PipelineDescriptionStep.Marshal(b, m, deterministic) +} +func (dst *PipelineDescriptionStep) XXX_Merge(src proto.Message) { + xxx_messageInfo_PipelineDescriptionStep.Merge(dst, src) +} +func (m *PipelineDescriptionStep) XXX_Size() int { + return xxx_messageInfo_PipelineDescriptionStep.Size(m) +} +func (m *PipelineDescriptionStep) XXX_DiscardUnknown() { + xxx_messageInfo_PipelineDescriptionStep.DiscardUnknown(m) +} + +var xxx_messageInfo_PipelineDescriptionStep proto.InternalMessageInfo + +type isPipelineDescriptionStep_Step interface { + isPipelineDescriptionStep_Step() +} + +type PipelineDescriptionStep_Primitive struct { + Primitive *PrimitivePipelineDescriptionStep `protobuf:"bytes,1,opt,name=primitive,proto3,oneof"` +} +type PipelineDescriptionStep_Pipeline struct { + Pipeline *SubpipelinePipelineDescriptionStep `protobuf:"bytes,2,opt,name=pipeline,proto3,oneof"` +} +type PipelineDescriptionStep_Placeholder struct { + Placeholder *PlaceholderPipelineDescriptionStep `protobuf:"bytes,3,opt,name=placeholder,proto3,oneof"` +} + +func (*PipelineDescriptionStep_Primitive) isPipelineDescriptionStep_Step() {} +func (*PipelineDescriptionStep_Pipeline) isPipelineDescriptionStep_Step() {} +func (*PipelineDescriptionStep_Placeholder) isPipelineDescriptionStep_Step() {} + +func (m *PipelineDescriptionStep) GetStep() isPipelineDescriptionStep_Step { + if m != nil { + return m.Step + } + return nil +} + +func (m *PipelineDescriptionStep) GetPrimitive() *PrimitivePipelineDescriptionStep { + if x, ok := m.GetStep().(*PipelineDescriptionStep_Primitive); ok { + return x.Primitive + } + return nil +} + +func (m *PipelineDescriptionStep) GetPipeline() *SubpipelinePipelineDescriptionStep { + if x, ok := m.GetStep().(*PipelineDescriptionStep_Pipeline); ok { + return x.Pipeline + } + return nil +} + +func (m *PipelineDescriptionStep) GetPlaceholder() *PlaceholderPipelineDescriptionStep { + if x, ok := m.GetStep().(*PipelineDescriptionStep_Placeholder); ok { + return x.Placeholder + } + return nil +} + +// XXX_OneofFuncs is for the internal use of the proto package. +func (*PipelineDescriptionStep) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) { + return _PipelineDescriptionStep_OneofMarshaler, _PipelineDescriptionStep_OneofUnmarshaler, _PipelineDescriptionStep_OneofSizer, []interface{}{ + (*PipelineDescriptionStep_Primitive)(nil), + (*PipelineDescriptionStep_Pipeline)(nil), + (*PipelineDescriptionStep_Placeholder)(nil), + } +} + +func _PipelineDescriptionStep_OneofMarshaler(msg proto.Message, b *proto.Buffer) error { + m := msg.(*PipelineDescriptionStep) + // step + switch x := m.Step.(type) { + case *PipelineDescriptionStep_Primitive: + b.EncodeVarint(1<<3 | proto.WireBytes) + if err := b.EncodeMessage(x.Primitive); err != nil { + return err + } + case *PipelineDescriptionStep_Pipeline: + b.EncodeVarint(2<<3 | proto.WireBytes) + if err := b.EncodeMessage(x.Pipeline); err != nil { + return err + } + case *PipelineDescriptionStep_Placeholder: + b.EncodeVarint(3<<3 | proto.WireBytes) + if err := b.EncodeMessage(x.Placeholder); err != nil { + return err + } + case nil: + default: + return fmt.Errorf("PipelineDescriptionStep.Step has unexpected type %T", x) + } + return nil +} + +func _PipelineDescriptionStep_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) { + m := msg.(*PipelineDescriptionStep) + switch tag { + case 1: // step.primitive + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + msg := new(PrimitivePipelineDescriptionStep) + err := b.DecodeMessage(msg) + m.Step = &PipelineDescriptionStep_Primitive{msg} + return true, err + case 2: // step.pipeline + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + msg := new(SubpipelinePipelineDescriptionStep) + err := b.DecodeMessage(msg) + m.Step = &PipelineDescriptionStep_Pipeline{msg} + return true, err + case 3: // step.placeholder + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + msg := new(PlaceholderPipelineDescriptionStep) + err := b.DecodeMessage(msg) + m.Step = &PipelineDescriptionStep_Placeholder{msg} + return true, err + default: + return false, nil + } +} + +func _PipelineDescriptionStep_OneofSizer(msg proto.Message) (n int) { + m := msg.(*PipelineDescriptionStep) + // step + switch x := m.Step.(type) { + case *PipelineDescriptionStep_Primitive: + s := proto.Size(x.Primitive) + n += 1 // tag and wire + n += proto.SizeVarint(uint64(s)) + n += s + case *PipelineDescriptionStep_Pipeline: + s := proto.Size(x.Pipeline) + n += 1 // tag and wire + n += proto.SizeVarint(uint64(s)) + n += s + case *PipelineDescriptionStep_Placeholder: + s := proto.Size(x.Placeholder) + n += 1 // tag and wire + n += proto.SizeVarint(uint64(s)) + n += s + case nil: + default: + panic(fmt.Sprintf("proto: unexpected type %T in oneof", x)) + } + return n +} + +// Pipeline description matches the D3M pipeline description. +// It serves two purposes: describing found pipelines by TA2 to TA3, and communicating pipeline +// templates by TA3 to TA2. Because of this some fields are reasonable only in one of those uses. +// They are marked with "TA2" or "TA3" in the comment, for fields which are primarily to be set +// only by TA2 or only by TA3, respectivelly. +type PipelineDescription struct { + // TA2: UUID of the pipeline. Templates do not have IDs. But TA3 might provide it for a fully + // specified pipeline. It does not necessary have to match "solution_id" from + // "ListSolutionsResponse" and other related messages. Those IDs are about whole solutions + // (pipeline, potentially fitted, with set hyper-parameters). This here ID is about this + // particular ID description. + Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + // "schema" field is not needed because it is fixed by the TA2-TA3 protocol version. + // System which generated a pipeline or a template. Optional. + Source *PipelineSource `protobuf:"bytes,2,opt,name=source,proto3" json:"source,omitempty"` + // TA2: Timestamp when created. Templates do not have this timestamp. TA3 might provide it for + // a fully specified pipeline. + Created *timestamp.Timestamp `protobuf:"bytes,3,opt,name=created,proto3" json:"created,omitempty"` + // In which context a template or pipeline was made. This is helpful to distinguish evaluation + // context from other contexts. The value should not really influence different behavior from + // either system, but it is useful when recording metalearning information to understand this. + Context PipelineContext `protobuf:"varint,4,opt,name=context,proto3,enum=PipelineContext" json:"context,omitempty"` + // Human friendly name of the pipeline. For templates it can be a hint to + // TA2 how to name found pipelines. Optional. + Name string `protobuf:"bytes,5,opt,name=name,proto3" json:"name,omitempty"` + // Human friendly description of the pipeline. Optional. + Description string `protobuf:"bytes,6,opt,name=description,proto3" json:"description,omitempty"` + // List of users associated with the creation of the template and consequently of the pipeline. + // TA2 can store this information into metalearning database. TA2 is not really expected to use + // this information during pipeline search. TA2 should not have to understand TA3 users, mapping + // between users and pipeline search IDs is something TA3 should handle. Optional. + Users []*PipelineDescriptionUser `protobuf:"bytes,7,rep,name=users,proto3" json:"users,omitempty"` + // In most cases inputs are datasets. But if TA3 wants to jut run a primitive, it can send a + // template with only that primitive in the template, and then pass anything to its inputs during + // execution. Here, we are describing possible inputs to the pipeline or template. Order matters. + Inputs []*PipelineDescriptionInput `protobuf:"bytes,8,rep,name=inputs,proto3" json:"inputs,omitempty"` + // Available outputs of the pipeline or template. + Outputs []*PipelineDescriptionOutput `protobuf:"bytes,9,rep,name=outputs,proto3" json:"outputs,omitempty"` + // Steps defining the pipeline. + Steps []*PipelineDescriptionStep `protobuf:"bytes,10,rep,name=steps,proto3" json:"steps,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PipelineDescription) Reset() { *m = PipelineDescription{} } +func (m *PipelineDescription) String() string { return proto.CompactTextString(m) } +func (*PipelineDescription) ProtoMessage() {} +func (*PipelineDescription) Descriptor() ([]byte, []int) { + return fileDescriptor_pipeline_448625383800ad61, []int{18} +} +func (m *PipelineDescription) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_PipelineDescription.Unmarshal(m, b) +} +func (m *PipelineDescription) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_PipelineDescription.Marshal(b, m, deterministic) +} +func (dst *PipelineDescription) XXX_Merge(src proto.Message) { + xxx_messageInfo_PipelineDescription.Merge(dst, src) +} +func (m *PipelineDescription) XXX_Size() int { + return xxx_messageInfo_PipelineDescription.Size(m) +} +func (m *PipelineDescription) XXX_DiscardUnknown() { + xxx_messageInfo_PipelineDescription.DiscardUnknown(m) +} + +var xxx_messageInfo_PipelineDescription proto.InternalMessageInfo + +func (m *PipelineDescription) GetId() string { + if m != nil { + return m.Id + } + return "" +} + +func (m *PipelineDescription) GetSource() *PipelineSource { + if m != nil { + return m.Source + } + return nil +} + +func (m *PipelineDescription) GetCreated() *timestamp.Timestamp { + if m != nil { + return m.Created + } + return nil +} + +func (m *PipelineDescription) GetContext() PipelineContext { + if m != nil { + return m.Context + } + return PipelineContext_PIPELINE_CONTEXT_UNKNOWN +} + +func (m *PipelineDescription) GetName() string { + if m != nil { + return m.Name + } + return "" +} + +func (m *PipelineDescription) GetDescription() string { + if m != nil { + return m.Description + } + return "" +} + +func (m *PipelineDescription) GetUsers() []*PipelineDescriptionUser { + if m != nil { + return m.Users + } + return nil +} + +func (m *PipelineDescription) GetInputs() []*PipelineDescriptionInput { + if m != nil { + return m.Inputs + } + return nil +} + +func (m *PipelineDescription) GetOutputs() []*PipelineDescriptionOutput { + if m != nil { + return m.Outputs + } + return nil +} + +func (m *PipelineDescription) GetSteps() []*PipelineDescriptionStep { + if m != nil { + return m.Steps + } + return nil +} + +func init() { + proto.RegisterType((*ContainerArgument)(nil), "ContainerArgument") + proto.RegisterType((*DataArgument)(nil), "DataArgument") + proto.RegisterType((*DataArguments)(nil), "DataArguments") + proto.RegisterType((*PrimitiveArgument)(nil), "PrimitiveArgument") + proto.RegisterType((*PrimitiveArguments)(nil), "PrimitiveArguments") + proto.RegisterType((*ValueArgument)(nil), "ValueArgument") + proto.RegisterType((*PrimitiveStepArgument)(nil), "PrimitiveStepArgument") + proto.RegisterType((*PrimitiveStepHyperparameter)(nil), "PrimitiveStepHyperparameter") + proto.RegisterType((*StepInput)(nil), "StepInput") + proto.RegisterType((*StepOutput)(nil), "StepOutput") + proto.RegisterType((*PipelineSource)(nil), "PipelineSource") + proto.RegisterType((*PipelineDescriptionUser)(nil), "PipelineDescriptionUser") + proto.RegisterType((*PipelineDescriptionInput)(nil), "PipelineDescriptionInput") + proto.RegisterType((*PipelineDescriptionOutput)(nil), "PipelineDescriptionOutput") + proto.RegisterType((*PrimitivePipelineDescriptionStep)(nil), "PrimitivePipelineDescriptionStep") + proto.RegisterMapType((map[string]*PrimitiveStepArgument)(nil), "PrimitivePipelineDescriptionStep.ArgumentsEntry") + proto.RegisterMapType((map[string]*PrimitiveStepHyperparameter)(nil), "PrimitivePipelineDescriptionStep.HyperparamsEntry") + proto.RegisterType((*SubpipelinePipelineDescriptionStep)(nil), "SubpipelinePipelineDescriptionStep") + proto.RegisterType((*PlaceholderPipelineDescriptionStep)(nil), "PlaceholderPipelineDescriptionStep") + proto.RegisterType((*PipelineDescriptionStep)(nil), "PipelineDescriptionStep") + proto.RegisterType((*PipelineDescription)(nil), "PipelineDescription") + proto.RegisterEnum("PipelineContext", PipelineContext_name, PipelineContext_value) +} + +func init() { proto.RegisterFile("pipeline.proto", fileDescriptor_pipeline_448625383800ad61) } + +var fileDescriptor_pipeline_448625383800ad61 = []byte{ + // 976 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xb4, 0x56, 0xdd, 0x6e, 0xdb, 0x36, + 0x14, 0xf6, 0xbf, 0xe3, 0xa3, 0xc5, 0x71, 0xd9, 0xad, 0x53, 0xbd, 0x00, 0xf5, 0x14, 0x6c, 0x09, + 0xda, 0x81, 0xed, 0xbc, 0x5e, 0x0c, 0xc3, 0x6e, 0x1c, 0xc7, 0xa8, 0x8d, 0x15, 0xb6, 0x41, 0x2b, + 0xdd, 0x30, 0x14, 0x08, 0x14, 0x9b, 0x4b, 0x85, 0xd9, 0x92, 0x20, 0xd1, 0xc1, 0x02, 0xec, 0x65, + 0xf6, 0x04, 0x7b, 0x8e, 0xbd, 0xcd, 0x5e, 0x60, 0xc0, 0x40, 0x8a, 0xa4, 0x24, 0x5b, 0x8e, 0x73, + 0xd3, 0x3b, 0x93, 0xfc, 0xf8, 0x9d, 0xc3, 0xef, 0x7c, 0xe7, 0x58, 0xd0, 0x0c, 0xdc, 0x80, 0x2e, + 0x5d, 0x8f, 0xe2, 0x20, 0xf4, 0x99, 0xdf, 0xee, 0xdc, 0xf8, 0xfe, 0xcd, 0x92, 0xbe, 0x14, 0xab, + 0xeb, 0xf5, 0x6f, 0x2f, 0x17, 0x34, 0x9a, 0x87, 0x6e, 0xc0, 0xfc, 0x50, 0x22, 0x9e, 0x6d, 0x22, + 0x98, 0xbb, 0xa2, 0x11, 0x73, 0x56, 0x81, 0x04, 0x1c, 0x05, 0xa1, 0xbb, 0x72, 0x99, 0x7b, 0xab, + 0x38, 0x8d, 0x5b, 0x67, 0xb9, 0x96, 0x0b, 0xeb, 0x14, 0x1e, 0xf5, 0x7d, 0x8f, 0x39, 0xae, 0x47, + 0xc3, 0x5e, 0x78, 0xb3, 0x5e, 0x51, 0x8f, 0x21, 0x04, 0x95, 0x85, 0xc3, 0x1c, 0xb3, 0xd8, 0x29, + 0x9e, 0x35, 0x88, 0xf8, 0x6d, 0x59, 0xf0, 0xc9, 0x85, 0xc3, 0x9c, 0x7b, 0x31, 0x27, 0x70, 0x98, + 0xc6, 0x44, 0x29, 0x50, 0x59, 0x83, 0x4e, 0xe1, 0xd1, 0x54, 0x65, 0x94, 0xcb, 0x56, 0x95, 0xc0, + 0x33, 0x40, 0x5b, 0xc0, 0x2c, 0xa5, 0x42, 0xbe, 0x80, 0xc3, 0x77, 0xfc, 0x4d, 0x9a, 0xae, 0x9d, + 0xa2, 0x33, 0xba, 0x35, 0x2c, 0x4e, 0x25, 0xf8, 0x4f, 0xf8, 0x4c, 0xd3, 0xce, 0x18, 0x0d, 0xf4, + 0xa5, 0x2e, 0x34, 0xe6, 0x4a, 0x0a, 0x79, 0x13, 0xe1, 0x2d, 0x71, 0x86, 0x05, 0x92, 0xc0, 0xd0, + 0x89, 0x0c, 0x54, 0x12, 0xf0, 0x43, 0x9c, 0x7e, 0xfe, 0xb0, 0x10, 0x47, 0x3c, 0x07, 0x38, 0x70, + 0xe4, 0x9e, 0xf5, 0x4f, 0x09, 0xbe, 0xc8, 0x84, 0x1f, 0xde, 0x05, 0x34, 0x0c, 0x9c, 0xd0, 0x59, + 0x51, 0x46, 0xc3, 0x8f, 0x96, 0x04, 0x27, 0xd6, 0x46, 0x30, 0xcb, 0x92, 0x78, 0x4b, 0x5f, 0x4e, + 0xac, 0x61, 0xe8, 0x6b, 0xa8, 0x0a, 0xaf, 0x98, 0x15, 0x81, 0x6f, 0xe2, 0x8c, 0xca, 0xc3, 0x02, + 0x89, 0x8f, 0xd1, 0x0b, 0x38, 0xe0, 0x31, 0xae, 0x22, 0xca, 0xcc, 0xaa, 0x84, 0x66, 0x8c, 0x30, + 0x2c, 0x90, 0x3a, 0x47, 0xcc, 0x28, 0x43, 0x3f, 0x42, 0x53, 0x47, 0x88, 0xc4, 0x95, 0x9a, 0xb8, + 0xf2, 0x78, 0x3b, 0x1b, 0x7e, 0xef, 0x30, 0x01, 0xcf, 0x28, 0xcb, 0x68, 0xf9, 0x0c, 0x1a, 0x5c, + 0xc1, 0x91, 0x17, 0xac, 0xf3, 0xfd, 0x78, 0x0c, 0xc0, 0x01, 0x93, 0x35, 0xe3, 0x88, 0x26, 0x94, + 0xdc, 0x85, 0x3c, 0x2f, 0xb9, 0x0b, 0xeb, 0x3d, 0x34, 0xa7, 0xb2, 0xdb, 0x66, 0xfe, 0x3a, 0x9c, + 0x53, 0xce, 0xe1, 0x39, 0x2b, 0xaa, 0x38, 0xf8, 0x6f, 0x64, 0x42, 0x5d, 0x28, 0x3d, 0x67, 0x42, + 0xdf, 0x06, 0x51, 0x4b, 0x74, 0x0c, 0x0d, 0xd5, 0xad, 0x91, 0x59, 0x16, 0x0e, 0x4f, 0x36, 0xac, + 0x2b, 0xf8, 0x5c, 0xb1, 0x5f, 0xc8, 0x9e, 0x75, 0x7d, 0xef, 0x32, 0xa2, 0xe1, 0x66, 0x22, 0xe8, + 0x09, 0xd4, 0x42, 0xea, 0x44, 0xbe, 0x27, 0x23, 0xc8, 0x15, 0x0f, 0x10, 0x3a, 0xfc, 0x96, 0xb3, + 0x8c, 0x4b, 0xd6, 0x20, 0xc9, 0x86, 0x85, 0xc1, 0xcc, 0x09, 0xa0, 0xc5, 0xd8, 0x7c, 0x88, 0xd5, + 0x87, 0xa7, 0x39, 0x78, 0xa9, 0x4d, 0xde, 0xcb, 0x51, 0xca, 0x56, 0x4a, 0xd1, 0xff, 0xca, 0xd0, + 0xd1, 0x65, 0xca, 0xa1, 0xe3, 0x72, 0xa3, 0xb3, 0xb4, 0xd5, 0x62, 0x0f, 0x43, 0x52, 0xdc, 0xb4, + 0xc1, 0xc6, 0xd0, 0x50, 0xd5, 0x8c, 0xcc, 0x52, 0xa7, 0x7c, 0x66, 0x74, 0x5f, 0xe1, 0x7d, 0xfc, + 0x58, 0xdb, 0x63, 0xe0, 0xb1, 0xf0, 0x8e, 0x24, 0x14, 0xe8, 0x2b, 0xa8, 0xfb, 0xe2, 0x41, 0x71, + 0x41, 0x8c, 0xae, 0x81, 0x13, 0x03, 0x10, 0x75, 0x86, 0x6c, 0x30, 0x3e, 0xe8, 0xb6, 0x8b, 0xcc, + 0x8a, 0x80, 0x76, 0xf7, 0x07, 0x4e, 0x7a, 0x55, 0x86, 0x4e, 0xd3, 0x20, 0x0c, 0xd5, 0x75, 0x44, + 0xc3, 0xc8, 0xac, 0x0a, 0x3e, 0x13, 0xef, 0xa8, 0x3f, 0x89, 0x61, 0x6d, 0x1b, 0x9a, 0xd9, 0x97, + 0xa0, 0x16, 0x94, 0x7f, 0xa7, 0x77, 0xb2, 0x08, 0xfc, 0x27, 0xfa, 0x46, 0x75, 0x60, 0xdc, 0xdb, + 0x4f, 0x70, 0xee, 0xe8, 0x92, 0x7d, 0xf8, 0x43, 0xe9, 0xfb, 0x62, 0xfb, 0x3d, 0xb4, 0x36, 0xd3, + 0xcc, 0xe1, 0xed, 0x66, 0x79, 0x8f, 0xf1, 0x3d, 0x33, 0x29, 0xc5, 0x6e, 0xfd, 0x55, 0x04, 0x6b, + 0xb6, 0xbe, 0x56, 0x36, 0xdf, 0xe5, 0x80, 0x57, 0x70, 0xa0, 0x20, 0xd2, 0x00, 0x9f, 0xe6, 0xa9, + 0x41, 0x34, 0x0a, 0x59, 0x50, 0x73, 0x3d, 0x51, 0xb8, 0xd8, 0x06, 0x80, 0x75, 0x6b, 0x13, 0x79, + 0xf2, 0xc0, 0xea, 0x5a, 0x3e, 0x58, 0xd3, 0xa5, 0x33, 0xa7, 0x1f, 0xfc, 0xe5, 0x82, 0x86, 0xbb, + 0x52, 0x4c, 0x02, 0x16, 0x1f, 0x12, 0xb0, 0x74, 0x4f, 0xc0, 0x7f, 0x8b, 0xb9, 0xbd, 0x2e, 0xc2, + 0xf4, 0xb6, 0x7b, 0xe1, 0xcb, 0xbd, 0x46, 0xcb, 0x4e, 0xe1, 0x5e, 0x4a, 0xcc, 0xb8, 0x5c, 0x27, + 0x78, 0x7f, 0x0d, 0x86, 0x85, 0x94, 0xba, 0x6f, 0xc0, 0x08, 0x12, 0x49, 0xe4, 0xf8, 0x3f, 0xc1, + 0xfb, 0x65, 0x1a, 0x16, 0x48, 0xfa, 0xe6, 0x79, 0x0d, 0x2a, 0x11, 0xa3, 0x81, 0xf5, 0x77, 0x19, + 0x1e, 0xe7, 0x5c, 0xd9, 0x1a, 0x6d, 0xa7, 0x50, 0x8b, 0xc4, 0x6c, 0x95, 0x99, 0x1f, 0xe1, 0xec, + 0xc8, 0x25, 0xf2, 0x18, 0xbd, 0x86, 0xfa, 0x3c, 0xa4, 0x0e, 0xa3, 0x0b, 0x99, 0x5d, 0x1b, 0xc7, + 0x1f, 0x36, 0x58, 0x7d, 0xd8, 0x60, 0x5b, 0x7d, 0xd8, 0x10, 0x05, 0x45, 0xcf, 0xe3, 0xe1, 0x4c, + 0xff, 0x60, 0xe2, 0x2f, 0xaa, 0xd9, 0x6d, 0x69, 0xfe, 0x7e, 0xbc, 0x4f, 0x14, 0x40, 0x8f, 0xb8, + 0x6a, 0x6a, 0xc4, 0x75, 0xc0, 0x58, 0x24, 0xd9, 0x8b, 0x3f, 0xa2, 0x06, 0x49, 0x6f, 0x25, 0x4d, + 0x5d, 0x7f, 0x50, 0x53, 0xa3, 0x6f, 0xb5, 0xad, 0x0e, 0xc4, 0x85, 0xa7, 0x78, 0xd7, 0x90, 0xd6, + 0x2e, 0x7b, 0x9d, 0xb8, 0xac, 0x21, 0xee, 0xb4, 0xf1, 0xce, 0x41, 0x9d, 0xcc, 0x30, 0x0c, 0x55, + 0x5e, 0x89, 0xc8, 0x84, 0xdd, 0x89, 0xf1, 0x0a, 0x92, 0x18, 0xf6, 0x7c, 0x05, 0x47, 0x1b, 0xd2, + 0xa0, 0x63, 0x30, 0xa7, 0xa3, 0xe9, 0xe0, 0xed, 0x68, 0x3c, 0xb8, 0xea, 0x4f, 0xc6, 0xf6, 0xe0, + 0x17, 0xfb, 0xea, 0x72, 0xfc, 0xd3, 0x78, 0xf2, 0xf3, 0xb8, 0x55, 0x40, 0x47, 0x60, 0x4c, 0xc9, + 0xc0, 0x26, 0xbd, 0xd1, 0x78, 0x34, 0x7e, 0xd3, 0x2a, 0x22, 0x03, 0xea, 0xf6, 0x60, 0x66, 0xf3, + 0x45, 0x09, 0x35, 0x01, 0x06, 0xef, 0x7a, 0x6f, 0x2f, 0x7b, 0xf6, 0x68, 0x32, 0x6e, 0x95, 0xf9, + 0x7a, 0x4a, 0x26, 0x17, 0x97, 0x7d, 0xb1, 0xae, 0x9c, 0xc3, 0xaf, 0xda, 0x7d, 0xd7, 0x35, 0x51, + 0xc2, 0xef, 0xfe, 0x0f, 0x00, 0x00, 0xff, 0xff, 0x14, 0x9b, 0x86, 0x6e, 0xdd, 0x0a, 0x00, 0x00, +} diff --git a/pipeline/pipeline.proto b/pipeline/pipeline.proto new file mode 100644 index 0000000..1a87372 --- /dev/null +++ b/pipeline/pipeline.proto @@ -0,0 +1,229 @@ +syntax = "proto3"; +option go_package = "pipeline"; + +import "google/protobuf/descriptor.proto"; +import "google/protobuf/timestamp.proto"; + +import "primitive.proto"; +import "value.proto"; + +// Pipeline description contains many "data references". Data reference is just a string +// which identifies an output of a step or a pipeline input and forms a data-flow connection +// between data available and an input to a step. It is recommended to be a string of the +// following forms: +// +// * `steps..` — `number` identifies the step in the list of steps (0-based) +// and `id` identifies the name of a produce method of the primitive, +// or the output of a pipeline step +// +// * `inputs.` — `number` identifies the pipeline input (0-based) +// +// * `outputs.` — `number` identifies the pipeline output (0-based) + +message ContainerArgument { + // Data reference. + string data = 1; +} + +message DataArgument { + // Data reference. + string data = 1; +} + +message DataArguments { + repeated string data = 1; +} + +message PrimitiveArgument { + // 0-based index identifying a step of which primitive is used as a value. + int32 data = 1; +} + +message PrimitiveArguments { + // 0-based index identifying a step of which primitive is used as a value. + repeated int32 data = 1; +} + +message ValueArgument { + Value data = 1; +} + +message PrimitiveStepArgument { + oneof argument { + // A container data type as an argument. + ContainerArgument container = 1; + // A singleton output from another step as an argument. + DataArgument data = 2; + } +} + +message PrimitiveStepHyperparameter { + oneof argument { + // A container data type as a hyper-parameter. + ContainerArgument container = 1; + // A singleton output from another step as a hyper-parameter. + DataArgument data = 2; + // A primitive instance to be passed as a hyper-parameter. + PrimitiveArgument primitive = 3; + // A constant value of a hyper-parameter. + ValueArgument value = 4; + // "A set of singleton outputs from other steps in a pipeline. + DataArguments data_set = 5; + // A set of primitive instances to be passed as a hyper-parameter. + PrimitiveArguments primitives_set = 6; + } +} + +message StepInput { + // Data reference. + string data = 1; +} + +message StepOutput { + // Name which becomes part of the data reference. + string id = 1; +} + +message PipelineSource { + // String representing name of the author, team. + string name = 1; + // An URI to contact the source. + string contact = 2; + // A list of pipeline IDs used to derive the pipeline. + repeated string pipelines = 3; +} + +enum PipelineContext { + // Default value. Not to be used. + PIPELINE_CONTEXT_UNKNOWN = 0; + + // Pipeline was created during building/training of the system itself, e.g., during metalearning. + PRETRAINING = 1; + // Pipeline was created during development or testing of the system itself, e.g., during debugging. + TESTING = 2; + // Pipeline was created during evaluation of the system itself, e.g., NIST blind evaluation. + EVALUATION = 3; + // Pipeline was created during regular (production) operation of the system. + PRODUCTION = 4; +} + +// User associated with the creation of the template/pipeline, or selection of a primitive. +message PipelineDescriptionUser { + // Globally unique ID for this user. It can be opaque, but it should identify the same user + // across sessions. Consider using UUID variant 5 with namespace set to the name of your system + // and name to an ID in your system's database. It does not have to map to any real ID, just + // that it is possible to connect mutliple pipelines/templates by the same user together, + // if necessary. + string id = 1; + // A natural language description of what the user did to be on the list, e.g., "Picked + // a pipeline from a list of pipelines.". + string reason = 2; + // A natural language description by the user of what the user did, + // e.g., "I picked a pipeline because it looks short in comparison with others.". + string rationale = 3; +} + +// Possible input to the pipeline or template. +message PipelineDescriptionInput { + // Human friendly name of the input. + string name = 1; +} + +// Available output of the pipeline or template. +message PipelineDescriptionOutput { + // Human friendly name of the output. + string name = 1; + // Data reference, probably of an output of a step. + string data = 2; +} + +message PrimitivePipelineDescriptionStep { + Primitive primitive = 1; + // Arguments to the primitive. Constructor arguments should not be listed here, because they + // can be automatically created from other information. All these arguments are listed as kind + // "PIPELINE" in primitive's metadata. + map arguments = 2; + // List of produce metods providing data. One can reference using data reference these outputs + // then in arguments (inputs) in other steps or pipeline outputs. + repeated StepOutput outputs = 3; + // Some hyper-parameters are not really tunable and should be fixed as part of template/pipeline. + // This can be done here. Hyper-parameters listed here cannot be tuned or overridden. Author of a + // template/pipeline decides which hyper-parameter are which, probably based on their semantic type. + // TA3 can specify a list of hyper-parameters to fix, and TA2 can add to the list additional + // hyper-paramaters in found pipelines. + map hyperparams = 4; + // List of users associated with selection of this primitive/arguments/hyper-parameters. Optional. + repeated PipelineDescriptionUser users = 5; +} + +message SubpipelinePipelineDescriptionStep { + // Only "id" field is required in this case to reference another pipeline in the template. + PipelineDescription pipeline = 1; + // List of data references, probably of an output of a step or pipeline input, + // mapped to sub-pipeline's inputs in order. + repeated StepInput inputs = 2; + // List of IDs to be used in data references, mapping sub-pipeline's outputs in order. + repeated StepOutput outputs = 3; +} + +// Used to represent a pipeline template which can be used to generate full pipelines. +// A placeholder is replaced with a pipeline step to form a pipeline. See README.md +// for restrictions on the number of them, their position, allowed inputs and outputs, +// etc. +message PlaceholderPipelineDescriptionStep { + // List of inputs which can be used as inputs to resulting sub-pipeline. Resulting + // sub-pipeline does not have to use all the inputs, but it cannot use any other inputs. + repeated StepInput inputs = 1; + // A list of outputs of the resulting sub-pipeline. + repeated StepOutput outputs = 2; +} + +message PipelineDescriptionStep { + oneof step { + PrimitivePipelineDescriptionStep primitive = 1; + SubpipelinePipelineDescriptionStep pipeline = 2; + PlaceholderPipelineDescriptionStep placeholder = 3; + } +} + +// Pipeline description matches the D3M pipeline description. +// It serves two purposes: describing found pipelines by TA2 to TA3, and communicating pipeline +// templates by TA3 to TA2. Because of this some fields are reasonable only in one of those uses. +// They are marked with "TA2" or "TA3" in the comment, for fields which are primarily to be set +// only by TA2 or only by TA3, respectivelly. +message PipelineDescription { + // TA2: UUID of the pipeline. Templates do not have IDs. But TA3 might provide it for a fully + // specified pipeline. It does not necessary have to match "solution_id" from + // "ListSolutionsResponse" and other related messages. Those IDs are about whole solutions + // (pipeline, potentially fitted, with set hyper-parameters). This here ID is about this + // particular ID description. + string id = 1; + // "schema" field is not needed because it is fixed by the TA2-TA3 protocol version. + // System which generated a pipeline or a template. Optional. + PipelineSource source = 2; + // TA2: Timestamp when created. Templates do not have this timestamp. TA3 might provide it for + // a fully specified pipeline. + google.protobuf.Timestamp created = 3; + // In which context a template or pipeline was made. This is helpful to distinguish evaluation + // context from other contexts. The value should not really influence different behavior from + // either system, but it is useful when recording metalearning information to understand this. + PipelineContext context = 4; + // Human friendly name of the pipeline. For templates it can be a hint to + // TA2 how to name found pipelines. Optional. + string name = 5; + // Human friendly description of the pipeline. Optional. + string description = 6; + // List of users associated with the creation of the template and consequently of the pipeline. + // TA2 can store this information into metalearning database. TA2 is not really expected to use + // this information during pipeline search. TA2 should not have to understand TA3 users, mapping + // between users and pipeline search IDs is something TA3 should handle. Optional. + repeated PipelineDescriptionUser users = 7; + // In most cases inputs are datasets. But if TA3 wants to jut run a primitive, it can send a + // template with only that primitive in the template, and then pass anything to its inputs during + // execution. Here, we are describing possible inputs to the pipeline or template. Order matters. + repeated PipelineDescriptionInput inputs = 8; + // Available outputs of the pipeline or template. + repeated PipelineDescriptionOutput outputs = 9; + // Steps defining the pipeline. + repeated PipelineDescriptionStep steps = 10; +} diff --git a/pipeline/primitive.pb.go b/pipeline/primitive.pb.go new file mode 100644 index 0000000..945e183 --- /dev/null +++ b/pipeline/primitive.pb.go @@ -0,0 +1,115 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// source: primitive.proto + +package pipeline + +import proto "github.com/golang/protobuf/proto" +import fmt "fmt" +import math "math" +import _ "github.com/golang/protobuf/protoc-gen-go/descriptor" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package + +// Description of the primitive. +type Primitive struct { + Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` + PythonPath string `protobuf:"bytes,3,opt,name=python_path,json=pythonPath,proto3" json:"python_path,omitempty"` + Name string `protobuf:"bytes,4,opt,name=name,proto3" json:"name,omitempty"` + // Digest is optional, because some locally registered primitives might not have it. + // But for all primitives published it is available and it should be provided here as well. + Digest string `protobuf:"bytes,5,opt,name=digest,proto3" json:"digest,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *Primitive) Reset() { *m = Primitive{} } +func (m *Primitive) String() string { return proto.CompactTextString(m) } +func (*Primitive) ProtoMessage() {} +func (*Primitive) Descriptor() ([]byte, []int) { + return fileDescriptor_primitive_791c73a92f28dc9a, []int{0} +} +func (m *Primitive) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_Primitive.Unmarshal(m, b) +} +func (m *Primitive) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_Primitive.Marshal(b, m, deterministic) +} +func (dst *Primitive) XXX_Merge(src proto.Message) { + xxx_messageInfo_Primitive.Merge(dst, src) +} +func (m *Primitive) XXX_Size() int { + return xxx_messageInfo_Primitive.Size(m) +} +func (m *Primitive) XXX_DiscardUnknown() { + xxx_messageInfo_Primitive.DiscardUnknown(m) +} + +var xxx_messageInfo_Primitive proto.InternalMessageInfo + +func (m *Primitive) GetId() string { + if m != nil { + return m.Id + } + return "" +} + +func (m *Primitive) GetVersion() string { + if m != nil { + return m.Version + } + return "" +} + +func (m *Primitive) GetPythonPath() string { + if m != nil { + return m.PythonPath + } + return "" +} + +func (m *Primitive) GetName() string { + if m != nil { + return m.Name + } + return "" +} + +func (m *Primitive) GetDigest() string { + if m != nil { + return m.Digest + } + return "" +} + +func init() { + proto.RegisterType((*Primitive)(nil), "Primitive") +} + +func init() { proto.RegisterFile("primitive.proto", fileDescriptor_primitive_791c73a92f28dc9a) } + +var fileDescriptor_primitive_791c73a92f28dc9a = []byte{ + // 177 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x34, 0x8e, 0xc1, 0xca, 0x82, 0x40, + 0x10, 0x80, 0xd1, 0xdf, 0xdf, 0x72, 0x82, 0x82, 0x3d, 0xc4, 0xd2, 0x25, 0xe9, 0xd4, 0x29, 0x0f, + 0xbd, 0x41, 0x4f, 0x20, 0x1d, 0xbb, 0x84, 0xb6, 0x93, 0x0e, 0xe8, 0xee, 0xb2, 0x4e, 0x42, 0xd7, + 0x9e, 0x3c, 0x18, 0xf3, 0x36, 0xdf, 0xf7, 0xcd, 0xc0, 0xc0, 0xc6, 0x07, 0xea, 0x89, 0x69, 0xc4, + 0x93, 0x0f, 0x8e, 0xdd, 0x2e, 0x6f, 0x9c, 0x6b, 0x3a, 0x2c, 0x84, 0xea, 0xd7, 0xb3, 0x30, 0x38, + 0x3c, 0x02, 0x79, 0x76, 0x61, 0xda, 0x38, 0x7c, 0x22, 0xc8, 0xca, 0xf9, 0x4a, 0xad, 0x21, 0x26, + 0xa3, 0xa3, 0x3c, 0x3a, 0x66, 0xd7, 0x98, 0x8c, 0xd2, 0xb0, 0x18, 0x31, 0x0c, 0xe4, 0xac, 0x8e, + 0x45, 0xce, 0xa8, 0xf6, 0xb0, 0xf2, 0x6f, 0x6e, 0x9d, 0xbd, 0xfb, 0x8a, 0x5b, 0xfd, 0x27, 0x15, + 0x26, 0x55, 0x56, 0xdc, 0x2a, 0x05, 0x89, 0xad, 0x7a, 0xd4, 0x89, 0x14, 0x99, 0xd5, 0x16, 0x52, + 0x43, 0x0d, 0x0e, 0xac, 0xff, 0xc5, 0xfe, 0xe8, 0x02, 0xb7, 0xa5, 0x27, 0x8f, 0x1d, 0x59, 0xac, + 0x53, 0xf9, 0xeb, 0xfc, 0x0d, 0x00, 0x00, 0xff, 0xff, 0x49, 0xad, 0x53, 0xbe, 0xcc, 0x00, 0x00, + 0x00, +} diff --git a/pipeline/primitive.proto b/pipeline/primitive.proto new file mode 100644 index 0000000..44e4a9d --- /dev/null +++ b/pipeline/primitive.proto @@ -0,0 +1,15 @@ +syntax = "proto3"; +option go_package = "pipeline"; + +import "google/protobuf/descriptor.proto"; + +// Description of the primitive. +message Primitive { + string id = 1; + string version = 2; + string python_path = 3; + string name = 4; + // Digest is optional, because some locally registered primitives might not have it. + // But for all primitives published it is available and it should be provided here as well. + string digest = 5; +} diff --git a/pipeline/problem.pb.go b/pipeline/problem.pb.go new file mode 100644 index 0000000..85968e5 --- /dev/null +++ b/pipeline/problem.pb.go @@ -0,0 +1,599 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// source: problem.proto + +package pipeline + +import proto "github.com/golang/protobuf/proto" +import fmt "fmt" +import math "math" +import _ "github.com/golang/protobuf/protoc-gen-go/descriptor" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package + +// Top level classification of the problem. +type TaskType int32 + +const ( + // Default value. Not to be used. + TaskType_TASK_TYPE_UNDEFINED TaskType = 0 + TaskType_CLASSIFICATION TaskType = 1 + TaskType_REGRESSION TaskType = 2 + TaskType_CLUSTERING TaskType = 3 + TaskType_LINK_PREDICTION TaskType = 4 + TaskType_VERTEX_NOMINATION TaskType = 5 + TaskType_COMMUNITY_DETECTION TaskType = 6 + TaskType_GRAPH_CLUSTERING TaskType = 7 + TaskType_GRAPH_MATCHING TaskType = 8 + TaskType_TIME_SERIES_FORECASTING TaskType = 9 + TaskType_COLLABORATIVE_FILTERING TaskType = 10 + TaskType_OBJECT_DETECTION TaskType = 11 +) + +var TaskType_name = map[int32]string{ + 0: "TASK_TYPE_UNDEFINED", + 1: "CLASSIFICATION", + 2: "REGRESSION", + 3: "CLUSTERING", + 4: "LINK_PREDICTION", + 5: "VERTEX_NOMINATION", + 6: "COMMUNITY_DETECTION", + 7: "GRAPH_CLUSTERING", + 8: "GRAPH_MATCHING", + 9: "TIME_SERIES_FORECASTING", + 10: "COLLABORATIVE_FILTERING", + 11: "OBJECT_DETECTION", +} +var TaskType_value = map[string]int32{ + "TASK_TYPE_UNDEFINED": 0, + "CLASSIFICATION": 1, + "REGRESSION": 2, + "CLUSTERING": 3, + "LINK_PREDICTION": 4, + "VERTEX_NOMINATION": 5, + "COMMUNITY_DETECTION": 6, + "GRAPH_CLUSTERING": 7, + "GRAPH_MATCHING": 8, + "TIME_SERIES_FORECASTING": 9, + "COLLABORATIVE_FILTERING": 10, + "OBJECT_DETECTION": 11, +} + +func (x TaskType) String() string { + return proto.EnumName(TaskType_name, int32(x)) +} +func (TaskType) EnumDescriptor() ([]byte, []int) { + return fileDescriptor_problem_21650f98b898c36f, []int{0} +} + +// Secondary classification of the problem. +type TaskSubtype int32 + +const ( + // Default value. Not to be used. + TaskSubtype_TASK_SUBTYPE_UNDEFINED TaskSubtype = 0 + // No secondary task is applicable for this problem. + TaskSubtype_NONE TaskSubtype = 1 + TaskSubtype_BINARY TaskSubtype = 2 + TaskSubtype_MULTICLASS TaskSubtype = 3 + TaskSubtype_MULTILABEL TaskSubtype = 4 + TaskSubtype_UNIVARIATE TaskSubtype = 5 + TaskSubtype_MULTIVARIATE TaskSubtype = 6 + TaskSubtype_OVERLAPPING TaskSubtype = 7 + TaskSubtype_NONOVERLAPPING TaskSubtype = 8 +) + +var TaskSubtype_name = map[int32]string{ + 0: "TASK_SUBTYPE_UNDEFINED", + 1: "NONE", + 2: "BINARY", + 3: "MULTICLASS", + 4: "MULTILABEL", + 5: "UNIVARIATE", + 6: "MULTIVARIATE", + 7: "OVERLAPPING", + 8: "NONOVERLAPPING", +} +var TaskSubtype_value = map[string]int32{ + "TASK_SUBTYPE_UNDEFINED": 0, + "NONE": 1, + "BINARY": 2, + "MULTICLASS": 3, + "MULTILABEL": 4, + "UNIVARIATE": 5, + "MULTIVARIATE": 6, + "OVERLAPPING": 7, + "NONOVERLAPPING": 8, +} + +func (x TaskSubtype) String() string { + return proto.EnumName(TaskSubtype_name, int32(x)) +} +func (TaskSubtype) EnumDescriptor() ([]byte, []int) { + return fileDescriptor_problem_21650f98b898c36f, []int{1} +} + +// The evaluation metric for any potential solution. +type PerformanceMetric int32 + +const ( + // Default value. Not to be used. + PerformanceMetric_METRIC_UNDEFINED PerformanceMetric = 0 + // The following are the only evaluation methods required + // to be supported for the ScoreSolution call. + PerformanceMetric_ACCURACY PerformanceMetric = 1 + PerformanceMetric_PRECISION PerformanceMetric = 2 + PerformanceMetric_RECALL PerformanceMetric = 3 + PerformanceMetric_F1 PerformanceMetric = 4 + PerformanceMetric_F1_MICRO PerformanceMetric = 5 + PerformanceMetric_F1_MACRO PerformanceMetric = 6 + PerformanceMetric_ROC_AUC PerformanceMetric = 7 + PerformanceMetric_ROC_AUC_MICRO PerformanceMetric = 8 + PerformanceMetric_ROC_AUC_MACRO PerformanceMetric = 9 + PerformanceMetric_MEAN_SQUARED_ERROR PerformanceMetric = 10 + PerformanceMetric_ROOT_MEAN_SQUARED_ERROR PerformanceMetric = 11 + PerformanceMetric_ROOT_MEAN_SQUARED_ERROR_AVG PerformanceMetric = 12 + PerformanceMetric_MEAN_ABSOLUTE_ERROR PerformanceMetric = 13 + PerformanceMetric_R_SQUARED PerformanceMetric = 14 + PerformanceMetric_NORMALIZED_MUTUAL_INFORMATION PerformanceMetric = 15 + PerformanceMetric_JACCARD_SIMILARITY_SCORE PerformanceMetric = 16 + PerformanceMetric_PRECISION_AT_TOP_K PerformanceMetric = 17 + PerformanceMetric_OBJECT_DETECTION_AVERAGE_PRECISION PerformanceMetric = 18 + // The rest are defined to allow expressing internal evaluation + // scores used by TA2 during pipeline search. If any you are using + // is missing, feel free to request it to be added. + // Average loss of an unspecified loss function. + PerformanceMetric_LOSS PerformanceMetric = 100 +) + +var PerformanceMetric_name = map[int32]string{ + 0: "METRIC_UNDEFINED", + 1: "ACCURACY", + 2: "PRECISION", + 3: "RECALL", + 4: "F1", + 5: "F1_MICRO", + 6: "F1_MACRO", + 7: "ROC_AUC", + 8: "ROC_AUC_MICRO", + 9: "ROC_AUC_MACRO", + 10: "MEAN_SQUARED_ERROR", + 11: "ROOT_MEAN_SQUARED_ERROR", + 12: "ROOT_MEAN_SQUARED_ERROR_AVG", + 13: "MEAN_ABSOLUTE_ERROR", + 14: "R_SQUARED", + 15: "NORMALIZED_MUTUAL_INFORMATION", + 16: "JACCARD_SIMILARITY_SCORE", + 17: "PRECISION_AT_TOP_K", + 18: "OBJECT_DETECTION_AVERAGE_PRECISION", + 100: "LOSS", +} +var PerformanceMetric_value = map[string]int32{ + "METRIC_UNDEFINED": 0, + "ACCURACY": 1, + "PRECISION": 2, + "RECALL": 3, + "F1": 4, + "F1_MICRO": 5, + "F1_MACRO": 6, + "ROC_AUC": 7, + "ROC_AUC_MICRO": 8, + "ROC_AUC_MACRO": 9, + "MEAN_SQUARED_ERROR": 10, + "ROOT_MEAN_SQUARED_ERROR": 11, + "ROOT_MEAN_SQUARED_ERROR_AVG": 12, + "MEAN_ABSOLUTE_ERROR": 13, + "R_SQUARED": 14, + "NORMALIZED_MUTUAL_INFORMATION": 15, + "JACCARD_SIMILARITY_SCORE": 16, + "PRECISION_AT_TOP_K": 17, + "OBJECT_DETECTION_AVERAGE_PRECISION": 18, + "LOSS": 100, +} + +func (x PerformanceMetric) String() string { + return proto.EnumName(PerformanceMetric_name, int32(x)) +} +func (PerformanceMetric) EnumDescriptor() ([]byte, []int) { + return fileDescriptor_problem_21650f98b898c36f, []int{2} +} + +type ProblemPerformanceMetric struct { + Metric PerformanceMetric `protobuf:"varint,1,opt,name=metric,proto3,enum=PerformanceMetric" json:"metric,omitempty"` + // Additional params used by some metrics. + K int32 `protobuf:"varint,2,opt,name=k,proto3" json:"k,omitempty"` + PosLabel string `protobuf:"bytes,3,opt,name=pos_label,json=posLabel,proto3" json:"pos_label,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ProblemPerformanceMetric) Reset() { *m = ProblemPerformanceMetric{} } +func (m *ProblemPerformanceMetric) String() string { return proto.CompactTextString(m) } +func (*ProblemPerformanceMetric) ProtoMessage() {} +func (*ProblemPerformanceMetric) Descriptor() ([]byte, []int) { + return fileDescriptor_problem_21650f98b898c36f, []int{0} +} +func (m *ProblemPerformanceMetric) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ProblemPerformanceMetric.Unmarshal(m, b) +} +func (m *ProblemPerformanceMetric) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ProblemPerformanceMetric.Marshal(b, m, deterministic) +} +func (dst *ProblemPerformanceMetric) XXX_Merge(src proto.Message) { + xxx_messageInfo_ProblemPerformanceMetric.Merge(dst, src) +} +func (m *ProblemPerformanceMetric) XXX_Size() int { + return xxx_messageInfo_ProblemPerformanceMetric.Size(m) +} +func (m *ProblemPerformanceMetric) XXX_DiscardUnknown() { + xxx_messageInfo_ProblemPerformanceMetric.DiscardUnknown(m) +} + +var xxx_messageInfo_ProblemPerformanceMetric proto.InternalMessageInfo + +func (m *ProblemPerformanceMetric) GetMetric() PerformanceMetric { + if m != nil { + return m.Metric + } + return PerformanceMetric_METRIC_UNDEFINED +} + +func (m *ProblemPerformanceMetric) GetK() int32 { + if m != nil { + return m.K + } + return 0 +} + +func (m *ProblemPerformanceMetric) GetPosLabel() string { + if m != nil { + return m.PosLabel + } + return "" +} + +type Problem struct { + // ID of this problem. + Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + // Version of this problem. + Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` + Name string `protobuf:"bytes,3,opt,name=name,proto3" json:"name,omitempty"` + Description string `protobuf:"bytes,4,opt,name=description,proto3" json:"description,omitempty"` + TaskType TaskType `protobuf:"varint,5,opt,name=task_type,json=taskType,proto3,enum=TaskType" json:"task_type,omitempty"` + TaskSubtype TaskSubtype `protobuf:"varint,6,opt,name=task_subtype,json=taskSubtype,proto3,enum=TaskSubtype" json:"task_subtype,omitempty"` + PerformanceMetrics []*ProblemPerformanceMetric `protobuf:"bytes,7,rep,name=performance_metrics,json=performanceMetrics,proto3" json:"performance_metrics,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *Problem) Reset() { *m = Problem{} } +func (m *Problem) String() string { return proto.CompactTextString(m) } +func (*Problem) ProtoMessage() {} +func (*Problem) Descriptor() ([]byte, []int) { + return fileDescriptor_problem_21650f98b898c36f, []int{1} +} +func (m *Problem) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_Problem.Unmarshal(m, b) +} +func (m *Problem) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_Problem.Marshal(b, m, deterministic) +} +func (dst *Problem) XXX_Merge(src proto.Message) { + xxx_messageInfo_Problem.Merge(dst, src) +} +func (m *Problem) XXX_Size() int { + return xxx_messageInfo_Problem.Size(m) +} +func (m *Problem) XXX_DiscardUnknown() { + xxx_messageInfo_Problem.DiscardUnknown(m) +} + +var xxx_messageInfo_Problem proto.InternalMessageInfo + +func (m *Problem) GetId() string { + if m != nil { + return m.Id + } + return "" +} + +func (m *Problem) GetVersion() string { + if m != nil { + return m.Version + } + return "" +} + +func (m *Problem) GetName() string { + if m != nil { + return m.Name + } + return "" +} + +func (m *Problem) GetDescription() string { + if m != nil { + return m.Description + } + return "" +} + +func (m *Problem) GetTaskType() TaskType { + if m != nil { + return m.TaskType + } + return TaskType_TASK_TYPE_UNDEFINED +} + +func (m *Problem) GetTaskSubtype() TaskSubtype { + if m != nil { + return m.TaskSubtype + } + return TaskSubtype_TASK_SUBTYPE_UNDEFINED +} + +func (m *Problem) GetPerformanceMetrics() []*ProblemPerformanceMetric { + if m != nil { + return m.PerformanceMetrics + } + return nil +} + +type ProblemTarget struct { + TargetIndex int32 `protobuf:"varint,1,opt,name=target_index,json=targetIndex,proto3" json:"target_index,omitempty"` + ResourceId string `protobuf:"bytes,2,opt,name=resource_id,json=resourceId,proto3" json:"resource_id,omitempty"` + ColumnIndex int32 `protobuf:"varint,3,opt,name=column_index,json=columnIndex,proto3" json:"column_index,omitempty"` + ColumnName string `protobuf:"bytes,4,opt,name=column_name,json=columnName,proto3" json:"column_name,omitempty"` + ClustersNumber int32 `protobuf:"varint,5,opt,name=clusters_number,json=clustersNumber,proto3" json:"clusters_number,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ProblemTarget) Reset() { *m = ProblemTarget{} } +func (m *ProblemTarget) String() string { return proto.CompactTextString(m) } +func (*ProblemTarget) ProtoMessage() {} +func (*ProblemTarget) Descriptor() ([]byte, []int) { + return fileDescriptor_problem_21650f98b898c36f, []int{2} +} +func (m *ProblemTarget) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ProblemTarget.Unmarshal(m, b) +} +func (m *ProblemTarget) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ProblemTarget.Marshal(b, m, deterministic) +} +func (dst *ProblemTarget) XXX_Merge(src proto.Message) { + xxx_messageInfo_ProblemTarget.Merge(dst, src) +} +func (m *ProblemTarget) XXX_Size() int { + return xxx_messageInfo_ProblemTarget.Size(m) +} +func (m *ProblemTarget) XXX_DiscardUnknown() { + xxx_messageInfo_ProblemTarget.DiscardUnknown(m) +} + +var xxx_messageInfo_ProblemTarget proto.InternalMessageInfo + +func (m *ProblemTarget) GetTargetIndex() int32 { + if m != nil { + return m.TargetIndex + } + return 0 +} + +func (m *ProblemTarget) GetResourceId() string { + if m != nil { + return m.ResourceId + } + return "" +} + +func (m *ProblemTarget) GetColumnIndex() int32 { + if m != nil { + return m.ColumnIndex + } + return 0 +} + +func (m *ProblemTarget) GetColumnName() string { + if m != nil { + return m.ColumnName + } + return "" +} + +func (m *ProblemTarget) GetClustersNumber() int32 { + if m != nil { + return m.ClustersNumber + } + return 0 +} + +type ProblemInput struct { + // Should match one of input datasets given to the pipeline search. + // Every "Dataset" object has an "id" associated with it and is available + // in its metadata. That ID is then used here to reference those inputs. + DatasetId string `protobuf:"bytes,1,opt,name=dataset_id,json=datasetId,proto3" json:"dataset_id,omitempty"` + // Targets should resolve to columns in a given dataset. + Targets []*ProblemTarget `protobuf:"bytes,2,rep,name=targets,proto3" json:"targets,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ProblemInput) Reset() { *m = ProblemInput{} } +func (m *ProblemInput) String() string { return proto.CompactTextString(m) } +func (*ProblemInput) ProtoMessage() {} +func (*ProblemInput) Descriptor() ([]byte, []int) { + return fileDescriptor_problem_21650f98b898c36f, []int{3} +} +func (m *ProblemInput) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ProblemInput.Unmarshal(m, b) +} +func (m *ProblemInput) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ProblemInput.Marshal(b, m, deterministic) +} +func (dst *ProblemInput) XXX_Merge(src proto.Message) { + xxx_messageInfo_ProblemInput.Merge(dst, src) +} +func (m *ProblemInput) XXX_Size() int { + return xxx_messageInfo_ProblemInput.Size(m) +} +func (m *ProblemInput) XXX_DiscardUnknown() { + xxx_messageInfo_ProblemInput.DiscardUnknown(m) +} + +var xxx_messageInfo_ProblemInput proto.InternalMessageInfo + +func (m *ProblemInput) GetDatasetId() string { + if m != nil { + return m.DatasetId + } + return "" +} + +func (m *ProblemInput) GetTargets() []*ProblemTarget { + if m != nil { + return m.Targets + } + return nil +} + +// Problem description matches the parsed problem description by +// the d3m_metadata.problem.parse_problem_description Python method. +// Problem outputs are not necessary for the purpose of this API +// and are needed only when executing an exported pipeline, but then +// TA2 gets full problem description anyway directly. +type ProblemDescription struct { + Problem *Problem `protobuf:"bytes,1,opt,name=problem,proto3" json:"problem,omitempty"` + Inputs []*ProblemInput `protobuf:"bytes,2,rep,name=inputs,proto3" json:"inputs,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ProblemDescription) Reset() { *m = ProblemDescription{} } +func (m *ProblemDescription) String() string { return proto.CompactTextString(m) } +func (*ProblemDescription) ProtoMessage() {} +func (*ProblemDescription) Descriptor() ([]byte, []int) { + return fileDescriptor_problem_21650f98b898c36f, []int{4} +} +func (m *ProblemDescription) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ProblemDescription.Unmarshal(m, b) +} +func (m *ProblemDescription) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ProblemDescription.Marshal(b, m, deterministic) +} +func (dst *ProblemDescription) XXX_Merge(src proto.Message) { + xxx_messageInfo_ProblemDescription.Merge(dst, src) +} +func (m *ProblemDescription) XXX_Size() int { + return xxx_messageInfo_ProblemDescription.Size(m) +} +func (m *ProblemDescription) XXX_DiscardUnknown() { + xxx_messageInfo_ProblemDescription.DiscardUnknown(m) +} + +var xxx_messageInfo_ProblemDescription proto.InternalMessageInfo + +func (m *ProblemDescription) GetProblem() *Problem { + if m != nil { + return m.Problem + } + return nil +} + +func (m *ProblemDescription) GetInputs() []*ProblemInput { + if m != nil { + return m.Inputs + } + return nil +} + +func init() { + proto.RegisterType((*ProblemPerformanceMetric)(nil), "ProblemPerformanceMetric") + proto.RegisterType((*Problem)(nil), "Problem") + proto.RegisterType((*ProblemTarget)(nil), "ProblemTarget") + proto.RegisterType((*ProblemInput)(nil), "ProblemInput") + proto.RegisterType((*ProblemDescription)(nil), "ProblemDescription") + proto.RegisterEnum("TaskType", TaskType_name, TaskType_value) + proto.RegisterEnum("TaskSubtype", TaskSubtype_name, TaskSubtype_value) + proto.RegisterEnum("PerformanceMetric", PerformanceMetric_name, PerformanceMetric_value) +} + +func init() { proto.RegisterFile("problem.proto", fileDescriptor_problem_21650f98b898c36f) } + +var fileDescriptor_problem_21650f98b898c36f = []byte{ + // 966 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x74, 0x55, 0xdb, 0x72, 0xdb, 0x36, + 0x10, 0x2d, 0x75, 0xd7, 0xea, 0x62, 0x18, 0x49, 0x13, 0x35, 0x6e, 0x26, 0x8a, 0x66, 0x9a, 0x7a, + 0xfc, 0x20, 0x4f, 0xdc, 0x2f, 0x80, 0x28, 0xc8, 0x81, 0xcd, 0x8b, 0x0a, 0x82, 0x6a, 0x9d, 0x17, + 0x8c, 0x2e, 0x8c, 0x47, 0x63, 0x49, 0x64, 0x49, 0xaa, 0xd3, 0xfc, 0x44, 0x1f, 0xfb, 0x09, 0xfd, + 0x80, 0x7e, 0x41, 0x3f, 0xad, 0x03, 0x90, 0xb4, 0xe5, 0xb8, 0x79, 0xc3, 0x9e, 0xb3, 0x7b, 0xb0, + 0x7b, 0xb0, 0x43, 0x42, 0x27, 0x8a, 0xc3, 0xc5, 0x26, 0xd8, 0x0e, 0xa3, 0x38, 0x4c, 0xc3, 0x57, + 0xfd, 0xdb, 0x30, 0xbc, 0xdd, 0x04, 0xe7, 0x3a, 0x5a, 0xec, 0x3f, 0x9d, 0xaf, 0x82, 0x64, 0x19, + 0xaf, 0xa3, 0x34, 0x8c, 0xb3, 0x8c, 0xc1, 0x6f, 0xd0, 0x9b, 0x66, 0x25, 0xd3, 0x20, 0xfe, 0x14, + 0xc6, 0xdb, 0xf9, 0x6e, 0x19, 0xd8, 0x41, 0x1a, 0xaf, 0x97, 0xf8, 0x0c, 0x6a, 0x5b, 0x7d, 0xea, + 0x19, 0x7d, 0xe3, 0xb4, 0x7b, 0x81, 0x87, 0x4f, 0x72, 0x78, 0x9e, 0x81, 0xdb, 0x60, 0xdc, 0xf5, + 0x4a, 0x7d, 0xe3, 0xb4, 0xca, 0x8d, 0x3b, 0x7c, 0x02, 0xcd, 0x28, 0x4c, 0xe4, 0x66, 0xbe, 0x08, + 0x36, 0xbd, 0x72, 0xdf, 0x38, 0x6d, 0xf2, 0x46, 0x14, 0x26, 0x96, 0x8a, 0x07, 0x7f, 0x96, 0xa0, + 0x9e, 0xdf, 0x89, 0xbb, 0x50, 0x5a, 0xaf, 0xb4, 0x7c, 0x93, 0x97, 0xd6, 0x2b, 0xdc, 0x83, 0xfa, + 0xef, 0x41, 0x9c, 0xac, 0xc3, 0x9d, 0x16, 0x6b, 0xf2, 0x22, 0xc4, 0x18, 0x2a, 0xbb, 0xf9, 0x36, + 0xc8, 0xd5, 0xf4, 0x19, 0xf7, 0xa1, 0x55, 0x0c, 0xa4, 0x2a, 0x2a, 0x9a, 0x3a, 0x84, 0xf0, 0x3b, + 0x68, 0xa6, 0xf3, 0xe4, 0x4e, 0xa6, 0x9f, 0xa3, 0xa0, 0x57, 0xd5, 0x53, 0x34, 0x87, 0x62, 0x9e, + 0xdc, 0x89, 0xcf, 0x51, 0xc0, 0x1b, 0x69, 0x7e, 0xc2, 0xe7, 0xd0, 0xd6, 0x79, 0xc9, 0x7e, 0xa1, + 0x53, 0x6b, 0x3a, 0xb5, 0xad, 0x53, 0xbd, 0x0c, 0xe3, 0xad, 0xf4, 0x21, 0xc0, 0x57, 0xf0, 0x2c, + 0x7a, 0x30, 0x43, 0x66, 0x2e, 0x24, 0xbd, 0x7a, 0xbf, 0x7c, 0xda, 0xba, 0xf8, 0x6e, 0xf8, 0x35, + 0x4f, 0x39, 0x8e, 0xbe, 0x84, 0x92, 0xc1, 0xbf, 0x06, 0x74, 0xf2, 0x02, 0x31, 0x8f, 0x6f, 0x83, + 0x14, 0xbf, 0x55, 0xed, 0xa8, 0x93, 0x5c, 0xef, 0x56, 0xc1, 0x1f, 0xda, 0xa0, 0xaa, 0x6a, 0x40, + 0x61, 0x4c, 0x41, 0xf8, 0x0d, 0xb4, 0xe2, 0x20, 0x09, 0xf7, 0xf1, 0x32, 0x90, 0xeb, 0x55, 0xee, + 0x16, 0x14, 0x10, 0x5b, 0x29, 0x8d, 0x65, 0xb8, 0xd9, 0x6f, 0x77, 0xb9, 0x46, 0x39, 0xd3, 0xc8, + 0xb0, 0x7b, 0x8d, 0x3c, 0x45, 0x5b, 0x9b, 0xf9, 0x07, 0x19, 0xe4, 0x28, 0x83, 0x7f, 0x84, 0xa3, + 0xe5, 0x66, 0x9f, 0xa4, 0x41, 0x9c, 0xc8, 0xdd, 0x7e, 0xbb, 0x08, 0x62, 0x6d, 0x62, 0x95, 0x77, + 0x0b, 0xd8, 0xd1, 0xe8, 0xe0, 0x17, 0x68, 0xe7, 0x13, 0xb0, 0x5d, 0xb4, 0x4f, 0xf1, 0x6b, 0x80, + 0xd5, 0x3c, 0x9d, 0x27, 0x6a, 0x82, 0xe2, 0x7d, 0x9b, 0x39, 0xc2, 0x56, 0xf8, 0x14, 0xea, 0xd9, + 0x2c, 0x49, 0xaf, 0xa4, 0x1d, 0xeb, 0x0e, 0x1f, 0x19, 0xc0, 0x0b, 0x7a, 0x20, 0x01, 0xe7, 0xcc, + 0xf8, 0xe0, 0x59, 0x07, 0x50, 0xcf, 0x17, 0x5d, 0x6b, 0xb7, 0x2e, 0x1a, 0x45, 0x3d, 0x2f, 0x08, + 0xfc, 0x03, 0xd4, 0xd6, 0xaa, 0x97, 0xe2, 0x8a, 0xce, 0xf0, 0xb0, 0x43, 0x9e, 0x93, 0x67, 0x7f, + 0x95, 0xa0, 0x51, 0x2c, 0x04, 0x7e, 0x09, 0xcf, 0x04, 0xf1, 0xae, 0xa5, 0xb8, 0x99, 0x52, 0xe9, + 0x3b, 0x63, 0x3a, 0x61, 0x0e, 0x1d, 0xa3, 0x6f, 0x30, 0x86, 0xae, 0x69, 0x11, 0xcf, 0x63, 0x13, + 0x66, 0x12, 0xc1, 0x5c, 0x07, 0x19, 0xb8, 0x0b, 0xc0, 0xe9, 0x25, 0xa7, 0x9e, 0xa7, 0xe2, 0x92, + 0x8a, 0x4d, 0xcb, 0xf7, 0x04, 0xe5, 0xcc, 0xb9, 0x44, 0x65, 0xfc, 0x0c, 0x8e, 0x2c, 0xe6, 0x5c, + 0xcb, 0x29, 0xa7, 0x63, 0x66, 0xea, 0xa2, 0x0a, 0xfe, 0x16, 0x8e, 0x67, 0x94, 0x0b, 0xfa, 0xab, + 0x74, 0x5c, 0x9b, 0x39, 0x99, 0x56, 0x55, 0x5d, 0x6c, 0xba, 0xb6, 0xed, 0x3b, 0x4c, 0xdc, 0xc8, + 0x31, 0x15, 0x34, 0xcb, 0xaf, 0xe1, 0xe7, 0x80, 0x2e, 0x39, 0x99, 0x7e, 0x90, 0x07, 0xd2, 0x75, + 0xd5, 0x4e, 0x86, 0xda, 0x44, 0x98, 0x1f, 0x14, 0xd6, 0xc0, 0x27, 0xf0, 0x52, 0x30, 0x9b, 0x4a, + 0x8f, 0x72, 0x46, 0x3d, 0x39, 0x71, 0x39, 0x35, 0x89, 0x27, 0x14, 0xd9, 0x54, 0xa4, 0xe9, 0x5a, + 0x16, 0x19, 0xb9, 0x9c, 0x08, 0x36, 0xa3, 0x72, 0xc2, 0xac, 0x5c, 0x0d, 0xd4, 0x1d, 0xee, 0xe8, + 0x8a, 0x9a, 0xe2, 0xe0, 0xe6, 0xd6, 0xd9, 0xdf, 0x06, 0xb4, 0x0e, 0xd6, 0x1f, 0xbf, 0x82, 0x17, + 0xda, 0x1b, 0xcf, 0x1f, 0x3d, 0xb1, 0xa7, 0x01, 0x15, 0xc7, 0x75, 0x28, 0x32, 0x30, 0x40, 0x6d, + 0xc4, 0x1c, 0xc2, 0x6f, 0x32, 0x43, 0x6c, 0xdf, 0x12, 0x4c, 0x3b, 0x87, 0xca, 0xf7, 0xb1, 0x45, + 0x46, 0xd4, 0x42, 0x15, 0x15, 0xfb, 0x0e, 0x9b, 0x11, 0xce, 0x88, 0xa0, 0xa8, 0x8a, 0x11, 0xb4, + 0x35, 0x5f, 0x20, 0x35, 0x7c, 0x04, 0x2d, 0x77, 0x46, 0xb9, 0x45, 0xa6, 0xd3, 0xfb, 0xc1, 0x1d, + 0xd7, 0x39, 0xc4, 0x1a, 0x67, 0xff, 0x94, 0xe1, 0xf8, 0xe9, 0xc7, 0xeb, 0x39, 0x20, 0x9b, 0x0a, + 0xce, 0xcc, 0x47, 0x8d, 0xb6, 0xa1, 0x41, 0x4c, 0xd3, 0xe7, 0xc4, 0xbc, 0x41, 0x06, 0xee, 0x40, + 0x73, 0xca, 0xa9, 0xc9, 0xf2, 0x07, 0x04, 0xa8, 0x29, 0xcf, 0x2c, 0x0b, 0x95, 0x71, 0x0d, 0x4a, + 0x93, 0xf7, 0xa8, 0xa2, 0x0a, 0x26, 0xef, 0xa5, 0xcd, 0x4c, 0xee, 0xa2, 0x6a, 0x11, 0x11, 0x15, + 0xd5, 0x70, 0x0b, 0xea, 0xdc, 0x35, 0x25, 0xf1, 0x4d, 0x54, 0xc7, 0xc7, 0xd0, 0xc9, 0x83, 0x3c, + 0xbb, 0xf1, 0x08, 0xd2, 0x25, 0x4d, 0xfc, 0x02, 0xb0, 0x4d, 0x89, 0x23, 0xbd, 0x9f, 0x7d, 0xc2, + 0xe9, 0x58, 0x52, 0xce, 0x5d, 0x8e, 0x40, 0xbd, 0x0f, 0x77, 0x5d, 0x21, 0xff, 0x87, 0x6c, 0xe1, + 0x37, 0x70, 0xf2, 0x15, 0x52, 0x92, 0xd9, 0x25, 0x6a, 0xab, 0xed, 0xd1, 0x1c, 0x19, 0x79, 0xae, + 0xe5, 0x0b, 0x9a, 0x57, 0x76, 0xd4, 0x80, 0xbc, 0xa8, 0x40, 0x5d, 0xfc, 0x16, 0x5e, 0x3b, 0x2e, + 0xb7, 0x89, 0xc5, 0x3e, 0xd2, 0xb1, 0xb4, 0x7d, 0xe1, 0x13, 0x4b, 0x32, 0x67, 0xa2, 0x30, 0xfd, + 0xea, 0x47, 0xf8, 0x7b, 0xe8, 0x5d, 0x11, 0xd3, 0x24, 0x7c, 0x2c, 0x3d, 0x66, 0x33, 0x8b, 0x70, + 0xb5, 0x91, 0x9e, 0xe9, 0x72, 0x8a, 0x90, 0x6a, 0xff, 0xde, 0x30, 0x49, 0x84, 0x14, 0xee, 0x54, + 0x5e, 0xa3, 0x63, 0xfc, 0x0e, 0x06, 0x5f, 0x6e, 0x90, 0x24, 0x33, 0xca, 0xc9, 0x25, 0x95, 0x0f, + 0x0e, 0x63, 0xb5, 0x27, 0x96, 0xeb, 0x79, 0x68, 0x35, 0x82, 0x8f, 0x8d, 0x68, 0x1d, 0x05, 0x9b, + 0xf5, 0x2e, 0x58, 0xd4, 0xf4, 0xaf, 0xe8, 0xa7, 0xff, 0x02, 0x00, 0x00, 0xff, 0xff, 0xd1, 0x7b, + 0x54, 0x43, 0xbd, 0x06, 0x00, 0x00, +} diff --git a/pipeline/problem.proto b/pipeline/problem.proto new file mode 100644 index 0000000..8a9b24f --- /dev/null +++ b/pipeline/problem.proto @@ -0,0 +1,117 @@ +syntax = "proto3"; +option go_package = "pipeline"; + +import "google/protobuf/descriptor.proto"; + +// Top level classification of the problem. +enum TaskType { + // Default value. Not to be used. + TASK_TYPE_UNDEFINED = 0; + + CLASSIFICATION = 1; + REGRESSION = 2; + CLUSTERING = 3; + LINK_PREDICTION = 4; + VERTEX_NOMINATION = 5; + COMMUNITY_DETECTION = 6; + GRAPH_CLUSTERING = 7; + GRAPH_MATCHING = 8; + TIME_SERIES_FORECASTING = 9; + COLLABORATIVE_FILTERING = 10; + OBJECT_DETECTION = 11; +} + +// Secondary classification of the problem. +enum TaskSubtype { + // Default value. Not to be used. + TASK_SUBTYPE_UNDEFINED = 0; + + // No secondary task is applicable for this problem. + NONE = 1; + BINARY = 2; + MULTICLASS = 3; + MULTILABEL = 4; + UNIVARIATE = 5; + MULTIVARIATE = 6; + OVERLAPPING = 7; + NONOVERLAPPING = 8; +} + +// The evaluation metric for any potential solution. +enum PerformanceMetric { + // Default value. Not to be used. + METRIC_UNDEFINED = 0; + + // The following are the only evaluation methods required + // to be supported for the ScoreSolution call. + ACCURACY = 1; + PRECISION = 2; + RECALL = 3; + F1 = 4; + F1_MICRO = 5; + F1_MACRO = 6; + ROC_AUC = 7; + ROC_AUC_MICRO = 8; + ROC_AUC_MACRO = 9; + MEAN_SQUARED_ERROR = 10; + ROOT_MEAN_SQUARED_ERROR = 11; + ROOT_MEAN_SQUARED_ERROR_AVG = 12; + MEAN_ABSOLUTE_ERROR = 13; + R_SQUARED = 14; + NORMALIZED_MUTUAL_INFORMATION = 15; + JACCARD_SIMILARITY_SCORE = 16; + PRECISION_AT_TOP_K = 17; + OBJECT_DETECTION_AVERAGE_PRECISION = 18; + + // The rest are defined to allow expressing internal evaluation + // scores used by TA2 during pipeline search. If any you are using + // is missing, feel free to request it to be added. + // Average loss of an unspecified loss function. + LOSS = 100; +} + +message ProblemPerformanceMetric { + PerformanceMetric metric = 1; + // Additional params used by some metrics. + int32 k = 2; + string pos_label = 3; +} + +message Problem { + // ID of this problem. + string id = 1; + // Version of this problem. + string version = 2; + string name = 3; + string description = 4; + TaskType task_type = 5; + TaskSubtype task_subtype = 6; + repeated ProblemPerformanceMetric performance_metrics = 7; +} + +message ProblemTarget { + int32 target_index = 1; + string resource_id = 2; + int32 column_index = 3; + string column_name = 4; + int32 clusters_number = 5; +} + +message ProblemInput { + // Should match one of input datasets given to the pipeline search. + // Every "Dataset" object has an "id" associated with it and is available + // in its metadata. That ID is then used here to reference those inputs. + string dataset_id = 1; + // Targets should resolve to columns in a given dataset. + repeated ProblemTarget targets = 2; +} + +// Problem description matches the parsed problem description by +// the d3m_metadata.problem.parse_problem_description Python method. +// Problem outputs are not necessary for the purpose of this API +// and are needed only when executing an exported pipeline, but then +// TA2 gets full problem description anyway directly. +message ProblemDescription { + Problem problem = 1; + repeated ProblemInput inputs = 2; +} diff --git a/pipeline/value.pb.go b/pipeline/value.pb.go new file mode 100644 index 0000000..e5e289b --- /dev/null +++ b/pipeline/value.pb.go @@ -0,0 +1,847 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// source: value.proto + +package pipeline + +import proto "github.com/golang/protobuf/proto" +import fmt "fmt" +import math "math" +import _ "github.com/golang/protobuf/protoc-gen-go/descriptor" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package + +type ValueType int32 + +const ( + // Default value. Not to be used. + ValueType_VALUE_TYPE_UNDEFINED ValueType = 0 + // Raw value. Not all values can be represented as a raw value. + ValueType_RAW ValueType = 1 + // Represent the value as a D3M dataset. Only "file://" schema is supported using a + // shared file system. Dataset URI should point to the "datasetDoc.json" file of the dataset. + // Only Dataset container values can be represented this way. + ValueType_DATASET_URI ValueType = 2 + // Represent the value as a CSV file. Only "file://" schema is supported using a + // shared file system. CSV URI should point to the file with ".csv" file extension. + // Only tabular container values with numberic and string cell values can be represented + // this way. + ValueType_CSV_URI ValueType = 3 + // Represent values by Python-pickling them. Only "file://" schema is supported using a + // shared file system. Pickle URI should point to the file with ".pickle" file extension. + ValueType_PICKLE_URI ValueType = 4 + // Represent values by Python-pickling them but sending them through the API. + ValueType_PICKLE_BLOB ValueType = 5 + // Represent values with arrow and storing them into shared instance of Plasma. + ValueType_PLASMA_ID ValueType = 6 +) + +var ValueType_name = map[int32]string{ + 0: "VALUE_TYPE_UNDEFINED", + 1: "RAW", + 2: "DATASET_URI", + 3: "CSV_URI", + 4: "PICKLE_URI", + 5: "PICKLE_BLOB", + 6: "PLASMA_ID", +} +var ValueType_value = map[string]int32{ + "VALUE_TYPE_UNDEFINED": 0, + "RAW": 1, + "DATASET_URI": 2, + "CSV_URI": 3, + "PICKLE_URI": 4, + "PICKLE_BLOB": 5, + "PLASMA_ID": 6, +} + +func (x ValueType) String() string { + return proto.EnumName(ValueType_name, int32(x)) +} +func (ValueType) EnumDescriptor() ([]byte, []int) { + return fileDescriptor_value_52e0194aa9642a81, []int{0} +} + +type NullValue int32 + +const ( + NullValue_NULL_VALUE NullValue = 0 +) + +var NullValue_name = map[int32]string{ + 0: "NULL_VALUE", +} +var NullValue_value = map[string]int32{ + "NULL_VALUE": 0, +} + +func (x NullValue) String() string { + return proto.EnumName(NullValue_name, int32(x)) +} +func (NullValue) EnumDescriptor() ([]byte, []int) { + return fileDescriptor_value_52e0194aa9642a81, []int{1} +} + +type ValueError struct { + // A error message useful for debugging or logging. Not meant to be very end-user friendly. + // If a list of supported/allowed value types could not support a given value, then message + // should say so. On the other hand, if there was really an error using a value type which + // would otherwise support a given value, then the error message should communicate this error. + // If there was such an error but some later value type allowed for recovery, then there + // should be no error. + Message string `protobuf:"bytes,1,opt,name=message,proto3" json:"message,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ValueError) Reset() { *m = ValueError{} } +func (m *ValueError) String() string { return proto.CompactTextString(m) } +func (*ValueError) ProtoMessage() {} +func (*ValueError) Descriptor() ([]byte, []int) { + return fileDescriptor_value_52e0194aa9642a81, []int{0} +} +func (m *ValueError) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ValueError.Unmarshal(m, b) +} +func (m *ValueError) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ValueError.Marshal(b, m, deterministic) +} +func (dst *ValueError) XXX_Merge(src proto.Message) { + xxx_messageInfo_ValueError.Merge(dst, src) +} +func (m *ValueError) XXX_Size() int { + return xxx_messageInfo_ValueError.Size(m) +} +func (m *ValueError) XXX_DiscardUnknown() { + xxx_messageInfo_ValueError.DiscardUnknown(m) +} + +var xxx_messageInfo_ValueError proto.InternalMessageInfo + +func (m *ValueError) GetMessage() string { + if m != nil { + return m.Message + } + return "" +} + +type ValueList struct { + Items []*ValueRaw `protobuf:"bytes,1,rep,name=items,proto3" json:"items,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ValueList) Reset() { *m = ValueList{} } +func (m *ValueList) String() string { return proto.CompactTextString(m) } +func (*ValueList) ProtoMessage() {} +func (*ValueList) Descriptor() ([]byte, []int) { + return fileDescriptor_value_52e0194aa9642a81, []int{1} +} +func (m *ValueList) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ValueList.Unmarshal(m, b) +} +func (m *ValueList) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ValueList.Marshal(b, m, deterministic) +} +func (dst *ValueList) XXX_Merge(src proto.Message) { + xxx_messageInfo_ValueList.Merge(dst, src) +} +func (m *ValueList) XXX_Size() int { + return xxx_messageInfo_ValueList.Size(m) +} +func (m *ValueList) XXX_DiscardUnknown() { + xxx_messageInfo_ValueList.DiscardUnknown(m) +} + +var xxx_messageInfo_ValueList proto.InternalMessageInfo + +func (m *ValueList) GetItems() []*ValueRaw { + if m != nil { + return m.Items + } + return nil +} + +type ValueDict struct { + Items map[string]*ValueRaw `protobuf:"bytes,1,rep,name=items,proto3" json:"items,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ValueDict) Reset() { *m = ValueDict{} } +func (m *ValueDict) String() string { return proto.CompactTextString(m) } +func (*ValueDict) ProtoMessage() {} +func (*ValueDict) Descriptor() ([]byte, []int) { + return fileDescriptor_value_52e0194aa9642a81, []int{2} +} +func (m *ValueDict) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ValueDict.Unmarshal(m, b) +} +func (m *ValueDict) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ValueDict.Marshal(b, m, deterministic) +} +func (dst *ValueDict) XXX_Merge(src proto.Message) { + xxx_messageInfo_ValueDict.Merge(dst, src) +} +func (m *ValueDict) XXX_Size() int { + return xxx_messageInfo_ValueDict.Size(m) +} +func (m *ValueDict) XXX_DiscardUnknown() { + xxx_messageInfo_ValueDict.DiscardUnknown(m) +} + +var xxx_messageInfo_ValueDict proto.InternalMessageInfo + +func (m *ValueDict) GetItems() map[string]*ValueRaw { + if m != nil { + return m.Items + } + return nil +} + +type ValueRaw struct { + // Types that are valid to be assigned to Raw: + // *ValueRaw_Null + // *ValueRaw_Double + // *ValueRaw_Int64 + // *ValueRaw_Bool + // *ValueRaw_String_ + // *ValueRaw_Bytes + // *ValueRaw_List + // *ValueRaw_Dict + Raw isValueRaw_Raw `protobuf_oneof:"raw"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ValueRaw) Reset() { *m = ValueRaw{} } +func (m *ValueRaw) String() string { return proto.CompactTextString(m) } +func (*ValueRaw) ProtoMessage() {} +func (*ValueRaw) Descriptor() ([]byte, []int) { + return fileDescriptor_value_52e0194aa9642a81, []int{3} +} +func (m *ValueRaw) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ValueRaw.Unmarshal(m, b) +} +func (m *ValueRaw) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ValueRaw.Marshal(b, m, deterministic) +} +func (dst *ValueRaw) XXX_Merge(src proto.Message) { + xxx_messageInfo_ValueRaw.Merge(dst, src) +} +func (m *ValueRaw) XXX_Size() int { + return xxx_messageInfo_ValueRaw.Size(m) +} +func (m *ValueRaw) XXX_DiscardUnknown() { + xxx_messageInfo_ValueRaw.DiscardUnknown(m) +} + +var xxx_messageInfo_ValueRaw proto.InternalMessageInfo + +type isValueRaw_Raw interface { + isValueRaw_Raw() +} + +type ValueRaw_Null struct { + Null NullValue `protobuf:"varint,1,opt,name=null,proto3,enum=NullValue,oneof"` +} +type ValueRaw_Double struct { + Double float64 `protobuf:"fixed64,2,opt,name=double,proto3,oneof"` +} +type ValueRaw_Int64 struct { + Int64 int64 `protobuf:"varint,3,opt,name=int64,proto3,oneof"` +} +type ValueRaw_Bool struct { + Bool bool `protobuf:"varint,4,opt,name=bool,proto3,oneof"` +} +type ValueRaw_String_ struct { + String_ string `protobuf:"bytes,5,opt,name=string,proto3,oneof"` +} +type ValueRaw_Bytes struct { + Bytes []byte `protobuf:"bytes,6,opt,name=bytes,proto3,oneof"` +} +type ValueRaw_List struct { + List *ValueList `protobuf:"bytes,7,opt,name=list,proto3,oneof"` +} +type ValueRaw_Dict struct { + Dict *ValueDict `protobuf:"bytes,8,opt,name=dict,proto3,oneof"` +} + +func (*ValueRaw_Null) isValueRaw_Raw() {} +func (*ValueRaw_Double) isValueRaw_Raw() {} +func (*ValueRaw_Int64) isValueRaw_Raw() {} +func (*ValueRaw_Bool) isValueRaw_Raw() {} +func (*ValueRaw_String_) isValueRaw_Raw() {} +func (*ValueRaw_Bytes) isValueRaw_Raw() {} +func (*ValueRaw_List) isValueRaw_Raw() {} +func (*ValueRaw_Dict) isValueRaw_Raw() {} + +func (m *ValueRaw) GetRaw() isValueRaw_Raw { + if m != nil { + return m.Raw + } + return nil +} + +func (m *ValueRaw) GetNull() NullValue { + if x, ok := m.GetRaw().(*ValueRaw_Null); ok { + return x.Null + } + return NullValue_NULL_VALUE +} + +func (m *ValueRaw) GetDouble() float64 { + if x, ok := m.GetRaw().(*ValueRaw_Double); ok { + return x.Double + } + return 0 +} + +func (m *ValueRaw) GetInt64() int64 { + if x, ok := m.GetRaw().(*ValueRaw_Int64); ok { + return x.Int64 + } + return 0 +} + +func (m *ValueRaw) GetBool() bool { + if x, ok := m.GetRaw().(*ValueRaw_Bool); ok { + return x.Bool + } + return false +} + +func (m *ValueRaw) GetString_() string { + if x, ok := m.GetRaw().(*ValueRaw_String_); ok { + return x.String_ + } + return "" +} + +func (m *ValueRaw) GetBytes() []byte { + if x, ok := m.GetRaw().(*ValueRaw_Bytes); ok { + return x.Bytes + } + return nil +} + +func (m *ValueRaw) GetList() *ValueList { + if x, ok := m.GetRaw().(*ValueRaw_List); ok { + return x.List + } + return nil +} + +func (m *ValueRaw) GetDict() *ValueDict { + if x, ok := m.GetRaw().(*ValueRaw_Dict); ok { + return x.Dict + } + return nil +} + +// XXX_OneofFuncs is for the internal use of the proto package. +func (*ValueRaw) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) { + return _ValueRaw_OneofMarshaler, _ValueRaw_OneofUnmarshaler, _ValueRaw_OneofSizer, []interface{}{ + (*ValueRaw_Null)(nil), + (*ValueRaw_Double)(nil), + (*ValueRaw_Int64)(nil), + (*ValueRaw_Bool)(nil), + (*ValueRaw_String_)(nil), + (*ValueRaw_Bytes)(nil), + (*ValueRaw_List)(nil), + (*ValueRaw_Dict)(nil), + } +} + +func _ValueRaw_OneofMarshaler(msg proto.Message, b *proto.Buffer) error { + m := msg.(*ValueRaw) + // raw + switch x := m.Raw.(type) { + case *ValueRaw_Null: + b.EncodeVarint(1<<3 | proto.WireVarint) + b.EncodeVarint(uint64(x.Null)) + case *ValueRaw_Double: + b.EncodeVarint(2<<3 | proto.WireFixed64) + b.EncodeFixed64(math.Float64bits(x.Double)) + case *ValueRaw_Int64: + b.EncodeVarint(3<<3 | proto.WireVarint) + b.EncodeVarint(uint64(x.Int64)) + case *ValueRaw_Bool: + t := uint64(0) + if x.Bool { + t = 1 + } + b.EncodeVarint(4<<3 | proto.WireVarint) + b.EncodeVarint(t) + case *ValueRaw_String_: + b.EncodeVarint(5<<3 | proto.WireBytes) + b.EncodeStringBytes(x.String_) + case *ValueRaw_Bytes: + b.EncodeVarint(6<<3 | proto.WireBytes) + b.EncodeRawBytes(x.Bytes) + case *ValueRaw_List: + b.EncodeVarint(7<<3 | proto.WireBytes) + if err := b.EncodeMessage(x.List); err != nil { + return err + } + case *ValueRaw_Dict: + b.EncodeVarint(8<<3 | proto.WireBytes) + if err := b.EncodeMessage(x.Dict); err != nil { + return err + } + case nil: + default: + return fmt.Errorf("ValueRaw.Raw has unexpected type %T", x) + } + return nil +} + +func _ValueRaw_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) { + m := msg.(*ValueRaw) + switch tag { + case 1: // raw.null + if wire != proto.WireVarint { + return true, proto.ErrInternalBadWireType + } + x, err := b.DecodeVarint() + m.Raw = &ValueRaw_Null{NullValue(x)} + return true, err + case 2: // raw.double + if wire != proto.WireFixed64 { + return true, proto.ErrInternalBadWireType + } + x, err := b.DecodeFixed64() + m.Raw = &ValueRaw_Double{math.Float64frombits(x)} + return true, err + case 3: // raw.int64 + if wire != proto.WireVarint { + return true, proto.ErrInternalBadWireType + } + x, err := b.DecodeVarint() + m.Raw = &ValueRaw_Int64{int64(x)} + return true, err + case 4: // raw.bool + if wire != proto.WireVarint { + return true, proto.ErrInternalBadWireType + } + x, err := b.DecodeVarint() + m.Raw = &ValueRaw_Bool{x != 0} + return true, err + case 5: // raw.string + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + x, err := b.DecodeStringBytes() + m.Raw = &ValueRaw_String_{x} + return true, err + case 6: // raw.bytes + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + x, err := b.DecodeRawBytes(true) + m.Raw = &ValueRaw_Bytes{x} + return true, err + case 7: // raw.list + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + msg := new(ValueList) + err := b.DecodeMessage(msg) + m.Raw = &ValueRaw_List{msg} + return true, err + case 8: // raw.dict + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + msg := new(ValueDict) + err := b.DecodeMessage(msg) + m.Raw = &ValueRaw_Dict{msg} + return true, err + default: + return false, nil + } +} + +func _ValueRaw_OneofSizer(msg proto.Message) (n int) { + m := msg.(*ValueRaw) + // raw + switch x := m.Raw.(type) { + case *ValueRaw_Null: + n += 1 // tag and wire + n += proto.SizeVarint(uint64(x.Null)) + case *ValueRaw_Double: + n += 1 // tag and wire + n += 8 + case *ValueRaw_Int64: + n += 1 // tag and wire + n += proto.SizeVarint(uint64(x.Int64)) + case *ValueRaw_Bool: + n += 1 // tag and wire + n += 1 + case *ValueRaw_String_: + n += 1 // tag and wire + n += proto.SizeVarint(uint64(len(x.String_))) + n += len(x.String_) + case *ValueRaw_Bytes: + n += 1 // tag and wire + n += proto.SizeVarint(uint64(len(x.Bytes))) + n += len(x.Bytes) + case *ValueRaw_List: + s := proto.Size(x.List) + n += 1 // tag and wire + n += proto.SizeVarint(uint64(s)) + n += s + case *ValueRaw_Dict: + s := proto.Size(x.Dict) + n += 1 // tag and wire + n += proto.SizeVarint(uint64(s)) + n += s + case nil: + default: + panic(fmt.Sprintf("proto: unexpected type %T in oneof", x)) + } + return n +} + +type Value struct { + // Types that are valid to be assigned to Value: + // *Value_Error + // *Value_Raw + // *Value_DatasetUri + // *Value_CsvUri + // *Value_PickleUri + // *Value_PickleBlob + // *Value_PlasmaId + Value isValue_Value `protobuf_oneof:"value"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *Value) Reset() { *m = Value{} } +func (m *Value) String() string { return proto.CompactTextString(m) } +func (*Value) ProtoMessage() {} +func (*Value) Descriptor() ([]byte, []int) { + return fileDescriptor_value_52e0194aa9642a81, []int{4} +} +func (m *Value) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_Value.Unmarshal(m, b) +} +func (m *Value) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_Value.Marshal(b, m, deterministic) +} +func (dst *Value) XXX_Merge(src proto.Message) { + xxx_messageInfo_Value.Merge(dst, src) +} +func (m *Value) XXX_Size() int { + return xxx_messageInfo_Value.Size(m) +} +func (m *Value) XXX_DiscardUnknown() { + xxx_messageInfo_Value.DiscardUnknown(m) +} + +var xxx_messageInfo_Value proto.InternalMessageInfo + +type isValue_Value interface { + isValue_Value() +} + +type Value_Error struct { + Error *ValueError `protobuf:"bytes,1,opt,name=error,proto3,oneof"` +} +type Value_Raw struct { + Raw *ValueRaw `protobuf:"bytes,2,opt,name=raw,proto3,oneof"` +} +type Value_DatasetUri struct { + DatasetUri string `protobuf:"bytes,3,opt,name=dataset_uri,json=datasetUri,proto3,oneof"` +} +type Value_CsvUri struct { + CsvUri string `protobuf:"bytes,4,opt,name=csv_uri,json=csvUri,proto3,oneof"` +} +type Value_PickleUri struct { + PickleUri string `protobuf:"bytes,5,opt,name=pickle_uri,json=pickleUri,proto3,oneof"` +} +type Value_PickleBlob struct { + PickleBlob []byte `protobuf:"bytes,6,opt,name=pickle_blob,json=pickleBlob,proto3,oneof"` +} +type Value_PlasmaId struct { + PlasmaId []byte `protobuf:"bytes,7,opt,name=plasma_id,json=plasmaId,proto3,oneof"` +} + +func (*Value_Error) isValue_Value() {} +func (*Value_Raw) isValue_Value() {} +func (*Value_DatasetUri) isValue_Value() {} +func (*Value_CsvUri) isValue_Value() {} +func (*Value_PickleUri) isValue_Value() {} +func (*Value_PickleBlob) isValue_Value() {} +func (*Value_PlasmaId) isValue_Value() {} + +func (m *Value) GetValue() isValue_Value { + if m != nil { + return m.Value + } + return nil +} + +func (m *Value) GetError() *ValueError { + if x, ok := m.GetValue().(*Value_Error); ok { + return x.Error + } + return nil +} + +func (m *Value) GetRaw() *ValueRaw { + if x, ok := m.GetValue().(*Value_Raw); ok { + return x.Raw + } + return nil +} + +func (m *Value) GetDatasetUri() string { + if x, ok := m.GetValue().(*Value_DatasetUri); ok { + return x.DatasetUri + } + return "" +} + +func (m *Value) GetCsvUri() string { + if x, ok := m.GetValue().(*Value_CsvUri); ok { + return x.CsvUri + } + return "" +} + +func (m *Value) GetPickleUri() string { + if x, ok := m.GetValue().(*Value_PickleUri); ok { + return x.PickleUri + } + return "" +} + +func (m *Value) GetPickleBlob() []byte { + if x, ok := m.GetValue().(*Value_PickleBlob); ok { + return x.PickleBlob + } + return nil +} + +func (m *Value) GetPlasmaId() []byte { + if x, ok := m.GetValue().(*Value_PlasmaId); ok { + return x.PlasmaId + } + return nil +} + +// XXX_OneofFuncs is for the internal use of the proto package. +func (*Value) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) { + return _Value_OneofMarshaler, _Value_OneofUnmarshaler, _Value_OneofSizer, []interface{}{ + (*Value_Error)(nil), + (*Value_Raw)(nil), + (*Value_DatasetUri)(nil), + (*Value_CsvUri)(nil), + (*Value_PickleUri)(nil), + (*Value_PickleBlob)(nil), + (*Value_PlasmaId)(nil), + } +} + +func _Value_OneofMarshaler(msg proto.Message, b *proto.Buffer) error { + m := msg.(*Value) + // value + switch x := m.Value.(type) { + case *Value_Error: + b.EncodeVarint(1<<3 | proto.WireBytes) + if err := b.EncodeMessage(x.Error); err != nil { + return err + } + case *Value_Raw: + b.EncodeVarint(2<<3 | proto.WireBytes) + if err := b.EncodeMessage(x.Raw); err != nil { + return err + } + case *Value_DatasetUri: + b.EncodeVarint(3<<3 | proto.WireBytes) + b.EncodeStringBytes(x.DatasetUri) + case *Value_CsvUri: + b.EncodeVarint(4<<3 | proto.WireBytes) + b.EncodeStringBytes(x.CsvUri) + case *Value_PickleUri: + b.EncodeVarint(5<<3 | proto.WireBytes) + b.EncodeStringBytes(x.PickleUri) + case *Value_PickleBlob: + b.EncodeVarint(6<<3 | proto.WireBytes) + b.EncodeRawBytes(x.PickleBlob) + case *Value_PlasmaId: + b.EncodeVarint(7<<3 | proto.WireBytes) + b.EncodeRawBytes(x.PlasmaId) + case nil: + default: + return fmt.Errorf("Value.Value has unexpected type %T", x) + } + return nil +} + +func _Value_OneofUnmarshaler(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error) { + m := msg.(*Value) + switch tag { + case 1: // value.error + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + msg := new(ValueError) + err := b.DecodeMessage(msg) + m.Value = &Value_Error{msg} + return true, err + case 2: // value.raw + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + msg := new(ValueRaw) + err := b.DecodeMessage(msg) + m.Value = &Value_Raw{msg} + return true, err + case 3: // value.dataset_uri + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + x, err := b.DecodeStringBytes() + m.Value = &Value_DatasetUri{x} + return true, err + case 4: // value.csv_uri + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + x, err := b.DecodeStringBytes() + m.Value = &Value_CsvUri{x} + return true, err + case 5: // value.pickle_uri + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + x, err := b.DecodeStringBytes() + m.Value = &Value_PickleUri{x} + return true, err + case 6: // value.pickle_blob + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + x, err := b.DecodeRawBytes(true) + m.Value = &Value_PickleBlob{x} + return true, err + case 7: // value.plasma_id + if wire != proto.WireBytes { + return true, proto.ErrInternalBadWireType + } + x, err := b.DecodeRawBytes(true) + m.Value = &Value_PlasmaId{x} + return true, err + default: + return false, nil + } +} + +func _Value_OneofSizer(msg proto.Message) (n int) { + m := msg.(*Value) + // value + switch x := m.Value.(type) { + case *Value_Error: + s := proto.Size(x.Error) + n += 1 // tag and wire + n += proto.SizeVarint(uint64(s)) + n += s + case *Value_Raw: + s := proto.Size(x.Raw) + n += 1 // tag and wire + n += proto.SizeVarint(uint64(s)) + n += s + case *Value_DatasetUri: + n += 1 // tag and wire + n += proto.SizeVarint(uint64(len(x.DatasetUri))) + n += len(x.DatasetUri) + case *Value_CsvUri: + n += 1 // tag and wire + n += proto.SizeVarint(uint64(len(x.CsvUri))) + n += len(x.CsvUri) + case *Value_PickleUri: + n += 1 // tag and wire + n += proto.SizeVarint(uint64(len(x.PickleUri))) + n += len(x.PickleUri) + case *Value_PickleBlob: + n += 1 // tag and wire + n += proto.SizeVarint(uint64(len(x.PickleBlob))) + n += len(x.PickleBlob) + case *Value_PlasmaId: + n += 1 // tag and wire + n += proto.SizeVarint(uint64(len(x.PlasmaId))) + n += len(x.PlasmaId) + case nil: + default: + panic(fmt.Sprintf("proto: unexpected type %T in oneof", x)) + } + return n +} + +func init() { + proto.RegisterType((*ValueError)(nil), "ValueError") + proto.RegisterType((*ValueList)(nil), "ValueList") + proto.RegisterType((*ValueDict)(nil), "ValueDict") + proto.RegisterMapType((map[string]*ValueRaw)(nil), "ValueDict.ItemsEntry") + proto.RegisterType((*ValueRaw)(nil), "ValueRaw") + proto.RegisterType((*Value)(nil), "Value") + proto.RegisterEnum("ValueType", ValueType_name, ValueType_value) + proto.RegisterEnum("NullValue", NullValue_name, NullValue_value) +} + +func init() { proto.RegisterFile("value.proto", fileDescriptor_value_52e0194aa9642a81) } + +var fileDescriptor_value_52e0194aa9642a81 = []byte{ + // 576 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x64, 0x53, 0x5d, 0x6f, 0xd3, 0x3c, + 0x14, 0x6e, 0x96, 0xa6, 0x6d, 0x4e, 0xde, 0x77, 0x44, 0xd6, 0x40, 0x66, 0x68, 0x5a, 0x56, 0x24, + 0x54, 0x0d, 0x94, 0x49, 0x03, 0x21, 0xc4, 0x5d, 0xbb, 0x06, 0xb5, 0x22, 0x94, 0x29, 0x6b, 0x87, + 0xe0, 0x26, 0x4a, 0x5a, 0x53, 0x59, 0xf3, 0x9a, 0x28, 0x76, 0x3a, 0x55, 0x82, 0x1f, 0xcd, 0x05, + 0xf7, 0xe8, 0x38, 0x69, 0xc7, 0xc7, 0x5d, 0x9e, 0x8f, 0xf3, 0xf8, 0x9c, 0x63, 0x07, 0x9c, 0x75, + 0x22, 0x4a, 0xe6, 0xe7, 0x45, 0xa6, 0xb2, 0x43, 0x6f, 0x99, 0x65, 0x4b, 0xc1, 0xce, 0x34, 0x4a, + 0xcb, 0xaf, 0x67, 0x0b, 0x26, 0xe7, 0x05, 0xcf, 0x55, 0x56, 0x54, 0x8e, 0xee, 0x33, 0x80, 0x6b, + 0x2c, 0x08, 0x8a, 0x22, 0x2b, 0x08, 0x85, 0xf6, 0x2d, 0x93, 0x32, 0x59, 0x32, 0x6a, 0x78, 0x46, + 0xcf, 0x8e, 0xb6, 0xb0, 0xfb, 0x02, 0x6c, 0xed, 0x0b, 0xb9, 0x54, 0xe4, 0x18, 0x2c, 0xae, 0xd8, + 0xad, 0xa4, 0x86, 0x67, 0xf6, 0x9c, 0x73, 0xdb, 0xd7, 0x52, 0x94, 0xdc, 0x45, 0x15, 0xdf, 0xfd, + 0x5e, 0xbb, 0x87, 0x7c, 0xae, 0xc8, 0xf3, 0x3f, 0xdd, 0x0f, 0xfd, 0x9d, 0xe4, 0x8f, 0x91, 0x0f, + 0x56, 0xaa, 0xd8, 0xd4, 0x95, 0x87, 0x17, 0x00, 0xf7, 0x24, 0x71, 0xc1, 0xbc, 0x61, 0x9b, 0xba, + 0x17, 0xfc, 0xc4, 0xa3, 0xf5, 0x80, 0x74, 0xcf, 0x33, 0xfe, 0x3a, 0x5a, 0xf3, 0x6f, 0xf7, 0xde, + 0x18, 0xdd, 0x1f, 0x06, 0x74, 0xb6, 0x3c, 0xf1, 0xa0, 0xb9, 0x2a, 0x85, 0xd0, 0x21, 0xfb, 0xe7, + 0xe0, 0x4f, 0x4a, 0x21, 0xb4, 0x38, 0x6a, 0x44, 0x5a, 0x21, 0x14, 0x5a, 0x8b, 0xac, 0x4c, 0x45, + 0x15, 0x6a, 0x8c, 0x1a, 0x51, 0x8d, 0xc9, 0x23, 0xb0, 0xf8, 0x4a, 0xbd, 0x7e, 0x45, 0x4d, 0xcf, + 0xe8, 0x99, 0xa3, 0x46, 0x54, 0x41, 0x72, 0x00, 0xcd, 0x34, 0xcb, 0x04, 0x6d, 0x7a, 0x46, 0xaf, + 0x83, 0x39, 0x88, 0x30, 0x47, 0xaa, 0x82, 0xaf, 0x96, 0xd4, 0xc2, 0x86, 0x31, 0xa7, 0xc2, 0x98, + 0x93, 0x6e, 0x14, 0x93, 0xb4, 0xe5, 0x19, 0xbd, 0xff, 0x30, 0x47, 0x43, 0xec, 0x4d, 0x70, 0xa9, + 0x68, 0x5b, 0x0f, 0x03, 0xfe, 0x6e, 0xc5, 0x98, 0x89, 0x0a, 0x3a, 0x16, 0x7c, 0xae, 0x68, 0xe7, + 0x77, 0x07, 0xee, 0x0e, 0x1d, 0xa8, 0x0c, 0x2c, 0x30, 0x8b, 0xe4, 0xae, 0xfb, 0xd3, 0x00, 0x4b, + 0x8b, 0xe4, 0x29, 0x58, 0x0c, 0x6f, 0x53, 0x4f, 0xec, 0x9c, 0x3b, 0xfe, 0xfd, 0x05, 0xe3, 0xc9, + 0x5a, 0x23, 0x47, 0xba, 0xea, 0x9f, 0x2d, 0x8e, 0x1a, 0x11, 0xf2, 0xe4, 0x04, 0x9c, 0x45, 0xa2, + 0x12, 0xc9, 0x54, 0x5c, 0x16, 0x5c, 0x8f, 0x8f, 0xf3, 0x40, 0x4d, 0xce, 0x0a, 0x4e, 0x1e, 0x43, + 0x7b, 0x2e, 0xd7, 0x5a, 0x6e, 0x6e, 0xc7, 0x9d, 0xcb, 0x35, 0x4a, 0xc7, 0x00, 0x39, 0x9f, 0xdf, + 0x08, 0xa6, 0xd5, 0xed, 0x32, 0xec, 0x8a, 0x43, 0xc3, 0x09, 0x38, 0xb5, 0x21, 0x15, 0x59, 0xba, + 0xdb, 0x4a, 0x5d, 0x35, 0x10, 0x59, 0x4a, 0x8e, 0xc0, 0xce, 0x45, 0x22, 0x6f, 0x93, 0x98, 0x2f, + 0xf4, 0x7e, 0xd0, 0xd0, 0xa9, 0xa8, 0xf1, 0x62, 0xd0, 0xae, 0xdf, 0xc1, 0xe9, 0xb7, 0xfa, 0xa9, + 0x4d, 0x37, 0x39, 0x23, 0x14, 0x0e, 0xae, 0xfb, 0xe1, 0x2c, 0x88, 0xa7, 0x9f, 0x2f, 0x83, 0x78, + 0x36, 0x19, 0x06, 0xef, 0xc6, 0x93, 0x60, 0xe8, 0x36, 0x48, 0x1b, 0xcc, 0xa8, 0xff, 0xc9, 0x35, + 0xc8, 0x03, 0x70, 0x86, 0xfd, 0x69, 0xff, 0x2a, 0x98, 0xc6, 0xb3, 0x68, 0xec, 0xee, 0x11, 0x07, + 0xda, 0x17, 0x57, 0xd7, 0x1a, 0x98, 0x64, 0x1f, 0xe0, 0x72, 0x7c, 0xf1, 0x3e, 0x0c, 0x34, 0x6e, + 0xa2, 0xbb, 0xc6, 0x83, 0xf0, 0xe3, 0xc0, 0xb5, 0xc8, 0xff, 0x60, 0x5f, 0x86, 0xfd, 0xab, 0x0f, + 0xfd, 0x78, 0x3c, 0x74, 0x5b, 0xa7, 0x4f, 0xc0, 0xde, 0xbd, 0x27, 0x2c, 0x9e, 0xcc, 0xc2, 0x30, + 0xd6, 0x2d, 0xb8, 0x8d, 0x01, 0x7c, 0xe9, 0xe4, 0x3c, 0x67, 0x82, 0xaf, 0x58, 0xda, 0xd2, 0xbf, + 0xdb, 0xcb, 0x5f, 0x01, 0x00, 0x00, 0xff, 0xff, 0x48, 0x3c, 0x88, 0x6e, 0x9f, 0x03, 0x00, 0x00, +} diff --git a/pipeline/value.proto b/pipeline/value.proto new file mode 100644 index 0000000..c5e9858 --- /dev/null +++ b/pipeline/value.proto @@ -0,0 +1,109 @@ +syntax = "proto3"; +option go_package = "pipeline"; + +import "google/protobuf/descriptor.proto"; + +// All values are immutable and no files should be changed after a URI +// is provided to the other system. When using shared file system, all +// URIs should be absolute to the file system, for example +// "file:///datasets/dataset_1/datasetDoc.json". It is assumed that both +// TA2 and TA3 systems both have a limited number of shared directories +// mounted at same locations (in previous example, "/datasets" directory). +// When one system creates a dataset and sends over the URI, the other can +// directly access it without doing any extra work (like downloading or copying). +// +// Configuration of shared directories and shared instance of Plasma are not +// specified by this API. +// +// Not all types of non-raw values is necessary to be supported/allowed. +// Both systems maintain a list of allowed value types the other system accepts. +// Some calls also provide a way to provide such a list. When a value is to be +// provided to the other system, the list is traversed in order and the first +// value type which can be used without an error is used. If the list is +// exhausted, then an error is provided instead. + +enum ValueType { + // Default value. Not to be used. + VALUE_TYPE_UNDEFINED = 0; + + // The following value types are those everyone should support. + + // Raw value. Not all values can be represented as a raw value. + RAW = 1; + // Represent the value as a D3M dataset. Only "file://" schema is supported using a + // shared file system. Dataset URI should point to the "datasetDoc.json" file of the dataset. + // Only Dataset container values can be represented this way. + DATASET_URI = 2; + // Represent the value as a CSV file. Only "file://" schema is supported using a + // shared file system. CSV URI should point to the file with ".csv" file extension. + // Only tabular container values with numberic and string cell values can be represented + // this way. + CSV_URI = 3; + + // The following are additional value types which can be supported by systems, + // but it is not required. If the value cannot be represented with value types your system + // supports and your system is still asked to do so, it should return "ValueError" error instead. + + // Represent values by Python-pickling them. Only "file://" schema is supported using a + // shared file system. Pickle URI should point to the file with ".pickle" file extension. + PICKLE_URI = 4; + // Represent values by Python-pickling them but sending them through the API. + PICKLE_BLOB = 5; + // Represent values with arrow and storing them into shared instance of Plasma. + PLASMA_ID = 6; +} + +message ValueError { + // A error message useful for debugging or logging. Not meant to be very end-user friendly. + // If a list of supported/allowed value types could not support a given value, then message + // should say so. On the other hand, if there was really an error using a value type which + // would otherwise support a given value, then the error message should communicate this error. + // If there was such an error but some later value type allowed for recovery, then there + // should be no error. + string message = 1; +} + +message ValueList { + repeated ValueRaw items = 1; +} + +message ValueDict { + map items = 1; +} + +enum NullValue { + NULL_VALUE = 0; +} + +message ValueRaw { + oneof raw { + NullValue null = 1; + double double = 2; + int64 int64 = 3; + bool bool = 4; + string string = 5; + bytes bytes = 6; + ValueList list = 7; + ValueDict dict = 8; + } +} + +message Value { + oneof value { + // If there was an error trying to provided the value using the requested + // value type and no other value type was available to be used. + ValueError error = 1; + // Raw values directly provided in the message. + ValueRaw raw = 2; + // An URI pointing to a dataset. Resulting value is Dataset container value from loading this URI. + string dataset_uri = 3; + // An URI pointing to a CSV file. + string csv_uri = 4; + // An URI to a Python-pickled value. + string pickle_uri = 5; + // A Python-pickled value itself. + bytes pickle_blob = 6; + // 20 bytes of Plasma ObjectID of the value. + bytes plasma_id = 7; + } +} diff --git a/primitive/classify.go b/primitive/classify.go new file mode 100644 index 0000000..3c4eca5 --- /dev/null +++ b/primitive/classify.go @@ -0,0 +1,70 @@ +package primitive + +import ( + "encoding/json" + "os" + "strconv" + + "github.com/pkg/errors" + "github.com/unchartedsoftware/distil-ingest/rest" + + "github.com/unchartedsoftware/distil-ingest/primitive/compute/description" + "github.com/unchartedsoftware/distil-ingest/primitive/compute/result" + "github.com/unchartedsoftware/distil-ingest/util" +) + +// ClassifyPrimitive will classify the dataset using a primitive. +func (s *IngestStep) ClassifyPrimitive(dataset string, outputPath string) error { + // create & submit the solution request + pip, err := description.CreateSimonPipeline("says", "") + if err != nil { + return errors.Wrap(err, "unable to create Simon pipeline") + } + + datasetURI, err := s.submitPrimitive(dataset, pip) + if err != nil { + return errors.Wrap(err, "unable to run Simon pipeline") + } + + // parse primitive response (variable,probabilities,labels) + res, err := result.ParseResultCSV(datasetURI) + if err != nil { + return errors.Wrap(err, "unable to parse Simon pipeline result") + } + + // First row is header, then all other rows are col index, types, probabilities. + probabilities := make([][]float64, len(res)-1) + labels := make([][]string, len(res)-1) + for i, v := range res { + if i > 0 { + colIndex, err := strconv.ParseInt(v[0].(string), 10, 64) + if err != nil { + return err + } + labels[colIndex] = toStringArray(v[1].([]interface{})) + probs, err := toFloat64Array(v[2].([]interface{})) + if err != nil { + return err + } + probabilities[colIndex] = probs + } + } + classification := &rest.ClassificationResult{ + Path: datasetURI, + Labels: labels, + Probabilities: probabilities, + } + + // output the classification in the expected JSON format + bytes, err := json.MarshalIndent(classification, "", " ") + if err != nil { + return errors.Wrap(err, "unable to serialize classification result") + } + // write to file + err = util.WriteFileWithDirs(outputPath, bytes, os.ModePerm) + if err != nil { + return errors.Wrap(err, "unable to store classification result") + } + + return nil +} diff --git a/primitive/cluster.go b/primitive/cluster.go new file mode 100644 index 0000000..4a683eb --- /dev/null +++ b/primitive/cluster.go @@ -0,0 +1,92 @@ +package primitive + +import ( + "bytes" + "encoding/csv" + "os" + "path" + + "github.com/pkg/errors" + "github.com/unchartedsoftware/distil-ingest/metadata" + "github.com/unchartedsoftware/distil-ingest/util" +) + +// ClusterPrimitive will cluster the dataset fields using a primitive. +func (s *IngestStep) ClusterPrimitive(schemaFile string, dataset string, + rootDataPath string, outputSchemaPath string, outputDataPath string, hasHeader bool) error { + // create required folders for outputPath + util.CreateContainingDirs(outputDataPath) + util.CreateContainingDirs(outputSchemaPath) + + // load metadata from original schema + meta, err := metadata.LoadMetadataFromOriginalSchema(outputSchemaPath) + if err != nil { + return errors.Wrap(err, "unable to load original schema file") + } + mainDR := meta.GetMainDataResource() + + // add feature variables + features, err := getFeatureVariables(meta, "_cluster_") + if err != nil { + return errors.Wrap(err, "unable to get cluster variables") + } + + d3mIndexField := getD3MIndexField(mainDR) + + // open the input file + dataPath := path.Join(rootDataPath, mainDR.ResPath) + lines, err := s.readCSVFile(dataPath, hasHeader) + if err != nil { + return errors.Wrap(err, "error reading raw data") + } + + // add the cluster data to the raw data + for _, f := range features { + mainDR.Variables = append(mainDR.Variables, f.Variable) + + lines, err = s.appendFeature(dataset, d3mIndexField, hasHeader, f, lines) + if err != nil { + return errors.Wrap(err, "error appending clustered data") + } + } + + // initialize csv writer + output := &bytes.Buffer{} + writer := csv.NewWriter(output) + + // output the header + header := make([]string, len(mainDR.Variables)) + for _, v := range mainDR.Variables { + header[v.Index] = v.Name + } + err = writer.Write(header) + if err != nil { + return errors.Wrap(err, "error storing clustered header") + } + + for _, line := range lines { + err = writer.Write(line) + if err != nil { + return errors.Wrap(err, "error storing clustered output") + } + } + + // output the data with the new feature + writer.Flush() + + err = util.WriteFileWithDirs(outputDataPath, output.Bytes(), os.ModePerm) + if err != nil { + return errors.Wrap(err, "error writing clustered output") + } + + relativePath := getRelativePath(rootDataPath, outputDataPath) + mainDR.ResPath = relativePath + + // write the new schema to file + err = meta.WriteSchema(outputSchemaPath) + if err != nil { + return errors.Wrap(err, "unable to store cluster schema") + } + + return nil +} diff --git a/primitive/compute/client.go b/primitive/compute/client.go new file mode 100644 index 0000000..4fc30d3 --- /dev/null +++ b/primitive/compute/client.go @@ -0,0 +1,393 @@ +package compute + +import ( + "fmt" + "io" + "path" + "strings" + "sync" + "time" + + "github.com/pkg/errors" + "github.com/unchartedsoftware/distil-ingest/pipeline" + "github.com/unchartedsoftware/plog" + "golang.org/x/net/context" + "google.golang.org/grpc" +) + +const defaultTrainTestRatio = 3 + +// Client provides facilities for managing GPRC solution requests. Requests are +// isssued and a context object containing rx channels is returned to the caller for consumption +// of results. The context for running requests can also be fetched, along with their buffered +// results. Spawning a grpc.ClientConn per RPC call is not considered good practice - the system +// is designed such that multiple go routines make RPC calls to a single shared client, and synch +// is managed internally. +type Client struct { + client pipeline.CoreClient + conn *grpc.ClientConn + runner *grpc.ClientConn + mu *sync.Mutex + UserAgent string + PullTimeout time.Duration + PullMax int + SkipPreprocessing bool +} + +// SearchSolutionHandler is executed when a new search solution is returned. +type SearchSolutionHandler func(*pipeline.GetSearchSolutionsResultsResponse) + +// NewClient creates a new pipline request dispatcher instance. This will establish +// the connection to the solution server or return an error on fail +func NewClient(serverAddr string, trace bool, userAgent string, + pullTimeout time.Duration, pullMax int, skipPreprocessing bool) (*Client, error) { + log.Infof("connecting to ta2 at %s", serverAddr) + + conn, err := grpc.Dial( + serverAddr, + grpc.WithInsecure(), + grpc.WithBlock(), + ) + if err != nil { + return nil, errors.Wrapf(err, "failed to connect to %s", serverAddr) + } + + log.Infof("connected to %s", serverAddr) + + client := Client{ + client: pipeline.NewCoreClient(conn), + conn: conn, + UserAgent: userAgent, + PullTimeout: pullTimeout, + PullMax: pullMax, + SkipPreprocessing: skipPreprocessing, + } + + // check for basic ta2 connectivity + helloResponse, err := client.client.Hello(context.Background(), &pipeline.HelloRequest{}) + if err != nil { + return nil, err + } + log.Infof("ta2 user agent: %s", helloResponse.GetUserAgent()) + log.Infof("ta2 API version: %s", helloResponse.GetVersion()) + log.Infof("ta2 Allowed value types: %+v", helloResponse.GetAllowedValueTypes()) + log.Infof("ta2 extensions: %+v", helloResponse.GetSupportedExtensions()) + + if !strings.EqualFold(GetAPIVersion(), helloResponse.GetVersion()) { + log.Warnf("ta2 API version '%s' does not match expected version '%s", helloResponse.GetVersion(), GetAPIVersion()) + } + + return &client, nil +} + +// NewClientWithRunner creates a new pipline request dispatcher instance. This will establish +// the connection to the solution server or return an error on fail +func NewClientWithRunner(serverAddr string, runnerAddr string, trace bool, userAgent string, pullTimeout time.Duration, pullMax int, skipPreprocessing bool) (*Client, error) { + + client, err := NewClient(serverAddr, trace, userAgent, pullTimeout, pullMax, skipPreprocessing) + if err != nil { + return nil, err + } + + log.Infof("connecting to ta2 runner at %s", runnerAddr) + + runner, err := grpc.Dial( + runnerAddr, + grpc.WithInsecure(), + grpc.WithBlock(), + ) + if err != nil { + return nil, errors.Wrapf(err, "failed to connect to %s", runnerAddr) + } + + log.Infof("connected to %s", runnerAddr) + + client.runner = runner + return client, nil +} + +// NewClientWithRunner creates a new pipline request dispatcher instance. This will establish +// the connection to the solution server or return an error on fail +func NewRunner(runnerAddr string, trace bool, userAgent string, pullTimeout time.Duration, pullMax int, skipPreprocessing bool) (*Client, error) { + + client := &Client{ + UserAgent: userAgent, + PullTimeout: pullTimeout, + PullMax: pullMax, + SkipPreprocessing: skipPreprocessing, + } + + log.Infof("connecting to ta2 runner at %s", runnerAddr) + + runner, err := grpc.Dial( + runnerAddr, + grpc.WithInsecure(), + grpc.WithBlock(), + ) + if err != nil { + return nil, errors.Wrapf(err, "failed to connect to %s", runnerAddr) + } + + log.Infof("connected to %s", runnerAddr) + + client.runner = runner + return client, nil +} + +// Close the connection to the solution service +func (c *Client) Close() { + log.Infof("client connection closed") + c.conn.Close() +} + +// StartSearch starts a solution search session. +func (c *Client) StartSearch(ctx context.Context, request *pipeline.SearchSolutionsRequest) (string, error) { + + searchSolutionResponse, err := c.client.SearchSolutions(ctx, request) + if err != nil { + return "", errors.Wrap(err, "failed to start search") + } + + return searchSolutionResponse.SearchId, nil +} + +// SearchSolutions generates candidate pipelines and executes a provided handler +// for each result. While handlers are executing asynchronously, this method +// will not return until all handlers have finished. +func (c *Client) SearchSolutions(ctx context.Context, searchID string, solutionHandler SearchSolutionHandler) error { + + searchPiplinesResultsRequest := &pipeline.GetSearchSolutionsResultsRequest{ + SearchId: searchID, + } + + searchSolutionsResultsResponse, err := c.client.GetSearchSolutionsResults(ctx, searchPiplinesResultsRequest) + if err != nil { + return errors.Wrap(err, "failed to open search results stream") + } + + // track handlers to ensure they all finish before returning + wg := &sync.WaitGroup{} + + err = pullFromAPI(c.PullMax, c.PullTimeout, func() error { + solutionResultResponse, err := searchSolutionsResultsResponse.Recv() + if err == io.EOF { + return nil + } + + if err != nil { + return errors.Wrap(err, "failed to get search result") + } + // ignore empty responses + if solutionResultResponse.SolutionId != "" { + wg.Add(1) + go func() { + solutionHandler(solutionResultResponse) + wg.Done() + }() + } + return nil + }) + if err != nil { + return err + } + + // don't return until all handlers have finished executing + wg.Wait() + return nil +} + +// GenerateSolutionScores generates scrores for candidate solutions. +func (c *Client) GenerateSolutionScores(ctx context.Context, solutionID string, datasetURI string, metrics []string) ([]*pipeline.GetScoreSolutionResultsResponse, error) { + + scoreSolutionRequest := &pipeline.ScoreSolutionRequest{ + SolutionId: solutionID, + Inputs: []*pipeline.Value{ + { + Value: &pipeline.Value_DatasetUri{ + DatasetUri: datasetURI, + }, + }, + }, + PerformanceMetrics: convertMetricsFromTA3ToTA2(metrics), + Configuration: &pipeline.ScoringConfiguration{ + Method: pipeline.EvaluationMethod_HOLDOUT, + TrainTestRatio: defaultTrainTestRatio, + }, + } + + scoreSolutionResponse, err := c.client.ScoreSolution(ctx, scoreSolutionRequest) + if err != nil { + return nil, errors.Wrap(err, "failed to start solution scoring") + } + + searchPiplinesResultsRequest := &pipeline.GetScoreSolutionResultsRequest{ + RequestId: scoreSolutionResponse.RequestId, + } + + scoreSolutionResultsResponse, err := c.client.GetScoreSolutionResults(ctx, searchPiplinesResultsRequest) + if err != nil { + return nil, errors.Wrap(err, "failed to open solution scoring results stream") + } + + var solutionResultResponses []*pipeline.GetScoreSolutionResultsResponse + + err = pullFromAPI(c.PullMax, c.PullTimeout, func() error { + solutionResultResponse, err := scoreSolutionResultsResponse.Recv() + if err == io.EOF { + return nil + } + + if err != nil { + return errors.Wrap(err, "failed to receive solution scoring result") + } + solutionResultResponses = append(solutionResultResponses, solutionResultResponse) + return nil + }) + if err != nil { + return nil, err + } + + return solutionResultResponses, nil +} + +// GenerateSolutionFit generates fit for candidate solutions. +func (c *Client) GenerateSolutionFit(ctx context.Context, solutionID string, datasetURI string) ([]*pipeline.GetFitSolutionResultsResponse, error) { + + fitSolutionRequest := &pipeline.FitSolutionRequest{ + SolutionId: solutionID, + Inputs: []*pipeline.Value{ + { + Value: &pipeline.Value_DatasetUri{ + DatasetUri: datasetURI, + }, + }, + }, + } + + fitSolutionResponse, err := c.client.FitSolution(ctx, fitSolutionRequest) + if err != nil { + return nil, errors.Wrap(err, "failed to start solution fitting") + } + + fitSolutionResultsRequest := &pipeline.GetFitSolutionResultsRequest{ + RequestId: fitSolutionResponse.RequestId, + } + + fitSolutionResultsResponse, err := c.client.GetFitSolutionResults(ctx, fitSolutionResultsRequest) + if err != nil { + return nil, errors.Wrap(err, "failed to open solution fitting result stream") + } + + var solutionResultResponses []*pipeline.GetFitSolutionResultsResponse + + err = pullFromAPI(c.PullMax, c.PullTimeout, func() error { + solutionResultResponse, err := fitSolutionResultsResponse.Recv() + if err == io.EOF { + return nil + } + + if err != nil { + return errors.Wrap(err, "failed to receving solution fitting result") + } + solutionResultResponses = append(solutionResultResponses, solutionResultResponse) + return nil + }) + if err != nil { + return nil, err + } + + return solutionResultResponses, nil +} + +// GeneratePredictions generates predictions. +func (c *Client) GeneratePredictions(ctx context.Context, request *pipeline.ProduceSolutionRequest) ([]*pipeline.GetProduceSolutionResultsResponse, error) { + + produceSolutionResponse, err := c.client.ProduceSolution(ctx, request) + if err != nil { + return nil, errors.Wrap(err, "failed to start solution produce") + } + + produceSolutionResultsRequest := &pipeline.GetProduceSolutionResultsRequest{ + RequestId: produceSolutionResponse.RequestId, + } + + produceSolutionResultsResponse, err := c.client.GetProduceSolutionResults(ctx, produceSolutionResultsRequest) + if err != nil { + return nil, errors.Wrap(err, "failed to open solution produce result stream") + } + + var solutionResultResponses []*pipeline.GetProduceSolutionResultsResponse + + err = pullFromAPI(c.PullMax, c.PullTimeout, func() error { + solutionResultResponse, err := produceSolutionResultsResponse.Recv() + if err == io.EOF { + return nil + } + + if err != nil { + return errors.Wrap(err, "failed to receive solution produce result") + } + solutionResultResponses = append(solutionResultResponses, solutionResultResponse) + return nil + }) + if err != nil { + return nil, err + } + + return solutionResultResponses, nil +} + +// StopSearch stop the solution search session. +func (c *Client) StopSearch(ctx context.Context, searchID string) error { + + stopSearchSolutions := &pipeline.StopSearchSolutionsRequest{ + SearchId: searchID, + } + + _, err := c.client.StopSearchSolutions(ctx, stopSearchSolutions) + return errors.Wrap(err, "failed to stop solution search") +} + +// EndSearch ends the solution search session. +func (c *Client) EndSearch(ctx context.Context, searchID string) error { + + endSearchSolutions := &pipeline.EndSearchSolutionsRequest{ + SearchId: searchID, + } + + _, err := c.client.EndSearchSolutions(ctx, endSearchSolutions) + return errors.Wrap(err, "failed to end solution search") +} + +// ExportSolution exports the solution. +func (c *Client) ExportSolution(ctx context.Context, fittedSolutionID string) error { + exportSolution := &pipeline.SolutionExportRequest{ + Rank: 1, + FittedSolutionId: fittedSolutionID, + } + _, err := c.client.SolutionExport(ctx, exportSolution) + return errors.Wrap(err, "failed to export solution") +} + +// ExecutePipeline executes a pre-specified pipeline. +func (c *Client) ExecutePipeline(ctx context.Context, datasetURI string, pipelineDesc *pipeline.PipelineDescription) (*pipeline.PipelineExecuteResponse, error) { + + datasetURI = fmt.Sprintf("file://%s", path.Join(datasetURI, D3MDataSchema)) + + in := &pipeline.PipelineExecuteRequest{ + PipelineDescription: pipelineDesc, + Inputs: []*pipeline.Value{ + { + Value: &pipeline.Value_DatasetUri{ + DatasetUri: datasetURI, + }, + }, + }, + } + out := new(pipeline.PipelineExecuteResponse) + err := c.runner.Invoke(ctx, "/Executor/ExecutePipeline", in, out) + if err != nil { + return nil, err + } + return out, nil +} diff --git a/primitive/compute/description/builder.go b/primitive/compute/description/builder.go new file mode 100644 index 0000000..1fae4a9 --- /dev/null +++ b/primitive/compute/description/builder.go @@ -0,0 +1,148 @@ +package description + +import ( + "fmt" + + "github.com/pkg/errors" + "github.com/unchartedsoftware/distil-ingest/pipeline" +) + +type builder struct { + name string + description string + outputs []string + steps []Step +} + +// Compileable allows an implementer to produce a pipeline +// description. +type Compileable interface { + Compile() (*pipeline.PipelineDescription, error) +} + +// Builder creates a PipelineDescription from a set of ordered pipeline description +// steps. Called as: +// +// pipelineDesc := NewBuilder("somePrimitive", "somePrimitive description"). +// Add(stepData0). +// Add(stepData1). +// Compile() +// +// An inference step can be added by calling AddInferencePoint(), which marks the point where +// a TA2 system should be filling in the rest of the pipeline. +type Builder interface { + Compileable + Add(stepData Step) Builder + AddInferencePoint() Compileable +} + +// NewBuilder creates a new Builder instance. +func NewBuilder(name, description string) Builder { + return &builder{ + name: name, + description: description, + outputs: []string{}, + steps: []Step{}, + } +} + +// Add a new step to the pipeline builder +func (p *builder) Add(step Step) Builder { + p.steps = append(p.steps, step) + return p +} + +// Add a new new inference marker to the pipeline builder. TA2 systems +// will infer the remained of the pipeline from this point. Note that +// a Compileable is returned here rather than a Builder, as no stages +// can be added after the inference point. +func (p *builder) AddInferencePoint() Compileable { + // Create the standard inference step and append it + p.steps = append(p.steps, NewInferenceStepData()) + return p +} + +func validateStep(steps []Step, stepNumber int) error { + // Validate step parameters. This is currently pretty surface level, but we could + // go in validate the struct hierarchy to catch more potential caller errors during + // the compile step. + // + // NOTE: Hyperparameters and Primitive are optional so there is no included check at this time. + + step := steps[stepNumber] + if step == nil { + return errors.Errorf("compile failed: nil value for step %d", stepNumber) + } + + args := step.GetArguments() + if args == nil { + return errors.Errorf("compile failed: step %d missing argument list", stepNumber) + } + + outputs := step.GetOutputMethods() + if len(outputs) == 0 { + return errors.Errorf("compile failed: expected at least 1 output for step %d", stepNumber) + } + return nil +} + +// Compile the pipeline into a PipelineDescription +func (p *builder) Compile() (*pipeline.PipelineDescription, error) { + if len(p.steps) == 0 { + return nil, errors.New("compile failed: pipeline requires at least 1 step") + } + + // make sure first step has an arg list + err := validateStep(p.steps, 0) + if err != nil { + return nil, err + } + + // first step, set the input to the dataset by default + args := p.steps[0].GetArguments() + _, ok := args[pipelineInputsKey] + if ok { + return nil, errors.Errorf("compile failed: argument `%s` is reserved for internal use", stepInputsKey) + } + p.steps[0].UpdateArguments(stepInputsKey, fmt.Sprintf("%s.0", pipelineInputsKey)) + + // Connect the input of each step to the output of the previous. Currently + // only support a single output. + for i := 1; i < len(p.steps); i++ { + previousStep := i - 1 + previousOutput := p.steps[i-1].GetOutputMethods()[0] + err := validateStep(p.steps, i) + if err != nil { + return nil, err + } + p.steps[i].UpdateArguments(stepInputsKey, fmt.Sprintf("steps.%d.%s", previousStep, previousOutput)) + } + + // Set the output from the tail end of the pipeline + lastStep := len(p.steps) - 1 + lastOutput := p.steps[lastStep].GetOutputMethods()[0] + pipelineOutputs := []*pipeline.PipelineDescriptionOutput{ + { + Data: fmt.Sprintf("steps.%d.%s", lastStep, lastOutput), + }, + } + + // build the pipeline descriptions + descriptionSteps := []*pipeline.PipelineDescriptionStep{} + for _, step := range p.steps { + builtStep, err := step.BuildDescriptionStep() + if err != nil { + return nil, err + } + descriptionSteps = append(descriptionSteps, builtStep) + } + + pipelineDesc := &pipeline.PipelineDescription{ + Name: p.name, + Description: p.description, + Steps: descriptionSteps, + Outputs: pipelineOutputs, + } + + return pipelineDesc, nil +} diff --git a/primitive/compute/description/inference_step_data.go b/primitive/compute/description/inference_step_data.go new file mode 100644 index 0000000..8eabda8 --- /dev/null +++ b/primitive/compute/description/inference_step_data.go @@ -0,0 +1,92 @@ +package description + +import ( + "fmt" + + "github.com/unchartedsoftware/distil-ingest/pipeline" + log "github.com/unchartedsoftware/plog" +) + +// InferenceStepData provides data for a pipeline description placeholder step, +// which marks the point at which a TA2 should be begin pipeline inference. +type InferenceStepData struct { + Inputs []string + Outputs []string +} + +// NewInferenceStepData creates a InferenceStepData instance with default values. +func NewInferenceStepData() *InferenceStepData { + return &InferenceStepData{ + Inputs: []string{}, + Outputs: []string{"produce"}, + } +} + +// GetPrimitive returns nil since there is no primitive associated with a placeholder +// step. +func (s *InferenceStepData) GetPrimitive() *pipeline.Primitive { + return nil +} + +// GetArguments adapts the internal placeholder step argument type to the primitive +// step argument type. +func (s *InferenceStepData) GetArguments() map[string]string { + argMap := map[string]string{} + for i, input := range s.Inputs { + argMap[fmt.Sprintf("%s.%d", stepInputsKey, i)] = input + } + return argMap +} + +// UpdateArguments updates the placheolder step argument. +func (s *InferenceStepData) UpdateArguments(key string, value string) { + if key != stepInputsKey { + log.Warnf("Compile warning - inference step key `%s` is not `%s` as expected", key, stepInputsKey) + } + s.Inputs = append(s.Inputs, value) +} + +// GetHyperparameters returns an empty map since inference steps don't +// take hyper parameters. +func (s *InferenceStepData) GetHyperparameters() map[string]interface{} { + return map[string]interface{}{} +} + +// GetOutputMethods returns a list of methods that will be called to generate +// primitive output. These feed into downstream primitives. +func (s *InferenceStepData) GetOutputMethods() []string { + return s.Outputs +} + +// BuildDescriptionStep creates protobuf structures from a pipeline step +// definition. +func (s *InferenceStepData) BuildDescriptionStep() (*pipeline.PipelineDescriptionStep, error) { + // generate arguments entries + inputs := []*pipeline.StepInput{} + for _, v := range s.Inputs { + input := &pipeline.StepInput{ + Data: v, + } + inputs = append(inputs, input) + } + + // list of methods that will generate output - order matters because the steps are + // numbered + outputs := []*pipeline.StepOutput{} + for _, v := range s.Outputs { + output := &pipeline.StepOutput{ + Id: v, + } + outputs = append(outputs, output) + } + + // create the pipeline description structure + return &pipeline.PipelineDescriptionStep{ + Step: &pipeline.PipelineDescriptionStep_Placeholder{ + Placeholder: &pipeline.PlaceholderPipelineDescriptionStep{ + Inputs: inputs, + Outputs: outputs, + }, + }, + }, nil +} diff --git a/primitive/compute/description/preprocessing.go b/primitive/compute/description/preprocessing.go new file mode 100644 index 0000000..a37d5b3 --- /dev/null +++ b/primitive/compute/description/preprocessing.go @@ -0,0 +1,127 @@ +package description + +import ( + "github.com/unchartedsoftware/distil-ingest/pipeline" +) + +const defaultResource = "0" + +// CreateUserDatasetPipeline creates a pipeline description to capture user feature selection and +// semantic type information. +func CreateUserDatasetPipeline(name string, description string, + targetFeature string) (*pipeline.PipelineDescription, error) { + + // instantiate the pipeline + builder := NewBuilder(name, description) + + pip, err := builder.AddInferencePoint().Compile() + if err != nil { + return nil, err + } + + // Input set to arbitrary string for now + pip.Inputs = []*pipeline.PipelineDescriptionInput{{ + Name: "dataset", + }} + return pip, nil +} + +type update struct { + removeIndices []int + addIndices []int +} + +func newUpdate() *update { + return &update{ + addIndices: []int{}, + removeIndices: []int{}, + } +} + +// CreateSlothPipeline creates a pipeline to peform timeseries clustering on a dataset. +func CreateSlothPipeline(name string, description string, targetColumns []string, outputLabels []string) (*pipeline.PipelineDescription, error) { + // insantiate the pipeline + pipeline, err := NewBuilder(name, description). + Add(NewDenormalizeStep()). + Add(NewDatasetToDataframeStep()). + Add(NewSlothStep(targetColumns, outputLabels)). + Compile() + + if err != nil { + return nil, err + } + return pipeline, nil +} + +// CreateDukePipeline creates a pipeline to peform image featurization on a dataset. +func CreateDukePipeline(name string, description string) (*pipeline.PipelineDescription, error) { + // insantiate the pipeline + pipeline, err := NewBuilder(name, description). + Add(NewDatasetToDataframeStep()). + Add(NewDukeStep()). + Compile() + + if err != nil { + return nil, err + } + return pipeline, nil +} + +// CreateSimonPipeline creates a pipeline to run semantic type inference on a dataset's +// columns. +func CreateSimonPipeline(name string, description string) (*pipeline.PipelineDescription, error) { + // insantiate the pipeline + pipeline, err := NewBuilder(name, description). + Add(NewDatasetToDataframeStep()). + Add(NewSimonStep()). + Compile() + + if err != nil { + return nil, err + } + return pipeline, nil +} + +// CreateCrocPipeline creates a pipeline to run image featurization on a dataset. +func CreateCrocPipeline(name string, description string, targetColumns []string, outputLabels []string) (*pipeline.PipelineDescription, error) { + // insantiate the pipeline + pipeline, err := NewBuilder(name, description). + Add(NewDenormalizeStep()). + Add(NewDatasetToDataframeStep()). + Add(NewCrocStep(targetColumns, outputLabels)). + Compile() + + if err != nil { + return nil, err + } + return pipeline, nil +} + +// CreateUnicornPipeline creates a pipeline to run image clustering on a dataset. +func CreateUnicornPipeline(name string, description string, targetColumns []string, outputLabels []string) (*pipeline.PipelineDescription, error) { + // insantiate the pipeline + pipeline, err := NewBuilder(name, description). + Add(NewDenormalizeStep()). + Add(NewDatasetToDataframeStep()). + Add(NewUnicornStep(targetColumns, outputLabels)). + Compile() + + if err != nil { + return nil, err + } + return pipeline, nil +} + +// CreatePCAFeaturesPipeline creates a pipeline to run feature ranking on an input dataset. +func CreatePCAFeaturesPipeline(name string, description string) (*pipeline.PipelineDescription, error) { + // insantiate the pipeline + pipeline, err := NewBuilder(name, description). + Add(NewDatasetToDataframeStep()). + Add(NewPCAFeaturesStep()). + Compile() + + if err != nil { + return nil, err + } + return pipeline, nil +} diff --git a/primitive/compute/description/primitive_steps.go b/primitive/compute/description/primitive_steps.go new file mode 100644 index 0000000..98337e3 --- /dev/null +++ b/primitive/compute/description/primitive_steps.go @@ -0,0 +1,244 @@ +package description + +import ( + "github.com/unchartedsoftware/distil-ingest/pipeline" +) + +// NewSimonStep creates a SIMON data classification step. It examines an input +// dataframe, and assigns types to the columns based on the exposed metadata. +func NewSimonStep() *StepData { + return NewStepData( + &pipeline.Primitive{ + Id: "d2fa8df2-6517-3c26-bafc-87b701c4043a", + Version: "1.1.1", + Name: "simon", + PythonPath: "d3m.primitives.distil.simon", + Digest: "0673d166f157944d3b6fdfa451f31fdfdbead7315ede3d6d9edb20f3f220b836", + }, + []string{"produce"}, + ) +} + +// NewSlothStep creates a Sloth timeseries clustering step. +func NewSlothStep(targetColumns []string, outputLabels []string) *StepData { + return NewStepDataWithHyperparameters( + &pipeline.Primitive{ + Id: "77bf4b92-2faa-3e38-bb7e-804131243a7f", + Version: "1.0.0", + Name: "Sloth", + PythonPath: "d3m.primitives.distil.Sloth.cluster", + Digest: "f94f1aacc23792b680af0bd895f0fd2bac7336b29967b6ad766df4cb3c1933ab", + }, + []string{"produce"}, + map[string]interface{}{ + "target_columns": targetColumns, + "output_labels": outputLabels, + }, + ) +} + +// NewUnicornStep creates a unicorn image clustering step. +func NewUnicornStep(targetColumns []string, outputLabels []string) *StepData { + return NewStepDataWithHyperparameters( + &pipeline.Primitive{ + Id: "475c26dc-eb2e-43d3-acdb-159b80d9f099", + Version: "1.0.0", + Name: "unicorn", + PythonPath: "d3m.primitives.distil.unicorn", + Digest: "2b0c0784fc077b106a9547a197be92ab02298dc206d60610929c50f831e86e84", + }, + []string{"produce"}, + map[string]interface{}{ + "target_columns": targetColumns, + "output_labels": outputLabels, + }, + ) +} + +// NewPCAFeaturesStep creates a PCA-based feature ranking call that can be added to +// a pipeline. +func NewPCAFeaturesStep() *StepData { + return NewStepData( + &pipeline.Primitive{ + Id: "04573880-d64f-4791-8932-52b7c3877639", + Version: "3.0.0", + Name: "PCA Features", + PythonPath: "d3m.primitives.distil.pcafeatures", + Digest: "5302eebf2fb8a80e9f00e7b74888aba9eb448a9c0463d9d26786dab717a62c61", + }, + []string{"produce"}, + ) +} + +// NewDukeStep creates a wrapper for the Duke dataset classifier. +func NewDukeStep() *StepData { + return NewStepData( + &pipeline.Primitive{ + Id: "46612a42-6120-3559-9db9-3aa9a76eb94f", + Version: "1.1.1", + Name: "duke", + PythonPath: "d3m.primitives.distil.duke", + Digest: "ea522d2adc756c3ad76f5848d28cd396304d4dfdc0cc55aa8b90fbaf04e8fc30", + }, + []string{"produce"}, + ) +} + +// NewCrocStep creates a wrapper for the Croc image classifier. +func NewCrocStep(targetColumns []string, outputLabels []string) *StepData { + return NewStepDataWithHyperparameters( + &pipeline.Primitive{ + Id: "404fae2a-2f0a-4c9b-9ad2-fb1528990561", + Version: "1.2.2", + Name: "croc", + PythonPath: "d3m.primitives.distil.croc", + Digest: "09cd99d609e317559feff580b8d893d0188f12915ab8d84a98de34eb344e340c", + }, + []string{"produce"}, + map[string]interface{}{ + "target_columns": targetColumns, + "output_labels": outputLabels, + }, + ) +} + +// NewDatasetToDataframeStep creates a primitive call that transforms an input dataset +// into a PANDAS dataframe. +func NewDatasetToDataframeStep() *StepData { + return NewStepData( + &pipeline.Primitive{ + Id: "4b42ce1e-9b98-4a25-b68e-fad13311eb65", + Version: "0.3.0", + Name: "Dataset to DataFrame converter", + PythonPath: "d3m.primitives.datasets.DatasetToDataFrame", + Digest: "85b946aa6123354fe51a288c3be56aaca82e76d4071c1edc13be6f9e0e100144", + }, + []string{"produce"}, + ) +} + +// ColumnUpdate defines a set of column indices to add/remvoe +// a set of semantic types to/from. +type ColumnUpdate struct { + Indices []int + SemanticTypes []string +} + +// NewUpdateSemanticTypeStep adds and removes semantic data values from an input +// dataset. An add of (1, 2), ("type a", "type b") would result in "type a" and "type b" +// being added to index 1 and 2. +func NewUpdateSemanticTypeStep(resourceID string, add *ColumnUpdate, remove *ColumnUpdate) (*StepData, error) { + return NewStepDataWithHyperparameters( + &pipeline.Primitive{ + Id: "98c79128-555a-4a6b-85fb-d4f4064c94ab", + Version: "0.2.0", + Name: "Semantic type updater", + PythonPath: "d3m.primitives.datasets.UpdateSemanticTypes", + Digest: "85b946aa6123354fe51a288c3be56aaca82e76d4071c1edc13be6f9e0e100144", + }, + []string{"produce"}, + map[string]interface{}{ + "resource_id": resourceID, + "add_columns": add.Indices, + "add_types": add.SemanticTypes, + "remove_columns": remove.Indices, + "remove_types": remove.SemanticTypes, + }, + ), nil +} + +// NewDenormalizeStep denormalize data that is contained in multiple resource files. +func NewDenormalizeStep() *StepData { + return NewStepData( + &pipeline.Primitive{ + Id: "f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e", + Version: "0.2.0", + Name: "Denormalize datasets", + PythonPath: "d3m.primitives.datasets.Denormalize", + Digest: "c39e3436373aed1944edbbc9b1cf24af5c71919d73bf0bb545cba0b685812df1", + }, + []string{"produce"}, + ) +} + +// NewRemoveColumnsStep removes columns from an input dataframe. Columns +// are specified by name and the match is case insensitive. +func NewRemoveColumnsStep(resourceID string, colIndices []int) (*StepData, error) { + return NewStepDataWithHyperparameters( + &pipeline.Primitive{ + Id: "2eeff053-395a-497d-88db-7374c27812e6", + Version: "0.2.0", + Name: "Column remover", + PythonPath: "d3m.primitives.datasets.RemoveColumns", + Digest: "85b946aa6123354fe51a288c3be56aaca82e76d4071c1edc13be6f9e0e100144", + }, + []string{"produce"}, + map[string]interface{}{ + "resource_id": resourceID, + "columns": colIndices, + }, + ), nil +} + +// NewTermFilterStep . +func NewTermFilterStep(resourceID string, colindex int, inclusive bool, terms []string, matchWhole bool) *StepData { + return NewStepDataWithHyperparameters( + &pipeline.Primitive{ + Id: "622893c7-42fc-4561-a6f6-071fb85d610a", + Version: "0.1.0", + Name: "Term list dataset filter", + PythonPath: "d3m.primitives.datasets.TermFilter", + Digest: "", + }, + []string{"produce"}, + map[string]interface{}{ + "resource_id": resourceID, + "column": colindex, + "inclusive": inclusive, + "terms": terms, + "match_whole": matchWhole, + }, + ) +} + +// NewRegexFilterStep . +func NewRegexFilterStep(resourceID string, colindex int, inclusive bool, regex string) *StepData { + return NewStepDataWithHyperparameters( + &pipeline.Primitive{ + Id: "d1b4c4b7-63ba-4ee6-ab30-035157cccf22", + Version: "0.1.0", + Name: "Regex dataset filter", + PythonPath: "d3m.primitives.datasets.RegexFilter", + Digest: "", + }, + []string{"produce"}, + map[string]interface{}{ + "resource_id": resourceID, + "column": colindex, + "inclusive": inclusive, + "regex": regex, + }, + ) +} + +// NewNumericRangeFilterStep . +func NewNumericRangeFilterStep(resourceID string, colindex int, inclusive bool, min float64, max float64, strict bool) *StepData { + return NewStepDataWithHyperparameters( + &pipeline.Primitive{ + Id: "8b1c1140-8c21-4f41-aeca-662b7d35aa29", + Version: "0.1.0", + Name: "Numeric range filter", + PythonPath: "d3m.primitives.datasets.NumericRangeFilter", + Digest: "", + }, + []string{"produce"}, + map[string]interface{}{ + "resource_id": resourceID, + "column": colindex, + "inclusive": inclusive, + "min": min, + "max": max, + "strict": strict, + }, + ) +} diff --git a/primitive/compute/description/step_data.go b/primitive/compute/description/step_data.go new file mode 100644 index 0000000..19ae39a --- /dev/null +++ b/primitive/compute/description/step_data.go @@ -0,0 +1,267 @@ +package description + +import ( + "reflect" + + "github.com/pkg/errors" + "github.com/unchartedsoftware/distil-ingest/pipeline" +) + +const ( + stepInputsKey = "inputs" + pipelineInputsKey = "inputs" +) + +// Step provides data for a pipeline description step and an operation +// to create a protobuf PipelineDescriptionStep from that data. +type Step interface { + BuildDescriptionStep() (*pipeline.PipelineDescriptionStep, error) + GetPrimitive() *pipeline.Primitive + GetArguments() map[string]string + UpdateArguments(string, string) + GetHyperparameters() map[string]interface{} + GetOutputMethods() []string +} + +// StepData contains the minimum amount of data used to describe a pipeline step +type StepData struct { + Primitive *pipeline.Primitive + Arguments map[string]string + Hyperparameters map[string]interface{} + OutputMethods []string +} + +// NewStepData Creates a pipeline step instance from the required field subset. +func NewStepData(primitive *pipeline.Primitive, outputMethods []string) *StepData { + return NewStepDataWithHyperparameters(primitive, outputMethods, nil) +} + +// NewStepDataWithHyperparameters creates a pipeline step instance from the required field subset. Hyperparameters are +// optional so nil is a valid value, valid types fror hyper parameters are intXX, string, bool. +func NewStepDataWithHyperparameters(primitive *pipeline.Primitive, outputMethods []string, hyperparameters map[string]interface{}) *StepData { + return &StepData{ + Primitive: primitive, + Hyperparameters: hyperparameters, // optional, nil is valid + Arguments: map[string]string{}, + OutputMethods: outputMethods, + } +} + +// GetPrimitive returns a primitive definition for a pipeline step. +func (s *StepData) GetPrimitive() *pipeline.Primitive { + return s.Primitive +} + +// GetArguments returns a map of arguments that will be passed to the methods +// of the primitive step. +func (s *StepData) GetArguments() map[string]string { + copy := map[string]string{} + for k, v := range s.Arguments { + copy[k] = v + } + return copy +} + +// UpdateArguments updates the arguments map that will be passed to the methods +// of primtive step. +func (s *StepData) UpdateArguments(key string, value string) { + s.Arguments[key] = value +} + +// GetHyperparameters returns a map of arguments that will be passed to the primitive methods +// of the primitive step. Types are currently restricted to intXX, bool, string +func (s *StepData) GetHyperparameters() map[string]interface{} { + return s.Hyperparameters +} + +// GetOutputMethods returns a list of methods that will be called to generate +// primitive output. These feed into downstream primitives. +func (s *StepData) GetOutputMethods() []string { + return s.OutputMethods +} + +// BuildDescriptionStep creates protobuf structures from a pipeline step +// definition. +func (s *StepData) BuildDescriptionStep() (*pipeline.PipelineDescriptionStep, error) { + + // generate arguments entries + arguments := map[string]*pipeline.PrimitiveStepArgument{} + for k, v := range s.Arguments { + arguments[k] = &pipeline.PrimitiveStepArgument{ + // only handle container args rights now - extend to others if required + Argument: &pipeline.PrimitiveStepArgument_Container{ + Container: &pipeline.ContainerArgument{ + Data: v, + }, + }, + } + } + + // generate arguments entries - accepted types are currently intXX, string, bool, as well as list, map[string] + // of those types. The underlying protobuf structure allows for others that can be handled here as needed. + hyperparameters := map[string]*pipeline.PrimitiveStepHyperparameter{} + for k, v := range s.Hyperparameters { + rawValue, err := parseValue(v) + if err != nil { + return nil, errors.Errorf("compile failed: hyperparameter `%s` - %s", k, err.Error()) + } + + hyperparameters[k] = &pipeline.PrimitiveStepHyperparameter{ + // only handle value args rights now - extend to others if required + Argument: &pipeline.PrimitiveStepHyperparameter_Value{ + Value: &pipeline.ValueArgument{ + Data: &pipeline.Value{ + Value: &pipeline.Value_Raw{ + Raw: rawValue, + }, + }, + }, + }, + } + } + + // list of methods that will generate output - order matters because the steps are + // numbered + outputMethods := []*pipeline.StepOutput{} + for _, outputMethod := range s.OutputMethods { + outputMethods = append(outputMethods, + &pipeline.StepOutput{ + Id: outputMethod, + }) + } + + // create the pipeline description structure + return &pipeline.PipelineDescriptionStep{ + Step: &pipeline.PipelineDescriptionStep_Primitive{ + Primitive: &pipeline.PrimitivePipelineDescriptionStep{ + Primitive: s.Primitive, + Arguments: arguments, + Hyperparams: hyperparameters, + Outputs: outputMethods, + }, + }, + }, nil +} + +func parseList(v interface{}) (*pipeline.ValueRaw, error) { + // parse list contents as a list, map, or value + valueList := []*pipeline.ValueRaw{} + var value *pipeline.ValueRaw + var err error + + // type switches to work well with generic arrays/maps so we have to revert to using reflection + refValue := reflect.ValueOf(v) + if refValue.Kind() != reflect.Slice { + return nil, errors.Errorf("unexpected parameter %s", refValue.Kind()) + } + for i := 0; i < refValue.Len(); i++ { + refElement := refValue.Index(i) + switch refElement.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.String, reflect.Bool, reflect.Float32, reflect.Float64: + value, err = parseValue(refElement.Interface()) + case reflect.Slice: + value, err = parseList(refElement.Interface()) + case reflect.Map: + value, err = parseMap(refElement.Interface()) + default: + err = errors.Errorf("unhandled list arg type %s", refElement.Kind()) + } + + if err != nil { + return nil, err + } + + valueList = append(valueList, value) + } + rawValue := &pipeline.ValueRaw{ + Raw: &pipeline.ValueRaw_List{ + List: &pipeline.ValueList{ + Items: valueList, + }, + }, + } + return rawValue, nil +} + +func parseMap(vmap interface{}) (*pipeline.ValueRaw, error) { + // parse map contents as list, map or value + valueMap := map[string]*pipeline.ValueRaw{} + var value *pipeline.ValueRaw + var err error + + // type switches to work well with generic arrays/maps so we have to revert to using reflection + refValue := reflect.ValueOf(vmap) + if refValue.Kind() != reflect.Map { + return nil, errors.Errorf("unexpected parameter %s", refValue.Kind()) + } + keys := refValue.MapKeys() + for _, key := range keys { + + if key.Kind() != reflect.String { + return nil, errors.Errorf("non-string map key type %s", refValue.Kind()) + } + + refElement := refValue.MapIndex(key) + switch refElement.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.String, reflect.Bool, reflect.Float32, reflect.Float64: + value, err = parseValue(refElement.Interface()) + case reflect.Slice: + value, err = parseList(refElement.Interface()) + case reflect.Map: + value, err = parseMap(refElement.Interface()) + default: + err = errors.Errorf("unhandled map arg type %s", refElement.Kind()) + } + + if err != nil { + return nil, err + } + valueMap[key.String()] = value + } + + v := &pipeline.ValueRaw{ + Raw: &pipeline.ValueRaw_Dict{ + Dict: &pipeline.ValueDict{ + Items: valueMap, + }, + }, + } + return v, nil +} + +func parseValue(v interface{}) (*pipeline.ValueRaw, error) { + refValue := reflect.ValueOf(v) + switch refValue.Kind() { + // parse a numeric, string or boolean value + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return &pipeline.ValueRaw{ + Raw: &pipeline.ValueRaw_Int64{ + Int64: refValue.Int(), + }, + }, nil + case reflect.Float32, reflect.Float64: + return &pipeline.ValueRaw{ + Raw: &pipeline.ValueRaw_Double{ + Double: refValue.Float(), + }, + }, nil + case reflect.String: + return &pipeline.ValueRaw{ + Raw: &pipeline.ValueRaw_String_{ + String_: refValue.String(), + }, + }, nil + case reflect.Bool: + return &pipeline.ValueRaw{ + Raw: &pipeline.ValueRaw_Bool{ + Bool: refValue.Bool(), + }, + }, nil + case reflect.Slice: + return parseList(v) + case reflect.Map: + return parseMap(v) + default: + return nil, errors.Errorf("unhandled value arg type %s", refValue.Kind()) + } +} diff --git a/primitive/compute/execute_pipeline_request.go b/primitive/compute/execute_pipeline_request.go new file mode 100644 index 0000000..a22ba54 --- /dev/null +++ b/primitive/compute/execute_pipeline_request.go @@ -0,0 +1,260 @@ +package compute + +import ( + "context" + "fmt" + "path" + "sync" + "time" + + "github.com/pkg/errors" + "github.com/unchartedsoftware/distil-ingest/pipeline" + "github.com/unchartedsoftware/plog" +) + +// ExecPipelineStatus contains status / result information for a pipeline status +// request. +type ExecPipelineStatus struct { + Progress string + RequestID string + Error error + Timestamp time.Time + ResultURI string +} + +// ExecPipelineStatusListener defines a funtction type for handling pipeline +// execution result updates. +type ExecPipelineStatusListener func(status ExecPipelineStatus) + +// ExecPipelineRequest defines a request that will execute a fully specified pipline +// on a TA2 system. +type ExecPipelineRequest struct { + datasetURI string + pipelineDesc *pipeline.PipelineDescription + wg *sync.WaitGroup + statusChannel chan ExecPipelineStatus + finished chan error +} + +// NewExecPipelineRequest creates a new request that will run the supplied dataset through +// the pipeline description. +func NewExecPipelineRequest(datasetURI string, pipelineDesc *pipeline.PipelineDescription) *ExecPipelineRequest { + return &ExecPipelineRequest{ + datasetURI: fmt.Sprintf("file://%s", path.Join(datasetURI, D3MDataSchema)), + pipelineDesc: pipelineDesc, + wg: &sync.WaitGroup{}, + finished: make(chan error), + statusChannel: make(chan ExecPipelineStatus, 1), + } +} + +// Listen listens for new solution statuses and invokes the caller supplied function +// when a status update is received. The call will block until the request completes. +func (e *ExecPipelineRequest) Listen(listener ExecPipelineStatusListener) error { + go func() { + for { + listener(<-e.statusChannel) + } + }() + return <-e.finished +} + +// Dispatch dispatches a pipeline exeucute request for processing by TA2 +func (e *ExecPipelineRequest) Dispatch(client *Client) error { + requestID, err := client.StartSearch(context.Background(), &pipeline.SearchSolutionsRequest{ + Version: GetAPIVersion(), + UserAgent: client.UserAgent, + Template: e.pipelineDesc, + AllowedValueTypes: []pipeline.ValueType{ + pipeline.ValueType_CSV_URI, + }, + Inputs: []*pipeline.Value{ + { + Value: &pipeline.Value_DatasetUri{ + DatasetUri: e.datasetURI, + }, + }, + }, + }) + if err != nil { + return err + } + + // dispatch search request + go e.dispatchRequest(client, requestID) + + return nil +} + +func (e *ExecPipelineRequest) dispatchRequest(client *Client, requestID string) { + + // Update request status + e.notifyStatus(e.statusChannel, requestID, RequestPendingStatus) + + var firstSolution string + var fitCalled bool + // Search for solutions, this wont return until the produce finishes or it times out. + err := client.SearchSolutions(context.Background(), requestID, func(solution *pipeline.GetSearchSolutionsResultsResponse) { + // A complete pipeline specification should result in a single solution being generated. Consider it an + // error condition when that is not the case. + if firstSolution == "" { + e.wg.Add(1) + firstSolution = solution.GetSolutionId() + } else if firstSolution != solution.GetSolutionId() { + log.Warnf("multiple solutions found for request %s, expected 1", requestID) + return + } + + // handle solution search update - status codes pertain to the search itself, and not a particular + // solution + if solution.GetProgress().GetState() == pipeline.ProgressState_ERRORED { + // search errored - most likely case is that the supplied pipeline had a problem in its specification + err := errors.Errorf("could not generate solution for request - %s", solution.GetProgress().GetStatus()) + e.notifyError(e.statusChannel, requestID, err) + e.wg.Done() + } else { + // search is actively running or has completed - safe to call fit at this point, but we should + // only do so once. A status update with no actual solution ID is valid in the API. + e.notifyStatus(e.statusChannel, requestID, RequestRunningStatus) + if solution.GetSolutionId() != "" && !fitCalled { + fitCalled = true + fittedSolutionID := e.dispatchFit(e.statusChannel, client, requestID, solution.GetSolutionId()) + if fittedSolutionID == "" { + e.wg.Done() + return + } + + // fit complete, safe to produce results + e.notifyStatus(e.statusChannel, requestID, RequestRunningStatus) + e.dispatchProduce(e.statusChannel, client, requestID, fittedSolutionID) + e.wg.Done() + } + + } + }) + + if err != nil { + e.notifyError(e.statusChannel, requestID, err) + } + + // wait until all are complete and the search has finished / timed out + e.wg.Wait() + + // end search + e.finished <- client.EndSearch(context.Background(), requestID) +} + +func (e *ExecPipelineRequest) dispatchFit(statusChan chan ExecPipelineStatus, client *Client, requestID string, solutionID string) string { + // run produce - this blocks until all responses are returned + responses, err := client.GenerateSolutionFit(context.Background(), solutionID, e.datasetURI) + if err != nil { + e.notifyError(statusChan, requestID, err) + return "" + } + + // find the completed response + var completed *pipeline.GetFitSolutionResultsResponse + for _, response := range responses { + if response.Progress.State == pipeline.ProgressState_COMPLETED { + completed = response + break + } + } + if completed == nil { + err := errors.Errorf("no completed response found") + e.notifyError(statusChan, requestID, err) + return "" + } + return completed.GetFittedSolutionId() +} + +func (e *ExecPipelineRequest) createProduceSolutionRequest(datsetURI string, solutionID string) *pipeline.ProduceSolutionRequest { + return &pipeline.ProduceSolutionRequest{ + FittedSolutionId: solutionID, + Inputs: []*pipeline.Value{ + { + Value: &pipeline.Value_DatasetUri{ + DatasetUri: e.datasetURI, + }, + }, + }, + ExposeOutputs: []string{defaultExposedOutputKey}, + ExposeValueTypes: []pipeline.ValueType{ + pipeline.ValueType_CSV_URI, + }, + } +} + +func (e *ExecPipelineRequest) dispatchProduce(statusChan chan ExecPipelineStatus, client *Client, requestID string, fittedSolutionID string) { + // generate predictions + produceRequest := e.createProduceSolutionRequest(e.datasetURI, fittedSolutionID) + + // run produce - this blocks until all responses are returned + responses, err := client.GeneratePredictions(context.Background(), produceRequest) + if err != nil { + e.notifyError(statusChan, requestID, err) + return + } + + // find the completed response + var completed *pipeline.GetProduceSolutionResultsResponse + for _, response := range responses { + if response.Progress.State == pipeline.ProgressState_COMPLETED { + completed = response + break + } + } + if completed == nil { + err := errors.Errorf("no completed response found") + e.notifyError(statusChan, requestID, err) + return + } + + // make sure the exposed output is what was asked for + output, ok := completed.ExposedOutputs[defaultExposedOutputKey] + if !ok { + err := errors.Errorf("output is missing from response") + e.notifyError(statusChan, requestID, err) + return + } + + var uri string + results := output.Value + switch res := results.(type) { + case *pipeline.Value_DatasetUri: + uri = res.DatasetUri + case *pipeline.Value_CsvUri: + uri = res.CsvUri + default: + err = errors.Errorf("unexpected result type '%v'", res) + e.notifyError(statusChan, requestID, err) + } + e.notifyResult(statusChan, requestID, uri) +} + +func (e *ExecPipelineRequest) notifyStatus(statusChan chan ExecPipelineStatus, requestID string, status string) { + // notify of update + statusChan <- ExecPipelineStatus{ + RequestID: requestID, + Progress: status, + Timestamp: time.Now(), + } +} + +func (e *ExecPipelineRequest) notifyError(statusChan chan ExecPipelineStatus, requestID string, err error) { + statusChan <- ExecPipelineStatus{ + RequestID: requestID, + Progress: RequestErroredStatus, + Error: err, + Timestamp: time.Now(), + } +} + +func (e *ExecPipelineRequest) notifyResult(statusChan chan ExecPipelineStatus, requestID string, resultURI string) { + statusChan <- ExecPipelineStatus{ + RequestID: requestID, + Progress: RequestCompletedStatus, + ResultURI: resultURI, + Timestamp: time.Now(), + } +} diff --git a/primitive/compute/persist.go b/primitive/compute/persist.go new file mode 100644 index 0000000..547cceb --- /dev/null +++ b/primitive/compute/persist.go @@ -0,0 +1,161 @@ +package compute + +import ( + "bytes" + "encoding/csv" + "fmt" + "io" + "math/rand" + "os" + "path" + "strings" + + "github.com/otiai10/copy" + "github.com/pkg/errors" + "github.com/unchartedsoftware/plog" + + "github.com/unchartedsoftware/distil-ingest/metadata" + "github.com/unchartedsoftware/distil-ingest/util" +) + +const ( + // D3MDataSchema provides the name of the D3M data schema file + D3MDataSchema = "datasetDoc.json" + + trainFilenamePrefix = "train" + testFilenamePrefix = "test" +) + +func updateSchemaReferenceFile(schema string, prevReferenceFile string, newReferenceFile string) string { + return strings.Replace(schema, fmt.Sprintf("\"resPath\": \"%s\"", prevReferenceFile), fmt.Sprintf("\"resPath\": \"%s\"", newReferenceFile), 1) +} + +func splitTrainTest(sourceFile string, trainFile string, testFile string, hasHeader bool) error { + // create the writers + outputTrain := &bytes.Buffer{} + writerTrain := csv.NewWriter(outputTrain) + outputTest := &bytes.Buffer{} + writerTest := csv.NewWriter(outputTest) + + // open the file + file, err := os.Open(sourceFile) + if err != nil { + return errors.Wrap(err, "failed to open source file") + } + reader := csv.NewReader(file) + + // write header to both outputs + if hasHeader { + header, err := reader.Read() + if err != nil { + return errors.Wrap(err, "unable to read header row") + } + err = writerTrain.Write(header) + if err != nil { + return errors.Wrap(err, "unable to write header to train output") + } + err = writerTest.Write(header) + if err != nil { + return errors.Wrap(err, "unable to write header to test output") + } + } + + // randomly assign rows to either train or test + for { + line, err := reader.Read() + if err == io.EOF { + break + } else if err != nil { + return errors.Wrap(err, "failed to read line from file") + } + if rand.Float64() < trainTestSplitThreshold { + err = writerTrain.Write(line) + if err != nil { + return errors.Wrap(err, "unable to write data to train output") + } + } else { + err = writerTest.Write(line) + if err != nil { + return errors.Wrap(err, "unable to write data to test output") + } + } + } + writerTrain.Flush() + writerTest.Flush() + + err = util.WriteFileWithDirs(trainFile, outputTrain.Bytes(), os.ModePerm) + if err != nil { + return errors.Wrap(err, "unable to output train data") + } + + err = util.WriteFileWithDirs(testFile, outputTest.Bytes(), os.ModePerm) + if err != nil { + return errors.Wrap(err, "unable to output test data") + } + + return nil +} + +// PersistOriginalData copies the original data and splits it into a train & +// test subset to be used as needed. +func PersistOriginalData(datasetName string, schemaFile string, sourceDataFolder string, tmpDataFolder string) (string, string, error) { + // The complete data is copied into separate train & test folders. + // The main data is then split randomly. + trainFolder := path.Join(tmpDataFolder, datasetName, trainFilenamePrefix) + testFolder := path.Join(tmpDataFolder, datasetName, testFilenamePrefix) + trainSchemaFile := path.Join(trainFolder, schemaFile) + testSchemaFile := path.Join(testFolder, schemaFile) + + // check if the data has already been split + log.Infof("checking folders `%s` & `%s` to see if the dataset has been previously split", trainFolder, testFolder) + if util.FileExists(trainSchemaFile) && util.FileExists(testSchemaFile) { + log.Infof("dataset '%s' already split", datasetName) + return trainSchemaFile, testSchemaFile, nil + } + + if util.DirExists(trainFolder) { + err := os.RemoveAll(trainFolder) + if err != nil { + return "", "", errors.Wrap(err, "unable to remove train folder from previous split attempt") + } + } + + if util.DirExists(testFolder) { + err := os.RemoveAll(testFolder) + if err != nil { + return "", "", errors.Wrap(err, "unable to remove test folder from previous split attempt") + } + } + + // copy the data over + err := copy.Copy(sourceDataFolder, trainFolder) + if err != nil { + return "", "", errors.Wrap(err, "unable to copy dataset folder to train") + } + + err = copy.Copy(sourceDataFolder, testFolder) + if err != nil { + return "", "", errors.Wrap(err, "unable to copy dataset folder to test") + } + + // read the dataset document + schemaFilename := path.Join(sourceDataFolder, schemaFile) + meta, err := metadata.LoadMetadataFromOriginalSchema(schemaFilename) + if err != nil { + return "", "", err + } + + // determine where the d3m index would be + mainDR := meta.GetMainDataResource() + + // split the source data into train & test + dataPath := path.Join(sourceDataFolder, mainDR.ResPath) + trainDataFile := path.Join(trainFolder, mainDR.ResPath) + testDataFile := path.Join(testFolder, mainDR.ResPath) + err = splitTrainTest(dataPath, trainDataFile, testDataFile, true) + if err != nil { + return "", "", err + } + + return trainSchemaFile, testSchemaFile, nil +} diff --git a/primitive/compute/pull.go b/primitive/compute/pull.go new file mode 100644 index 0000000..f1ccfe5 --- /dev/null +++ b/primitive/compute/pull.go @@ -0,0 +1,47 @@ +package compute + +import ( + "io" + "time" + + "github.com/pkg/errors" +) + +type pullFunc func() error + +func pullFromAPI(maxPulls int, timeout time.Duration, pull pullFunc) error { + + recvChan := make(chan error) + + count := 0 + for { + + // pull + go func() { + err := pull() + recvChan <- err + }() + + // set timeout timer + timer := time.NewTimer(timeout) + + select { + case err := <-recvChan: + timer.Stop() + if err == io.EOF { + return nil + } else if err != nil { + return errors.Wrap(err, "rpc error") + } + count++ + if count > maxPulls { + return nil + } + + case <-timer.C: + // timeout + return errors.Errorf("solution request has timed out") + } + + } +} diff --git a/primitive/compute/result/complex_field.peg b/primitive/compute/result/complex_field.peg new file mode 100644 index 0000000..807709a --- /dev/null +++ b/primitive/compute/result/complex_field.peg @@ -0,0 +1,46 @@ +package result + +# Parser for an array field exported from a Python PANDAS dataframe via its to_csv() method. +# The exported file is RFC 4180 compatible, and is loaded using the CSV loader from the golang +# standard lib. The loader handles most of the escaping. +# +# "[10, 20, 30, \"forty & fifty\"]" +# +# parses to the Go type: +# +# []interface{"10", "20", "30", "forty & fifty"} +# + +# generated go struct - accessible via `p` in attached code blocks +type ComplexField Peg { + arrayElements +} + +ComplexField <- array !. + +# non-terminals +array <- ws* obracket { p.pushArray() } array_contents cbracket { p.popArray() } / + ws* oparen { p.pushArray() } array_contents comma? ws* cparen { p.popArray() } +array_contents <- ws* (item ws* (comma ws* item ws* )*)? +item <- array / string / { p.addElement(buffer[begin:end]) } +string <- dquote_string / squote_string +dquote_string <- escdquote <(textdata / squote / lf / cr / obracket / cbracket / oparen / cparen / comma)*> escdquote { p.addElement(buffer[begin:end]) } +squote_string <- squote <(escsquote / escdquote / textdata / lf / cr / obracket / cbracket / oparen / cparen)*> squote { p.addElement(buffer[begin:end]) } +value <- negative? number+ (decimal_point number+)? + +# terminals +ws <- ' ' +comma <- ',' +lf <- '\n' +cr <- '\r' +escdquote <- '\"' +escsquote <- '\\\'' +squote <- '\'' +obracket <- '[' +cbracket <- ']' +oparen <- '(' +cparen <- ')' +number <- [a-zA-Z0-9] +negative <- '-' +decimal_point <- '.' +textdata <- [a-zA-Z0-9 !#$&%*+\-./:;<->?\\^_`{|}~] diff --git a/primitive/compute/result/complex_field.peg.go b/primitive/compute/result/complex_field.peg.go new file mode 100644 index 0000000..97320a5 --- /dev/null +++ b/primitive/compute/result/complex_field.peg.go @@ -0,0 +1,1256 @@ +// Code generated by peg. DO NOT EDIT. +package result + +//go:generate peg -inline ./api/compute/result/complex_field.peg + +import ( + "fmt" + "math" + "sort" + "strconv" +) + +const endSymbol rune = 1114112 + +/* The rule types inferred from the grammar are below. */ +type pegRule uint8 + +const ( + ruleUnknown pegRule = iota + ruleComplexField + rulearray + rulearray_contents + ruleitem + rulestring + ruledquote_string + rulesquote_string + rulevalue + rulews + rulecomma + rulelf + rulecr + ruleescdquote + ruleescsquote + rulesquote + ruleobracket + rulecbracket + ruleoparen + rulecparen + rulenumber + rulenegative + ruledecimal_point + ruletextdata + ruleAction0 + ruleAction1 + ruleAction2 + ruleAction3 + rulePegText + ruleAction4 + ruleAction5 + ruleAction6 +) + +var rul3s = [...]string{ + "Unknown", + "ComplexField", + "array", + "array_contents", + "item", + "string", + "dquote_string", + "squote_string", + "value", + "ws", + "comma", + "lf", + "cr", + "escdquote", + "escsquote", + "squote", + "obracket", + "cbracket", + "oparen", + "cparen", + "number", + "negative", + "decimal_point", + "textdata", + "Action0", + "Action1", + "Action2", + "Action3", + "PegText", + "Action4", + "Action5", + "Action6", +} + +type token32 struct { + pegRule + begin, end uint32 +} + +func (t *token32) String() string { + return fmt.Sprintf("\x1B[34m%v\x1B[m %v %v", rul3s[t.pegRule], t.begin, t.end) +} + +type node32 struct { + token32 + up, next *node32 +} + +func (node *node32) print(pretty bool, buffer string) { + var print func(node *node32, depth int) + print = func(node *node32, depth int) { + for node != nil { + for c := 0; c < depth; c++ { + fmt.Printf(" ") + } + rule := rul3s[node.pegRule] + quote := strconv.Quote(string(([]rune(buffer)[node.begin:node.end]))) + if !pretty { + fmt.Printf("%v %v\n", rule, quote) + } else { + fmt.Printf("\x1B[34m%v\x1B[m %v\n", rule, quote) + } + if node.up != nil { + print(node.up, depth+1) + } + node = node.next + } + } + print(node, 0) +} + +func (node *node32) Print(buffer string) { + node.print(false, buffer) +} + +func (node *node32) PrettyPrint(buffer string) { + node.print(true, buffer) +} + +type tokens32 struct { + tree []token32 +} + +func (t *tokens32) Trim(length uint32) { + t.tree = t.tree[:length] +} + +func (t *tokens32) Print() { + for _, token := range t.tree { + fmt.Println(token.String()) + } +} + +func (t *tokens32) AST() *node32 { + type element struct { + node *node32 + down *element + } + tokens := t.Tokens() + var stack *element + for _, token := range tokens { + if token.begin == token.end { + continue + } + node := &node32{token32: token} + for stack != nil && stack.node.begin >= token.begin && stack.node.end <= token.end { + stack.node.next = node.up + node.up = stack.node + stack = stack.down + } + stack = &element{node: node, down: stack} + } + if stack != nil { + return stack.node + } + return nil +} + +func (t *tokens32) PrintSyntaxTree(buffer string) { + t.AST().Print(buffer) +} + +func (t *tokens32) PrettyPrintSyntaxTree(buffer string) { + t.AST().PrettyPrint(buffer) +} + +func (t *tokens32) Add(rule pegRule, begin, end, index uint32) { + if tree := t.tree; int(index) >= len(tree) { + expanded := make([]token32, 2*len(tree)) + copy(expanded, tree) + t.tree = expanded + } + t.tree[index] = token32{ + pegRule: rule, + begin: begin, + end: end, + } +} + +func (t *tokens32) Tokens() []token32 { + return t.tree +} + +type ComplexField struct { + arrayElements + + Buffer string + buffer []rune + rules [32]func() bool + parse func(rule ...int) error + reset func() + Pretty bool + tokens32 +} + +func (p *ComplexField) Parse(rule ...int) error { + return p.parse(rule...) +} + +func (p *ComplexField) Reset() { + p.reset() +} + +type textPosition struct { + line, symbol int +} + +type textPositionMap map[int]textPosition + +func translatePositions(buffer []rune, positions []int) textPositionMap { + length, translations, j, line, symbol := len(positions), make(textPositionMap, len(positions)), 0, 1, 0 + sort.Ints(positions) + +search: + for i, c := range buffer { + if c == '\n' { + line, symbol = line+1, 0 + } else { + symbol++ + } + if i == positions[j] { + translations[positions[j]] = textPosition{line, symbol} + for j++; j < length; j++ { + if i != positions[j] { + continue search + } + } + break search + } + } + + return translations +} + +type parseError struct { + p *ComplexField + max token32 +} + +func (e *parseError) Error() string { + tokens, error := []token32{e.max}, "\n" + positions, p := make([]int, 2*len(tokens)), 0 + for _, token := range tokens { + positions[p], p = int(token.begin), p+1 + positions[p], p = int(token.end), p+1 + } + translations := translatePositions(e.p.buffer, positions) + format := "parse error near %v (line %v symbol %v - line %v symbol %v):\n%v\n" + if e.p.Pretty { + format = "parse error near \x1B[34m%v\x1B[m (line %v symbol %v - line %v symbol %v):\n%v\n" + } + for _, token := range tokens { + begin, end := int(token.begin), int(token.end) + error += fmt.Sprintf(format, + rul3s[token.pegRule], + translations[begin].line, translations[begin].symbol, + translations[end].line, translations[end].symbol, + strconv.Quote(string(e.p.buffer[begin:end]))) + } + + return error +} + +func (p *ComplexField) PrintSyntaxTree() { + if p.Pretty { + p.tokens32.PrettyPrintSyntaxTree(p.Buffer) + } else { + p.tokens32.PrintSyntaxTree(p.Buffer) + } +} + +func (p *ComplexField) Execute() { + buffer, _buffer, text, begin, end := p.Buffer, p.buffer, "", 0, 0 + for _, token := range p.Tokens() { + switch token.pegRule { + + case rulePegText: + begin, end = int(token.begin), int(token.end) + text = string(_buffer[begin:end]) + + case ruleAction0: + p.pushArray() + case ruleAction1: + p.popArray() + case ruleAction2: + p.pushArray() + case ruleAction3: + p.popArray() + case ruleAction4: + p.addElement(buffer[begin:end]) + case ruleAction5: + p.addElement(buffer[begin:end]) + case ruleAction6: + p.addElement(buffer[begin:end]) + + } + } + _, _, _, _, _ = buffer, _buffer, text, begin, end +} + +func (p *ComplexField) Init() { + var ( + max token32 + position, tokenIndex uint32 + buffer []rune + ) + p.reset = func() { + max = token32{} + position, tokenIndex = 0, 0 + + p.buffer = []rune(p.Buffer) + if len(p.buffer) == 0 || p.buffer[len(p.buffer)-1] != endSymbol { + p.buffer = append(p.buffer, endSymbol) + } + buffer = p.buffer + } + p.reset() + + _rules := p.rules + tree := tokens32{tree: make([]token32, math.MaxInt16)} + p.parse = func(rule ...int) error { + r := 1 + if len(rule) > 0 { + r = rule[0] + } + matches := p.rules[r]() + p.tokens32 = tree + if matches { + p.Trim(tokenIndex) + return nil + } + return &parseError{p, max} + } + + add := func(rule pegRule, begin uint32) { + tree.Add(rule, begin, position, tokenIndex) + tokenIndex++ + if begin != position && position > max.end { + max = token32{rule, begin, position} + } + } + + matchDot := func() bool { + if buffer[position] != endSymbol { + position++ + return true + } + return false + } + + /*matchChar := func(c byte) bool { + if buffer[position] == c { + position++ + return true + } + return false + }*/ + + /*matchRange := func(lower byte, upper byte) bool { + if c := buffer[position]; c >= lower && c <= upper { + position++ + return true + } + return false + }*/ + + _rules = [...]func() bool{ + nil, + /* 0 ComplexField <- <(array !.)> */ + func() bool { + position0, tokenIndex0 := position, tokenIndex + { + position1 := position + if !_rules[rulearray]() { + goto l0 + } + { + position2, tokenIndex2 := position, tokenIndex + if !matchDot() { + goto l2 + } + goto l0 + l2: + position, tokenIndex = position2, tokenIndex2 + } + add(ruleComplexField, position1) + } + return true + l0: + position, tokenIndex = position0, tokenIndex0 + return false + }, + /* 1 array <- <((ws* obracket Action0 array_contents cbracket Action1) / (ws* oparen Action2 array_contents comma? ws* cparen Action3))> */ + func() bool { + position3, tokenIndex3 := position, tokenIndex + { + position4 := position + { + position5, tokenIndex5 := position, tokenIndex + l7: + { + position8, tokenIndex8 := position, tokenIndex + if !_rules[rulews]() { + goto l8 + } + goto l7 + l8: + position, tokenIndex = position8, tokenIndex8 + } + if !_rules[ruleobracket]() { + goto l6 + } + { + add(ruleAction0, position) + } + if !_rules[rulearray_contents]() { + goto l6 + } + if !_rules[rulecbracket]() { + goto l6 + } + { + add(ruleAction1, position) + } + goto l5 + l6: + position, tokenIndex = position5, tokenIndex5 + l11: + { + position12, tokenIndex12 := position, tokenIndex + if !_rules[rulews]() { + goto l12 + } + goto l11 + l12: + position, tokenIndex = position12, tokenIndex12 + } + if !_rules[ruleoparen]() { + goto l3 + } + { + add(ruleAction2, position) + } + if !_rules[rulearray_contents]() { + goto l3 + } + { + position14, tokenIndex14 := position, tokenIndex + if !_rules[rulecomma]() { + goto l14 + } + goto l15 + l14: + position, tokenIndex = position14, tokenIndex14 + } + l15: + l16: + { + position17, tokenIndex17 := position, tokenIndex + if !_rules[rulews]() { + goto l17 + } + goto l16 + l17: + position, tokenIndex = position17, tokenIndex17 + } + if !_rules[rulecparen]() { + goto l3 + } + { + add(ruleAction3, position) + } + } + l5: + add(rulearray, position4) + } + return true + l3: + position, tokenIndex = position3, tokenIndex3 + return false + }, + /* 2 array_contents <- <(ws* (item ws* (comma ws* item ws*)*)?)> */ + func() bool { + { + position20 := position + l21: + { + position22, tokenIndex22 := position, tokenIndex + if !_rules[rulews]() { + goto l22 + } + goto l21 + l22: + position, tokenIndex = position22, tokenIndex22 + } + { + position23, tokenIndex23 := position, tokenIndex + if !_rules[ruleitem]() { + goto l23 + } + l25: + { + position26, tokenIndex26 := position, tokenIndex + if !_rules[rulews]() { + goto l26 + } + goto l25 + l26: + position, tokenIndex = position26, tokenIndex26 + } + l27: + { + position28, tokenIndex28 := position, tokenIndex + if !_rules[rulecomma]() { + goto l28 + } + l29: + { + position30, tokenIndex30 := position, tokenIndex + if !_rules[rulews]() { + goto l30 + } + goto l29 + l30: + position, tokenIndex = position30, tokenIndex30 + } + if !_rules[ruleitem]() { + goto l28 + } + l31: + { + position32, tokenIndex32 := position, tokenIndex + if !_rules[rulews]() { + goto l32 + } + goto l31 + l32: + position, tokenIndex = position32, tokenIndex32 + } + goto l27 + l28: + position, tokenIndex = position28, tokenIndex28 + } + goto l24 + l23: + position, tokenIndex = position23, tokenIndex23 + } + l24: + add(rulearray_contents, position20) + } + return true + }, + /* 3 item <- <(array / string / ( Action4))> */ + func() bool { + position33, tokenIndex33 := position, tokenIndex + { + position34 := position + { + position35, tokenIndex35 := position, tokenIndex + if !_rules[rulearray]() { + goto l36 + } + goto l35 + l36: + position, tokenIndex = position35, tokenIndex35 + { + position38 := position + { + position39, tokenIndex39 := position, tokenIndex + { + position41 := position + if !_rules[ruleescdquote]() { + goto l40 + } + { + position42 := position + l43: + { + position44, tokenIndex44 := position, tokenIndex + { + position45, tokenIndex45 := position, tokenIndex + if !_rules[ruletextdata]() { + goto l46 + } + goto l45 + l46: + position, tokenIndex = position45, tokenIndex45 + if !_rules[rulesquote]() { + goto l47 + } + goto l45 + l47: + position, tokenIndex = position45, tokenIndex45 + if !_rules[rulelf]() { + goto l48 + } + goto l45 + l48: + position, tokenIndex = position45, tokenIndex45 + if !_rules[rulecr]() { + goto l49 + } + goto l45 + l49: + position, tokenIndex = position45, tokenIndex45 + if !_rules[ruleobracket]() { + goto l50 + } + goto l45 + l50: + position, tokenIndex = position45, tokenIndex45 + if !_rules[rulecbracket]() { + goto l51 + } + goto l45 + l51: + position, tokenIndex = position45, tokenIndex45 + if !_rules[ruleoparen]() { + goto l52 + } + goto l45 + l52: + position, tokenIndex = position45, tokenIndex45 + if !_rules[rulecparen]() { + goto l53 + } + goto l45 + l53: + position, tokenIndex = position45, tokenIndex45 + if !_rules[rulecomma]() { + goto l44 + } + } + l45: + goto l43 + l44: + position, tokenIndex = position44, tokenIndex44 + } + add(rulePegText, position42) + } + if !_rules[ruleescdquote]() { + goto l40 + } + { + add(ruleAction5, position) + } + add(ruledquote_string, position41) + } + goto l39 + l40: + position, tokenIndex = position39, tokenIndex39 + { + position55 := position + if !_rules[rulesquote]() { + goto l37 + } + { + position56 := position + l57: + { + position58, tokenIndex58 := position, tokenIndex + { + position59, tokenIndex59 := position, tokenIndex + { + position61 := position + if buffer[position] != rune('\\') { + goto l60 + } + position++ + if buffer[position] != rune('\'') { + goto l60 + } + position++ + add(ruleescsquote, position61) + } + goto l59 + l60: + position, tokenIndex = position59, tokenIndex59 + if !_rules[ruleescdquote]() { + goto l62 + } + goto l59 + l62: + position, tokenIndex = position59, tokenIndex59 + if !_rules[ruletextdata]() { + goto l63 + } + goto l59 + l63: + position, tokenIndex = position59, tokenIndex59 + if !_rules[rulelf]() { + goto l64 + } + goto l59 + l64: + position, tokenIndex = position59, tokenIndex59 + if !_rules[rulecr]() { + goto l65 + } + goto l59 + l65: + position, tokenIndex = position59, tokenIndex59 + if !_rules[ruleobracket]() { + goto l66 + } + goto l59 + l66: + position, tokenIndex = position59, tokenIndex59 + if !_rules[rulecbracket]() { + goto l67 + } + goto l59 + l67: + position, tokenIndex = position59, tokenIndex59 + if !_rules[ruleoparen]() { + goto l68 + } + goto l59 + l68: + position, tokenIndex = position59, tokenIndex59 + if !_rules[rulecparen]() { + goto l58 + } + } + l59: + goto l57 + l58: + position, tokenIndex = position58, tokenIndex58 + } + add(rulePegText, position56) + } + if !_rules[rulesquote]() { + goto l37 + } + { + add(ruleAction6, position) + } + add(rulesquote_string, position55) + } + } + l39: + add(rulestring, position38) + } + goto l35 + l37: + position, tokenIndex = position35, tokenIndex35 + { + position70 := position + { + position71 := position + { + position72, tokenIndex72 := position, tokenIndex + { + position74 := position + if buffer[position] != rune('-') { + goto l72 + } + position++ + add(rulenegative, position74) + } + goto l73 + l72: + position, tokenIndex = position72, tokenIndex72 + } + l73: + if !_rules[rulenumber]() { + goto l33 + } + l75: + { + position76, tokenIndex76 := position, tokenIndex + if !_rules[rulenumber]() { + goto l76 + } + goto l75 + l76: + position, tokenIndex = position76, tokenIndex76 + } + { + position77, tokenIndex77 := position, tokenIndex + { + position79 := position + if buffer[position] != rune('.') { + goto l77 + } + position++ + add(ruledecimal_point, position79) + } + if !_rules[rulenumber]() { + goto l77 + } + l80: + { + position81, tokenIndex81 := position, tokenIndex + if !_rules[rulenumber]() { + goto l81 + } + goto l80 + l81: + position, tokenIndex = position81, tokenIndex81 + } + goto l78 + l77: + position, tokenIndex = position77, tokenIndex77 + } + l78: + add(rulevalue, position71) + } + add(rulePegText, position70) + } + { + add(ruleAction4, position) + } + } + l35: + add(ruleitem, position34) + } + return true + l33: + position, tokenIndex = position33, tokenIndex33 + return false + }, + /* 4 string <- <(dquote_string / squote_string)> */ + nil, + /* 5 dquote_string <- <(escdquote <(textdata / squote / lf / cr / obracket / cbracket / oparen / cparen / comma)*> escdquote Action5)> */ + nil, + /* 6 squote_string <- <(squote <(escsquote / escdquote / textdata / lf / cr / obracket / cbracket / oparen / cparen)*> squote Action6)> */ + nil, + /* 7 value <- <(negative? number+ (decimal_point number+)?)> */ + nil, + /* 8 ws <- <' '> */ + func() bool { + position87, tokenIndex87 := position, tokenIndex + { + position88 := position + if buffer[position] != rune(' ') { + goto l87 + } + position++ + add(rulews, position88) + } + return true + l87: + position, tokenIndex = position87, tokenIndex87 + return false + }, + /* 9 comma <- <','> */ + func() bool { + position89, tokenIndex89 := position, tokenIndex + { + position90 := position + if buffer[position] != rune(',') { + goto l89 + } + position++ + add(rulecomma, position90) + } + return true + l89: + position, tokenIndex = position89, tokenIndex89 + return false + }, + /* 10 lf <- <'\n'> */ + func() bool { + position91, tokenIndex91 := position, tokenIndex + { + position92 := position + if buffer[position] != rune('\n') { + goto l91 + } + position++ + add(rulelf, position92) + } + return true + l91: + position, tokenIndex = position91, tokenIndex91 + return false + }, + /* 11 cr <- <'\r'> */ + func() bool { + position93, tokenIndex93 := position, tokenIndex + { + position94 := position + if buffer[position] != rune('\r') { + goto l93 + } + position++ + add(rulecr, position94) + } + return true + l93: + position, tokenIndex = position93, tokenIndex93 + return false + }, + /* 12 escdquote <- <'"'> */ + func() bool { + position95, tokenIndex95 := position, tokenIndex + { + position96 := position + if buffer[position] != rune('"') { + goto l95 + } + position++ + add(ruleescdquote, position96) + } + return true + l95: + position, tokenIndex = position95, tokenIndex95 + return false + }, + /* 13 escsquote <- <('\\' '\'')> */ + nil, + /* 14 squote <- <'\''> */ + func() bool { + position98, tokenIndex98 := position, tokenIndex + { + position99 := position + if buffer[position] != rune('\'') { + goto l98 + } + position++ + add(rulesquote, position99) + } + return true + l98: + position, tokenIndex = position98, tokenIndex98 + return false + }, + /* 15 obracket <- <'['> */ + func() bool { + position100, tokenIndex100 := position, tokenIndex + { + position101 := position + if buffer[position] != rune('[') { + goto l100 + } + position++ + add(ruleobracket, position101) + } + return true + l100: + position, tokenIndex = position100, tokenIndex100 + return false + }, + /* 16 cbracket <- <']'> */ + func() bool { + position102, tokenIndex102 := position, tokenIndex + { + position103 := position + if buffer[position] != rune(']') { + goto l102 + } + position++ + add(rulecbracket, position103) + } + return true + l102: + position, tokenIndex = position102, tokenIndex102 + return false + }, + /* 17 oparen <- <'('> */ + func() bool { + position104, tokenIndex104 := position, tokenIndex + { + position105 := position + if buffer[position] != rune('(') { + goto l104 + } + position++ + add(ruleoparen, position105) + } + return true + l104: + position, tokenIndex = position104, tokenIndex104 + return false + }, + /* 18 cparen <- <')'> */ + func() bool { + position106, tokenIndex106 := position, tokenIndex + { + position107 := position + if buffer[position] != rune(')') { + goto l106 + } + position++ + add(rulecparen, position107) + } + return true + l106: + position, tokenIndex = position106, tokenIndex106 + return false + }, + /* 19 number <- <([a-z] / [A-Z] / [0-9])> */ + func() bool { + position108, tokenIndex108 := position, tokenIndex + { + position109 := position + { + position110, tokenIndex110 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { + goto l111 + } + position++ + goto l110 + l111: + position, tokenIndex = position110, tokenIndex110 + if c := buffer[position]; c < rune('A') || c > rune('Z') { + goto l112 + } + position++ + goto l110 + l112: + position, tokenIndex = position110, tokenIndex110 + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l108 + } + position++ + } + l110: + add(rulenumber, position109) + } + return true + l108: + position, tokenIndex = position108, tokenIndex108 + return false + }, + /* 20 negative <- <'-'> */ + nil, + /* 21 decimal_point <- <'.'> */ + nil, + /* 22 textdata <- <([a-z] / [A-Z] / [0-9] / ' ' / '!' / '#' / '$' / '&' / '%' / '*' / '+' / '-' / '.' / '/' / ':' / ';' / [<->] / '?' / '\\' / '^' / '_' / '`' / '{' / '|' / '}' / '~')> */ + func() bool { + position115, tokenIndex115 := position, tokenIndex + { + position116 := position + { + position117, tokenIndex117 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { + goto l118 + } + position++ + goto l117 + l118: + position, tokenIndex = position117, tokenIndex117 + if c := buffer[position]; c < rune('A') || c > rune('Z') { + goto l119 + } + position++ + goto l117 + l119: + position, tokenIndex = position117, tokenIndex117 + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l120 + } + position++ + goto l117 + l120: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune(' ') { + goto l121 + } + position++ + goto l117 + l121: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('!') { + goto l122 + } + position++ + goto l117 + l122: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('#') { + goto l123 + } + position++ + goto l117 + l123: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('$') { + goto l124 + } + position++ + goto l117 + l124: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('&') { + goto l125 + } + position++ + goto l117 + l125: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('%') { + goto l126 + } + position++ + goto l117 + l126: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('*') { + goto l127 + } + position++ + goto l117 + l127: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('+') { + goto l128 + } + position++ + goto l117 + l128: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('-') { + goto l129 + } + position++ + goto l117 + l129: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('.') { + goto l130 + } + position++ + goto l117 + l130: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('/') { + goto l131 + } + position++ + goto l117 + l131: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune(':') { + goto l132 + } + position++ + goto l117 + l132: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune(';') { + goto l133 + } + position++ + goto l117 + l133: + position, tokenIndex = position117, tokenIndex117 + if c := buffer[position]; c < rune('<') || c > rune('>') { + goto l134 + } + position++ + goto l117 + l134: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('?') { + goto l135 + } + position++ + goto l117 + l135: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('\\') { + goto l136 + } + position++ + goto l117 + l136: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('^') { + goto l137 + } + position++ + goto l117 + l137: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('_') { + goto l138 + } + position++ + goto l117 + l138: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('`') { + goto l139 + } + position++ + goto l117 + l139: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('{') { + goto l140 + } + position++ + goto l117 + l140: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('|') { + goto l141 + } + position++ + goto l117 + l141: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('}') { + goto l142 + } + position++ + goto l117 + l142: + position, tokenIndex = position117, tokenIndex117 + if buffer[position] != rune('~') { + goto l115 + } + position++ + } + l117: + add(ruletextdata, position116) + } + return true + l115: + position, tokenIndex = position115, tokenIndex115 + return false + }, + /* 24 Action0 <- <{ p.pushArray() }> */ + nil, + /* 25 Action1 <- <{ p.popArray() }> */ + nil, + /* 26 Action2 <- <{ p.pushArray() }> */ + nil, + /* 27 Action3 <- <{ p.popArray() }> */ + nil, + nil, + /* 29 Action4 <- <{ p.addElement(buffer[begin:end]) }> */ + nil, + /* 30 Action5 <- <{ p.addElement(buffer[begin:end]) }> */ + nil, + /* 31 Action6 <- <{ p.addElement(buffer[begin:end]) }> */ + nil, + } + p.rules = _rules +} diff --git a/primitive/compute/result/complex_field_test.go b/primitive/compute/result/complex_field_test.go new file mode 100644 index 0000000..26bcd16 --- /dev/null +++ b/primitive/compute/result/complex_field_test.go @@ -0,0 +1,117 @@ +package result + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParserSingleQuoted(t *testing.T) { + field := &ComplexField{Buffer: " ['c ar' , '\\'plane', 'b* oat']"} // single quote can be escaped in python + field.Init() + + err := field.Parse() + assert.NoError(t, err) + + field.Execute() + assert.Equal(t, []interface{}{"c ar", "'plane", "b* oat"}, field.arrayElements.elements) +} + +func TestParserDoubleQuoted(t *testing.T) { + field := &ComplexField{Buffer: "[\"&car\" , \"\\plane\", \"boat's\"]"} + field.Init() + + err := field.Parse() + assert.NoError(t, err) + + field.Execute() + assert.Equal(t, []interface{}{"&car", "\\plane", "boat's"}, field.arrayElements.elements) +} + +func TestParserValues(t *testing.T) { + field := &ComplexField{Buffer: "[10, 20, 30, \"forty &*\"]"} + field.Init() + + err := field.Parse() + field.PrintSyntaxTree() + assert.NoError(t, err) + + field.Execute() + assert.Equal(t, []interface{}{"10", "20", "30", "forty &*"}, field.arrayElements.elements) +} + +func TestParserFail(t *testing.T) { + field := &ComplexField{Buffer: "[&*&, \"car\" , \"plane\", \"boat's\"]"} + field.Init() + + err := field.Parse() + assert.Error(t, err) +} + +func TestParserNested(t *testing.T) { + field := &ComplexField{Buffer: "[[10, 20, 30, [alpha, bravo]], [40, 50, 60]]"} + field.Init() + + err := field.Parse() + field.PrintSyntaxTree() + assert.NoError(t, err) + + field.Execute() + + assert.Equal(t, []interface{}{"alpha", "bravo"}, field.arrayElements.elements[0].([]interface{})[3].([]interface{})) + assert.Equal(t, []interface{}{"40", "50", "60"}, field.arrayElements.elements[1].([]interface{})) +} + +func TestParserTuple(t *testing.T) { + field := &ComplexField{Buffer: "(10, 20, 30,)"} + field.Init() + + err := field.Parse() + field.PrintSyntaxTree() + assert.NoError(t, err) + + field.Execute() + + assert.Equal(t, []interface{}{"10", "20", "30"}, field.arrayElements.elements) +} + +func TestParserSingleTuple(t *testing.T) { + field := &ComplexField{Buffer: "(10, )"} + field.Init() + + err := field.Parse() + field.PrintSyntaxTree() + assert.NoError(t, err) + + field.Execute() + + assert.Equal(t, []interface{}{"10"}, field.arrayElements.elements) +} + +func TestParserNestedTuple(t *testing.T) { + field := &ComplexField{Buffer: "((10, 20, 30, (alpha, bravo)), (40, 50, 60))"} + field.Init() + + err := field.Parse() + field.PrintSyntaxTree() + assert.NoError(t, err) + + field.Execute() + + assert.Equal(t, []interface{}{"alpha", "bravo"}, field.arrayElements.elements[0].([]interface{})[3].([]interface{})) + assert.Equal(t, []interface{}{"40", "50", "60"}, field.arrayElements.elements[1].([]interface{})) +} + +func TestParserNestedMixed(t *testing.T) { + field := &ComplexField{Buffer: "([10, 20, 30, (alpha, bravo)], [40, 50, 60])"} + field.Init() + + err := field.Parse() + field.PrintSyntaxTree() + assert.NoError(t, err) + + field.Execute() + + assert.Equal(t, []interface{}{"alpha", "bravo"}, field.arrayElements.elements[0].([]interface{})[3].([]interface{})) + assert.Equal(t, []interface{}{"40", "50", "60"}, field.arrayElements.elements[1].([]interface{})) +} diff --git a/primitive/compute/result/result_csv_parser.go b/primitive/compute/result/result_csv_parser.go new file mode 100644 index 0000000..bd8a625 --- /dev/null +++ b/primitive/compute/result/result_csv_parser.go @@ -0,0 +1,90 @@ +package result + +import ( + "encoding/csv" + "io" + "os" + "strings" + + "github.com/pkg/errors" +) + +// ParseResultCSV parses a result CSV that is compliant with RFC 4180, with +// additional logic added to extract nested arrays generated by PANDAS to_csv() calls. +func ParseResultCSV(path string) ([][]interface{}, error) { + csvFile, err := os.Open(path) + if err != nil { + return nil, errors.Wrapf(err, "error opening result file '%s'", path) + } + + csvReader := csv.NewReader(csvFile) + results := [][]interface{}{} + for { + line, err := csvReader.Read() + if err == io.EOF { + break + } else if err != nil { + return nil, errors.Wrapf(err, "error parsing result file - '%s'", line) + } + + record := []interface{}{} + for _, elem := range line { + // parse value into float, int, string, or array + record = append(record, parseVal(elem)) + } + results = append(results, record) + } + return results, nil +} + +func parseVal(val string) interface{} { + // check to see if we can parse the value as an array - if not we leave it as a string + arrayVal, err := parseArray(val) + if err == nil { + return arrayVal + } + return val +} + +func parseArray(val string) ([]interface{}, error) { + field := &ComplexField{ + Buffer: val, + } + field.Init() + + err := field.Parse() + if err != nil { + return nil, err + } + + field.Execute() + return field.arrayElements.elements, nil +} + +// Structure to interact with peg parser +type arrayElements struct { + elements []interface{} + stack [][]interface{} +} + +func (a *arrayElements) lastIdx() int { + return len(a.stack) - 1 +} + +// Called by peg parse +func (a *arrayElements) addElement(element string) { + // there is no single quote escape in Go so we need to pull it out of any element we process + element = strings.Replace(element, "\\'", "'", -1) + a.stack[a.lastIdx()] = append(a.stack[a.lastIdx()], element) +} + +func (a *arrayElements) pushArray() { + a.stack = append(a.stack, []interface{}{}) +} + +func (a *arrayElements) popArray() { + a.elements, a.stack = a.stack[a.lastIdx()], a.stack[:a.lastIdx()] + if len(a.stack) != 0 { + a.stack[a.lastIdx()] = append(a.stack[a.lastIdx()], a.elements) + } +} diff --git a/primitive/compute/result/result_csv_parser_test.go b/primitive/compute/result/result_csv_parser_test.go new file mode 100644 index 0000000..2ef2f43 --- /dev/null +++ b/primitive/compute/result/result_csv_parser_test.go @@ -0,0 +1,25 @@ +package result + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestCSVResultParser(t *testing.T) { + result, err := ParseResultCSV("./testdata/test.csv") + assert.NoError(t, err) + assert.NotEmpty(t, result) + + fmt.Printf("%v", result) + + assert.Equal(t, []interface{}{"idx", "col a", "col b"}, result[0]) + assert.Equal(t, []interface{}{"0", []interface{}{"alpha", "bravo"}, "foxtrot"}, result[1]) + assert.Equal(t, []interface{}{"1", []interface{}{"charlie", "delta's oscar"}, "hotel"}, result[2]) + assert.Equal(t, []interface{}{"2", []interface{}{"a", "[", "b"}, []interface{}{"c", "\"", "e"}}, result[3]) + assert.Equal(t, []interface{}{"3", []interface{}{"a", "['\"", "b"}, []interface{}{"c", "\"", "e"}}, result[4]) + assert.Equal(t, []interface{}{"4", []interface{}{"-10.001", "20.1"}, []interface{}{"30", "40"}}, result[5]) + assert.Equal(t, []interface{}{"5", []interface{}{"int"}, []interface{}{"0.989599347114563"}}, result[6]) + assert.Equal(t, []interface{}{"7", []interface{}{"int", "categorical"}, []interface{}{"0.9885959029197693", "1"}}, result[8]) +} diff --git a/primitive/compute/result/testdata/test.csv b/primitive/compute/result/testdata/test.csv new file mode 100644 index 0000000..ed06fca --- /dev/null +++ b/primitive/compute/result/testdata/test.csv @@ -0,0 +1,11 @@ +idx,col a,col b +0,"['alpha', 'bravo']",foxtrot +1,"('charlie', ""delta's oscar"")",hotel +2,"['a', '[', 'b']","['c', '""', 'e']" +3,"['a', '[\'""', 'b']","['c', '""', 'e']" +4,"[-10.001,20.1]","(30,40,)" +5,"('int',)",[0.989599347114563] +6,"('text',)",[0.9966805577278137] +7,"('int', 'categorical')","[0.9885959029197693, 1]" +8,"('int',)",[0.9898993372917175] +9,"('int',)",[0.9903160929679871] diff --git a/primitive/compute/solution_request.go b/primitive/compute/solution_request.go new file mode 100644 index 0000000..5f3a55c --- /dev/null +++ b/primitive/compute/solution_request.go @@ -0,0 +1,386 @@ +package compute + +import ( + "context" + "encoding/json" + "fmt" + "path/filepath" + "sync" + "time" + + "github.com/pkg/errors" + uuid "github.com/satori/go.uuid" + "github.com/unchartedsoftware/distil-ingest/pipeline" + "github.com/unchartedsoftware/distil-ingest/primitive/compute/description" + "github.com/unchartedsoftware/plog" +) + +const ( + defaultResourceID = "0" + defaultExposedOutputKey = "outputs.0" + trainTestSplitThreshold = 0.9 + // SolutionPendingStatus represents that the solution request has been acknoledged by not yet sent to the API + SolutionPendingStatus = "SOLUTION_PENDING" + // SolutionRunningStatus represents that the solution request has been sent to the API. + SolutionRunningStatus = "SOLUTION_RUNNING" + // SolutionErroredStatus represents that the solution request has terminated with an error. + SolutionErroredStatus = "SOLUTION_ERRORED" + // SolutionCompletedStatus represents that the solution request has completed successfully. + SolutionCompletedStatus = "SOLUTION_COMPLETED" + // RequestPendingStatus represents that the solution request has been acknoledged by not yet sent to the API + RequestPendingStatus = "REQUEST_PENDING" + // RequestRunningStatus represents that the solution request has been sent to the API. + RequestRunningStatus = "REQUEST_RUNNING" + // RequestErroredStatus represents that the solution request has terminated with an error. + RequestErroredStatus = "REQUEST_ERRORED" + // RequestCompletedStatus represents that the solution request has completed successfully. + RequestCompletedStatus = "REQUEST_COMPLETED" +) + +var ( + // folder for dataset data exchanged with TA2 + datasetDir string + // folder containing the input dataset + inputDir string +) + +// SetDatasetDir sets the output data dir +func SetDatasetDir(dir string) { + datasetDir = dir +} + +// SetInputDir sets the input data dir +func SetInputDir(dir string) { + inputDir = dir +} + +func newStatusChannel() chan SolutionStatus { + // NOTE: WE BUFFER THE CHANNEL TO A SIZE OF 1 HERE SO THAT THE INITIAL + // PERSIST DOES NOT DEADLOCK + return make(chan SolutionStatus, 1) +} + +// SolutionRequest represents a solution search request. +type SolutionRequest struct { + Dataset string `json:"dataset"` + Index string `json:"index"` + TargetFeature string `json:"target"` + Task string `json:"task"` + SubTask string `json:"subTask"` + MaxSolutions int32 `json:"maxSolutions"` + MaxTime int64 `json:"maxTime"` + Metrics []string `json:"metrics"` + mu *sync.Mutex + wg *sync.WaitGroup + requestChannel chan SolutionStatus + solutionChannels []chan SolutionStatus + listener SolutionStatusListener + finished chan error +} + +// NewSolutionRequest instantiates a new SolutionRequest. +func NewSolutionRequest(data []byte) (*SolutionRequest, error) { + req := &SolutionRequest{ + mu: &sync.Mutex{}, + wg: &sync.WaitGroup{}, + finished: make(chan error), + requestChannel: newStatusChannel(), + } + err := json.Unmarshal(data, &req) + if err != nil { + return nil, err + } + return req, nil +} + +// SolutionStatus represents a solution status. +type SolutionStatus struct { + Progress string `json:"progress"` + RequestID string `json:"requestId"` + SolutionID string `json:"solutionId"` + ResultID string `json:"resultId"` + Error error `json:"error"` + Timestamp time.Time `json:"timestamp"` +} + +// SolutionStatusListener executes on a new solution status. +type SolutionStatusListener func(status SolutionStatus) + +func (s *SolutionRequest) addSolution(c chan SolutionStatus) { + s.wg.Add(1) + s.mu.Lock() + s.solutionChannels = append(s.solutionChannels, c) + if s.listener != nil { + go s.listenOnStatusChannel(c) + } + s.mu.Unlock() +} + +func (s *SolutionRequest) completeSolution() { + s.wg.Done() +} + +func (s *SolutionRequest) waitOnSolutions() { + s.wg.Wait() +} + +func (s *SolutionRequest) listenOnStatusChannel(statusChannel chan SolutionStatus) { + for { + // read status from, channel + status := <-statusChannel + // execute callback + s.listener(status) + } +} + +// Listen listens ont he solution requests for new solution statuses. +func (s *SolutionRequest) Listen(listener SolutionStatusListener) error { + s.listener = listener + s.mu.Lock() + // listen on main request channel + go s.listenOnStatusChannel(s.requestChannel) + // listen on individual solution channels + for _, c := range s.solutionChannels { + go s.listenOnStatusChannel(c) + } + s.mu.Unlock() + return <-s.finished +} + +func (s *SolutionRequest) createSearchSolutionsRequest(preprocessing *pipeline.PipelineDescription, + datasetURI string, userAgent string) (*pipeline.SearchSolutionsRequest, error) { + return createSearchSolutionsRequest(preprocessing, datasetURI, userAgent, s.TargetFeature, s.Dataset, s.Metrics, s.Task, s.SubTask, s.MaxTime) +} + +func createSearchSolutionsRequest(preprocessing *pipeline.PipelineDescription, + datasetURI string, userAgent string, targetFeature string, dataset string, metrics []string, task string, subTask string, maxTime int64) (*pipeline.SearchSolutionsRequest, error) { + + return &pipeline.SearchSolutionsRequest{ + Problem: &pipeline.ProblemDescription{ + Problem: &pipeline.Problem{ + TaskType: convertTaskTypeFromTA3ToTA2(task), + TaskSubtype: convertTaskSubTypeFromTA3ToTA2(subTask), + PerformanceMetrics: convertMetricsFromTA3ToTA2(metrics), + }, + Inputs: []*pipeline.ProblemInput{ + { + DatasetId: convertDatasetTA3ToTA2(dataset), + Targets: convertTargetFeaturesTA3ToTA2(targetFeature, -1), + }, + }, + }, + + // Our agent/version info + UserAgent: userAgent, + Version: GetAPIVersion(), + + // Requested max time for solution search - not guaranteed to be honoured + TimeBound: float64(maxTime), + + // we accept dataset and csv uris as return types + AllowedValueTypes: []pipeline.ValueType{ + pipeline.ValueType_DATASET_URI, + pipeline.ValueType_CSV_URI, + }, + + // URI of the input dataset + Inputs: []*pipeline.Value{ + { + Value: &pipeline.Value_DatasetUri{ + DatasetUri: datasetURI, + }, + }, + }, + + Template: preprocessing, + }, nil +} + +// createPreprocessingPipeline creates pipeline to enfore user feature selection and typing +func (s *SolutionRequest) createPreprocessingPipeline(targetVariable string) (*pipeline.PipelineDescription, error) { + uuid := uuid.NewV4() + name := fmt.Sprintf("preprocessing-%s-%s", s.Dataset, uuid.String()) + desc := fmt.Sprintf("Preprocessing pipeline capturing user feature selection and type information. Dataset: `%s` ID: `%s`", s.Dataset, uuid.String()) + + preprocessingPipeline, err := description.CreateUserDatasetPipeline(name, desc, targetVariable) + if err != nil { + return nil, err + } + + return preprocessingPipeline, nil +} + +func (s *SolutionRequest) createProduceSolutionRequest(datasetURI string, fittedSolutionID string) *pipeline.ProduceSolutionRequest { + return &pipeline.ProduceSolutionRequest{ + FittedSolutionId: fittedSolutionID, + Inputs: []*pipeline.Value{ + { + Value: &pipeline.Value_DatasetUri{ + DatasetUri: datasetURI, + }, + }, + }, + ExposeOutputs: []string{defaultExposedOutputKey}, + ExposeValueTypes: []pipeline.ValueType{ + pipeline.ValueType_CSV_URI, + }, + } +} + +func (s *SolutionRequest) dispatchSolution(statusChan chan SolutionStatus, client *Client, searchID string, solutionID string, dataset string, datasetURITrain string, datasetURITest string) error { + + // score solution + solutionScoreResponses, err := client.GenerateSolutionScores(context.Background(), solutionID, datasetURITest, s.Metrics) + if err != nil { + return err + } + + // persist the scores + for _, response := range solutionScoreResponses { + // only persist scores from COMPLETED responses + if response.Progress.State == pipeline.ProgressState_COMPLETED { + log.Infof("scoring complete") + } + } + + // fit solution + var fitResults []*pipeline.GetFitSolutionResultsResponse + fitResults, err = client.GenerateSolutionFit(context.Background(), solutionID, datasetURITrain) + if err != nil { + return err + } + + // find the completed result and get the fitted solution ID out + var fittedSolutionID string + for _, result := range fitResults { + if result.GetFittedSolutionId() != "" { + fittedSolutionID = result.GetFittedSolutionId() + break + } + } + if fittedSolutionID == "" { + return errors.Errorf("no fitted solution ID for solution `%s`", solutionID) + } + + // generate predictions + produceSolutionRequest := s.createProduceSolutionRequest(datasetURITest, fittedSolutionID) + + // generate predictions + predictionResponses, err := client.GeneratePredictions(context.Background(), produceSolutionRequest) + if err != nil { + return err + } + + for _, response := range predictionResponses { + + if response.Progress.State != pipeline.ProgressState_COMPLETED { + // only persist completed responses + continue + } + } + + return nil +} + +func (s *SolutionRequest) dispatchRequest(client *Client, searchID string, dataset string, datasetURITrain string, datasetURITest string) error { + + // search for solutions, this wont return until the search finishes or it times out + err := client.SearchSolutions(context.Background(), searchID, func(solution *pipeline.GetSearchSolutionsResultsResponse) { + // create a new status channel for the solution + c := newStatusChannel() + // add the solution to the request + s.addSolution(c) + // dispatch it + s.dispatchSolution(c, client, searchID, solution.SolutionId, dataset, datasetURITrain, datasetURITest) + // once done, mark as complete + s.completeSolution() + }) + + // update request status + if err != nil { + return err + } + + // wait until all are complete and the search has finished / timed out + s.waitOnSolutions() + + // end search + s.finished <- client.EndSearch(context.Background(), searchID) + + return nil +} + +// PersistAndDispatch persists the solution request and dispatches it. +func (s *SolutionRequest) PersistAndDispatch(client *Client) error { + + // perist the datasets and get URI + datasetPathTrain, datasetPathTest, err := PersistOriginalData(s.Dataset, D3MDataSchema, inputDir, datasetDir) + if err != nil { + return err + } + + // make sure the path is absolute and contains the URI prefix + datasetPathTrain, err = filepath.Abs(datasetPathTrain) + if err != nil { + return err + } + datasetPathTrain = fmt.Sprintf("file://%s", datasetPathTrain) + datasetPathTest, err = filepath.Abs(datasetPathTest) + if err != nil { + return err + } + datasetPathTest = fmt.Sprintf("file://%s", datasetPathTest) + + // generate the pre-processing pipeline to enforce feature selection and semantic type changes + var preprocessing *pipeline.PipelineDescription + if !client.SkipPreprocessing { + preprocessing, err = s.createPreprocessingPipeline(s.TargetFeature) + if err != nil { + return err + } + } + + // create search solutions request + searchRequest, err := s.createSearchSolutionsRequest(preprocessing, datasetPathTrain, client.UserAgent) + if err != nil { + return err + } + + // start a solution searchID + requestID, err := client.StartSearch(context.Background(), searchRequest) + if err != nil { + return err + } + + // dispatch search request + go s.dispatchRequest(client, requestID, s.Dataset, datasetPathTrain, datasetPathTest) + + return nil +} + +// CreateSearchSolutionRequest creates a search solution request, including +// the pipeline steps required to process the data. +func CreateSearchSolutionRequest( + target string, sourceURI string, dataset string, + userAgent string, skipPreprocessing bool) (*pipeline.SearchSolutionsRequest, error) { + uuid := uuid.NewV4() + name := fmt.Sprintf("preprocessing-%s-%s", dataset, uuid.String()) + desc := fmt.Sprintf("Preprocessing pipeline capturing user feature selection and type information. Dataset: `%s` ID: `%s`", dataset, uuid.String()) + + var err error + var preprocessingPipeline *pipeline.PipelineDescription + if !skipPreprocessing { + preprocessingPipeline, err = description.CreateUserDatasetPipeline(name, desc, target) + if err != nil { + return nil, errors.Wrap(err, "unable to create preprocessing pipeline") + } + } + + // create search solutions request + searchRequest, err := createSearchSolutionsRequest(preprocessingPipeline, sourceURI, userAgent, target, dataset, nil, "", "", 600) + if err != nil { + return nil, errors.Wrap(err, "unable to create search solution request") + } + + return searchRequest, nil +} diff --git a/primitive/compute/stop_solution_request.go b/primitive/compute/stop_solution_request.go new file mode 100644 index 0000000..fdc8a34 --- /dev/null +++ b/primitive/compute/stop_solution_request.go @@ -0,0 +1,26 @@ +package compute + +import ( + "context" + "encoding/json" +) + +// StopSolutionSearchRequest represents a request to stop any pending siolution searches. +type StopSolutionSearchRequest struct { + RequestID string `json:"requestId"` +} + +// NewStopSolutionSearchRequest instantiates a new StopSolutionSearchRequest. +func NewStopSolutionSearchRequest(data []byte) (*StopSolutionSearchRequest, error) { + req := &StopSolutionSearchRequest{} + err := json.Unmarshal(data, &req) + if err != nil { + return nil, err + } + return req, nil +} + +// Dispatch dispatches the stop search request. +func (s *StopSolutionSearchRequest) Dispatch(client *Client) error { + return client.StopSearch(context.Background(), s.RequestID) +} diff --git a/primitive/compute/ta3ta2.go b/primitive/compute/ta3ta2.go new file mode 100644 index 0000000..f27c7ca --- /dev/null +++ b/primitive/compute/ta3ta2.go @@ -0,0 +1,245 @@ +package compute + +import ( + "bytes" + "compress/gzip" + "io/ioutil" + + "github.com/golang/protobuf/proto" + protobuf "github.com/golang/protobuf/protoc-gen-go/descriptor" + "github.com/unchartedsoftware/plog" + + "github.com/unchartedsoftware/distil-ingest/pipeline" +) + +const ( + unknownAPIVersion = "unknown" +) + +var ( + // cached ta3ta2 API version + apiVersion string + problemMetricMap = map[string]string{ + "accuracy": "ACCURACY", + "precision": "PRECISION", + "recall": "RECALL", + "f1": "F1", + "f1Micro": "F1_MICRO", + "f1Macro": "F1_MACRO", + "rocAuc": "ROC_AUC", + "rocAucMicro": "ROC_AUC_MICRO", + "rocAucMacro": "ROC_AUC_MACRO", + "meanSquaredError": "MEAN_SQUARED_ERROR", + "rootMeanSquaredError": "ROOT_MEAN_SQUARED_ERROR", + "rootMeanSquaredErrorAvg": "ROOT_MEAN_SQUARED_ERROR_AVG", + "meanAbsoluteError": "MEAN_ABSOLUTE_ERROR", + "rSquared": "R_SQUARED", + "normalizedMutualInformation": "NORMALIZED_MUTUAL_INFORMATION", + "jaccardSimilarityScore": "JACCARD_SIMILARITY_SCORE", + "precisionAtTopK": "PRECISION_AT_TOP_K", + "objectDetectionAP": "OBJECT_DETECTION_AVERAGE_PRECISION", + } + problemTaskMap = map[string]string{ + "classification": "CLASSIFICATION", + "regression": "REGRESSION", + "clustering": "CLUSTERING", + "linkPrediction": "LINK_PREDICTION", + "vertexNomination": "VERTEX_NOMINATION", + "communityDetection": "COMMUNITY_DETECTION", + "graphClustering": "GRAPH_CLUSTERING", + "graphMatching": "GRAPH_MATCHING", + "timeSeriesForecasting": "TIME_SERIES_FORECASTING", + "collaborativeFiltering": "COLLABORATIVE_FILTERING", + "objectDetection": "OBJECT_DETECTION", + } + problemTaskSubMap = map[string]string{ + "none": "NONE", + "binary": "BINARY", + "multiClass": "MULTICLASS", + "multiLabel": "MULTILABEL", + "univariate": "UNIVARIATE", + "multivariate": "MULTIVARIATE", + "overlapping": "OVERLAPPING", + "nonOverlapping": "NONOVERLAPPING", + } + metricScoreMultiplier = map[string]float64{ + "ACCURACY": 1, + "PRECISION": 1, + "RECALL": 1, + "F1": 1, + "F1_MICRO": 1, + "F1_MACRO": 1, + "ROC_AUC": 1, + "ROC_AUC_MICRO": 1, + "ROC_AUC_MACRO": 1, + "MEAN_SQUARED_ERROR": -1, + "ROOT_MEAN_SQUARED_ERROR": -1, + "ROOT_MEAN_SQUARED_ERROR_AVG": -1, + "MEAN_ABSOLUTE_ERROR": -1, + "R_SQUARED": 1, + "NORMALIZED_MUTUAL_INFORMATION": 1, + "JACCARD_SIMILARITY_SCORE": 1, + "PRECISION_AT_TOP_K": 1, + "OBJECT_DETECTION_AVERAGE_PRECISION": 1, + } + metricLabel = map[string]string{ + "ACCURACY": "Accuracy", + "PRECISION": "Precision", + "RECALL": "Recall", + "F1": "F1", + "F1_MICRO": "F1 Micro", + "F1_MACRO": "F1 Macro", + "ROC_AUC": "ROC AUC", + "ROC_AUC_MICRO": "ROC AUC Micro", + "ROC_AUC_MACRO": "ROC AUC Macro", + "MEAN_SQUARED_ERROR": "MSE", + "ROOT_MEAN_SQUARED_ERROR": "RMSE", + "ROOT_MEAN_SQUARED_ERROR_AVG": "RMSE Avg", + "MEAN_ABSOLUTE_ERROR": "MAE", + "R_SQUARED": "R Squared", + "NORMALIZED_MUTUAL_INFORMATION": "Normalized MI", + "JACCARD_SIMILARITY_SCORE": "Jaccard Similarity", + "PRECISION_AT_TOP_K": "Precision Top K", + "OBJECT_DETECTION_AVERAGE_PRECISION": "Avg Precision", + } +) + +// ConvertProblemMetricToTA2 converts a problem schema metric to a TA2 metric. +func ConvertProblemMetricToTA2(metric string) string { + return problemMetricMap[metric] +} + +// ConvertProblemTaskToTA2 converts a problem schema metric to a TA2 task. +func ConvertProblemTaskToTA2(metric string) string { + return problemTaskMap[metric] +} + +// ConvertProblemTaskSubToTA2 converts a problem schema metric to a TA2 task sub. +func ConvertProblemTaskSubToTA2(metric string) string { + return problemTaskSubMap[metric] +} + +// GetMetricScoreMultiplier returns a weight to determine whether a higher or +// lower score is `better`. +func GetMetricScoreMultiplier(metric string) float64 { + return metricScoreMultiplier[metric] +} + +// GetMetricLabel returns a label string for a metric. +func GetMetricLabel(metric string) string { + return metricLabel[metric] +} + +func convertMetricsFromTA3ToTA2(metrics []string) []*pipeline.ProblemPerformanceMetric { + var res []*pipeline.ProblemPerformanceMetric + for _, metric := range metrics { + ta2Metric := ConvertProblemMetricToTA2(metric) + var metricSet pipeline.PerformanceMetric + if ta2Metric == "" { + log.Warnf("unrecognized metric ('%s'), defaulting to undefined", metric) + metricSet = pipeline.PerformanceMetric_METRIC_UNDEFINED + } else { + metricAdjusted, ok := pipeline.PerformanceMetric_value[ta2Metric] + if !ok { + log.Warnf("undefined metric found ('%s'), defaulting to undefined", ta2Metric) + metricSet = pipeline.PerformanceMetric_METRIC_UNDEFINED + } else { + metricSet = pipeline.PerformanceMetric(metricAdjusted) + } + } + res = append(res, &pipeline.ProblemPerformanceMetric{ + Metric: metricSet, + }) + } + return res +} + +func convertTaskTypeFromTA3ToTA2(taskType string) pipeline.TaskType { + ta2Task := ConvertProblemTaskToTA2(taskType) + if ta2Task == "" { + log.Warnf("unrecognized task type ('%s'), defaulting to undefined", taskType) + return pipeline.TaskType_TASK_TYPE_UNDEFINED + } + task, ok := pipeline.TaskType_value[ta2Task] + if !ok { + log.Warnf("undefined task type found ('%s'), defaulting to undefined", ta2Task) + return pipeline.TaskType_TASK_TYPE_UNDEFINED + } + return pipeline.TaskType(task) +} + +func convertTaskSubTypeFromTA3ToTA2(taskSubType string) pipeline.TaskSubtype { + ta2TaskSub := ConvertProblemTaskSubToTA2(taskSubType) + if ta2TaskSub == "" { + log.Warnf("unrecognized task sub type ('%s'), defaulting to undefined", taskSubType) + return pipeline.TaskSubtype_TASK_SUBTYPE_UNDEFINED + } + task, ok := pipeline.TaskSubtype_value[ta2TaskSub] + if !ok { + log.Warnf("undefined task sub type found ('%s'), defaulting to undefined", ta2TaskSub) + return pipeline.TaskSubtype_TASK_SUBTYPE_UNDEFINED + } + return pipeline.TaskSubtype(task) +} + +func convertTargetFeaturesTA3ToTA2(target string, columnIndex int) []*pipeline.ProblemTarget { + return []*pipeline.ProblemTarget{ + { + ColumnName: target, + ResourceId: defaultResourceID, + TargetIndex: 0, + ColumnIndex: int32(columnIndex), + }, + } +} + +func convertDatasetTA3ToTA2(dataset string) string { + return dataset +} + +// GetAPIVersion retrieves the ta3-ta2 API version embedded in the pipeline_core.proto file. This is +// a non-trivial operation, so the value is cached for quick access. +func GetAPIVersion() string { + if apiVersion != "" { + return apiVersion + } + + // Get the raw file descriptor bytes + fileDesc := proto.FileDescriptor(pipeline.E_ProtocolVersion.Filename) + if fileDesc == nil { + log.Errorf("failed to find file descriptor for %v", pipeline.E_ProtocolVersion.Filename) + return unknownAPIVersion + } + + // Open a gzip reader and decompress + r, err := gzip.NewReader(bytes.NewReader(fileDesc)) + if err != nil { + log.Errorf("failed to open gzip reader: %v", err) + return unknownAPIVersion + } + defer r.Close() + + b, err := ioutil.ReadAll(r) + if err != nil { + log.Errorf("failed to decompress descriptor: %v", err) + return unknownAPIVersion + } + + // Unmarshall the bytes from the proto format + fd := &protobuf.FileDescriptorProto{} + if err := proto.Unmarshal(b, fd); err != nil { + log.Errorf("malformed FileDescriptorProto: %v", err) + return unknownAPIVersion + } + + // Fetch the extension from the FileDescriptorOptions message + ex, err := proto.GetExtension(fd.GetOptions(), pipeline.E_ProtocolVersion) + if err != nil { + log.Errorf("failed to fetch extension: %v", err) + return unknownAPIVersion + } + + apiVersion = *ex.(*string) + + return apiVersion +} diff --git a/primitive/feature.go b/primitive/feature.go new file mode 100644 index 0000000..fe4286d --- /dev/null +++ b/primitive/feature.go @@ -0,0 +1,91 @@ +package primitive + +import ( + "bytes" + "encoding/csv" + "os" + "path" + + "github.com/pkg/errors" + "github.com/unchartedsoftware/distil-ingest/metadata" + "github.com/unchartedsoftware/distil-ingest/util" +) + +// FeaturizePrimitive will featurize the dataset fields using a primitive. +func (s *IngestStep) FeaturizePrimitive(schemaFile string, dataset string, + rootDataPath string, outputSchemaPath string, outputDataPath string, hasHeader bool) error { + // create required folders for outputPath + util.CreateContainingDirs(outputDataPath) + util.CreateContainingDirs(outputSchemaPath) + + // load metadata from original schema + meta, err := metadata.LoadMetadataFromOriginalSchema(schemaFile) + if err != nil { + return errors.Wrap(err, "unable to load original schema file") + } + mainDR := meta.GetMainDataResource() + + // add feature variables + features, err := getClusterVariables(meta, "_feature_") + if err != nil { + return errors.Wrap(err, "unable to get feature variables") + } + + d3mIndexField := getD3MIndexField(mainDR) + + // open the input file + dataPath := path.Join(rootDataPath, mainDR.ResPath) + lines, err := s.readCSVFile(dataPath, hasHeader) + if err != nil { + return errors.Wrap(err, "error reading raw data") + } + + // add the cluster data to the raw data + for _, f := range features { + mainDR.Variables = append(mainDR.Variables, f.Variable) + + lines, err = s.appendFeature(dataset, d3mIndexField, hasHeader, f, lines) + if err != nil { + return errors.Wrap(err, "error appending feature data") + } + } + + // initialize csv writer + output := &bytes.Buffer{} + writer := csv.NewWriter(output) + + // output the header + header := make([]string, len(mainDR.Variables)) + for _, v := range mainDR.Variables { + header[v.Index] = v.Name + } + err = writer.Write(header) + if err != nil { + return errors.Wrap(err, "error storing feature header") + } + + for _, line := range lines { + err = writer.Write(line) + if err != nil { + return errors.Wrap(err, "error storing feature output") + } + } + + // output the data with the new feature + writer.Flush() + err = util.WriteFileWithDirs(outputDataPath, output.Bytes(), os.ModePerm) + if err != nil { + return errors.Wrap(err, "error writing feature output") + } + + relativePath := getRelativePath(rootDataPath, outputDataPath) + mainDR.ResPath = relativePath + + // write the new schema to file + err = meta.WriteSchema(outputSchemaPath) + if err != nil { + return errors.Wrap(err, "unable to store feature schema") + } + + return nil +} diff --git a/primitive/pipeline.go b/primitive/pipeline.go new file mode 100644 index 0000000..9632e63 --- /dev/null +++ b/primitive/pipeline.go @@ -0,0 +1,256 @@ +package primitive + +import ( + "context" + "encoding/csv" + "fmt" + "io" + "os" + "strconv" + "strings" + + "github.com/pkg/errors" + "github.com/unchartedsoftware/distil-ingest/metadata" + "github.com/unchartedsoftware/distil-ingest/pipeline" + "github.com/unchartedsoftware/distil-ingest/primitive/compute" + "github.com/unchartedsoftware/distil-ingest/primitive/compute/description" + "github.com/unchartedsoftware/distil-ingest/primitive/compute/result" +) + +const ( + denormFieldName = "filename" +) + +// FeatureRequest captures the properties of a request to a primitive. +type FeatureRequest struct { + SourceVariableName string + FeatureVariableName string + Variable *metadata.Variable + Step *pipeline.PipelineDescription +} + +type IngestStep struct { + client *compute.Client +} + +func NewIngestStep(client *compute.Client) *IngestStep { + return &IngestStep{ + client: client, + } +} + +func (s *IngestStep) submitPrimitive(dataset string, step *pipeline.PipelineDescription) (string, error) { + + res, err := s.client.ExecutePipeline(context.Background(), dataset, step) + if err != nil { + return "", errors.Wrap(err, "unable to dispatch mocked pipeline") + } + resultURI := strings.Replace(res.ResultURI, "file://", "", -1) + return resultURI, nil +} + +func (s *IngestStep) readCSVFile(filename string, hasHeader bool) ([][]string, error) { + // open the file + csvFile, err := os.Open(filename) + if err != nil { + return nil, errors.Wrap(err, "failed to open data file") + } + defer csvFile.Close() + reader := csv.NewReader(csvFile) + + lines := make([][]string, 0) + + // skip the header as needed + if hasHeader { + _, err = reader.Read() + if err != nil { + return nil, errors.Wrap(err, "failed to read header from file") + } + } + + // read the raw data + for { + line, err := reader.Read() + if err == io.EOF { + break + } else if err != nil { + return nil, errors.Wrap(err, "failed to read line from file") + } + + lines = append(lines, line) + } + + return lines, nil +} + +func (s *IngestStep) appendFeature(dataset string, d3mIndexField int, hasHeader bool, feature *FeatureRequest, lines [][]string) ([][]string, error) { + datasetURI, err := s.submitPrimitive(dataset, feature.Step) + if err != nil { + return nil, errors.Wrap(err, "unable to run pipeline primitive") + } + + // parse primitive response (new field contains output) + res, err := result.ParseResultCSV(datasetURI) + if err != nil { + return nil, errors.Wrap(err, "unable to parse pipeline primitive result") + } + + // find the field with the feature output + labelIndex := 1 + for i, f := range res[0] { + if f == feature.FeatureVariableName { + labelIndex = i + } + } + + // build the lookup for the new field + features := make(map[string]string) + for i, v := range res { + // skip header + if i > 0 { + d3mIndex := v[0].(string) + labels := v[labelIndex].(string) + features[d3mIndex] = labels + } + } + + // add the new feature to the raw data + for i, line := range lines { + if i > 0 || !hasHeader { + d3mIndex := line[d3mIndexField] + feature := features[d3mIndex] + line = append(line, feature) + } + } + + return lines, nil +} + +func getFeatureVariables(meta *metadata.Metadata, prefix string) ([]*FeatureRequest, error) { + mainDR := meta.GetMainDataResource() + features := make([]*FeatureRequest, 0) + for _, v := range mainDR.Variables { + if v.RefersTo != nil && v.RefersTo["resID"] != nil { + // get the refered DR + resID := v.RefersTo["resID"].(string) + + res := getDataResource(meta, resID) + + // check if needs to be featurized + if res.CanBeFeaturized() { + // create the new resource to hold the featured output + indexName := fmt.Sprintf("%s%s", prefix, v.Name) + + // add the feature variable + v := metadata.NewVariable(len(mainDR.Variables), indexName, "label", v.Name, "string", "string", "", "", []string{"attribute"}, metadata.VarRoleMetadata, nil, mainDR.Variables, false) + + // create the required pipeline + step, err := description.CreateCrocPipeline("leather", "", []string{v.Name}, []string{indexName}) + if err != nil { + return nil, errors.Wrap(err, "unable to create step pipeline") + } + + features = append(features, &FeatureRequest{ + SourceVariableName: denormFieldName, + FeatureVariableName: indexName, + Variable: v, + Step: step, + }) + } + } + } + + return features, nil +} + +func getClusterVariables(meta *metadata.Metadata, prefix string) ([]*FeatureRequest, error) { + mainDR := meta.GetMainDataResource() + features := make([]*FeatureRequest, 0) + for _, v := range mainDR.Variables { + if v.RefersTo != nil && v.RefersTo["resID"] != nil { + // get the refered DR + resID := v.RefersTo["resID"].(string) + + res := getDataResource(meta, resID) + + // check if needs to be featurized + if res.CanBeFeaturized() || res.ResType == "timeseries" { + // create the new resource to hold the featured output + indexName := fmt.Sprintf("%s%s", prefix, v.Name) + + // add the feature variable + v := metadata.NewVariable(len(mainDR.Variables), indexName, "group", v.Name, "string", "string", "", "", []string{"attribute"}, metadata.VarRoleMetadata, nil, mainDR.Variables, false) + + // create the required pipeline + var step *pipeline.PipelineDescription + var err error + if res.CanBeFeaturized() { + step, err = description.CreateUnicornPipeline("horned", "", []string{v.Name}, []string{indexName}) + } else { + step, err = description.CreateSlothPipeline("leaf", "", []string{v.Name}, []string{indexName}) + } + if err != nil { + return nil, errors.Wrap(err, "unable to create step pipeline") + } + + features = append(features, &FeatureRequest{ + SourceVariableName: denormFieldName, + FeatureVariableName: indexName, + Variable: v, + Step: step, + }) + } + } + } + + return features, nil +} + +func getD3MIndexField(dr *metadata.DataResource) int { + d3mIndexField := -1 + for _, v := range dr.Variables { + if v.Name == metadata.D3MIndexName { + d3mIndexField = v.Index + } + } + + return d3mIndexField +} + +func toStringArray(in []interface{}) []string { + strArr := make([]string, 0) + for _, v := range in { + strArr = append(strArr, v.(string)) + } + return strArr +} + +func toFloat64Array(in []interface{}) ([]float64, error) { + strArr := make([]float64, 0) + for _, v := range in { + strFloat, err := strconv.ParseFloat(v.(string), 64) + if err != nil { + return nil, errors.Wrap(err, "failed to convert interface array to float array") + } + strArr = append(strArr, strFloat) + } + return strArr, nil +} + +func getDataResource(meta *metadata.Metadata, resID string) *metadata.DataResource { + // main data resource has d3m index variable + for _, dr := range meta.DataResources { + if dr.ResID == resID { + return dr + } + } + + return nil +} + +func getRelativePath(rootPath string, filePath string) string { + relativePath := strings.TrimPrefix(filePath, rootPath) + relativePath = strings.TrimPrefix(relativePath, "/") + + return relativePath +} diff --git a/primitive/rank.go b/primitive/rank.go new file mode 100644 index 0000000..2dbf497 --- /dev/null +++ b/primitive/rank.go @@ -0,0 +1,68 @@ +package primitive + +import ( + "encoding/json" + "os" + "strconv" + + "github.com/pkg/errors" + "github.com/unchartedsoftware/distil-ingest/rest" + + "github.com/unchartedsoftware/distil-ingest/primitive/compute/description" + "github.com/unchartedsoftware/distil-ingest/primitive/compute/result" + "github.com/unchartedsoftware/distil-ingest/util" +) + +// RankPrimitive will rank the dataset using a primitive. +func (s *IngestStep) RankPrimitive(dataset string, outputPath string) error { + // create & submit the solution request + pip, err := description.CreatePCAFeaturesPipeline("harry", "") + if err != nil { + return errors.Wrap(err, "unable to create PCA pipeline") + } + + datasetURI, err := s.submitPrimitive(dataset, pip) + if err != nil { + return errors.Wrap(err, "unable to run PCA pipeline") + } + + // parse primitive response (col index,importance) + res, err := result.ParseResultCSV(datasetURI) + if err != nil { + return errors.Wrap(err, "unable to parse PCA pipeline result") + } + + ranks := make([]float64, len(res)-1) + for i, v := range res { + if i > 0 { + colIndex, err := strconv.ParseInt(v[0].(string), 10, 64) + if err != nil { + return errors.Wrap(err, "unable to parse PCA col index") + } + vInt, err := strconv.ParseFloat(v[1].(string), 64) + if err != nil { + return errors.Wrap(err, "unable to parse PCA rank value") + } + ranks[colIndex] = vInt + } + } + + importance := &rest.ImportanceResult{ + Path: datasetURI, + Features: ranks, + } + + // output the classification in the expected JSON format + bytes, err := json.MarshalIndent(importance, "", " ") + if err != nil { + return errors.Wrap(err, "unable to serialize ranking result") + } + + // write to file + err = util.WriteFileWithDirs(outputPath, bytes, os.ModePerm) + if err != nil { + return errors.Wrap(err, "unable to store ranking result") + } + + return nil +} diff --git a/primitive/summarize.go b/primitive/summarize.go new file mode 100644 index 0000000..6097939 --- /dev/null +++ b/primitive/summarize.go @@ -0,0 +1,62 @@ +package primitive + +import ( + "encoding/json" + "os" + "strings" + + "github.com/pkg/errors" + "github.com/unchartedsoftware/distil-ingest/primitive/compute/description" + "github.com/unchartedsoftware/distil-ingest/primitive/compute/result" + "github.com/unchartedsoftware/distil-ingest/rest" + "github.com/unchartedsoftware/distil-ingest/util" +) + +// SummarizePrimitive will summarize the dataset using a primitive. +func (s *IngestStep) SummarizePrimitive(dataset string, outputPath string) error { + // create & submit the solution request + pip, err := description.CreateDukePipeline("wellington", "") + if err != nil { + return errors.Wrap(err, "unable to create Duke pipeline") + } + + datasetURI, err := s.submitPrimitive(dataset, pip) + if err != nil { + return errors.Wrap(err, "unable to run Duke pipeline") + } + + // parse primitive response (token,probability) + res, err := result.ParseResultCSV(datasetURI) + if err != nil { + return errors.Wrap(err, "unable to parse Duke pipeline result") + } + + tokens := make([]string, len(res)-1) + for i, v := range res { + // skip the header + if i > 0 { + token, ok := v[0].(string) + if !ok { + return errors.Wrap(err, "unable to parse Duke token") + } + tokens[i-1] = token + } + } + + sum := &rest.SummaryResult{ + Summary: strings.Join(tokens, ", "), + } + + // output the classification in the expected JSON format + bytes, err := json.MarshalIndent(sum, "", " ") + if err != nil { + return errors.Wrap(err, "unable to serialize summary result") + } + // write to file + err = util.WriteFileWithDirs(outputPath, bytes, os.ModePerm) + if err != nil { + return errors.Wrap(err, "unable to store summary result") + } + + return nil +} diff --git a/util/file.go b/util/file.go new file mode 100644 index 0000000..ebae4e8 --- /dev/null +++ b/util/file.go @@ -0,0 +1,58 @@ +package util + +import ( + "io/ioutil" + "os" + "path/filepath" + + "github.com/pkg/errors" +) + +// CreateContainingDirs creates all directories on the supplied path. +func CreateContainingDirs(filePath string) error { + dirToCreate := filepath.Dir(filePath) + if dirToCreate != "/" && dirToCreate != "." { + err := os.MkdirAll(dirToCreate, 0777) + if err != nil { + return errors.Wrap(err, "unable to create containing directory") + } + } + + return nil +} + +// WriteFileWithDirs writes the file and creates any missing directories along +// the way. +func WriteFileWithDirs(filename string, data []byte, perm os.FileMode) error { + + dir, _ := filepath.Split(filename) + + // make all dirs up to the destination + err := os.MkdirAll(dir, os.ModePerm) + if err != nil { + return err + } + + // write the file + return ioutil.WriteFile(filename, data, perm) +} + +// DirExists checks to see if a directory exists. +func DirExists(path string) bool { + if _, err := os.Stat(path); os.IsNotExist(err) { + return false + } + return true +} + +// FileExists checks to see if a file exists. +func FileExists(filename string) bool { + _, err := os.Stat(filename) + if err == nil { + return true + } + if os.IsNotExist(err) { + return false + } + return true +} From 7949e257a8397478927c9c38f7c8bbaf8239b26f Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Fri, 5 Oct 2018 13:04:21 -0400 Subject: [PATCH 02/23] Updated dependencies for primitive invocation. --- Gopkg.lock | 196 ++++++++++++++++++++++++++++++++++++++++++++++------- Gopkg.toml | 12 ++++ 2 files changed, 183 insertions(+), 25 deletions(-) diff --git a/Gopkg.lock b/Gopkg.lock index 49c4d29..659a70d 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -1,11 +1,6 @@ # This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. -[[projects]] - name = "github.com/Sirupsen/logrus" - packages = ["."] - revision = "a283a10442df8dc09befd873fab202bf8a253d6a" - [[projects]] name = "github.com/aws/aws-sdk-go" packages = [ @@ -19,13 +14,21 @@ "aws/credentials/ec2rolecreds", "aws/credentials/endpointcreds", "aws/credentials/stscreds", + "aws/csm", "aws/defaults", "aws/ec2metadata", "aws/endpoints", "aws/request", "aws/session", "aws/signer/v4", + "internal/s3err", + "internal/sdkio", + "internal/sdkrand", + "internal/sdkuri", + "internal/shareddefaults", "private/protocol", + "private/protocol/eventstream", + "private/protocol/eventstream/eventstreamapi", "private/protocol/query", "private/protocol/query/queryutil", "private/protocol/rest", @@ -34,19 +37,20 @@ "service/s3", "service/sts" ] - revision = "7111a70b8bea15081468f11bc1450ac2fec2406b" - version = "v1.8.3" + revision = "3c7ce80a3b6d9b952022a6c5e5dd758486ff4de1" + version = "v1.15.48" [[projects]] name = "github.com/davecgh/go-spew" packages = ["spew"] - revision = "04cdfd42973bb9c8589fd6a731800cf222fde1a9" + revision = "8991bc29aa16c548c550c7ff78260e27b9ab7c73" + version = "v1.1.1" [[projects]] name = "github.com/go-ini/ini" packages = ["."] - revision = "e7fea39b01aea8d5671f6858f0532f56e8bff3a5" - version = "v1.27.0" + revision = "7b294651033cd7d9e7f0d9ffa1b75ed1e198e737" + version = "v1.38.3" [[projects]] name = "github.com/go-pg/pg" @@ -58,28 +62,73 @@ "orm", "types" ] - revision = "6b379280e4b806f329f36da1710cabe134991509" - version = "v6.4.24" + revision = "514bed76d8f579d6ff8d40294fa77e476a5c1b3f" + version = "v6.15.0" + +[[projects]] + name = "github.com/golang/protobuf" + packages = [ + "proto", + "protoc-gen-go/descriptor", + "ptypes", + "ptypes/any", + "ptypes/duration", + "ptypes/timestamp" + ] + revision = "aa810b61a9c79d51363740d207bb46cf8e620ed5" + version = "v1.2.0" [[projects]] name = "github.com/jeffail/gabs" packages = ["."] - revision = "9cef256b595a9e616eb6aec1da446529b7705613" + revision = "7a0fed31069aba77993a518cc2f37b28ee7aa883" + version = "v1.1.0" [[projects]] + branch = "master" name = "github.com/jinzhu/inflection" packages = ["."] - revision = "1c35d901db3da928c72a72d8458480cc9ade058f" + revision = "04140366298a54a039076d798123ffa108fff46c" [[projects]] name = "github.com/jmespath/go-jmespath" packages = ["."] - revision = "bd40a432e4c76585ef6b72d3fd96fb9b6dc7b68d" + revision = "0b12d6b5" + +[[projects]] + branch = "master" + name = "github.com/mailru/easyjson" + packages = [ + ".", + "buffer", + "jlexer", + "jwriter" + ] + revision = "60711f1a8329503b04e1c88535f419d0bb440bff" + +[[projects]] + name = "github.com/mattn/go-colorable" + packages = ["."] + revision = "167de6bfdfba052fa6b2d3664c8f5272e23c9072" + version = "v0.0.9" [[projects]] + name = "github.com/mattn/go-isatty" + packages = ["."] + revision = "6ca4dbf54d38eea1a992b3c722a76a5d1c4cb25c" + version = "v0.0.4" + +[[projects]] + branch = "master" name = "github.com/mgutz/ansi" packages = ["."] - revision = "c286dcecd19ff979eeb73ea444e479b903f2cfcb" + revision = "9520e82c474b0a04dd04f8a40959027271bab992" + +[[projects]] + branch = "master" + name = "github.com/otiai10/copy" + packages = ["."] + revision = "7e9a647135a142c2669943d4a4d29be015ce9392" [[projects]] name = "github.com/pkg/errors" @@ -90,12 +139,20 @@ [[projects]] name = "github.com/pmezard/go-difflib" packages = ["difflib"] - revision = "d8ed2627bdf02c080bf22230dbb337003b7aba2d" + revision = "792786c7400a136282c1664665ae0a8db921c6c2" + version = "v1.0.0" + +[[projects]] + name = "github.com/satori/go.uuid" + packages = ["."] + revision = "f58768cc1a7a7e77a3bd49e98cdd21419399b6a3" + version = "v1.2.0" [[projects]] name = "github.com/stretchr/testify" packages = ["assert"] - revision = "890a5c3458b43e6104ff5da8dfa139d013d77544" + revision = "f35b8ab0b5a2cef36673838d662e249dd9c94686" + version = "v1.2.2" [[projects]] branch = "master" @@ -113,22 +170,111 @@ "threshold", "util" ] - revision = "f967d93bead1da1df483356f0819caa2ecfb1bc8" + revision = "2fed02d559eb90bb6c0cb7f8212c25a474f911e3" [[projects]] + branch = "master" name = "github.com/unchartedsoftware/plog" packages = ["."] - revision = "24c9313ea3ff2a86868f9961420e5b72fb83a019" + revision = "34d2bbd3c0a9458feb0752af2b578a23eb8ddc4f" [[projects]] name = "github.com/urfave/cli" packages = ["."] - revision = "f017f86fccc5a039a98f23311f34fdf78b014f78" + revision = "cfb38830724cc34fedffe9a2a29fb54fa9169cd1" + version = "v1.20.0" + +[[projects]] + branch = "master" + name = "github.com/vmihailenco/sasl" + packages = ["."] + revision = "2f13c189728a02f8cc31b3b9cc06047b383c21cc" + +[[projects]] + branch = "master" + name = "golang.org/x/crypto" + packages = ["pbkdf2"] + revision = "e3636079e1a4c1f337f212cc5cd2aca108f6c900" [[projects]] + branch = "master" + name = "golang.org/x/net" + packages = [ + "context", + "http/httpguts", + "http2", + "http2/hpack", + "idna", + "internal/timeseries", + "trace" + ] + revision = "146acd28ed5894421fb5aac80ca93bc1b1f46f87" + +[[projects]] + branch = "master" name = "golang.org/x/sys" packages = ["unix"] - revision = "a646d33e2ee3172a661fc09bca23bb4889a41bc8" + revision = "4497e2df6f9e69048a54498c7affbbec3294ad47" + +[[projects]] + name = "golang.org/x/text" + packages = [ + "collate", + "collate/build", + "internal/colltab", + "internal/gen", + "internal/tag", + "internal/triegen", + "internal/ucd", + "language", + "secure/bidirule", + "transform", + "unicode/bidi", + "unicode/cldr", + "unicode/norm", + "unicode/rangetable" + ] + revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0" + version = "v0.3.0" + +[[projects]] + branch = "master" + name = "google.golang.org/genproto" + packages = ["googleapis/rpc/status"] + revision = "af9cb2a35e7f169ec875002c1829c9b315cddc04" + +[[projects]] + name = "google.golang.org/grpc" + packages = [ + ".", + "balancer", + "balancer/base", + "balancer/roundrobin", + "codes", + "connectivity", + "credentials", + "encoding", + "encoding/proto", + "grpclog", + "internal", + "internal/backoff", + "internal/channelz", + "internal/envconfig", + "internal/grpcrand", + "internal/transport", + "keepalive", + "metadata", + "naming", + "peer", + "resolver", + "resolver/dns", + "resolver/passthrough", + "stats", + "status", + "tap" + ] + revision = "8dea3dc473e90c8179e519d91302d0597c0ca1d1" + version = "v1.15.0" [[projects]] name = "gopkg.in/olivere/elastic.v5" @@ -137,12 +283,12 @@ "config", "uritemplates" ] - revision = "edbef41beaacc2ee95e61af8faff04e67c01e268" - version = "v5.0.45" + revision = "fc3063a8c0686f64e94f4b2c17eb140c06eb6793" + version = "v5.0.76" [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "57866312a139e2614a09a940b81dcf5215f00292d4853fc888ff0ad5ebccfd5a" + inputs-digest = "d6f33f77642f451ba56738d07082864113ea3012f7fd770696f38fdd20e2e674" solver-name = "gps-cdcl" solver-version = 1 diff --git a/Gopkg.toml b/Gopkg.toml index d8e6260..d954c70 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -29,10 +29,22 @@ name = "github.com/aws/aws-sdk-go" version = "1.8.3" +[[constraint]] + name = "github.com/golang/protobuf" + version = "1.1.0" + [[constraint]] name = "github.com/go-pg/pg" version = "6.4.24" +[[constraint]] + branch = "master" + name = "github.com/unchartedsoftware/plog" + +[[constraint]] + name = "google.golang.org/grpc" + version = "1.11.3" + [[constraint]] name = "github.com/pkg/errors" version = "0.8.0" From d2a26adb416ca0e8109a75a124021e2e260a7e6f Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Thu, 11 Oct 2018 08:56:37 -0400 Subject: [PATCH 03/23] Fixes to clustering with primitive. --- cmd/distil-cluster/main.go | 23 +++++------------------ primitive/cluster.go | 2 +- primitive/pipeline.go | 2 +- 3 files changed, 7 insertions(+), 20 deletions(-) diff --git a/cmd/distil-cluster/main.go b/cmd/distil-cluster/main.go index 98f2290..1dda02b 100644 --- a/cmd/distil-cluster/main.go +++ b/cmd/distil-cluster/main.go @@ -12,7 +12,6 @@ import ( "github.com/unchartedsoftware/distil-ingest/primitive" "github.com/unchartedsoftware/distil-ingest/primitive/compute" - "github.com/unchartedsoftware/distil-ingest/util" ) func splitAndTrim(arg string) []string { @@ -72,11 +71,6 @@ func main() { Value: "", Usage: "The path to use as output for the clustered schema document", }, - cli.StringFlag{ - Name: "output-data", - Value: "", - Usage: "The path to use as output for the clustered data", - }, cli.BoolFlag{ Name: "has-header", Usage: "Whether or not the CSV file has a header row", @@ -94,9 +88,8 @@ func main() { datasetPath := c.String("dataset") //mediaPath := c.String("media-path") outputSchema := c.String("output-schema") - //outputData := c.String("output-data") schemaPath := c.String("schema") - outputFilePath := c.String("output") + outputData := c.String("output") hasHeader := c.Bool("has-header") // initialize client @@ -109,25 +102,19 @@ func main() { step := primitive.NewIngestStep(client) // create feature folder - clusterPath := path.Join(outputFilePath, "clusters") - if util.DirExists(clusterPath) { - // delete existing data to overwrite with latest - os.RemoveAll(clusterPath) - log.Infof("Deleted data at %s", clusterPath) - } - if err := os.MkdirAll(clusterPath, 0777); err != nil && !os.IsExist(err) { + if err := os.MkdirAll(path.Dir(outputData), 0777); err != nil && !os.IsExist(err) { log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) } - os.Remove(path.Join(outputFilePath, "clusterDatasetDoc.json")) + os.Remove(outputData) // create featurizer - err = step.ClusterPrimitive(schemaPath, datasetPath, datasetPath, outputSchema, outputFilePath, hasHeader) + err = step.ClusterPrimitive(schemaPath, datasetPath, datasetPath, outputSchema, outputData, hasHeader) if err != nil { log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) } - log.Infof("Clustered data written to %s", outputFilePath) + log.Infof("Clustered data written to %s", outputData) return nil } diff --git a/primitive/cluster.go b/primitive/cluster.go index 4a683eb..9418a8a 100644 --- a/primitive/cluster.go +++ b/primitive/cluster.go @@ -19,7 +19,7 @@ func (s *IngestStep) ClusterPrimitive(schemaFile string, dataset string, util.CreateContainingDirs(outputSchemaPath) // load metadata from original schema - meta, err := metadata.LoadMetadataFromOriginalSchema(outputSchemaPath) + meta, err := metadata.LoadMetadataFromOriginalSchema(schemaFile) if err != nil { return errors.Wrap(err, "unable to load original schema file") } diff --git a/primitive/pipeline.go b/primitive/pipeline.go index 9632e63..e63b4ea 100644 --- a/primitive/pipeline.go +++ b/primitive/pipeline.go @@ -145,7 +145,7 @@ func getFeatureVariables(meta *metadata.Metadata, prefix string) ([]*FeatureRequ v := metadata.NewVariable(len(mainDR.Variables), indexName, "label", v.Name, "string", "string", "", "", []string{"attribute"}, metadata.VarRoleMetadata, nil, mainDR.Variables, false) // create the required pipeline - step, err := description.CreateCrocPipeline("leather", "", []string{v.Name}, []string{indexName}) + step, err := description.CreateCrocPipeline("leather", "", []string{denormFieldName}, []string{indexName}) if err != nil { return nil, errors.Wrap(err, "unable to create step pipeline") } From b2529ae760a50e84884f093e495af8e01d98bf75 Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Fri, 12 Oct 2018 11:56:09 -0400 Subject: [PATCH 04/23] Updated parameters for primitive ingest. Added missing fields in metadata required by primitive ingest. --- cmd/distil-cluster/main.go | 2 +- cmd/distil-featurize/main.go | 11 ++--------- cmd/distil-merge/main.go | 32 +++----------------------------- metadata/metadata.go | 14 +++++++++----- metadata/table.go | 13 +++++++++++++ primitive/cluster.go | 2 +- primitive/pipeline.go | 6 ++++-- 7 files changed, 33 insertions(+), 47 deletions(-) diff --git a/cmd/distil-cluster/main.go b/cmd/distil-cluster/main.go index 1dda02b..ebc291e 100644 --- a/cmd/distil-cluster/main.go +++ b/cmd/distil-cluster/main.go @@ -101,7 +101,7 @@ func main() { } step := primitive.NewIngestStep(client) - // create feature folder + // create cluster folder if err := os.MkdirAll(path.Dir(outputData), 0777); err != nil && !os.IsExist(err) { log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) diff --git a/cmd/distil-featurize/main.go b/cmd/distil-featurize/main.go index 94dadb6..a94ac0b 100644 --- a/cmd/distil-featurize/main.go +++ b/cmd/distil-featurize/main.go @@ -12,7 +12,6 @@ import ( "github.com/unchartedsoftware/distil-ingest/primitive" "github.com/unchartedsoftware/distil-ingest/primitive/compute" - "github.com/unchartedsoftware/distil-ingest/util" ) func splitAndTrim(arg string) []string { @@ -115,17 +114,11 @@ func main() { step := primitive.NewIngestStep(client) // create feature folder - featurePath := path.Join(outputFilePath, "features") - if util.DirExists(featurePath) { - // delete existing data to overwrite with latest - os.RemoveAll(featurePath) - log.Infof("Deleted data at %s", featurePath) - } - if err := os.MkdirAll(featurePath, 0777); err != nil && !os.IsExist(err) { + if err := os.MkdirAll(path.Dir(outputFilePath), 0777); err != nil && !os.IsExist(err) { log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) } - os.Remove(path.Join(outputFilePath, "featureDatasetDoc.json")) + os.Remove(outputFilePath) // create featurizer err = step.FeaturizePrimitive(schemaPath, datasetPath, datasetPath, outputSchema, outputFilePath, hasHeader) diff --git a/cmd/distil-merge/main.go b/cmd/distil-merge/main.go index b1873b3..aa0bd69 100644 --- a/cmd/distil-merge/main.go +++ b/cmd/distil-merge/main.go @@ -15,7 +15,7 @@ import ( "github.com/unchartedsoftware/distil-ingest/merge" "github.com/unchartedsoftware/distil-ingest/metadata" - "github.com/unchartedsoftware/distil-ingest/s3" + "github.com/unchartedsoftware/distil-ingest/util" ) const ( @@ -47,11 +47,6 @@ func main() { Value: "", Usage: "The raw dat a file path", }, - cli.StringFlag{ - Name: "output-bucket", - Value: "", - Usage: "The merged output AWS S3 bucket", - }, cli.StringFlag{ Name: "output-key", Value: "", @@ -114,8 +109,6 @@ func main() { } schemaPath := filepath.Clean(c.String("schema")) rawDataPath := filepath.Clean(c.String("raw-data")) - outputBucket := c.String("output-bucket") - outputKey := c.String("output-key") outputPathHeader := filepath.Clean(c.String("output-path-header")) outputSchemaPath := filepath.Clean(c.String("output-schema-path")) hasHeader := c.Bool("has-header") @@ -134,24 +127,8 @@ func main() { return cli.NewExitError(errors.Cause(err), 2) } - // get AWS S3 client - client, err := s3.NewClient() - if err != nil { - log.Errorf("%+v", err) - return cli.NewExitError(errors.Cause(err), 3) - } - - // write merged output to AWS S3 - if outputBucket != "" { - err = s3.WriteToBucket(client, outputBucket, outputKey, output) - if err != nil { - log.Errorf("%+v", err) - return cli.NewExitError(errors.Cause(err), 4) - } - } - // write copy to disk - err = ioutil.WriteFile(outputPath, output, 0644) + err = util.WriteFileWithDirs(outputPath, output, 0644) if err != nil { log.Errorf("%+v", err) return cli.NewExitError(errors.Cause(err), 5) @@ -166,9 +143,6 @@ func main() { // log success / failure log.Infof("Merged data successfully written to %s", outputPath) - if outputBucket != "" { - log.Infof("Merged data successfully written to %s/%s", outputBucket, outputKey) - } // get header for the merged data headers, err := meta.GenerateHeaders() @@ -188,7 +162,7 @@ func main() { } // write to file to submit the file - err = ioutil.WriteFile(outputPathHeader, data, 0644) + err = util.WriteFileWithDirs(outputPathHeader, data, 0644) if err != nil { log.Errorf("%+v", err) return cli.NewExitError(errors.Cause(err), 2) diff --git a/metadata/metadata.go b/metadata/metadata.go index 786f50c..94eee87 100644 --- a/metadata/metadata.go +++ b/metadata/metadata.go @@ -50,6 +50,8 @@ const ( provenanceSimon = "d3m.primitives.distil.simon" provenanceSchema = "schema" + + schemaVersion = "3.1.1" ) var ( @@ -89,6 +91,7 @@ type DataResource struct { ResPath string `json:"resPath"` IsCollection bool `json:"isCollection"` Variables []*Variable `json:"columns,omitempty"` + ResFormat []string `json:"resFormat"` } // SuggestedType represents a classified variable type. @@ -876,11 +879,12 @@ func (m *Metadata) WriteMergedSchema(path string, mergedDataResource *DataResour // create output format output := map[string]interface{}{ "about": map[string]interface{}{ - "datasetID": m.ID, - "datasetName": m.Name, - "description": m.Description, - "rawData": m.Raw, - "mergedSchema": "true", + "datasetID": m.ID, + "datasetName": m.Name, + "description": m.Description, + "datasetSchemaVersion": schemaVersion, + "rawData": m.Raw, + "mergedSchema": "true", }, "dataResources": []*DataResource{mergedDataResource}, } diff --git a/metadata/table.go b/metadata/table.go index 7c1cec6..9f266e0 100644 --- a/metadata/table.go +++ b/metadata/table.go @@ -28,10 +28,23 @@ func (r *Table) Parse(res *gabs.Container) (*DataResource, error) { } resPath := res.Path("resPath").Data().(string) + var resFormats []string + if res.Path("resFormat").Data() != nil { + formatsRaw, err := res.Path("resFormat").Children() + if err != nil { + return nil, errors.Wrap(err, "unable to parse resource format") + } + resFormats = make([]string, len(formatsRaw)) + for i, r := range formatsRaw { + resFormats[i] = r.Data().(string) + } + } + dr := &DataResource{ ResID: resID, ResPath: resPath, ResType: resTypeTable, + ResFormat: resFormats, IsCollection: false, Variables: make([]*Variable, 0), } diff --git a/primitive/cluster.go b/primitive/cluster.go index 9418a8a..e5a62df 100644 --- a/primitive/cluster.go +++ b/primitive/cluster.go @@ -26,7 +26,7 @@ func (s *IngestStep) ClusterPrimitive(schemaFile string, dataset string, mainDR := meta.GetMainDataResource() // add feature variables - features, err := getFeatureVariables(meta, "_cluster_") + features, err := getClusterVariables(meta, "_cluster_") if err != nil { return errors.Wrap(err, "unable to get cluster variables") } diff --git a/primitive/pipeline.go b/primitive/pipeline.go index e63b4ea..538673b 100644 --- a/primitive/pipeline.go +++ b/primitive/pipeline.go @@ -15,6 +15,7 @@ import ( "github.com/unchartedsoftware/distil-ingest/primitive/compute" "github.com/unchartedsoftware/distil-ingest/primitive/compute/description" "github.com/unchartedsoftware/distil-ingest/primitive/compute/result" + "github.com/unchartedsoftware/plog" ) const ( @@ -88,6 +89,7 @@ func (s *IngestStep) appendFeature(dataset string, d3mIndexField int, hasHeader if err != nil { return nil, errors.Wrap(err, "unable to run pipeline primitive") } + log.Infof("parsing primitive result from '%s'", datasetURI) // parse primitive response (new field contains output) res, err := result.ParseResultCSV(datasetURI) @@ -185,9 +187,9 @@ func getClusterVariables(meta *metadata.Metadata, prefix string) ([]*FeatureRequ var step *pipeline.PipelineDescription var err error if res.CanBeFeaturized() { - step, err = description.CreateUnicornPipeline("horned", "", []string{v.Name}, []string{indexName}) + step, err = description.CreateUnicornPipeline("horned", "", []string{denormFieldName}, []string{indexName}) } else { - step, err = description.CreateSlothPipeline("leaf", "", []string{v.Name}, []string{indexName}) + step, err = description.CreateSlothPipeline("leaf", "", []string{denormFieldName}, []string{indexName}) } if err != nil { return nil, errors.Wrap(err, "unable to create step pipeline") From 6753be7019a543167ebed80ae35b57bad205d21d Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Fri, 12 Oct 2018 14:56:18 -0400 Subject: [PATCH 05/23] More missing schema values and additional updates to merging to work with primitives. --- cmd/distil-merge/main.go | 28 ++++++++++++++-------------- merge/merge.go | 1 + metadata/metadata.go | 2 ++ metadata/table.go | 2 ++ 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/cmd/distil-merge/main.go b/cmd/distil-merge/main.go index aa0bd69..d00c3e5 100644 --- a/cmd/distil-merge/main.go +++ b/cmd/distil-merge/main.go @@ -87,13 +87,14 @@ func main() { } outputPath := filepath.Clean(c.String("output-path")) + outputPathHeader := filepath.Clean(c.String("output-path-header")) outputPathRelative := filepath.Clean(c.String("output-path-relative")) dataPath := filepath.Clean(c.String("data")) // If no schema provided, assume it is a raw data file. if c.String("schema") == "" { - log.Infof("Schema file not specified so assuming raw dataset being merged, copying from %s to %s", dataPath, outputPath) - err := mergeRawData(dataPath, outputPath) + log.Infof("Schema file not specified so assuming raw dataset being merged, copying from %s to %s", dataPath, outputPathHeader) + err := mergeRawData(dataPath, outputPathHeader) if err != nil { log.Errorf("%+v", err) return cli.NewExitError(errors.Cause(err), 1) @@ -109,7 +110,6 @@ func main() { } schemaPath := filepath.Clean(c.String("schema")) rawDataPath := filepath.Clean(c.String("raw-data")) - outputPathHeader := filepath.Clean(c.String("output-path-header")) outputSchemaPath := filepath.Clean(c.String("output-schema-path")) hasHeader := c.Bool("has-header") @@ -128,21 +128,14 @@ func main() { } // write copy to disk - err = util.WriteFileWithDirs(outputPath, output, 0644) - if err != nil { - log.Errorf("%+v", err) - return cli.NewExitError(errors.Cause(err), 5) - } - - // write merged metadata out to disk - err = meta.WriteMergedSchema(outputSchemaPath, mergedDR) + err = util.WriteFileWithDirs(outputPathHeader, output, 0644) if err != nil { log.Errorf("%+v", err) return cli.NewExitError(errors.Cause(err), 5) } // log success / failure - log.Infof("Merged data successfully written to %s", outputPath) + log.Infof("Merged data successfully written to %s", outputPathHeader) // get header for the merged data headers, err := meta.GenerateHeaders() @@ -155,19 +148,26 @@ func main() { header := headers[0] // add the header to the raw data - data, err := getMergedData(header, outputPath, hasHeader) + data, err := getMergedData(header, outputPathHeader, hasHeader) if err != nil { log.Errorf("%+v", err) return cli.NewExitError(errors.Cause(err), 2) } // write to file to submit the file - err = util.WriteFileWithDirs(outputPathHeader, data, 0644) + err = util.WriteFileWithDirs(outputPath, data, 0644) if err != nil { log.Errorf("%+v", err) return cli.NewExitError(errors.Cause(err), 2) } + // write merged metadata out to disk + err = meta.WriteMergedSchema(outputSchemaPath, mergedDR) + if err != nil { + log.Errorf("%+v", err) + return cli.NewExitError(errors.Cause(err), 5) + } + // log success / failure log.Infof("Merged data with header successfully written to %s", outputPathHeader) diff --git a/merge/merge.go b/merge/merge.go index bfa2d10..180a370 100644 --- a/merge/merge.go +++ b/merge/merge.go @@ -158,6 +158,7 @@ func InjectFileLinks(meta *metadata.Metadata, merged []byte, rawDataPath string, if variable.Name == d3mIndexName { mergedDataResource.Variables = dr.Variables mergedDataResource.ResType = dr.ResType + mergedDataResource.ResFormat = dr.ResFormat } } else if r == "key" { keyColumns = append(keyColumns, variable) diff --git a/metadata/metadata.go b/metadata/metadata.go index 94eee87..0dfc205 100644 --- a/metadata/metadata.go +++ b/metadata/metadata.go @@ -52,6 +52,7 @@ const ( provenanceSchema = "schema" schemaVersion = "3.1.1" + license = "Unknown" ) var ( @@ -883,6 +884,7 @@ func (m *Metadata) WriteMergedSchema(path string, mergedDataResource *DataResour "datasetName": m.Name, "description": m.Description, "datasetSchemaVersion": schemaVersion, + "license": license, "rawData": m.Raw, "mergedSchema": "true", }, diff --git a/metadata/table.go b/metadata/table.go index 9f266e0..4918e18 100644 --- a/metadata/table.go +++ b/metadata/table.go @@ -38,6 +38,8 @@ func (r *Table) Parse(res *gabs.Container) (*DataResource, error) { for i, r := range formatsRaw { resFormats[i] = r.Data().(string) } + } else { + resFormats = make([]string, 0) } dr := &DataResource{ From 1241f79abc316d0c0949acf53eb38740bffdbd55 Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Mon, 15 Oct 2018 11:52:28 -0400 Subject: [PATCH 06/23] More merge step fixes to provide a valid schema output. --- cmd/distil-merge/main.go | 17 +++++------------ cmd/distil-rank/main.go | 2 +- merge/merge.go | 33 ++++++++++++++++++++------------ metadata/metadata.go | 41 ++++++++++++++++++++++++---------------- 4 files changed, 52 insertions(+), 41 deletions(-) diff --git a/cmd/distil-merge/main.go b/cmd/distil-merge/main.go index d00c3e5..f8dc50a 100644 --- a/cmd/distil-merge/main.go +++ b/cmd/distil-merge/main.go @@ -128,34 +128,27 @@ func main() { } // write copy to disk - err = util.WriteFileWithDirs(outputPathHeader, output, 0644) + err = util.WriteFileWithDirs(outputPath, output, 0644) if err != nil { log.Errorf("%+v", err) return cli.NewExitError(errors.Cause(err), 5) } // log success / failure - log.Infof("Merged data successfully written to %s", outputPathHeader) + log.Infof("Merged data successfully written to %s", outputPath) // get header for the merged data - headers, err := meta.GenerateHeaders() - if err != nil { - log.Errorf("%+v", err) - return cli.NewExitError(errors.Cause(err), 2) - } - - // merged data only has 1 header - header := headers[0] + header := mergedDR.GenerateHeader() // add the header to the raw data - data, err := getMergedData(header, outputPathHeader, hasHeader) + data, err := getMergedData(header, outputPath, hasHeader) if err != nil { log.Errorf("%+v", err) return cli.NewExitError(errors.Cause(err), 2) } // write to file to submit the file - err = util.WriteFileWithDirs(outputPath, data, 0644) + err = util.WriteFileWithDirs(outputPathHeader, data, 0644) if err != nil { log.Errorf("%+v", err) return cli.NewExitError(errors.Cause(err), 2) diff --git a/cmd/distil-rank/main.go b/cmd/distil-rank/main.go index 7b1fcac..26be42f 100644 --- a/cmd/distil-rank/main.go +++ b/cmd/distil-rank/main.go @@ -92,7 +92,7 @@ func main() { if c.String("endpoint") == "" { return cli.NewExitError("missing commandline flag `--endpoint`", 1) } - if c.String("ranking-output") == "" { + if c.String("output") == "" { return cli.NewExitError("missing commandline flag `--ranking-output`", 1) } diff --git a/merge/merge.go b/merge/merge.go index 180a370..10d22bd 100644 --- a/merge/merge.go +++ b/merge/merge.go @@ -174,18 +174,22 @@ func InjectFileLinks(meta *metadata.Metadata, merged []byte, rawDataPath string, // reverse the reference to point from the key to the index obj, ok := variable.RefersTo["resObject"].(map[string]interface{}) if !ok { - return nil, nil, errors.Errorf("failed to parse reference for %s", variable.Name) - } - - // Some datasets point to a resource rather than column - // Ignore those references - name, ok := obj["columnName"].(string) - if ok { - references[name] = map[string]interface{}{ - "resID": dr.ResID, - "resObject": map[string]interface{}{ - "columnName": variable.Name, - }, + // check if it is a string which does not refer to another resource + _, ok := variable.RefersTo["resObject"].(string) + if !ok { + return nil, nil, errors.Errorf("failed to parse reference for %s", variable.Name) + } + } else { + // Some datasets point to a resource rather than column + // Ignore those references + name, ok := obj["columnName"].(string) + if ok { + references[name] = map[string]interface{}{ + "resID": dr.ResID, + "resObject": map[string]interface{}{ + "columnName": variable.Name, + }, + } } } } @@ -216,6 +220,11 @@ func InjectFileLinks(meta *metadata.Metadata, merged []byte, rawDataPath string, } } + // adjust variable indices + for i, v := range mergedDataResource.Variables { + v.Index = i + } + // create reader reader := csv.NewReader(bytes.NewBuffer(merged)) diff --git a/metadata/metadata.go b/metadata/metadata.go index 0dfc205..d7df107 100644 --- a/metadata/metadata.go +++ b/metadata/metadata.go @@ -12,7 +12,6 @@ import ( "path/filepath" "regexp" "sort" - "strings" "github.com/jeffail/gabs" "github.com/pkg/errors" @@ -116,6 +115,7 @@ type Metadata struct { NumRows int64 NumBytes int64 SchemaSource string + Redacted bool } // NormalizeVariableName normalizes a variable name. @@ -198,7 +198,7 @@ func LoadMetadataFromOriginalSchema(schemaPath string) (*Metadata, error) { if err != nil { return nil, err } - err = meta.loadDescription() + err = meta.loadAbout() if err != nil { return nil, err } @@ -226,7 +226,7 @@ func LoadMetadataFromMergedSchema(schemaPath string) (*Metadata, error) { if err != nil { return nil, err } - err = meta.loadDescription() + err = meta.loadAbout() if err != nil { return nil, err } @@ -285,7 +285,7 @@ func LoadMetadataFromClassification(schemaPath string, classificationPath string if err != nil { return nil, err } - err = meta.loadDescription() + err = meta.loadAbout() if err != nil { return nil, err } @@ -446,19 +446,25 @@ func (m *Metadata) GenerateHeaders() ([][]string, error) { headers := make([][]string, len(m.DataResources)) for index, dr := range m.DataResources { - header := make([]string, len(dr.Variables)) - - // iterate over the fields - for hIndex, field := range dr.Variables { - header[hIndex] = strings.Replace(field.Name, "_", "", -1) - } - + header := dr.GenerateHeader() headers[index] = header } return headers, nil } +// GenerateHeaders generates csv headers for the data resource. +func (dr *DataResource) GenerateHeader() []string { + header := make([]string, len(dr.Variables)) + + // iterate over the fields + for hIndex, field := range dr.Variables { + header[hIndex] = field.Name + } + + return header +} + // LoadSummaryFromDescription loads a summary from the description. func (m *Metadata) LoadSummaryFromDescription(summaryFile string) error { // request summary @@ -566,11 +572,12 @@ func (m *Metadata) loadName() error { return nil } -func (m *Metadata) loadDescription() error { - // load from property +func (m *Metadata) loadAbout() error { if m.schema.Path("about.description").Data() != nil { m.Description = m.schema.Path("about.description").Data().(string) - return nil + } + if m.schema.Path("about.redacted").Data() != nil { + m.Redacted = m.schema.Path("about.redacted").Data().(bool) } return nil } @@ -644,19 +651,20 @@ func parseSchemaVariable(v *gabs.Container, existingVariables []*Variable, norma resObjectMap, err := refersToData.Path("resObject").ChildrenMap() if err != nil { // see if it is maybe a string and if it is, ignore - _, ok := refersToData.Path("resObject").Data().(string) + data, ok := refersToData.Path("resObject").Data().(string) if !ok { return nil, errors.Wrapf(err, "unable to parse resObject") } + refersTo["resObject"] = data } else { for k, v := range resObjectMap { resObject[k] = v.Data().(string) } + refersTo["resObject"] = resObject } } refersTo["resID"] = resID - refersTo["resObject"] = resObject } variable := NewVariable( varIndex, @@ -886,6 +894,7 @@ func (m *Metadata) WriteMergedSchema(path string, mergedDataResource *DataResour "datasetSchemaVersion": schemaVersion, "license": license, "rawData": m.Raw, + "redacted": m.Redacted, "mergedSchema": "true", }, "dataResources": []*DataResource{mergedDataResource}, From 27062884d52f99555f19a820f816cd84227e6251 Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Mon, 15 Oct 2018 17:05:07 -0400 Subject: [PATCH 07/23] Fixed feature variable building. --- primitive/feature.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/primitive/feature.go b/primitive/feature.go index fe4286d..2e895c2 100644 --- a/primitive/feature.go +++ b/primitive/feature.go @@ -26,7 +26,7 @@ func (s *IngestStep) FeaturizePrimitive(schemaFile string, dataset string, mainDR := meta.GetMainDataResource() // add feature variables - features, err := getClusterVariables(meta, "_feature_") + features, err := getFeatureVariables(meta, "_feature_") if err != nil { return errors.Wrap(err, "unable to get feature variables") } From 940b844f7d1575edfc1a34c3cf1f723690c227c4 Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Tue, 16 Oct 2018 16:49:53 -0400 Subject: [PATCH 08/23] Fixed clustering and feature header handling. --- primitive/cluster.go | 2 +- primitive/compute/client.go | 2 +- primitive/feature.go | 2 +- primitive/pipeline.go | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/primitive/cluster.go b/primitive/cluster.go index e5a62df..7c0f7d7 100644 --- a/primitive/cluster.go +++ b/primitive/cluster.go @@ -44,7 +44,7 @@ func (s *IngestStep) ClusterPrimitive(schemaFile string, dataset string, for _, f := range features { mainDR.Variables = append(mainDR.Variables, f.Variable) - lines, err = s.appendFeature(dataset, d3mIndexField, hasHeader, f, lines) + lines, err = s.appendFeature(dataset, d3mIndexField, false, f, lines) if err != nil { return errors.Wrap(err, "error appending clustered data") } diff --git a/primitive/compute/client.go b/primitive/compute/client.go index 4fc30d3..bba3d5d 100644 --- a/primitive/compute/client.go +++ b/primitive/compute/client.go @@ -387,7 +387,7 @@ func (c *Client) ExecutePipeline(ctx context.Context, datasetURI string, pipelin out := new(pipeline.PipelineExecuteResponse) err := c.runner.Invoke(ctx, "/Executor/ExecutePipeline", in, out) if err != nil { - return nil, err + return nil, errors.Wrap(err, "unable to invoke pipeline execution") } return out, nil } diff --git a/primitive/feature.go b/primitive/feature.go index 2e895c2..91e73d7 100644 --- a/primitive/feature.go +++ b/primitive/feature.go @@ -44,7 +44,7 @@ func (s *IngestStep) FeaturizePrimitive(schemaFile string, dataset string, for _, f := range features { mainDR.Variables = append(mainDR.Variables, f.Variable) - lines, err = s.appendFeature(dataset, d3mIndexField, hasHeader, f, lines) + lines, err = s.appendFeature(dataset, d3mIndexField, false, f, lines) if err != nil { return errors.Wrap(err, "error appending feature data") } diff --git a/primitive/pipeline.go b/primitive/pipeline.go index 538673b..c7150aa 100644 --- a/primitive/pipeline.go +++ b/primitive/pipeline.go @@ -122,6 +122,7 @@ func (s *IngestStep) appendFeature(dataset string, d3mIndexField int, hasHeader d3mIndex := line[d3mIndexField] feature := features[d3mIndex] line = append(line, feature) + lines[i] = line } } From 253e5ed63e5bf03b32d05fef7f3c724e8f0c6a1d Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Mon, 22 Oct 2018 10:15:06 -0400 Subject: [PATCH 09/23] Initial addition of denormalize primitive support. --- .../compute/description/preprocessing.go | 13 ++++++++ primitive/merge.go | 32 +++++++++++++++++++ primitive/summarize.go | 2 +- 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 primitive/merge.go diff --git a/primitive/compute/description/preprocessing.go b/primitive/compute/description/preprocessing.go index a37d5b3..ebcb117 100644 --- a/primitive/compute/description/preprocessing.go +++ b/primitive/compute/description/preprocessing.go @@ -125,3 +125,16 @@ func CreatePCAFeaturesPipeline(name string, description string) (*pipeline.Pipel } return pipeline, nil } + +// CreateDenormalizePipeline creates a pipeline to run the denormalize primitive on an input dataset. +func CreateDenormalizePipeline(name string, description string) (*pipeline.PipelineDescription, error) { + // insantiate the pipeline + pipeline, err := NewBuilder(name, description). + Add(NewDenormalizeStep()). + Compile() + + if err != nil { + return nil, err + } + return pipeline, nil +} diff --git a/primitive/merge.go b/primitive/merge.go new file mode 100644 index 0000000..248b9b3 --- /dev/null +++ b/primitive/merge.go @@ -0,0 +1,32 @@ +package primitive + +import ( + "github.com/pkg/errors" + "github.com/unchartedsoftware/plog" + + "github.com/unchartedsoftware/distil-ingest/primitive/compute/description" + "github.com/unchartedsoftware/distil-ingest/primitive/compute/result" +) + +// RankPrimitive will rank the dataset using a primitive. +func (s *IngestStep) MergePrimitive(dataset string, outputPath string) error { + // create & submit the solution request + pip, err := description.CreateDenormalizePipeline("3NF", "") + if err != nil { + return errors.Wrap(err, "unable to create denormalize pipeline") + } + + datasetURI, err := s.submitPrimitive(dataset, pip) + if err != nil { + return errors.Wrap(err, "unable to run PCA pipeline") + } + + // parse primitive response (col index,importance) + log.Infof("MERGING: %v", datasetURI) + _, err = result.ParseResultCSV(datasetURI) + if err != nil { + return errors.Wrap(err, "unable to parse PCA pipeline result") + } + + return nil +} diff --git a/primitive/summarize.go b/primitive/summarize.go index 6097939..617582a 100644 --- a/primitive/summarize.go +++ b/primitive/summarize.go @@ -35,7 +35,7 @@ func (s *IngestStep) SummarizePrimitive(dataset string, outputPath string) error for i, v := range res { // skip the header if i > 0 { - token, ok := v[0].(string) + token, ok := v[1].(string) if !ok { return errors.Wrap(err, "unable to parse Duke token") } From 07823f1f02b06551e33865526fd5b0973b38020e Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Mon, 22 Oct 2018 11:13:18 -0400 Subject: [PATCH 10/23] Updated merge command to use primitive. --- cmd/distil-merge/main.go | 197 +++++---------------------------------- 1 file changed, 23 insertions(+), 174 deletions(-) diff --git a/cmd/distil-merge/main.go b/cmd/distil-merge/main.go index f8dc50a..2976edb 100644 --- a/cmd/distil-merge/main.go +++ b/cmd/distil-merge/main.go @@ -1,10 +1,6 @@ package main import ( - "bytes" - "encoding/csv" - "io" - "io/ioutil" "os" "path/filepath" "runtime" @@ -13,9 +9,8 @@ import ( "github.com/unchartedsoftware/plog" "github.com/urfave/cli" - "github.com/unchartedsoftware/distil-ingest/merge" - "github.com/unchartedsoftware/distil-ingest/metadata" - "github.com/unchartedsoftware/distil-ingest/util" + "github.com/unchartedsoftware/distil-ingest/primitive" + "github.com/unchartedsoftware/distil-ingest/primitive/compute" ) const ( @@ -38,9 +33,9 @@ func main() { Usage: "The dataset schema file path", }, cli.StringFlag{ - Name: "data", + Name: "dataset", Value: "", - Usage: "The data file path", + Usage: "The dataet path", }, cli.StringFlag{ Name: "raw-data", @@ -48,12 +43,7 @@ func main() { Usage: "The raw dat a file path", }, cli.StringFlag{ - Name: "output-key", - Value: "", - Usage: "The merged output AWS S3 key", - }, - cli.StringFlag{ - Name: "output-path", + Name: "output", Value: "", Usage: "The merged output path", }, @@ -79,180 +69,39 @@ func main() { } app.Action = func(c *cli.Context) error { - if c.String("data") == "" { - return cli.NewExitError("missing commandline flag `--data`", 1) - } - if c.String("output-path") == "" { - return cli.NewExitError("missing commandline flag `--output-path`", 1) - } - - outputPath := filepath.Clean(c.String("output-path")) - outputPathHeader := filepath.Clean(c.String("output-path-header")) - outputPathRelative := filepath.Clean(c.String("output-path-relative")) - dataPath := filepath.Clean(c.String("data")) - - // If no schema provided, assume it is a raw data file. - if c.String("schema") == "" { - log.Infof("Schema file not specified so assuming raw dataset being merged, copying from %s to %s", dataPath, outputPathHeader) - err := mergeRawData(dataPath, outputPathHeader) - if err != nil { - log.Errorf("%+v", err) - return cli.NewExitError(errors.Cause(err), 1) - } - - log.Infof("Successfully merged raw dataset") - - return nil - } - - if c.String("output-schema-path") == "" { - return cli.NewExitError("missing commandline flag `--output-schema-path`", 1) - } - schemaPath := filepath.Clean(c.String("schema")) - rawDataPath := filepath.Clean(c.String("raw-data")) - outputSchemaPath := filepath.Clean(c.String("output-schema-path")) - hasHeader := c.Bool("has-header") - - // load the metadata from schema - meta, err := metadata.LoadMetadataFromOriginalSchema(schemaPath) - if err != nil { - log.Errorf("%+v", err) - return cli.NewExitError(errors.Cause(err), 1) + if c.String("dataset") == "" { + return cli.NewExitError("missing commandline flag `--dataset`", 1) } - - // merge file links in dataset - mergedDR, output, err := merge.InjectFileLinksFromFile(meta, dataPath, rawDataPath, outputPathRelative, hasHeader) - if err != nil { - log.Errorf("%+v", err) - return cli.NewExitError(errors.Cause(err), 2) + if c.String("endpoint") == "" { + return cli.NewExitError("missing commandline flag `--endpoint`", 1) } - - // write copy to disk - err = util.WriteFileWithDirs(outputPath, output, 0644) - if err != nil { - log.Errorf("%+v", err) - return cli.NewExitError(errors.Cause(err), 5) + if c.String("output") == "" { + return cli.NewExitError("missing commandline flag `--ranking-output`", 1) } - // log success / failure - log.Infof("Merged data successfully written to %s", outputPath) - - // get header for the merged data - header := mergedDR.GenerateHeader() + output := filepath.Clean(c.String("output")) + endpoint := filepath.Clean(c.String("endpoint")) + dataset := filepath.Clean(c.String("dataset")) - // add the header to the raw data - data, err := getMergedData(header, outputPath, hasHeader) + // initialize client + log.Infof("Using pipeline runner interface at `%s` ", endpoint) + client, err := compute.NewRunner(endpoint, true, "distil-ingest", 60, 10, true) if err != nil { - log.Errorf("%+v", err) + log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) } + step := primitive.NewIngestStep(client) - // write to file to submit the file - err = util.WriteFileWithDirs(outputPathHeader, data, 0644) + // merge the dataset into a single file + err = step.MergePrimitive(dataset, output) if err != nil { - log.Errorf("%+v", err) + log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) } - - // write merged metadata out to disk - err = meta.WriteMergedSchema(outputSchemaPath, mergedDR) - if err != nil { - log.Errorf("%+v", err) - return cli.NewExitError(errors.Cause(err), 5) - } - - // log success / failure - log.Infof("Merged data with header successfully written to %s", outputPathHeader) + log.Infof("Merged data written to %s", output) return nil } // run app app.Run(os.Args) } - -func getMergedData(header []string, datasetPath string, hasHeader bool) ([]byte, error) { - // Copy source to destination. - file, err := os.Open(datasetPath) - if err != nil { - return nil, errors.Wrap(err, "failed to open source file") - } - - reader := csv.NewReader(file) - - // output writer - output := &bytes.Buffer{} - writer := csv.NewWriter(output) - if header != nil && len(header) > 0 { - err := writer.Write(header) - if err != nil { - return nil, errors.Wrap(err, "failed to write header to file") - } - } - - count := 0 - for { - line, err := reader.Read() - if err == io.EOF { - break - } else if err != nil { - return nil, errors.Wrap(err, "failed to read line from file") - } - if count > 0 || !hasHeader { - err := writer.Write(line) - if err != nil { - return nil, errors.Wrap(err, "failed to write line to file") - } - } - count++ - } - // flush writer - writer.Flush() - - // close left - err = file.Close() - if err != nil { - return nil, errors.Wrap(err, "failed to close input file") - } - return output.Bytes(), nil -} - -func mergeRawData(dataPath string, outputPath string) error { - // Copy source to destination. - file, err := os.Open(dataPath) - if err != nil { - return errors.Wrap(err, "failed to open data file") - } - - reader := csv.NewReader(file) - - // output writer - output := &bytes.Buffer{} - writer := csv.NewWriter(output) - for { - line, err := reader.Read() - if err == io.EOF { - break - } else if err != nil { - return errors.Wrap(err, "failed to read line from file") - } - // write the csv line back out - err = writer.Write(line) - if err != nil { - return errors.Wrap(err, "failed to write line to file") - } - } - // flush writer - writer.Flush() - - err = ioutil.WriteFile(outputPath, output.Bytes(), 0644) - if err != nil { - return errors.Wrap(err, "failed to close output file") - } - - // close left - err = file.Close() - if err != nil { - return errors.Wrap(err, "failed to close input file") - } - return nil -} From 8a1c1942ed21a1c4572b24c3e107b972301ee6b1 Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Tue, 23 Oct 2018 08:56:01 -0400 Subject: [PATCH 11/23] Merge step updated to output schema built from output header and input schema. --- cmd/distil-merge/main.go | 19 +++-- metadata/metadata.go | 18 +++- .../compute/description/preprocessing.go | 1 + primitive/feature.go | 2 +- primitive/merge.go | 83 +++++++++++++++++-- 5 files changed, 106 insertions(+), 17 deletions(-) diff --git a/cmd/distil-merge/main.go b/cmd/distil-merge/main.go index 2976edb..79086b0 100644 --- a/cmd/distil-merge/main.go +++ b/cmd/distil-merge/main.go @@ -37,6 +37,11 @@ func main() { Value: "", Usage: "The dataet path", }, + cli.StringFlag{ + Name: "endpoint", + Value: "", + Usage: "The pipeline runner endpoint", + }, cli.StringFlag{ Name: "raw-data", Value: "", @@ -75,11 +80,15 @@ func main() { if c.String("endpoint") == "" { return cli.NewExitError("missing commandline flag `--endpoint`", 1) } - if c.String("output") == "" { - return cli.NewExitError("missing commandline flag `--ranking-output`", 1) + if c.String("output-data") == "" { + return cli.NewExitError("missing commandline flag `--output-data`", 1) + } + if c.String("output-schema") == "" { + return cli.NewExitError("missing commandline flag `--output-schema`", 1) } - output := filepath.Clean(c.String("output")) + outputDataPath := filepath.Clean(c.String("output-data")) + outputSchemaPath := filepath.Clean(c.String("output-schema")) endpoint := filepath.Clean(c.String("endpoint")) dataset := filepath.Clean(c.String("dataset")) @@ -93,12 +102,12 @@ func main() { step := primitive.NewIngestStep(client) // merge the dataset into a single file - err = step.MergePrimitive(dataset, output) + err = step.MergePrimitive(dataset, outputSchemaPath, outputDataPath) if err != nil { log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) } - log.Infof("Merged data written to %s", output) + log.Infof("Merged data written to %s", outputSchemaPath) return nil } diff --git a/metadata/metadata.go b/metadata/metadata.go index d7df107..e98402b 100644 --- a/metadata/metadata.go +++ b/metadata/metadata.go @@ -118,6 +118,12 @@ type Metadata struct { Redacted bool } +func NewMetadata() *Metadata { + return &Metadata{ + DataResources: make([]*DataResource, 0), + } +} + // NormalizeVariableName normalizes a variable name. func NormalizeVariableName(name string) string { nameNormalized := nameRegex.ReplaceAllString(name, "_") @@ -916,10 +922,14 @@ func (m *Metadata) WriteSchema(path string) error { output := map[string]interface{}{ "about": map[string]interface{}{ - "datasetID": m.ID, - "datasetName": m.Name, - "description": m.Description, - "rawData": m.Raw, + "datasetID": m.ID, + "datasetName": m.Name, + "description": m.Description, + "datasetSchemaVersion": schemaVersion, + "license": license, + "rawData": m.Raw, + "redacted": m.Redacted, + "mergedSchema": "false", }, "dataResources": dataResources, } diff --git a/primitive/compute/description/preprocessing.go b/primitive/compute/description/preprocessing.go index ebcb117..25191ce 100644 --- a/primitive/compute/description/preprocessing.go +++ b/primitive/compute/description/preprocessing.go @@ -131,6 +131,7 @@ func CreateDenormalizePipeline(name string, description string) (*pipeline.Pipel // insantiate the pipeline pipeline, err := NewBuilder(name, description). Add(NewDenormalizeStep()). + Add(NewDatasetToDataframeStep()). Compile() if err != nil { diff --git a/primitive/feature.go b/primitive/feature.go index 91e73d7..fa903d0 100644 --- a/primitive/feature.go +++ b/primitive/feature.go @@ -78,7 +78,7 @@ func (s *IngestStep) FeaturizePrimitive(schemaFile string, dataset string, return errors.Wrap(err, "error writing feature output") } - relativePath := getRelativePath(rootDataPath, outputDataPath) + relativePath := getRelativePath(path.Dir(outputSchemaPath), outputDataPath) mainDR.ResPath = relativePath // write the new schema to file diff --git a/primitive/merge.go b/primitive/merge.go index 248b9b3..0abb4ba 100644 --- a/primitive/merge.go +++ b/primitive/merge.go @@ -1,15 +1,21 @@ package primitive import ( + "bytes" + "encoding/csv" + "os" + "path" + "github.com/pkg/errors" - "github.com/unchartedsoftware/plog" + "github.com/unchartedsoftware/distil-ingest/metadata" "github.com/unchartedsoftware/distil-ingest/primitive/compute/description" "github.com/unchartedsoftware/distil-ingest/primitive/compute/result" + "github.com/unchartedsoftware/distil-ingest/util" ) // RankPrimitive will rank the dataset using a primitive. -func (s *IngestStep) MergePrimitive(dataset string, outputPath string) error { +func (s *IngestStep) MergePrimitive(dataset string, outputSchemaPath string, outputDataPath string) error { // create & submit the solution request pip, err := description.CreateDenormalizePipeline("3NF", "") if err != nil { @@ -18,15 +24,78 @@ func (s *IngestStep) MergePrimitive(dataset string, outputPath string) error { datasetURI, err := s.submitPrimitive(dataset, pip) if err != nil { - return errors.Wrap(err, "unable to run PCA pipeline") + return errors.Wrap(err, "unable to run denormalize pipeline") + } + + // parse primitive response (raw data from the input dataset) + rawResults, err := result.ParseResultCSV(datasetURI) + if err != nil { + return errors.Wrap(err, "unable to parse denormalize result") } - // parse primitive response (col index,importance) - log.Infof("MERGING: %v", datasetURI) - _, err = result.ParseResultCSV(datasetURI) + // need to manually build the metadata and output it. + meta, err := metadata.LoadMetadataFromOriginalSchema(dataset) if err != nil { - return errors.Wrap(err, "unable to parse PCA pipeline result") + return errors.Wrap(err, "unable to load original metadata") + } + vars := s.mapFields(meta) + + outputMeta := metadata.NewMetadata() + header := rawResults[0] + for i, field := range header { + // the first column is a row idnex and should be discarded. + if i > 0 { + fieldName, ok := field.(string) + if !ok { + return errors.Errorf("unable to cast field name") + } + + v := vars[fieldName] + v.Index = i - 1 + outputMeta.DataResources[0].Variables = append(outputMeta.DataResources[0].Variables, v) + } + } + + // initialize csv writer + output := &bytes.Buffer{} + writer := csv.NewWriter(output) + + // rewrite the output without the first column + for _, line := range rawResults { + lineString := make([]string, len(line)-1) + for i := 1; i < len(line); i++ { + lineString[i-1] = line[i].(string) + } + writer.Write(lineString) + } + + // output the data + writer.Flush() + err = util.WriteFileWithDirs(outputDataPath, output.Bytes(), os.ModePerm) + if err != nil { + return errors.Wrap(err, "error writing merged output") + } + + relativePath := getRelativePath(path.Dir(outputSchemaPath), outputDataPath) + outputMeta.DataResources[0].ResPath = relativePath + + // write the new schema to file + err = meta.WriteSchema(outputSchemaPath) + if err != nil { + return errors.Wrap(err, "unable to store merged schema") } return nil } + +func (s *IngestStep) mapFields(meta *metadata.Metadata) map[string]*metadata.Variable { + // cycle through each data resource, mapping field names to variables. + fields := make(map[string]*metadata.Variable) + for _, dr := range meta.DataResources { + for _, v := range dr.Variables { + fields[v.Name] = v + } + } + + return fields +} From 2b41fd6ef4bad36caf426e530af4079ee4c655f4 Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Tue, 23 Oct 2018 14:47:44 -0400 Subject: [PATCH 12/23] Added copying to merge step. --- cmd/distil-merge/main.go | 16 ++++++---------- metadata/metadata.go | 14 +++++++++++++- primitive/merge.go | 32 ++++++++++++++++++++++++++++---- primitive/pipeline.go | 5 +++++ 4 files changed, 52 insertions(+), 15 deletions(-) diff --git a/cmd/distil-merge/main.go b/cmd/distil-merge/main.go index 79086b0..840d8fa 100644 --- a/cmd/distil-merge/main.go +++ b/cmd/distil-merge/main.go @@ -35,7 +35,7 @@ func main() { cli.StringFlag{ Name: "dataset", Value: "", - Usage: "The dataet path", + Usage: "The dataet schema path", }, cli.StringFlag{ Name: "endpoint", @@ -50,7 +50,7 @@ func main() { cli.StringFlag{ Name: "output", Value: "", - Usage: "The merged output path", + Usage: "The merged output folder", }, cli.StringFlag{ Name: "output-path-relative", @@ -81,14 +81,10 @@ func main() { return cli.NewExitError("missing commandline flag `--endpoint`", 1) } if c.String("output-data") == "" { - return cli.NewExitError("missing commandline flag `--output-data`", 1) - } - if c.String("output-schema") == "" { - return cli.NewExitError("missing commandline flag `--output-schema`", 1) + return cli.NewExitError("missing commandline flag `--output`", 1) } - outputDataPath := filepath.Clean(c.String("output-data")) - outputSchemaPath := filepath.Clean(c.String("output-schema")) + outputFolderPath := filepath.Clean(c.String("output")) endpoint := filepath.Clean(c.String("endpoint")) dataset := filepath.Clean(c.String("dataset")) @@ -102,12 +98,12 @@ func main() { step := primitive.NewIngestStep(client) // merge the dataset into a single file - err = step.MergePrimitive(dataset, outputSchemaPath, outputDataPath) + err = step.MergePrimitive(dataset, outputFolderPath) if err != nil { log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) } - log.Infof("Merged data written to %s", outputSchemaPath) + log.Infof("Merged data written to %s", outputFolderPath) return nil } diff --git a/metadata/metadata.go b/metadata/metadata.go index e98402b..83084f1 100644 --- a/metadata/metadata.go +++ b/metadata/metadata.go @@ -118,12 +118,24 @@ type Metadata struct { Redacted bool } -func NewMetadata() *Metadata { +func NewMetadata(id string, name string, description string) *Metadata { return &Metadata{ + ID: id, + Name: name, + Description: description, DataResources: make([]*DataResource, 0), } } +func NewDataResource(id string, typ string, format []string) *DataResource { + return &DataResource{ + ResID: id, + ResType: typ, + ResFormat: format, + Variables: make([]*Variable, 0), + } +} + // NormalizeVariableName normalizes a variable name. func NormalizeVariableName(name string) string { nameNormalized := nameRegex.ReplaceAllString(name, "_") diff --git a/primitive/merge.go b/primitive/merge.go index 0abb4ba..3e6f4c6 100644 --- a/primitive/merge.go +++ b/primitive/merge.go @@ -6,6 +6,7 @@ import ( "os" "path" + "github.com/otiai10/copy" "github.com/pkg/errors" "github.com/unchartedsoftware/distil-ingest/metadata" @@ -15,14 +16,35 @@ import ( ) // RankPrimitive will rank the dataset using a primitive. -func (s *IngestStep) MergePrimitive(dataset string, outputSchemaPath string, outputDataPath string) error { +func (s *IngestStep) MergePrimitive(dataset string, outputFolder string) error { + outputSchemaPath := path.Join(outputFolder, D3MSchemaPathRelative) + outputDataPath := path.Join(outputFolder, D3MDataPathRelative) + sourceFolder := path.Dir(dataset) + + // copy the source folder to have all the linked files for merging + err := copy.Copy(sourceFolder, outputFolder) + if err != nil { + return errors.Wrap(err, "unable to copy source data") + } + + // delete the existing files that will be overwritten + err = os.Remove(outputSchemaPath) + if err != nil { + return errors.Wrap(err, "unable to delete existing schema file") + } + err = os.Remove(outputDataPath) + if err != nil { + return errors.Wrap(err, "unable to delete existing data file") + } + // create & submit the solution request pip, err := description.CreateDenormalizePipeline("3NF", "") if err != nil { return errors.Wrap(err, "unable to create denormalize pipeline") } - datasetURI, err := s.submitPrimitive(dataset, pip) + // pipeline execution assumes datasetDoc.json as schema file + datasetURI, err := s.submitPrimitive(sourceFolder, pip) if err != nil { return errors.Wrap(err, "unable to run denormalize pipeline") } @@ -39,8 +61,10 @@ func (s *IngestStep) MergePrimitive(dataset string, outputSchemaPath string, out return errors.Wrap(err, "unable to load original metadata") } vars := s.mapFields(meta) + mainDR := meta.GetMainDataResource() - outputMeta := metadata.NewMetadata() + outputMeta := metadata.NewMetadata(meta.ID, meta.Name, meta.Description) + outputMeta.DataResources = append(outputMeta.DataResources, metadata.NewDataResource("0", mainDR.ResType, mainDR.ResFormat)) header := rawResults[0] for i, field := range header { // the first column is a row idnex and should be discarded. @@ -80,7 +104,7 @@ func (s *IngestStep) MergePrimitive(dataset string, outputSchemaPath string, out outputMeta.DataResources[0].ResPath = relativePath // write the new schema to file - err = meta.WriteSchema(outputSchemaPath) + err = outputMeta.WriteSchema(outputSchemaPath) if err != nil { return errors.Wrap(err, "unable to store merged schema") } diff --git a/primitive/pipeline.go b/primitive/pipeline.go index c7150aa..288b281 100644 --- a/primitive/pipeline.go +++ b/primitive/pipeline.go @@ -19,6 +19,11 @@ import ( ) const ( + // D3MSchemaPathRelative is the standard name of the schema document. + D3MSchemaPathRelative = "datasetDoc.json" + // D3MDataPathRelative is the standard name of the data file. + D3MDataPathRelative = "tables/learningData.csv" + denormFieldName = "filename" ) From c81c7b74d3db2b5861c71781381cd8232ef87a81 Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Wed, 24 Oct 2018 08:57:47 -0400 Subject: [PATCH 13/23] Added resource file copying in clustering step. --- cmd/distil-cluster/main.go | 16 +++------------- cmd/distil-merge/main.go | 2 +- primitive/cluster.go | 25 +++++++++++++++++++++---- primitive/pipeline.go | 31 +++++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+), 18 deletions(-) diff --git a/cmd/distil-cluster/main.go b/cmd/distil-cluster/main.go index ebc291e..55c0570 100644 --- a/cmd/distil-cluster/main.go +++ b/cmd/distil-cluster/main.go @@ -2,7 +2,6 @@ package main import ( "os" - "path" "runtime" "strings" @@ -86,10 +85,8 @@ func main() { endpoint := c.String("endpoint") datasetPath := c.String("dataset") - //mediaPath := c.String("media-path") - outputSchema := c.String("output-schema") schemaPath := c.String("schema") - outputData := c.String("output") + output := c.String("output") hasHeader := c.Bool("has-header") // initialize client @@ -101,20 +98,13 @@ func main() { } step := primitive.NewIngestStep(client) - // create cluster folder - if err := os.MkdirAll(path.Dir(outputData), 0777); err != nil && !os.IsExist(err) { - log.Errorf("%v", err) - return cli.NewExitError(errors.Cause(err), 2) - } - os.Remove(outputData) - // create featurizer - err = step.ClusterPrimitive(schemaPath, datasetPath, datasetPath, outputSchema, outputData, hasHeader) + err = step.ClusterPrimitive(schemaPath, datasetPath, datasetPath, output, hasHeader) if err != nil { log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) } - log.Infof("Clustered data written to %s", outputData) + log.Infof("Clustered data written to %s", output) return nil } diff --git a/cmd/distil-merge/main.go b/cmd/distil-merge/main.go index 840d8fa..42ef935 100644 --- a/cmd/distil-merge/main.go +++ b/cmd/distil-merge/main.go @@ -80,7 +80,7 @@ func main() { if c.String("endpoint") == "" { return cli.NewExitError("missing commandline flag `--endpoint`", 1) } - if c.String("output-data") == "" { + if c.String("output") == "" { return cli.NewExitError("missing commandline flag `--output`", 1) } diff --git a/primitive/cluster.go b/primitive/cluster.go index 7c0f7d7..0ebdf8e 100644 --- a/primitive/cluster.go +++ b/primitive/cluster.go @@ -7,16 +7,33 @@ import ( "path" "github.com/pkg/errors" + "github.com/unchartedsoftware/distil-ingest/metadata" "github.com/unchartedsoftware/distil-ingest/util" ) // ClusterPrimitive will cluster the dataset fields using a primitive. func (s *IngestStep) ClusterPrimitive(schemaFile string, dataset string, - rootDataPath string, outputSchemaPath string, outputDataPath string, hasHeader bool) error { - // create required folders for outputPath - util.CreateContainingDirs(outputDataPath) - util.CreateContainingDirs(outputSchemaPath) + rootDataPath string, outputFolder string, hasHeader bool) error { + outputSchemaPath := path.Join(outputFolder, D3MSchemaPathRelative) + outputDataPath := path.Join(outputFolder, D3MDataPathRelative) + sourceFolder := path.Dir(dataset) + + // copy the source folder to have all the linked files for merging + err := copyResourceFiles(sourceFolder, outputFolder) + if err != nil { + return errors.Wrap(err, "unable to copy source data") + } + + // delete the existing files that will be overwritten + err = os.Remove(outputSchemaPath) + if err != nil { + return errors.Wrap(err, "unable to delete existing schema file") + } + err = os.Remove(outputDataPath) + if err != nil { + return errors.Wrap(err, "unable to delete existing data file") + } // load metadata from original schema meta, err := metadata.LoadMetadataFromOriginalSchema(schemaFile) diff --git a/primitive/pipeline.go b/primitive/pipeline.go index 288b281..34e5a2f 100644 --- a/primitive/pipeline.go +++ b/primitive/pipeline.go @@ -5,11 +5,14 @@ import ( "encoding/csv" "fmt" "io" + "io/ioutil" "os" "strconv" "strings" + "github.com/otiai10/copy" "github.com/pkg/errors" + "github.com/unchartedsoftware/distil-ingest/metadata" "github.com/unchartedsoftware/distil-ingest/pipeline" "github.com/unchartedsoftware/distil-ingest/primitive/compute" @@ -262,3 +265,31 @@ func getRelativePath(rootPath string, filePath string) string { return relativePath } + +func copyResourceFiles(sourceFolder string, destinationFolder string) error { + // if source contains destination, then go folder by folder to avoid + // recursion problem + + if strings.HasPrefix(destinationFolder, sourceFolder) { + // copy every subfolder that isn't the destination folder + files, err := ioutil.ReadDir(sourceFolder) + if err != nil { + return errors.Wrapf(err, "unable to read source data '%s'", sourceFolder) + } + for _, f := range files { + if f.Name() != destinationFolder { + err = copyResourceFiles(f.Name(), destinationFolder) + if err != nil { + return err + } + } + } + } else { + err := copy.Copy(sourceFolder, destinationFolder) + if err != nil { + return errors.Wrap(err, "unable to copy source data") + } + } + + return nil +} From 4c62276adb4b0aa36ea594f3e7549b5dfd646df2 Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Wed, 24 Oct 2018 11:40:22 -0400 Subject: [PATCH 14/23] Adding source folder copying for featurize step. --- cmd/distil-cluster/main.go | 4 +++- cmd/distil-featurize/main.go | 27 +++++---------------------- primitive/cluster.go | 6 ++++-- primitive/feature.go | 16 ++++++++++++++-- primitive/pipeline.go | 6 ++++-- 5 files changed, 30 insertions(+), 29 deletions(-) diff --git a/cmd/distil-cluster/main.go b/cmd/distil-cluster/main.go index 55c0570..5955de1 100644 --- a/cmd/distil-cluster/main.go +++ b/cmd/distil-cluster/main.go @@ -2,6 +2,7 @@ package main import ( "os" + "path" "runtime" "strings" @@ -88,6 +89,7 @@ func main() { schemaPath := c.String("schema") output := c.String("output") hasHeader := c.Bool("has-header") + rootDataPath := path.Dir(datasetPath) // initialize client log.Infof("Using pipeline runner interface at `%s` ", endpoint) @@ -99,7 +101,7 @@ func main() { step := primitive.NewIngestStep(client) // create featurizer - err = step.ClusterPrimitive(schemaPath, datasetPath, datasetPath, output, hasHeader) + err = step.ClusterPrimitive(schemaPath, datasetPath, rootDataPath, output, hasHeader) if err != nil { log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) diff --git a/cmd/distil-featurize/main.go b/cmd/distil-featurize/main.go index a94ac0b..c9f4f9d 100644 --- a/cmd/distil-featurize/main.go +++ b/cmd/distil-featurize/main.go @@ -56,23 +56,13 @@ func main() { Value: "csv", Usage: "The dataset file type", }, - cli.StringFlag{ - Name: "output", - Value: "", - Usage: "The featurize output file path", - }, cli.StringFlag{ Name: "media-path", Value: "", Usage: "The path to the folder containing the media subfolder that is accessible for featurization", }, cli.StringFlag{ - Name: "output-schema", - Value: "", - Usage: "The path to use as output for the featurized schema document", - }, - cli.StringFlag{ - Name: "output-data", + Name: "output", Value: "", Usage: "The path to use as output for the featurized data", }, @@ -97,11 +87,11 @@ func main() { endpoint := c.String("endpoint") datasetPath := c.String("dataset") //mediaPath := c.String("media-path") - outputSchema := c.String("output-schema") //outputData := c.String("output-data") schemaPath := c.String("schema") - outputFilePath := c.String("output") + outputPath := c.String("output") hasHeader := c.Bool("has-header") + rootDataPath := path.Dir(datasetPath) //threshold := c.Float64("threshold") // initialize client @@ -113,20 +103,13 @@ func main() { } step := primitive.NewIngestStep(client) - // create feature folder - if err := os.MkdirAll(path.Dir(outputFilePath), 0777); err != nil && !os.IsExist(err) { - log.Errorf("%v", err) - return cli.NewExitError(errors.Cause(err), 2) - } - os.Remove(outputFilePath) - // create featurizer - err = step.FeaturizePrimitive(schemaPath, datasetPath, datasetPath, outputSchema, outputFilePath, hasHeader) + err = step.FeaturizePrimitive(schemaPath, datasetPath, rootDataPath, outputPath, hasHeader) if err != nil { log.Errorf("%v", err) return cli.NewExitError(errors.Cause(err), 2) } - log.Infof("Featurized data written to %s", outputFilePath) + log.Infof("Featurized data written to %s", outputPath) return nil } diff --git a/primitive/cluster.go b/primitive/cluster.go index 0ebdf8e..4c3a345 100644 --- a/primitive/cluster.go +++ b/primitive/cluster.go @@ -6,6 +6,7 @@ import ( "os" "path" + "github.com/otiai10/copy" "github.com/pkg/errors" "github.com/unchartedsoftware/distil-ingest/metadata" @@ -20,7 +21,8 @@ func (s *IngestStep) ClusterPrimitive(schemaFile string, dataset string, sourceFolder := path.Dir(dataset) // copy the source folder to have all the linked files for merging - err := copyResourceFiles(sourceFolder, outputFolder) + os.MkdirAll(outputFolder, os.ModePerm) + err := copy.Copy(sourceFolder, outputFolder) if err != nil { return errors.Wrap(err, "unable to copy source data") } @@ -61,7 +63,7 @@ func (s *IngestStep) ClusterPrimitive(schemaFile string, dataset string, for _, f := range features { mainDR.Variables = append(mainDR.Variables, f.Variable) - lines, err = s.appendFeature(dataset, d3mIndexField, false, f, lines) + lines, err = s.appendFeature(sourceFolder, d3mIndexField, false, f, lines) if err != nil { return errors.Wrap(err, "error appending clustered data") } diff --git a/primitive/feature.go b/primitive/feature.go index fa903d0..fcde4e9 100644 --- a/primitive/feature.go +++ b/primitive/feature.go @@ -6,14 +6,26 @@ import ( "os" "path" + "github.com/otiai10/copy" "github.com/pkg/errors" + "github.com/unchartedsoftware/distil-ingest/metadata" "github.com/unchartedsoftware/distil-ingest/util" ) // FeaturizePrimitive will featurize the dataset fields using a primitive. func (s *IngestStep) FeaturizePrimitive(schemaFile string, dataset string, - rootDataPath string, outputSchemaPath string, outputDataPath string, hasHeader bool) error { + rootDataPath string, outputFolder string, hasHeader bool) error { + outputSchemaPath := path.Join(outputFolder, D3MSchemaPathRelative) + outputDataPath := path.Join(outputFolder, D3MDataPathRelative) + sourceFolder := path.Dir(dataset) + + // copy the source folder to have all the linked files for merging + os.MkdirAll(outputFolder, os.ModePerm) + err := copy.Copy(sourceFolder, outputFolder) + if err != nil { + return errors.Wrap(err, "unable to copy source data") + } // create required folders for outputPath util.CreateContainingDirs(outputDataPath) util.CreateContainingDirs(outputSchemaPath) @@ -44,7 +56,7 @@ func (s *IngestStep) FeaturizePrimitive(schemaFile string, dataset string, for _, f := range features { mainDR.Variables = append(mainDR.Variables, f.Variable) - lines, err = s.appendFeature(dataset, d3mIndexField, false, f, lines) + lines, err = s.appendFeature(sourceFolder, d3mIndexField, false, f, lines) if err != nil { return errors.Wrap(err, "error appending feature data") } diff --git a/primitive/pipeline.go b/primitive/pipeline.go index 34e5a2f..da5aef6 100644 --- a/primitive/pipeline.go +++ b/primitive/pipeline.go @@ -7,6 +7,7 @@ import ( "io" "io/ioutil" "os" + "path" "strconv" "strings" @@ -277,8 +278,9 @@ func copyResourceFiles(sourceFolder string, destinationFolder string) error { return errors.Wrapf(err, "unable to read source data '%s'", sourceFolder) } for _, f := range files { - if f.Name() != destinationFolder { - err = copyResourceFiles(f.Name(), destinationFolder) + name := path.Join(sourceFolder, f.Name()) + if name != destinationFolder { + err = copyResourceFiles(name, destinationFolder) if err != nil { return err } From a98b32f52737b56146081e4581c83b824f1184f9 Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Thu, 25 Oct 2018 08:12:19 -0400 Subject: [PATCH 15/23] Fixed copying issues on primitive invocation. --- primitive/cluster.go | 12 +++--------- primitive/feature.go | 7 +++---- primitive/merge.go | 10 ++-------- 3 files changed, 8 insertions(+), 21 deletions(-) diff --git a/primitive/cluster.go b/primitive/cluster.go index 4c3a345..5f3b0e5 100644 --- a/primitive/cluster.go +++ b/primitive/cluster.go @@ -28,14 +28,8 @@ func (s *IngestStep) ClusterPrimitive(schemaFile string, dataset string, } // delete the existing files that will be overwritten - err = os.Remove(outputSchemaPath) - if err != nil { - return errors.Wrap(err, "unable to delete existing schema file") - } - err = os.Remove(outputDataPath) - if err != nil { - return errors.Wrap(err, "unable to delete existing data file") - } + os.Remove(outputSchemaPath) + os.Remove(outputDataPath) // load metadata from original schema meta, err := metadata.LoadMetadataFromOriginalSchema(schemaFile) @@ -98,7 +92,7 @@ func (s *IngestStep) ClusterPrimitive(schemaFile string, dataset string, return errors.Wrap(err, "error writing clustered output") } - relativePath := getRelativePath(rootDataPath, outputDataPath) + relativePath := getRelativePath(path.Dir(outputSchemaPath), outputDataPath) mainDR.ResPath = relativePath // write the new schema to file diff --git a/primitive/feature.go b/primitive/feature.go index fcde4e9..ef4f8d6 100644 --- a/primitive/feature.go +++ b/primitive/feature.go @@ -21,15 +21,14 @@ func (s *IngestStep) FeaturizePrimitive(schemaFile string, dataset string, sourceFolder := path.Dir(dataset) // copy the source folder to have all the linked files for merging - os.MkdirAll(outputFolder, os.ModePerm) err := copy.Copy(sourceFolder, outputFolder) if err != nil { return errors.Wrap(err, "unable to copy source data") } - // create required folders for outputPath - util.CreateContainingDirs(outputDataPath) - util.CreateContainingDirs(outputSchemaPath) + // delete the existing files that will be overwritten + os.Remove(outputSchemaPath) + os.Remove(outputDataPath) // load metadata from original schema meta, err := metadata.LoadMetadataFromOriginalSchema(schemaFile) if err != nil { diff --git a/primitive/merge.go b/primitive/merge.go index 3e6f4c6..f3e7c97 100644 --- a/primitive/merge.go +++ b/primitive/merge.go @@ -28,14 +28,8 @@ func (s *IngestStep) MergePrimitive(dataset string, outputFolder string) error { } // delete the existing files that will be overwritten - err = os.Remove(outputSchemaPath) - if err != nil { - return errors.Wrap(err, "unable to delete existing schema file") - } - err = os.Remove(outputDataPath) - if err != nil { - return errors.Wrap(err, "unable to delete existing data file") - } + os.Remove(outputSchemaPath) + os.Remove(outputDataPath) // create & submit the solution request pip, err := description.CreateDenormalizePipeline("3NF", "") From 893c1111e31f1e6e02ad59a0dcd2144d70a456a7 Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Thu, 25 Oct 2018 11:36:51 -0400 Subject: [PATCH 16/23] Added denorm field renaming handling in merge step. --- metadata/media.go | 16 ++++++++++++++++ metadata/metadata.go | 15 +++++++++++++++ primitive/merge.go | 17 ++++++++++++++++- 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/metadata/media.go b/metadata/media.go index 2d82a13..1c5fd38 100644 --- a/metadata/media.go +++ b/metadata/media.go @@ -4,6 +4,7 @@ import ( "fmt" "github.com/jeffail/gabs" + "github.com/pkg/errors" ) // Media is a data resource that is backed by media files. @@ -30,11 +31,26 @@ func (r *Media) Parse(res *gabs.Container) (*DataResource, error) { } resPath := res.Path("resPath").Data().(string) + var resFormats []string + if res.Path("resFormat").Data() != nil { + formatsRaw, err := res.Path("resFormat").Children() + if err != nil { + return nil, errors.Wrap(err, "unable to parse resource format") + } + resFormats = make([]string, len(formatsRaw)) + for i, r := range formatsRaw { + resFormats[i] = r.Data().(string) + } + } else { + resFormats = make([]string, 0) + } + dr := &DataResource{ ResID: resID, ResPath: resPath, ResType: r.Type, IsCollection: true, + ResFormat: resFormats, } return dr, nil diff --git a/metadata/metadata.go b/metadata/metadata.go index 83084f1..d32094c 100644 --- a/metadata/metadata.go +++ b/metadata/metadata.go @@ -954,6 +954,21 @@ func (m *Metadata) WriteSchema(path string) error { return ioutil.WriteFile(path, bytes, 0644) } +// IsMediaReference returns true if a variable is a reference to a media resource. +func (v *Variable) IsMediaReference() bool { + // if refers to has a res object of string, assume media reference` + mediaReference := false + if v.RefersTo != nil { + if v.RefersTo["resObject"] != nil { + _, ok := v.RefersTo["resObject"].(string) + if ok { + mediaReference = true + } + } + } + return mediaReference +} + // IngestMetadata adds a document consisting of the metadata to the // provided index. func IngestMetadata(client *elastic.Client, index string, datasetPrefix string, meta *Metadata) error { diff --git a/primitive/merge.go b/primitive/merge.go index f3e7c97..e2da39e 100644 --- a/primitive/merge.go +++ b/primitive/merge.go @@ -54,8 +54,12 @@ func (s *IngestStep) MergePrimitive(dataset string, outputFolder string) error { if err != nil { return errors.Wrap(err, "unable to load original metadata") } - vars := s.mapFields(meta) mainDR := meta.GetMainDataResource() + vars := s.mapFields(meta) + varsDenorm := s.mapDenormFields(mainDR) + for k, v := range varsDenorm { + vars[k] = v + } outputMeta := metadata.NewMetadata(meta.ID, meta.Name, meta.Description) outputMeta.DataResources = append(outputMeta.DataResources, metadata.NewDataResource("0", mainDR.ResType, mainDR.ResFormat)) @@ -117,3 +121,14 @@ func (s *IngestStep) mapFields(meta *metadata.Metadata) map[string]*metadata.Var return fields } + +func (s *IngestStep) mapDenormFields(mainDR *metadata.DataResource) map[string]*metadata.Variable { + fields := make(map[string]*metadata.Variable) + for _, field := range mainDR.Variables { + if field.IsMediaReference() { + // DENORM PRIMITIVE RENAMES REFERENCE FIELDS TO `filename` + fields[denormFieldName] = field + } + } + return fields +} From 4d1e004c68f23202eda6090d441715f822c08607 Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Fri, 26 Oct 2018 11:31:12 -0400 Subject: [PATCH 17/23] Updated parsing and pipelines to latest version. --- primitive/compute/description/builder.go | 2 +- primitive/compute/description/builder_test.go | 211 ++++++++++++++++++ .../description/inference_step_data.go | 2 +- .../compute/description/preprocessing.go | 175 +++++++++++++-- .../compute/description/preprocessing_test.go | 163 ++++++++++++++ .../compute/description/primitive_steps.go | 38 +++- primitive/compute/description/step_data.go | 2 +- primitive/merge.go | 8 + 8 files changed, 574 insertions(+), 27 deletions(-) create mode 100644 primitive/compute/description/builder_test.go create mode 100644 primitive/compute/description/preprocessing_test.go diff --git a/primitive/compute/description/builder.go b/primitive/compute/description/builder.go index 1fae4a9..11ed0fe 100644 --- a/primitive/compute/description/builder.go +++ b/primitive/compute/description/builder.go @@ -4,7 +4,7 @@ import ( "fmt" "github.com/pkg/errors" - "github.com/unchartedsoftware/distil-ingest/pipeline" + "github.com/unchartedsoftware/distil/api/pipeline" ) type builder struct { diff --git a/primitive/compute/description/builder_test.go b/primitive/compute/description/builder_test.go new file mode 100644 index 0000000..cac7c7d --- /dev/null +++ b/primitive/compute/description/builder_test.go @@ -0,0 +1,211 @@ +package description + +import ( + fmt "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/unchartedsoftware/distil/api/pipeline" +) + +func createLabels(counter int64) []string { + return []string{fmt.Sprintf("alpha-%d", counter), fmt.Sprintf("bravo-%d", counter)} +} + +func createTestStep(step int64) *StepData { + labels := createLabels(step) + return NewStepDataWithHyperparameters( + &pipeline.Primitive{ + Id: fmt.Sprintf("0000-primtive-%d", step), + Version: "1.0.0", + Name: fmt.Sprintf("primitive-%d", step), + PythonPath: fmt.Sprintf("d3m.primitives.distil.primitive.%d", step), + }, + []string{"produce"}, + map[string]interface{}{ + "testString": fmt.Sprintf("hyperparam-%d", step), + "testBool": step%2 == 0, + "testInt": step, + "testFloat": float64(step) + 0.5, + "testStringArray": labels, + "testBoolArray": []bool{step%2 == 0, step%2 != 0}, + "testIntArray": []int64{step, step + 1}, + "testFloatArray": []float64{float64(step) + 0.5, float64(step) + 1.5}, + "testIntMap": map[string]int64{labels[0]: int64(step), labels[1]: int64(step + 1)}, + "testFloatMap": map[string]float64{labels[0]: float64(step) + 0.5, labels[1]: float64(step) + 1.5}, + "testNestedIntArray": [][]int64{{step, step + 1}, {step + 2, step + 3}}, + "testNestedIntMap": map[string][]int64{labels[0]: {step, step + 1}, labels[1]: {step + 2, step + 3}}, + }, + ) +} + +func ConvertToStringArray(list *pipeline.ValueList) []string { + arr := []string{} + for _, v := range list.Items { + arr = append(arr, v.GetString_()) + } + return arr +} + +func ConvertToBoolArray(list *pipeline.ValueList) []bool { + arr := []bool{} + for _, v := range list.Items { + arr = append(arr, v.GetBool()) + } + return arr +} + +func ConvertToIntArray(list *pipeline.ValueList) []int64 { + arr := []int64{} + for _, v := range list.Items { + arr = append(arr, v.GetInt64()) + } + return arr +} + +func ConvertToFloatArray(list *pipeline.ValueList) []float64 { + arr := []float64{} + for _, v := range list.Items { + arr = append(arr, v.GetDouble()) + } + return arr +} + +func ConvertToIntMap(dict *pipeline.ValueDict) map[string]int64 { + mp := map[string]int64{} + for k, v := range dict.Items { + mp[k] = v.GetInt64() + } + return mp +} + +func ConvertToFloatMap(dict *pipeline.ValueDict) map[string]float64 { + mp := map[string]float64{} + for k, v := range dict.Items { + mp[k] = v.GetDouble() + } + return mp +} + +func ConvertToNestedIntArray(list *pipeline.ValueList) [][]int64 { + arr := [][]int64{} + for _, v := range list.Items { + inner := []int64{} + for _, w := range v.GetList().Items { + inner = append(inner, w.GetInt64()) + } + arr = append(arr, inner) + } + return arr +} + +func ConvertToNestedIntMap(dict *pipeline.ValueDict) map[string][]int64 { + mp := map[string][]int64{} + for k, v := range dict.Items { + inner := []int64{} + for _, w := range v.GetList().Items { + inner = append(inner, w.GetInt64()) + } + mp[k] = inner + } + return mp +} + +func testStep(t *testing.T, index int64, step *StepData, steps []*pipeline.PipelineDescriptionStep) { + labels := createLabels(index) + + assert.Equal(t, "produce", steps[index].GetPrimitive().GetOutputs()[0].GetId()) + + assert.Equal(t, fmt.Sprintf("hyperparam-%d", index), + steps[index].GetPrimitive().GetHyperparams()["testString"].GetValue().GetData().GetRaw().GetString_()) + + assert.Equal(t, int64(index), steps[index].GetPrimitive().GetHyperparams()["testInt"].GetValue().GetData().GetRaw().GetInt64()) + + assert.Equal(t, index%2 == 0, steps[index].GetPrimitive().GetHyperparams()["testBool"].GetValue().GetData().GetRaw().GetBool()) + + assert.Equal(t, float64(index)+0.5, steps[index].GetPrimitive().GetHyperparams()["testFloat"].GetValue().GetData().GetRaw().GetDouble()) + + assert.Equal(t, labels, + ConvertToStringArray(steps[index].GetPrimitive().GetHyperparams()["testStringArray"].GetValue().GetData().GetRaw().GetList())) + + assert.Equal(t, []int64{int64(index), int64(index) + 1}, + ConvertToIntArray(steps[index].GetPrimitive().GetHyperparams()["testIntArray"].GetValue().GetData().GetRaw().GetList())) + + assert.Equal(t, []float64{float64(index) + 0.5, float64(index) + 1.5}, + ConvertToFloatArray(steps[index].GetPrimitive().GetHyperparams()["testFloatArray"].GetValue().GetData().GetRaw().GetList())) + + assert.Equal(t, []bool{index%2 == 0, index%2 != 0}, + ConvertToBoolArray(steps[index].GetPrimitive().GetHyperparams()["testBoolArray"].GetValue().GetData().GetRaw().GetList())) + + assert.Equal(t, map[string]int64{labels[0]: int64(index), labels[1]: int64(index + 1)}, + ConvertToIntMap(steps[index].GetPrimitive().GetHyperparams()["testIntMap"].GetValue().GetData().GetRaw().GetDict())) + + assert.Equal(t, map[string]float64{labels[0]: float64(index) + 0.5, labels[1]: float64(index) + 1.5}, + ConvertToFloatMap(steps[index].GetPrimitive().GetHyperparams()["testFloatMap"].GetValue().GetData().GetRaw().GetDict())) + + assert.Equal(t, [][]int64{{index, index + 1}, {index + 2, index + 3}}, + ConvertToNestedIntArray(steps[index].GetPrimitive().GetHyperparams()["testNestedIntArray"].GetValue().GetData().GetRaw().GetList())) + + assert.Equal(t, map[string][]int64{labels[0]: {index, index + 1}, labels[1]: {index + 2, index + 3}}, + ConvertToNestedIntMap(steps[index].GetPrimitive().GetHyperparams()["testNestedIntMap"].GetValue().GetData().GetRaw().GetDict())) + + assert.EqualValues(t, step.GetPrimitive(), steps[index].GetPrimitive().GetPrimitive()) +} + +// Tests basic pipeline compilation. +func TestPipelineCompile(t *testing.T) { + + step0 := createTestStep(0) + step1 := createTestStep(1) + step2 := createTestStep(2) + + desc, err := NewBuilder("test pipeline", "test pipelne consisting of 3 stages"). + Add(step0). + Add(step1). + Add(step2). + Compile() + assert.NoError(t, err) + + steps := desc.GetSteps() + assert.Equal(t, len(steps), 3) + + // validate step inputs + assert.Equal(t, "inputs.0", steps[0].GetPrimitive().GetArguments()[stepInputsKey].GetContainer().GetData()) + testStep(t, 0, step0, steps) + + assert.Equal(t, "steps.0.produce", steps[1].GetPrimitive().GetArguments()[stepInputsKey].GetContainer().GetData()) + testStep(t, 1, step1, steps) + + assert.Equal(t, "steps.1.produce", steps[2].GetPrimitive().GetArguments()[stepInputsKey].GetContainer().GetData()) + testStep(t, 2, step2, steps) + + // validate outputs + assert.Equal(t, 1, len(desc.GetOutputs())) + assert.Equal(t, "steps.2.produce", desc.GetOutputs()[0].GetData()) +} + +// Tests proper compilation of an inference point. +func TestPipelineCompileWithInference(t *testing.T) { + + step0 := createTestStep(0) + step1 := createTestStep(1) + + desc, err := NewBuilder("test pipeline", "test pipelne consisting of 3 stages"). + Add(step0). + Add(step1). + AddInferencePoint(). + Compile() + assert.NoError(t, err) + + steps := desc.GetSteps() + assert.Equal(t, len(steps), 3) + + assert.Equal(t, "steps.1.produce", steps[2].GetPlaceholder().GetInputs()[0].GetData()) + assert.Equal(t, "produce", steps[2].GetPlaceholder().GetOutputs()[0].GetId()) + assert.Nil(t, steps[2].GetPrimitive().GetHyperparams()) + assert.Nil(t, steps[2].GetPrimitive().GetPrimitive()) + + // validate outputs + assert.Equal(t, 1, len(desc.GetOutputs())) + assert.Equal(t, "steps.2.produce", desc.GetOutputs()[0].GetData()) +} diff --git a/primitive/compute/description/inference_step_data.go b/primitive/compute/description/inference_step_data.go index 8eabda8..fee75d0 100644 --- a/primitive/compute/description/inference_step_data.go +++ b/primitive/compute/description/inference_step_data.go @@ -3,7 +3,7 @@ package description import ( "fmt" - "github.com/unchartedsoftware/distil-ingest/pipeline" + "github.com/unchartedsoftware/distil/api/pipeline" log "github.com/unchartedsoftware/plog" ) diff --git a/primitive/compute/description/preprocessing.go b/primitive/compute/description/preprocessing.go index 25191ce..c19cf5c 100644 --- a/primitive/compute/description/preprocessing.go +++ b/primitive/compute/description/preprocessing.go @@ -1,18 +1,57 @@ package description import ( - "github.com/unchartedsoftware/distil-ingest/pipeline" + "strings" + + "github.com/pkg/errors" + "github.com/unchartedsoftware/distil/api/model" + "github.com/unchartedsoftware/distil/api/pipeline" ) const defaultResource = "0" // CreateUserDatasetPipeline creates a pipeline description to capture user feature selection and // semantic type information. -func CreateUserDatasetPipeline(name string, description string, - targetFeature string) (*pipeline.PipelineDescription, error) { +func CreateUserDatasetPipeline(name string, description string, allFeatures []*model.Variable, + targetFeature string, selectedFeatures []string, filters []*model.Filter) (*pipeline.PipelineDescription, error) { + + // save the selected features in a set for quick lookup + selectedSet := map[string]bool{} + for _, v := range selectedFeatures { + selectedSet[strings.ToLower(v)] = true + } + columnIndices := mapColumns(allFeatures, selectedSet) + + // create the semantic type update primitive + updateSemanticTypes, err := createUpdateSemanticTypes(allFeatures, selectedSet) + if err != nil { + return nil, err + } + + // create the feature selection primitive + removeFeatures, err := createRemoveFeatures(allFeatures, selectedSet) + if err != nil { + return nil, err + } + + // If neither have any content, we'll skip the template altogether. + if len(updateSemanticTypes) == 0 && removeFeatures == nil { + return nil, nil + } + + filterData := createFilterData(filters, columnIndices) // instantiate the pipeline builder := NewBuilder(name, description) + for _, v := range updateSemanticTypes { + builder = builder.Add(v) + } + if removeFeatures != nil { + builder = builder.Add(removeFeatures) + } + for _, f := range filterData { + builder = builder.Add(f) + } pip, err := builder.AddInferencePoint().Compile() if err != nil { @@ -26,6 +65,27 @@ func CreateUserDatasetPipeline(name string, description string, return pip, nil } +func createRemoveFeatures(allFeatures []*model.Variable, selectedSet map[string]bool) (*StepData, error) { + // create a list of features to remove + removeFeatures := []int{} + for _, v := range allFeatures { + if !selectedSet[strings.ToLower(v.Key)] { + removeFeatures = append(removeFeatures, v.Index) + } + } + + if len(removeFeatures) == 0 { + return nil, nil + } + + // instantiate the feature remove primitive + featureSelect, err := NewRemoveColumnsStep(defaultResource, removeFeatures) + if err != nil { + return nil, err + } + return featureSelect, nil +} + type update struct { removeIndices []int addIndices []int @@ -38,13 +98,97 @@ func newUpdate() *update { } } +func createUpdateSemanticTypes(allFeatures []*model.Variable, selectedSet map[string]bool) ([]*StepData, error) { + // create maps of (semantic type, index list) - primitive allows for semantic types to be added to / + // remove from multiple columns in a single operation + updateMap := map[string]*update{} + for _, v := range allFeatures { + if selectedSet[strings.ToLower(v.Key)] { + addType := model.MapTA2Type(v.Type) + if addType == "" { + return nil, errors.Errorf("variable `%s` internal type `%s` can't be mapped to ta2", v.Key, v.Type) + } + removeType := model.MapTA2Type(v.OriginalType) + if removeType == "" { + return nil, errors.Errorf("remove variable `%s` internal type `%s` can't be mapped to ta2", v.Key, v.OriginalType) + } + + // only apply change when types are different + if addType != removeType { + if _, ok := updateMap[addType]; !ok { + updateMap[addType] = newUpdate() + } + updateMap[addType].addIndices = append(updateMap[addType].addIndices, v.Index) + + if _, ok := updateMap[removeType]; !ok { + updateMap[removeType] = newUpdate() + } + updateMap[removeType].removeIndices = append(updateMap[removeType].removeIndices, v.Index) + } + } + } + + // Copy the created maps into the column update structure used by the primitive + semanticTypeUpdates := []*StepData{} + for k, v := range updateMap { + var addKey string + if len(v.addIndices) > 0 { + addKey = k + } + add := &ColumnUpdate{ + SemanticTypes: []string{addKey}, + Indices: v.addIndices, + } + var removeKey string + if len(v.removeIndices) > 0 { + removeKey = k + } + remove := &ColumnUpdate{ + SemanticTypes: []string{removeKey}, + Indices: v.removeIndices, + } + semanticTypeUpdate, err := NewUpdateSemanticTypeStep(defaultResource, add, remove) + if err != nil { + return nil, err + } + semanticTypeUpdates = append(semanticTypeUpdates, semanticTypeUpdate) + } + return semanticTypeUpdates, nil +} + +func createFilterData(filters []*model.Filter, columnIndices map[string]int) []*StepData { + + // Map the fiters to pipeline primitives + filterSteps := []*StepData{} + for _, f := range filters { + var filter *StepData + inclusive := f.Mode == model.IncludeFilter + colIndex := columnIndices[f.Key] + + switch f.Type { + case model.NumericalFilter: + filter = NewNumericRangeFilterStep(defaultResource, colIndex, inclusive, *f.Min, *f.Max, false) + case model.CategoricalFilter: + filter = NewTermFilterStep(defaultResource, colIndex, inclusive, f.Categories, true) + case model.RowFilter: + filter = NewTermFilterStep(defaultResource, colIndex, inclusive, f.D3mIndices, true) + case model.FeatureFilter, model.TextFilter: + filter = NewTermFilterStep(defaultResource, colIndex, inclusive, f.Categories, false) + } + + filterSteps = append(filterSteps, filter) + } + return filterSteps +} + // CreateSlothPipeline creates a pipeline to peform timeseries clustering on a dataset. -func CreateSlothPipeline(name string, description string, targetColumns []string, outputLabels []string) (*pipeline.PipelineDescription, error) { +func CreateSlothPipeline(name string, description string) (*pipeline.PipelineDescription, error) { // insantiate the pipeline pipeline, err := NewBuilder(name, description). Add(NewDenormalizeStep()). Add(NewDatasetToDataframeStep()). - Add(NewSlothStep(targetColumns, outputLabels)). + Add(NewTimeSeriesReaderStep(1, 0, 1)). + Add(NewSlothStep()). Compile() if err != nil { @@ -126,16 +270,15 @@ func CreatePCAFeaturesPipeline(name string, description string) (*pipeline.Pipel return pipeline, nil } -// CreateDenormalizePipeline creates a pipeline to run the denormalize primitive on an input dataset. -func CreateDenormalizePipeline(name string, description string) (*pipeline.PipelineDescription, error) { - // insantiate the pipeline - pipeline, err := NewBuilder(name, description). - Add(NewDenormalizeStep()). - Add(NewDatasetToDataframeStep()). - Compile() - - if err != nil { - return nil, err +func mapColumns(allFeatures []*model.Variable, selectedSet map[string]bool) map[string]int { + colIndices := make(map[string]int) + index := 0 + for _, f := range allFeatures { + if selectedSet[f.Key] { + colIndices[f.Key] = index + index = index + 1 + } } - return pipeline, nil + + return colIndices } diff --git a/primitive/compute/description/preprocessing_test.go b/primitive/compute/description/preprocessing_test.go new file mode 100644 index 0000000..bc8a276 --- /dev/null +++ b/primitive/compute/description/preprocessing_test.go @@ -0,0 +1,163 @@ +package description + +import ( + "io/ioutil" + "testing" + + "github.com/golang/protobuf/proto" + "github.com/stretchr/testify/assert" + "github.com/unchartedsoftware/distil/api/model" +) + +func TestCreateUserDatasetPipeline(t *testing.T) { + + variables := []*model.Variable{ + { + Key: "test_var_0", + OriginalType: "ordinal", + Type: "categorical", + Index: 0, + }, + { + Key: "test_var_1", + OriginalType: "categorical", + Type: "integer", + Index: 1, + }, + { + Key: "test_var_2", + OriginalType: "categorical", + Type: "integer", + Index: 2, + }, + { + Key: "test_var_3", + OriginalType: "categorical", + Type: "integer", + Index: 3, + }, + } + + pipeline, err := CreateUserDatasetPipeline( + "test_user_pipeline", "a test user pipeline", variables, "test_target", []string{"test_var_0", "test_var_1", "test_var_3"}, nil) + + // assert 1st is a semantic type update + hyperParams := pipeline.GetSteps()[0].GetPrimitive().GetHyperparams() + assert.Equal(t, []int64{1, 3}, ConvertToIntArray(hyperParams["add_columns"].GetValue().GetData().GetRaw().GetList())) + assert.Equal(t, []string{"http://schema.org/Integer"}, ConvertToStringArray(hyperParams["add_types"].GetValue().GetData().GetRaw().GetList())) + assert.Equal(t, []int64{}, ConvertToIntArray(hyperParams["remove_columns"].GetValue().GetData().GetRaw().GetList())) + assert.Equal(t, []string{""}, ConvertToStringArray(hyperParams["remove_types"].GetValue().GetData().GetRaw().GetList())) + + // assert 2nd is a semantic type update + hyperParams = pipeline.GetSteps()[1].GetPrimitive().GetHyperparams() + assert.Equal(t, []int64{}, ConvertToIntArray(hyperParams["add_columns"].GetValue().GetData().GetRaw().GetList())) + assert.Equal(t, []string{""}, ConvertToStringArray(hyperParams["add_types"].GetValue().GetData().GetRaw().GetList())) + assert.Equal(t, []int64{1, 3}, ConvertToIntArray(hyperParams["remove_columns"].GetValue().GetData().GetRaw().GetList())) + assert.Equal(t, []string{"https://metadata.datadrivendiscovery.org/types/CategoricalData"}, + ConvertToStringArray(hyperParams["remove_types"].GetValue().GetData().GetRaw().GetList())) + + // assert 3rd step is column remove and index two was remove + hyperParams = pipeline.GetSteps()[2].GetPrimitive().GetHyperparams() + assert.Equal(t, "0", hyperParams["resource_id"].GetValue().GetData().GetRaw().GetString_(), "0") + assert.Equal(t, []int64{2}, ConvertToIntArray(hyperParams["columns"].GetValue().GetData().GetRaw().GetList())) + + assert.NoError(t, err) + t.Logf("\n%s", proto.MarshalTextString(pipeline)) +} + +func TestCreateUserDatasetPipelineMappingError(t *testing.T) { + + variables := []*model.Variable{ + { + Key: "test_var_0", + OriginalType: "blordinal", + Type: "categorical", + Index: 0, + }, + } + + pipeline, err := CreateUserDatasetPipeline( + "test_user_pipeline", "a test user pipeline", variables, "test_target", []string{"test_var_0"}, nil) + assert.Error(t, err) + t.Logf("\n%s", proto.MarshalTextString(pipeline)) +} + +func TestCreateUserDatasetEmpty(t *testing.T) { + + variables := []*model.Variable{ + { + Key: "test_var_0", + OriginalType: "categorical", + Type: "categorical", + Index: 0, + }, + } + + pipeline, err := CreateUserDatasetPipeline( + "test_user_pipeline", "a test user pipeline", variables, "test_target", []string{"test_var_0"}, nil) + + assert.Nil(t, pipeline) + assert.Nil(t, err) + + t.Logf("\n%s", proto.MarshalTextString(pipeline)) +} + +func TestCreatePCAFeaturesPipeline(t *testing.T) { + pipeline, err := CreatePCAFeaturesPipeline("pca_features_test", "test pca feature ranking pipeline") + assert.NoError(t, err) + + data, err := proto.Marshal(pipeline) + assert.NoError(t, err) + assert.NotNil(t, data) + + err = ioutil.WriteFile("/tmp/create_pca_features.pln", data, 0644) + assert.NoError(t, err) +} + +func TestCreateSimonPipeline(t *testing.T) { + pipeline, err := CreateSimonPipeline("simon_test", "test simon classification pipeline") + assert.NoError(t, err) + + data, err := proto.Marshal(pipeline) + assert.NoError(t, err) + assert.NotNil(t, data) + + err = ioutil.WriteFile("/tmp/create_simon.pln", data, 0644) + assert.NoError(t, err) +} + +func TestCreateCrocPipeline(t *testing.T) { + pipeline, err := CreateCrocPipeline("croc_test", "test croc object detection pipeline", []string{"filename"}, []string{"objects"}) + assert.NoError(t, err) + + data, err := proto.Marshal(pipeline) + assert.NoError(t, err) + assert.NotNil(t, data) + + err = ioutil.WriteFile("/tmp/create_croc.pln", data, 0644) + assert.NoError(t, err) +} + +func TestCreateUnicornPipeline(t *testing.T) { + pipeline, err := CreateUnicornPipeline("unicorn test", "test unicorn image detection pipeline", []string{"filename"}, []string{"objects"}) + assert.NoError(t, err) + + data, err := proto.Marshal(pipeline) + assert.NoError(t, err) + assert.NotNil(t, data) + + err = ioutil.WriteFile("/tmp/create_unicorn.pln", data, 0644) + assert.NoError(t, err) +} + +func TestCreateSlothPipeline(t *testing.T) { + pipeline, err := CreateSlothPipeline("sloth_test", "test sloth object detection pipeline") + assert.NoError(t, err) + + data, err := proto.Marshal(pipeline) + assert.NoError(t, err) + assert.NotNil(t, data) + + err = ioutil.WriteFile("/tmp/create_sloth.pln", data, 0644) + assert.NoError(t, err) +} diff --git a/primitive/compute/description/primitive_steps.go b/primitive/compute/description/primitive_steps.go index 98337e3..d940ed1 100644 --- a/primitive/compute/description/primitive_steps.go +++ b/primitive/compute/description/primitive_steps.go @@ -1,7 +1,7 @@ package description import ( - "github.com/unchartedsoftware/distil-ingest/pipeline" + "github.com/unchartedsoftware/distil/api/pipeline" ) // NewSimonStep creates a SIMON data classification step. It examines an input @@ -20,19 +20,18 @@ func NewSimonStep() *StepData { } // NewSlothStep creates a Sloth timeseries clustering step. -func NewSlothStep(targetColumns []string, outputLabels []string) *StepData { +func NewSlothStep() *StepData { return NewStepDataWithHyperparameters( &pipeline.Primitive{ Id: "77bf4b92-2faa-3e38-bb7e-804131243a7f", - Version: "1.0.0", + Version: "2.0.0", Name: "Sloth", PythonPath: "d3m.primitives.distil.Sloth.cluster", Digest: "f94f1aacc23792b680af0bd895f0fd2bac7336b29967b6ad766df4cb3c1933ab", }, []string{"produce"}, map[string]interface{}{ - "target_columns": targetColumns, - "output_labels": outputLabels, + "nclusters": 4, }, ) } @@ -180,7 +179,8 @@ func NewRemoveColumnsStep(resourceID string, colIndices []int) (*StepData, error ), nil } -// NewTermFilterStep . +// NewTermFilterStep creates a primitive step that filters dataset rows based on a match against a +// term list. The term match can be partial, or apply to whole terms only. func NewTermFilterStep(resourceID string, colindex int, inclusive bool, terms []string, matchWhole bool) *StepData { return NewStepDataWithHyperparameters( &pipeline.Primitive{ @@ -201,7 +201,7 @@ func NewTermFilterStep(resourceID string, colindex int, inclusive bool, terms [] ) } -// NewRegexFilterStep . +// NewRegexFilterStep creates a primitive step that filter dataset rows based on a regex match. func NewRegexFilterStep(resourceID string, colindex int, inclusive bool, regex string) *StepData { return NewStepDataWithHyperparameters( &pipeline.Primitive{ @@ -221,7 +221,8 @@ func NewRegexFilterStep(resourceID string, colindex int, inclusive bool, regex s ) } -// NewNumericRangeFilterStep . +// NewNumericRangeFilterStep creates a primitive step that filters dataset rows based on an +// included/excluded numeric range. Inclusion of boundaries is controlled by the strict flag. func NewNumericRangeFilterStep(resourceID string, colindex int, inclusive bool, min float64, max float64, strict bool) *StepData { return NewStepDataWithHyperparameters( &pipeline.Primitive{ @@ -242,3 +243,24 @@ func NewNumericRangeFilterStep(resourceID string, colindex int, inclusive bool, }, ) } + +// NewTimeSeriesReaderStep creates a primitive step that reads time series values using a dataframe +// containing a file URI column. The result is a new dataframe that stores the timetamps as the column headers, +// and the accompanying values for each file as a row. +func NewTimeSeriesReaderStep(fileColIndex int, timeColIndex int, valueColIndex int) *StepData { + return NewStepDataWithHyperparameters( + &pipeline.Primitive{ + Id: "1689aafa-16dc-4c55-8ad4-76cadcf46086", + Version: "0.1.0", + Name: "Time series reader", + PythonPath: "d3m.primitives.data.TimeSeriesReader", + Digest: "", + }, + []string{"produce"}, + map[string]interface{}{ + "file_col_index": fileColIndex, + "time_col_index": timeColIndex, + "value_col_index": valueColIndex, + }, + ) +} diff --git a/primitive/compute/description/step_data.go b/primitive/compute/description/step_data.go index 19ae39a..ccd07b3 100644 --- a/primitive/compute/description/step_data.go +++ b/primitive/compute/description/step_data.go @@ -4,7 +4,7 @@ import ( "reflect" "github.com/pkg/errors" - "github.com/unchartedsoftware/distil-ingest/pipeline" + "github.com/unchartedsoftware/distil/api/pipeline" ) const ( diff --git a/primitive/merge.go b/primitive/merge.go index e2da39e..be1bb61 100644 --- a/primitive/merge.go +++ b/primitive/merge.go @@ -82,7 +82,15 @@ func (s *IngestStep) MergePrimitive(dataset string, outputFolder string) error { output := &bytes.Buffer{} writer := csv.NewWriter(output) + // returned header doesnt match expected header so use metadata header + headerMetadata, err := outputMeta.GenerateHeaders() + if err != nil { + return errors.Wrapf(err, "unable to generate header") + } + writer.Write(headerMetadata[0]) + // rewrite the output without the first column + rawResults = rawResults[1:] for _, line := range rawResults { lineString := make([]string, len(line)-1) for i := 1; i < len(line); i++ { From ff1cf5faa192576fc2ebbdb8bac7f3fc54326a29 Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Fri, 26 Oct 2018 14:08:51 -0400 Subject: [PATCH 18/23] Updated compute package from distil. --- primitive/compute/description/builder.go | 2 +- primitive/compute/description/builder_test.go | 2 +- .../description/inference_step_data.go | 2 +- .../compute/description/preprocessing.go | 209 +++++------------- .../compute/description/preprocessing_test.go | 64 ++---- .../compute/description/primitive_steps.go | 2 +- primitive/compute/description/step_data.go | 2 +- primitive/pipeline.go | 18 +- 8 files changed, 89 insertions(+), 212 deletions(-) diff --git a/primitive/compute/description/builder.go b/primitive/compute/description/builder.go index 11ed0fe..1fae4a9 100644 --- a/primitive/compute/description/builder.go +++ b/primitive/compute/description/builder.go @@ -4,7 +4,7 @@ import ( "fmt" "github.com/pkg/errors" - "github.com/unchartedsoftware/distil/api/pipeline" + "github.com/unchartedsoftware/distil-ingest/pipeline" ) type builder struct { diff --git a/primitive/compute/description/builder_test.go b/primitive/compute/description/builder_test.go index cac7c7d..59d859b 100644 --- a/primitive/compute/description/builder_test.go +++ b/primitive/compute/description/builder_test.go @@ -5,7 +5,7 @@ import ( "testing" "github.com/stretchr/testify/assert" - "github.com/unchartedsoftware/distil/api/pipeline" + "github.com/unchartedsoftware/distil-ingest/pipeline" ) func createLabels(counter int64) []string { diff --git a/primitive/compute/description/inference_step_data.go b/primitive/compute/description/inference_step_data.go index fee75d0..8eabda8 100644 --- a/primitive/compute/description/inference_step_data.go +++ b/primitive/compute/description/inference_step_data.go @@ -3,7 +3,7 @@ package description import ( "fmt" - "github.com/unchartedsoftware/distil/api/pipeline" + "github.com/unchartedsoftware/distil-ingest/pipeline" log "github.com/unchartedsoftware/plog" ) diff --git a/primitive/compute/description/preprocessing.go b/primitive/compute/description/preprocessing.go index c19cf5c..4a6ab15 100644 --- a/primitive/compute/description/preprocessing.go +++ b/primitive/compute/description/preprocessing.go @@ -4,54 +4,18 @@ import ( "strings" "github.com/pkg/errors" - "github.com/unchartedsoftware/distil/api/model" - "github.com/unchartedsoftware/distil/api/pipeline" + "github.com/unchartedsoftware/distil-ingest/metadata" + "github.com/unchartedsoftware/distil-ingest/pipeline" ) const defaultResource = "0" // CreateUserDatasetPipeline creates a pipeline description to capture user feature selection and // semantic type information. -func CreateUserDatasetPipeline(name string, description string, allFeatures []*model.Variable, - targetFeature string, selectedFeatures []string, filters []*model.Filter) (*pipeline.PipelineDescription, error) { - - // save the selected features in a set for quick lookup - selectedSet := map[string]bool{} - for _, v := range selectedFeatures { - selectedSet[strings.ToLower(v)] = true - } - columnIndices := mapColumns(allFeatures, selectedSet) - - // create the semantic type update primitive - updateSemanticTypes, err := createUpdateSemanticTypes(allFeatures, selectedSet) - if err != nil { - return nil, err - } - - // create the feature selection primitive - removeFeatures, err := createRemoveFeatures(allFeatures, selectedSet) - if err != nil { - return nil, err - } - - // If neither have any content, we'll skip the template altogether. - if len(updateSemanticTypes) == 0 && removeFeatures == nil { - return nil, nil - } - - filterData := createFilterData(filters, columnIndices) +func CreateUserDatasetPipeline(name string, description string, targetFeature string) (*pipeline.PipelineDescription, error) { // instantiate the pipeline builder := NewBuilder(name, description) - for _, v := range updateSemanticTypes { - builder = builder.Add(v) - } - if removeFeatures != nil { - builder = builder.Add(removeFeatures) - } - for _, f := range filterData { - builder = builder.Add(f) - } pip, err := builder.AddInferencePoint().Compile() if err != nil { @@ -65,135 +29,37 @@ func CreateUserDatasetPipeline(name string, description string, allFeatures []*m return pip, nil } -func createRemoveFeatures(allFeatures []*model.Variable, selectedSet map[string]bool) (*StepData, error) { - // create a list of features to remove - removeFeatures := []int{} - for _, v := range allFeatures { - if !selectedSet[strings.ToLower(v.Key)] { - removeFeatures = append(removeFeatures, v.Index) - } - } - - if len(removeFeatures) == 0 { - return nil, nil - } +// CreateSlothPipeline creates a pipeline to peform timeseries clustering on a dataset. +func CreateSlothPipeline(name string, description string, targetColumn string, timeColumn string, valueColumn string, + baseFeatures []*metadata.Variable, timeSeriesFeatures []*metadata.Variable) (*pipeline.PipelineDescription, error) { - // instantiate the feature remove primitive - featureSelect, err := NewRemoveColumnsStep(defaultResource, removeFeatures) + targetIdx, err := getIndex(baseFeatures, targetColumn) if err != nil { return nil, err } - return featureSelect, nil -} - -type update struct { - removeIndices []int - addIndices []int -} - -func newUpdate() *update { - return &update{ - addIndices: []int{}, - removeIndices: []int{}, - } -} - -func createUpdateSemanticTypes(allFeatures []*model.Variable, selectedSet map[string]bool) ([]*StepData, error) { - // create maps of (semantic type, index list) - primitive allows for semantic types to be added to / - // remove from multiple columns in a single operation - updateMap := map[string]*update{} - for _, v := range allFeatures { - if selectedSet[strings.ToLower(v.Key)] { - addType := model.MapTA2Type(v.Type) - if addType == "" { - return nil, errors.Errorf("variable `%s` internal type `%s` can't be mapped to ta2", v.Key, v.Type) - } - removeType := model.MapTA2Type(v.OriginalType) - if removeType == "" { - return nil, errors.Errorf("remove variable `%s` internal type `%s` can't be mapped to ta2", v.Key, v.OriginalType) - } - - // only apply change when types are different - if addType != removeType { - if _, ok := updateMap[addType]; !ok { - updateMap[addType] = newUpdate() - } - updateMap[addType].addIndices = append(updateMap[addType].addIndices, v.Index) - - if _, ok := updateMap[removeType]; !ok { - updateMap[removeType] = newUpdate() - } - updateMap[removeType].removeIndices = append(updateMap[removeType].removeIndices, v.Index) - } - } - } - // Copy the created maps into the column update structure used by the primitive - semanticTypeUpdates := []*StepData{} - for k, v := range updateMap { - var addKey string - if len(v.addIndices) > 0 { - addKey = k - } - add := &ColumnUpdate{ - SemanticTypes: []string{addKey}, - Indices: v.addIndices, - } - var removeKey string - if len(v.removeIndices) > 0 { - removeKey = k - } - remove := &ColumnUpdate{ - SemanticTypes: []string{removeKey}, - Indices: v.removeIndices, - } - semanticTypeUpdate, err := NewUpdateSemanticTypeStep(defaultResource, add, remove) - if err != nil { - return nil, err - } - semanticTypeUpdates = append(semanticTypeUpdates, semanticTypeUpdate) + timeIdx, err := getIndex(timeSeriesFeatures, timeColumn) + if err != nil { + return nil, err } - return semanticTypeUpdates, nil -} - -func createFilterData(filters []*model.Filter, columnIndices map[string]int) []*StepData { - - // Map the fiters to pipeline primitives - filterSteps := []*StepData{} - for _, f := range filters { - var filter *StepData - inclusive := f.Mode == model.IncludeFilter - colIndex := columnIndices[f.Key] - - switch f.Type { - case model.NumericalFilter: - filter = NewNumericRangeFilterStep(defaultResource, colIndex, inclusive, *f.Min, *f.Max, false) - case model.CategoricalFilter: - filter = NewTermFilterStep(defaultResource, colIndex, inclusive, f.Categories, true) - case model.RowFilter: - filter = NewTermFilterStep(defaultResource, colIndex, inclusive, f.D3mIndices, true) - case model.FeatureFilter, model.TextFilter: - filter = NewTermFilterStep(defaultResource, colIndex, inclusive, f.Categories, false) - } - filterSteps = append(filterSteps, filter) + valueIdx, err := getIndex(timeSeriesFeatures, valueColumn) + if err != nil { + return nil, err } - return filterSteps -} -// CreateSlothPipeline creates a pipeline to peform timeseries clustering on a dataset. -func CreateSlothPipeline(name string, description string) (*pipeline.PipelineDescription, error) { // insantiate the pipeline pipeline, err := NewBuilder(name, description). Add(NewDenormalizeStep()). Add(NewDatasetToDataframeStep()). - Add(NewTimeSeriesReaderStep(1, 0, 1)). + Add(NewTimeSeriesLoaderStep(targetIdx, timeIdx, valueIdx)). Add(NewSlothStep()). Compile() if err != nil { return nil, err } + return pipeline, nil } @@ -270,15 +136,46 @@ func CreatePCAFeaturesPipeline(name string, description string) (*pipeline.Pipel return pipeline, nil } -func mapColumns(allFeatures []*model.Variable, selectedSet map[string]bool) map[string]int { - colIndices := make(map[string]int) - index := 0 +// CreateDenormalizePipeline creates a pipeline to run the denormalize primitive on an input dataset. +func CreateDenormalizePipeline(name string, description string) (*pipeline.PipelineDescription, error) { + // insantiate the pipeline + pipeline, err := NewBuilder(name, description). + Add(NewDenormalizeStep()). + Add(NewDatasetToDataframeStep()). + Compile() + + if err != nil { + return nil, err + } + return pipeline, nil +} + +func getIndex(allFeatures []*metadata.Variable, name string) (int, error) { for _, f := range allFeatures { - if selectedSet[f.Key] { - colIndices[f.Key] = index - index = index + 1 + if strings.EqualFold(name, f.Name) { + return f.Index, nil } } + return -1, errors.Errorf("can't find var '%s'", name) +} - return colIndices +// NewTimeSeriesLoaderStep creates a primitive step that reads time series values using a dataframe +// containing a file URI column. The result is a new dataframe that stores the timetamps as the column headers, +// and the accompanying values for each file as a row. +func NewTimeSeriesLoaderStep(fileColIndex int, timeColIndex int, valueColIndex int) *StepData { + return NewStepDataWithHyperparameters( + &pipeline.Primitive{ + Id: "1689aafa-16dc-4c55-8ad4-76cadcf46086", + Version: "0.1.0", + Name: "Time series loader", + PythonPath: "d3m.primitives.distil.TimeSeriesLoader", + Digest: "", + }, + []string{"produce"}, + map[string]interface{}{ + "file_col_index": fileColIndex, + "time_col_index": timeColIndex, + "value_col_index": valueColIndex, + }, + ) } diff --git a/primitive/compute/description/preprocessing_test.go b/primitive/compute/description/preprocessing_test.go index bc8a276..96ec3c6 100644 --- a/primitive/compute/description/preprocessing_test.go +++ b/primitive/compute/description/preprocessing_test.go @@ -6,40 +6,13 @@ import ( "github.com/golang/protobuf/proto" "github.com/stretchr/testify/assert" - "github.com/unchartedsoftware/distil/api/model" + "github.com/unchartedsoftware/distil-ingest/metadata" ) func TestCreateUserDatasetPipeline(t *testing.T) { - variables := []*model.Variable{ - { - Key: "test_var_0", - OriginalType: "ordinal", - Type: "categorical", - Index: 0, - }, - { - Key: "test_var_1", - OriginalType: "categorical", - Type: "integer", - Index: 1, - }, - { - Key: "test_var_2", - OriginalType: "categorical", - Type: "integer", - Index: 2, - }, - { - Key: "test_var_3", - OriginalType: "categorical", - Type: "integer", - Index: 3, - }, - } - pipeline, err := CreateUserDatasetPipeline( - "test_user_pipeline", "a test user pipeline", variables, "test_target", []string{"test_var_0", "test_var_1", "test_var_3"}, nil) + "test_user_pipeline", "a test user pipeline", "test_target") // assert 1st is a semantic type update hyperParams := pipeline.GetSteps()[0].GetPrimitive().GetHyperparams() @@ -67,34 +40,16 @@ func TestCreateUserDatasetPipeline(t *testing.T) { func TestCreateUserDatasetPipelineMappingError(t *testing.T) { - variables := []*model.Variable{ - { - Key: "test_var_0", - OriginalType: "blordinal", - Type: "categorical", - Index: 0, - }, - } - pipeline, err := CreateUserDatasetPipeline( - "test_user_pipeline", "a test user pipeline", variables, "test_target", []string{"test_var_0"}, nil) + "test_user_pipeline", "a test user pipeline", "test_target") assert.Error(t, err) t.Logf("\n%s", proto.MarshalTextString(pipeline)) } func TestCreateUserDatasetEmpty(t *testing.T) { - variables := []*model.Variable{ - { - Key: "test_var_0", - OriginalType: "categorical", - Type: "categorical", - Index: 0, - }, - } - pipeline, err := CreateUserDatasetPipeline( - "test_user_pipeline", "a test user pipeline", variables, "test_target", []string{"test_var_0"}, nil) + "test_user_pipeline", "a test user pipeline", "test_target") assert.Nil(t, pipeline) assert.Nil(t, err) @@ -151,7 +106,16 @@ func TestCreateUnicornPipeline(t *testing.T) { } func TestCreateSlothPipeline(t *testing.T) { - pipeline, err := CreateSlothPipeline("sloth_test", "test sloth object detection pipeline") + baseVriables := []*metadata.Variable{ + {Name: "filename", Index: 1}, + } + + timeSeriesVariables := []*metadata.Variable{ + {Name: "time", Index: 0}, + {Name: "value", Index: 1}, + } + + pipeline, err := CreateSlothPipeline("sloth_test", "test sloth object detection pipeline", "filename", "time", "value", baseVriables, timeSeriesVariables) assert.NoError(t, err) data, err := proto.Marshal(pipeline) diff --git a/primitive/compute/description/primitive_steps.go b/primitive/compute/description/primitive_steps.go index d940ed1..89b5787 100644 --- a/primitive/compute/description/primitive_steps.go +++ b/primitive/compute/description/primitive_steps.go @@ -1,7 +1,7 @@ package description import ( - "github.com/unchartedsoftware/distil/api/pipeline" + "github.com/unchartedsoftware/distil-ingest/pipeline" ) // NewSimonStep creates a SIMON data classification step. It examines an input diff --git a/primitive/compute/description/step_data.go b/primitive/compute/description/step_data.go index ccd07b3..19ae39a 100644 --- a/primitive/compute/description/step_data.go +++ b/primitive/compute/description/step_data.go @@ -4,7 +4,7 @@ import ( "reflect" "github.com/pkg/errors" - "github.com/unchartedsoftware/distil/api/pipeline" + "github.com/unchartedsoftware/distil-ingest/pipeline" ) const ( diff --git a/primitive/pipeline.go b/primitive/pipeline.go index da5aef6..6522f36 100644 --- a/primitive/pipeline.go +++ b/primitive/pipeline.go @@ -199,7 +199,19 @@ func getClusterVariables(meta *metadata.Metadata, prefix string) ([]*FeatureRequ if res.CanBeFeaturized() { step, err = description.CreateUnicornPipeline("horned", "", []string{denormFieldName}, []string{indexName}) } else { - step, err = description.CreateSlothPipeline("leaf", "", []string{denormFieldName}, []string{indexName}) + // TODO: Extract actual column names from time series resource + timeField, valueField := getTimeSeriesFields(res) + resFields := []*metadata.Variable{ + { + Name: "time", + Index: 0, + }, + { + Name: "value", + Index: 1, + }, + } + step, err = description.CreateSlothPipeline("leaf", "", v.Name, timeField, valueField, mainDR.Variables, resFields) } if err != nil { return nil, errors.Wrap(err, "unable to create step pipeline") @@ -295,3 +307,7 @@ func copyResourceFiles(sourceFolder string, destinationFolder string) error { return nil } + +func getTimeSeriesFields(dr *metadata.DataResource) (string, string) { + return "time", "value" +} From ecb29b19e30ee1928031d45a74791b9ace128d1b Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Fri, 26 Oct 2018 14:21:15 -0400 Subject: [PATCH 19/23] Updated the parsing grammar to handle arrays better. --- primitive/compute/result/complex_field.peg | 9 +- primitive/compute/result/complex_field.peg.go | 1024 +++++++++-------- .../compute/result/complex_field_test.go | 9 + .../compute/result/result_csv_parser_test.go | 1 + primitive/compute/result/testdata/test.csv | 1 + 5 files changed, 584 insertions(+), 460 deletions(-) diff --git a/primitive/compute/result/complex_field.peg b/primitive/compute/result/complex_field.peg index 807709a..eaf1431 100644 --- a/primitive/compute/result/complex_field.peg +++ b/primitive/compute/result/complex_field.peg @@ -18,10 +18,11 @@ type ComplexField Peg { ComplexField <- array !. -# non-terminals -array <- ws* obracket { p.pushArray() } array_contents cbracket { p.popArray() } / - ws* oparen { p.pushArray() } array_contents comma? ws* cparen { p.popArray() } -array_contents <- ws* (item ws* (comma ws* item ws* )*)? +# non-terminals +array <- ws* obracket { p.pushArray() } ws* array_contents ws* cbracket { p.popArray() } / + ws* oparen { p.pushArray() } ws* tuple_contents ws* cparen { p.popArray() } +array_contents <- (ws* item ws*) (comma ws* item ws*)* comma? +tuple_contents <- (item ws* comma ws*)+ (ws* item ws*)? item <- array / string / { p.addElement(buffer[begin:end]) } string <- dquote_string / squote_string dquote_string <- escdquote <(textdata / squote / lf / cr / obracket / cbracket / oparen / cparen / comma)*> escdquote { p.addElement(buffer[begin:end]) } diff --git a/primitive/compute/result/complex_field.peg.go b/primitive/compute/result/complex_field.peg.go index 97320a5..7ac6eed 100644 --- a/primitive/compute/result/complex_field.peg.go +++ b/primitive/compute/result/complex_field.peg.go @@ -20,6 +20,7 @@ const ( ruleComplexField rulearray rulearray_contents + ruletuple_contents ruleitem rulestring ruledquote_string @@ -55,6 +56,7 @@ var rul3s = [...]string{ "ComplexField", "array", "array_contents", + "tuple_contents", "item", "string", "dquote_string", @@ -199,7 +201,7 @@ type ComplexField struct { Buffer string buffer []rune - rules [32]func() bool + rules [33]func() bool parse func(rule ...int) error reset func() Pretty bool @@ -403,7 +405,7 @@ func (p *ComplexField) Init() { position, tokenIndex = position0, tokenIndex0 return false }, - /* 1 array <- <((ws* obracket Action0 array_contents cbracket Action1) / (ws* oparen Action2 array_contents comma? ws* cparen Action3))> */ + /* 1 array <- <((ws* obracket Action0 ws* array_contents ws* cbracket Action1) / (ws* oparen Action2 ws* tuple_contents ws* cparen Action3))> */ func() bool { position3, tokenIndex3 := position, tokenIndex { @@ -426,8 +428,95 @@ func (p *ComplexField) Init() { { add(ruleAction0, position) } - if !_rules[rulearray_contents]() { - goto l6 + l10: + { + position11, tokenIndex11 := position, tokenIndex + if !_rules[rulews]() { + goto l11 + } + goto l10 + l11: + position, tokenIndex = position11, tokenIndex11 + } + { + position12 := position + l13: + { + position14, tokenIndex14 := position, tokenIndex + if !_rules[rulews]() { + goto l14 + } + goto l13 + l14: + position, tokenIndex = position14, tokenIndex14 + } + if !_rules[ruleitem]() { + goto l6 + } + l15: + { + position16, tokenIndex16 := position, tokenIndex + if !_rules[rulews]() { + goto l16 + } + goto l15 + l16: + position, tokenIndex = position16, tokenIndex16 + } + l17: + { + position18, tokenIndex18 := position, tokenIndex + if !_rules[rulecomma]() { + goto l18 + } + l19: + { + position20, tokenIndex20 := position, tokenIndex + if !_rules[rulews]() { + goto l20 + } + goto l19 + l20: + position, tokenIndex = position20, tokenIndex20 + } + if !_rules[ruleitem]() { + goto l18 + } + l21: + { + position22, tokenIndex22 := position, tokenIndex + if !_rules[rulews]() { + goto l22 + } + goto l21 + l22: + position, tokenIndex = position22, tokenIndex22 + } + goto l17 + l18: + position, tokenIndex = position18, tokenIndex18 + } + { + position23, tokenIndex23 := position, tokenIndex + if !_rules[rulecomma]() { + goto l23 + } + goto l24 + l23: + position, tokenIndex = position23, tokenIndex23 + } + l24: + add(rulearray_contents, position12) + } + l25: + { + position26, tokenIndex26 := position, tokenIndex + if !_rules[rulews]() { + goto l26 + } + goto l25 + l26: + position, tokenIndex = position26, tokenIndex26 } if !_rules[rulecbracket]() { goto l6 @@ -438,15 +527,15 @@ func (p *ComplexField) Init() { goto l5 l6: position, tokenIndex = position5, tokenIndex5 - l11: + l28: { - position12, tokenIndex12 := position, tokenIndex + position29, tokenIndex29 := position, tokenIndex if !_rules[rulews]() { - goto l12 + goto l29 } - goto l11 - l12: - position, tokenIndex = position12, tokenIndex12 + goto l28 + l29: + position, tokenIndex = position29, tokenIndex29 } if !_rules[ruleoparen]() { goto l3 @@ -454,28 +543,118 @@ func (p *ComplexField) Init() { { add(ruleAction2, position) } - if !_rules[rulearray_contents]() { - goto l3 + l31: + { + position32, tokenIndex32 := position, tokenIndex + if !_rules[rulews]() { + goto l32 + } + goto l31 + l32: + position, tokenIndex = position32, tokenIndex32 } { - position14, tokenIndex14 := position, tokenIndex + position33 := position + if !_rules[ruleitem]() { + goto l3 + } + l36: + { + position37, tokenIndex37 := position, tokenIndex + if !_rules[rulews]() { + goto l37 + } + goto l36 + l37: + position, tokenIndex = position37, tokenIndex37 + } if !_rules[rulecomma]() { - goto l14 + goto l3 + } + l38: + { + position39, tokenIndex39 := position, tokenIndex + if !_rules[rulews]() { + goto l39 + } + goto l38 + l39: + position, tokenIndex = position39, tokenIndex39 } - goto l15 - l14: - position, tokenIndex = position14, tokenIndex14 + l34: + { + position35, tokenIndex35 := position, tokenIndex + if !_rules[ruleitem]() { + goto l35 + } + l40: + { + position41, tokenIndex41 := position, tokenIndex + if !_rules[rulews]() { + goto l41 + } + goto l40 + l41: + position, tokenIndex = position41, tokenIndex41 + } + if !_rules[rulecomma]() { + goto l35 + } + l42: + { + position43, tokenIndex43 := position, tokenIndex + if !_rules[rulews]() { + goto l43 + } + goto l42 + l43: + position, tokenIndex = position43, tokenIndex43 + } + goto l34 + l35: + position, tokenIndex = position35, tokenIndex35 + } + { + position44, tokenIndex44 := position, tokenIndex + l46: + { + position47, tokenIndex47 := position, tokenIndex + if !_rules[rulews]() { + goto l47 + } + goto l46 + l47: + position, tokenIndex = position47, tokenIndex47 + } + if !_rules[ruleitem]() { + goto l44 + } + l48: + { + position49, tokenIndex49 := position, tokenIndex + if !_rules[rulews]() { + goto l49 + } + goto l48 + l49: + position, tokenIndex = position49, tokenIndex49 + } + goto l45 + l44: + position, tokenIndex = position44, tokenIndex44 + } + l45: + add(ruletuple_contents, position33) } - l15: - l16: + l50: { - position17, tokenIndex17 := position, tokenIndex + position51, tokenIndex51 := position, tokenIndex if !_rules[rulews]() { - goto l17 + goto l51 } - goto l16 - l17: - position, tokenIndex = position17, tokenIndex17 + goto l50 + l51: + position, tokenIndex = position51, tokenIndex51 } if !_rules[rulecparen]() { goto l3 @@ -492,764 +671,697 @@ func (p *ComplexField) Init() { position, tokenIndex = position3, tokenIndex3 return false }, - /* 2 array_contents <- <(ws* (item ws* (comma ws* item ws*)*)?)> */ - func() bool { - { - position20 := position - l21: - { - position22, tokenIndex22 := position, tokenIndex - if !_rules[rulews]() { - goto l22 - } - goto l21 - l22: - position, tokenIndex = position22, tokenIndex22 - } - { - position23, tokenIndex23 := position, tokenIndex - if !_rules[ruleitem]() { - goto l23 - } - l25: - { - position26, tokenIndex26 := position, tokenIndex - if !_rules[rulews]() { - goto l26 - } - goto l25 - l26: - position, tokenIndex = position26, tokenIndex26 - } - l27: - { - position28, tokenIndex28 := position, tokenIndex - if !_rules[rulecomma]() { - goto l28 - } - l29: - { - position30, tokenIndex30 := position, tokenIndex - if !_rules[rulews]() { - goto l30 - } - goto l29 - l30: - position, tokenIndex = position30, tokenIndex30 - } - if !_rules[ruleitem]() { - goto l28 - } - l31: - { - position32, tokenIndex32 := position, tokenIndex - if !_rules[rulews]() { - goto l32 - } - goto l31 - l32: - position, tokenIndex = position32, tokenIndex32 - } - goto l27 - l28: - position, tokenIndex = position28, tokenIndex28 - } - goto l24 - l23: - position, tokenIndex = position23, tokenIndex23 - } - l24: - add(rulearray_contents, position20) - } - return true - }, - /* 3 item <- <(array / string / ( Action4))> */ + /* 2 array_contents <- <(ws* item ws* (comma ws* item ws*)* comma?)> */ + nil, + /* 3 tuple_contents <- <((item ws* comma ws*)+ (ws* item ws*)?)> */ + nil, + /* 4 item <- <(array / string / ( Action4))> */ func() bool { - position33, tokenIndex33 := position, tokenIndex + position55, tokenIndex55 := position, tokenIndex { - position34 := position + position56 := position { - position35, tokenIndex35 := position, tokenIndex + position57, tokenIndex57 := position, tokenIndex if !_rules[rulearray]() { - goto l36 + goto l58 } - goto l35 - l36: - position, tokenIndex = position35, tokenIndex35 + goto l57 + l58: + position, tokenIndex = position57, tokenIndex57 { - position38 := position + position60 := position { - position39, tokenIndex39 := position, tokenIndex + position61, tokenIndex61 := position, tokenIndex { - position41 := position + position63 := position if !_rules[ruleescdquote]() { - goto l40 + goto l62 } { - position42 := position - l43: + position64 := position + l65: { - position44, tokenIndex44 := position, tokenIndex + position66, tokenIndex66 := position, tokenIndex { - position45, tokenIndex45 := position, tokenIndex + position67, tokenIndex67 := position, tokenIndex if !_rules[ruletextdata]() { - goto l46 + goto l68 } - goto l45 - l46: - position, tokenIndex = position45, tokenIndex45 + goto l67 + l68: + position, tokenIndex = position67, tokenIndex67 if !_rules[rulesquote]() { - goto l47 + goto l69 } - goto l45 - l47: - position, tokenIndex = position45, tokenIndex45 + goto l67 + l69: + position, tokenIndex = position67, tokenIndex67 if !_rules[rulelf]() { - goto l48 + goto l70 } - goto l45 - l48: - position, tokenIndex = position45, tokenIndex45 + goto l67 + l70: + position, tokenIndex = position67, tokenIndex67 if !_rules[rulecr]() { - goto l49 + goto l71 } - goto l45 - l49: - position, tokenIndex = position45, tokenIndex45 + goto l67 + l71: + position, tokenIndex = position67, tokenIndex67 if !_rules[ruleobracket]() { - goto l50 + goto l72 } - goto l45 - l50: - position, tokenIndex = position45, tokenIndex45 + goto l67 + l72: + position, tokenIndex = position67, tokenIndex67 if !_rules[rulecbracket]() { - goto l51 + goto l73 } - goto l45 - l51: - position, tokenIndex = position45, tokenIndex45 + goto l67 + l73: + position, tokenIndex = position67, tokenIndex67 if !_rules[ruleoparen]() { - goto l52 + goto l74 } - goto l45 - l52: - position, tokenIndex = position45, tokenIndex45 + goto l67 + l74: + position, tokenIndex = position67, tokenIndex67 if !_rules[rulecparen]() { - goto l53 + goto l75 } - goto l45 - l53: - position, tokenIndex = position45, tokenIndex45 + goto l67 + l75: + position, tokenIndex = position67, tokenIndex67 if !_rules[rulecomma]() { - goto l44 + goto l66 } } - l45: - goto l43 - l44: - position, tokenIndex = position44, tokenIndex44 + l67: + goto l65 + l66: + position, tokenIndex = position66, tokenIndex66 } - add(rulePegText, position42) + add(rulePegText, position64) } if !_rules[ruleescdquote]() { - goto l40 + goto l62 } { add(ruleAction5, position) } - add(ruledquote_string, position41) + add(ruledquote_string, position63) } - goto l39 - l40: - position, tokenIndex = position39, tokenIndex39 + goto l61 + l62: + position, tokenIndex = position61, tokenIndex61 { - position55 := position + position77 := position if !_rules[rulesquote]() { - goto l37 + goto l59 } { - position56 := position - l57: + position78 := position + l79: { - position58, tokenIndex58 := position, tokenIndex + position80, tokenIndex80 := position, tokenIndex { - position59, tokenIndex59 := position, tokenIndex + position81, tokenIndex81 := position, tokenIndex { - position61 := position + position83 := position if buffer[position] != rune('\\') { - goto l60 + goto l82 } position++ if buffer[position] != rune('\'') { - goto l60 + goto l82 } position++ - add(ruleescsquote, position61) + add(ruleescsquote, position83) } - goto l59 - l60: - position, tokenIndex = position59, tokenIndex59 + goto l81 + l82: + position, tokenIndex = position81, tokenIndex81 if !_rules[ruleescdquote]() { - goto l62 + goto l84 } - goto l59 - l62: - position, tokenIndex = position59, tokenIndex59 + goto l81 + l84: + position, tokenIndex = position81, tokenIndex81 if !_rules[ruletextdata]() { - goto l63 + goto l85 } - goto l59 - l63: - position, tokenIndex = position59, tokenIndex59 + goto l81 + l85: + position, tokenIndex = position81, tokenIndex81 if !_rules[rulelf]() { - goto l64 + goto l86 } - goto l59 - l64: - position, tokenIndex = position59, tokenIndex59 + goto l81 + l86: + position, tokenIndex = position81, tokenIndex81 if !_rules[rulecr]() { - goto l65 + goto l87 } - goto l59 - l65: - position, tokenIndex = position59, tokenIndex59 + goto l81 + l87: + position, tokenIndex = position81, tokenIndex81 if !_rules[ruleobracket]() { - goto l66 + goto l88 } - goto l59 - l66: - position, tokenIndex = position59, tokenIndex59 + goto l81 + l88: + position, tokenIndex = position81, tokenIndex81 if !_rules[rulecbracket]() { - goto l67 + goto l89 } - goto l59 - l67: - position, tokenIndex = position59, tokenIndex59 + goto l81 + l89: + position, tokenIndex = position81, tokenIndex81 if !_rules[ruleoparen]() { - goto l68 + goto l90 } - goto l59 - l68: - position, tokenIndex = position59, tokenIndex59 + goto l81 + l90: + position, tokenIndex = position81, tokenIndex81 if !_rules[rulecparen]() { - goto l58 + goto l80 } } - l59: - goto l57 - l58: - position, tokenIndex = position58, tokenIndex58 + l81: + goto l79 + l80: + position, tokenIndex = position80, tokenIndex80 } - add(rulePegText, position56) + add(rulePegText, position78) } if !_rules[rulesquote]() { - goto l37 + goto l59 } { add(ruleAction6, position) } - add(rulesquote_string, position55) + add(rulesquote_string, position77) } } - l39: - add(rulestring, position38) + l61: + add(rulestring, position60) } - goto l35 - l37: - position, tokenIndex = position35, tokenIndex35 + goto l57 + l59: + position, tokenIndex = position57, tokenIndex57 { - position70 := position + position92 := position { - position71 := position + position93 := position { - position72, tokenIndex72 := position, tokenIndex + position94, tokenIndex94 := position, tokenIndex { - position74 := position + position96 := position if buffer[position] != rune('-') { - goto l72 + goto l94 } position++ - add(rulenegative, position74) + add(rulenegative, position96) } - goto l73 - l72: - position, tokenIndex = position72, tokenIndex72 + goto l95 + l94: + position, tokenIndex = position94, tokenIndex94 } - l73: + l95: if !_rules[rulenumber]() { - goto l33 + goto l55 } - l75: + l97: { - position76, tokenIndex76 := position, tokenIndex + position98, tokenIndex98 := position, tokenIndex if !_rules[rulenumber]() { - goto l76 + goto l98 } - goto l75 - l76: - position, tokenIndex = position76, tokenIndex76 + goto l97 + l98: + position, tokenIndex = position98, tokenIndex98 } { - position77, tokenIndex77 := position, tokenIndex + position99, tokenIndex99 := position, tokenIndex { - position79 := position + position101 := position if buffer[position] != rune('.') { - goto l77 + goto l99 } position++ - add(ruledecimal_point, position79) + add(ruledecimal_point, position101) } if !_rules[rulenumber]() { - goto l77 + goto l99 } - l80: + l102: { - position81, tokenIndex81 := position, tokenIndex + position103, tokenIndex103 := position, tokenIndex if !_rules[rulenumber]() { - goto l81 + goto l103 } - goto l80 - l81: - position, tokenIndex = position81, tokenIndex81 + goto l102 + l103: + position, tokenIndex = position103, tokenIndex103 } - goto l78 - l77: - position, tokenIndex = position77, tokenIndex77 + goto l100 + l99: + position, tokenIndex = position99, tokenIndex99 } - l78: - add(rulevalue, position71) + l100: + add(rulevalue, position93) } - add(rulePegText, position70) + add(rulePegText, position92) } { add(ruleAction4, position) } } - l35: - add(ruleitem, position34) + l57: + add(ruleitem, position56) } return true - l33: - position, tokenIndex = position33, tokenIndex33 + l55: + position, tokenIndex = position55, tokenIndex55 return false }, - /* 4 string <- <(dquote_string / squote_string)> */ + /* 5 string <- <(dquote_string / squote_string)> */ nil, - /* 5 dquote_string <- <(escdquote <(textdata / squote / lf / cr / obracket / cbracket / oparen / cparen / comma)*> escdquote Action5)> */ + /* 6 dquote_string <- <(escdquote <(textdata / squote / lf / cr / obracket / cbracket / oparen / cparen / comma)*> escdquote Action5)> */ nil, - /* 6 squote_string <- <(squote <(escsquote / escdquote / textdata / lf / cr / obracket / cbracket / oparen / cparen)*> squote Action6)> */ + /* 7 squote_string <- <(squote <(escsquote / escdquote / textdata / lf / cr / obracket / cbracket / oparen / cparen)*> squote Action6)> */ nil, - /* 7 value <- <(negative? number+ (decimal_point number+)?)> */ + /* 8 value <- <(negative? number+ (decimal_point number+)?)> */ nil, - /* 8 ws <- <' '> */ + /* 9 ws <- <' '> */ func() bool { - position87, tokenIndex87 := position, tokenIndex + position109, tokenIndex109 := position, tokenIndex { - position88 := position + position110 := position if buffer[position] != rune(' ') { - goto l87 + goto l109 } position++ - add(rulews, position88) + add(rulews, position110) } return true - l87: - position, tokenIndex = position87, tokenIndex87 + l109: + position, tokenIndex = position109, tokenIndex109 return false }, - /* 9 comma <- <','> */ + /* 10 comma <- <','> */ func() bool { - position89, tokenIndex89 := position, tokenIndex + position111, tokenIndex111 := position, tokenIndex { - position90 := position + position112 := position if buffer[position] != rune(',') { - goto l89 + goto l111 } position++ - add(rulecomma, position90) + add(rulecomma, position112) } return true - l89: - position, tokenIndex = position89, tokenIndex89 + l111: + position, tokenIndex = position111, tokenIndex111 return false }, - /* 10 lf <- <'\n'> */ + /* 11 lf <- <'\n'> */ func() bool { - position91, tokenIndex91 := position, tokenIndex + position113, tokenIndex113 := position, tokenIndex { - position92 := position + position114 := position if buffer[position] != rune('\n') { - goto l91 + goto l113 } position++ - add(rulelf, position92) + add(rulelf, position114) } return true - l91: - position, tokenIndex = position91, tokenIndex91 + l113: + position, tokenIndex = position113, tokenIndex113 return false }, - /* 11 cr <- <'\r'> */ + /* 12 cr <- <'\r'> */ func() bool { - position93, tokenIndex93 := position, tokenIndex + position115, tokenIndex115 := position, tokenIndex { - position94 := position + position116 := position if buffer[position] != rune('\r') { - goto l93 + goto l115 } position++ - add(rulecr, position94) + add(rulecr, position116) } return true - l93: - position, tokenIndex = position93, tokenIndex93 + l115: + position, tokenIndex = position115, tokenIndex115 return false }, - /* 12 escdquote <- <'"'> */ + /* 13 escdquote <- <'"'> */ func() bool { - position95, tokenIndex95 := position, tokenIndex + position117, tokenIndex117 := position, tokenIndex { - position96 := position + position118 := position if buffer[position] != rune('"') { - goto l95 + goto l117 } position++ - add(ruleescdquote, position96) + add(ruleescdquote, position118) } return true - l95: - position, tokenIndex = position95, tokenIndex95 + l117: + position, tokenIndex = position117, tokenIndex117 return false }, - /* 13 escsquote <- <('\\' '\'')> */ + /* 14 escsquote <- <('\\' '\'')> */ nil, - /* 14 squote <- <'\''> */ + /* 15 squote <- <'\''> */ func() bool { - position98, tokenIndex98 := position, tokenIndex + position120, tokenIndex120 := position, tokenIndex { - position99 := position + position121 := position if buffer[position] != rune('\'') { - goto l98 + goto l120 } position++ - add(rulesquote, position99) + add(rulesquote, position121) } return true - l98: - position, tokenIndex = position98, tokenIndex98 + l120: + position, tokenIndex = position120, tokenIndex120 return false }, - /* 15 obracket <- <'['> */ + /* 16 obracket <- <'['> */ func() bool { - position100, tokenIndex100 := position, tokenIndex + position122, tokenIndex122 := position, tokenIndex { - position101 := position + position123 := position if buffer[position] != rune('[') { - goto l100 + goto l122 } position++ - add(ruleobracket, position101) + add(ruleobracket, position123) } return true - l100: - position, tokenIndex = position100, tokenIndex100 + l122: + position, tokenIndex = position122, tokenIndex122 return false }, - /* 16 cbracket <- <']'> */ + /* 17 cbracket <- <']'> */ func() bool { - position102, tokenIndex102 := position, tokenIndex + position124, tokenIndex124 := position, tokenIndex { - position103 := position + position125 := position if buffer[position] != rune(']') { - goto l102 + goto l124 } position++ - add(rulecbracket, position103) + add(rulecbracket, position125) } return true - l102: - position, tokenIndex = position102, tokenIndex102 + l124: + position, tokenIndex = position124, tokenIndex124 return false }, - /* 17 oparen <- <'('> */ + /* 18 oparen <- <'('> */ func() bool { - position104, tokenIndex104 := position, tokenIndex + position126, tokenIndex126 := position, tokenIndex { - position105 := position + position127 := position if buffer[position] != rune('(') { - goto l104 + goto l126 } position++ - add(ruleoparen, position105) + add(ruleoparen, position127) } return true - l104: - position, tokenIndex = position104, tokenIndex104 + l126: + position, tokenIndex = position126, tokenIndex126 return false }, - /* 18 cparen <- <')'> */ + /* 19 cparen <- <')'> */ func() bool { - position106, tokenIndex106 := position, tokenIndex + position128, tokenIndex128 := position, tokenIndex { - position107 := position + position129 := position if buffer[position] != rune(')') { - goto l106 + goto l128 } position++ - add(rulecparen, position107) + add(rulecparen, position129) } return true - l106: - position, tokenIndex = position106, tokenIndex106 + l128: + position, tokenIndex = position128, tokenIndex128 return false }, - /* 19 number <- <([a-z] / [A-Z] / [0-9])> */ + /* 20 number <- <([a-z] / [A-Z] / [0-9])> */ func() bool { - position108, tokenIndex108 := position, tokenIndex + position130, tokenIndex130 := position, tokenIndex { - position109 := position + position131 := position { - position110, tokenIndex110 := position, tokenIndex + position132, tokenIndex132 := position, tokenIndex if c := buffer[position]; c < rune('a') || c > rune('z') { - goto l111 + goto l133 } position++ - goto l110 - l111: - position, tokenIndex = position110, tokenIndex110 + goto l132 + l133: + position, tokenIndex = position132, tokenIndex132 if c := buffer[position]; c < rune('A') || c > rune('Z') { - goto l112 + goto l134 } position++ - goto l110 - l112: - position, tokenIndex = position110, tokenIndex110 + goto l132 + l134: + position, tokenIndex = position132, tokenIndex132 if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l108 + goto l130 } position++ } - l110: - add(rulenumber, position109) + l132: + add(rulenumber, position131) } return true - l108: - position, tokenIndex = position108, tokenIndex108 + l130: + position, tokenIndex = position130, tokenIndex130 return false }, - /* 20 negative <- <'-'> */ + /* 21 negative <- <'-'> */ nil, - /* 21 decimal_point <- <'.'> */ + /* 22 decimal_point <- <'.'> */ nil, - /* 22 textdata <- <([a-z] / [A-Z] / [0-9] / ' ' / '!' / '#' / '$' / '&' / '%' / '*' / '+' / '-' / '.' / '/' / ':' / ';' / [<->] / '?' / '\\' / '^' / '_' / '`' / '{' / '|' / '}' / '~')> */ + /* 23 textdata <- <([a-z] / [A-Z] / [0-9] / ' ' / '!' / '#' / '$' / '&' / '%' / '*' / '+' / '-' / '.' / '/' / ':' / ';' / [<->] / '?' / '\\' / '^' / '_' / '`' / '{' / '|' / '}' / '~')> */ func() bool { - position115, tokenIndex115 := position, tokenIndex + position137, tokenIndex137 := position, tokenIndex { - position116 := position + position138 := position { - position117, tokenIndex117 := position, tokenIndex + position139, tokenIndex139 := position, tokenIndex if c := buffer[position]; c < rune('a') || c > rune('z') { - goto l118 + goto l140 } position++ - goto l117 - l118: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l140: + position, tokenIndex = position139, tokenIndex139 if c := buffer[position]; c < rune('A') || c > rune('Z') { - goto l119 + goto l141 } position++ - goto l117 - l119: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l141: + position, tokenIndex = position139, tokenIndex139 if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l120 + goto l142 } position++ - goto l117 - l120: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l142: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune(' ') { - goto l121 + goto l143 } position++ - goto l117 - l121: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l143: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('!') { - goto l122 + goto l144 } position++ - goto l117 - l122: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l144: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('#') { - goto l123 + goto l145 } position++ - goto l117 - l123: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l145: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('$') { - goto l124 + goto l146 } position++ - goto l117 - l124: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l146: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('&') { - goto l125 + goto l147 } position++ - goto l117 - l125: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l147: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('%') { - goto l126 + goto l148 } position++ - goto l117 - l126: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l148: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('*') { - goto l127 + goto l149 } position++ - goto l117 - l127: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l149: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('+') { - goto l128 + goto l150 } position++ - goto l117 - l128: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l150: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('-') { - goto l129 + goto l151 } position++ - goto l117 - l129: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l151: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('.') { - goto l130 + goto l152 } position++ - goto l117 - l130: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l152: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('/') { - goto l131 + goto l153 } position++ - goto l117 - l131: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l153: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune(':') { - goto l132 + goto l154 } position++ - goto l117 - l132: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l154: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune(';') { - goto l133 + goto l155 } position++ - goto l117 - l133: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l155: + position, tokenIndex = position139, tokenIndex139 if c := buffer[position]; c < rune('<') || c > rune('>') { - goto l134 + goto l156 } position++ - goto l117 - l134: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l156: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('?') { - goto l135 + goto l157 } position++ - goto l117 - l135: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l157: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('\\') { - goto l136 + goto l158 } position++ - goto l117 - l136: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l158: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('^') { - goto l137 + goto l159 } position++ - goto l117 - l137: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l159: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('_') { - goto l138 + goto l160 } position++ - goto l117 - l138: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l160: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('`') { - goto l139 + goto l161 } position++ - goto l117 - l139: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l161: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('{') { - goto l140 + goto l162 } position++ - goto l117 - l140: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l162: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('|') { - goto l141 + goto l163 } position++ - goto l117 - l141: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l163: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('}') { - goto l142 + goto l164 } position++ - goto l117 - l142: - position, tokenIndex = position117, tokenIndex117 + goto l139 + l164: + position, tokenIndex = position139, tokenIndex139 if buffer[position] != rune('~') { - goto l115 + goto l137 } position++ } - l117: - add(ruletextdata, position116) + l139: + add(ruletextdata, position138) } return true - l115: - position, tokenIndex = position115, tokenIndex115 + l137: + position, tokenIndex = position137, tokenIndex137 return false }, - /* 24 Action0 <- <{ p.pushArray() }> */ + /* 25 Action0 <- <{ p.pushArray() }> */ nil, - /* 25 Action1 <- <{ p.popArray() }> */ + /* 26 Action1 <- <{ p.popArray() }> */ nil, - /* 26 Action2 <- <{ p.pushArray() }> */ + /* 27 Action2 <- <{ p.pushArray() }> */ nil, - /* 27 Action3 <- <{ p.popArray() }> */ + /* 28 Action3 <- <{ p.popArray() }> */ nil, nil, - /* 29 Action4 <- <{ p.addElement(buffer[begin:end]) }> */ + /* 30 Action4 <- <{ p.addElement(buffer[begin:end]) }> */ nil, - /* 30 Action5 <- <{ p.addElement(buffer[begin:end]) }> */ + /* 31 Action5 <- <{ p.addElement(buffer[begin:end]) }> */ nil, - /* 31 Action6 <- <{ p.addElement(buffer[begin:end]) }> */ + /* 32 Action6 <- <{ p.addElement(buffer[begin:end]) }> */ nil, } p.rules = _rules diff --git a/primitive/compute/result/complex_field_test.go b/primitive/compute/result/complex_field_test.go index 26bcd16..a991abc 100644 --- a/primitive/compute/result/complex_field_test.go +++ b/primitive/compute/result/complex_field_test.go @@ -75,6 +75,15 @@ func TestParserTuple(t *testing.T) { assert.Equal(t, []interface{}{"10", "20", "30"}, field.arrayElements.elements) } +func TestParserTupleFail(t *testing.T) { + field := &ComplexField{Buffer: "(10)"} + field.Init() + + err := field.Parse() + field.PrintSyntaxTree() + assert.Error(t, err) +} + func TestParserSingleTuple(t *testing.T) { field := &ComplexField{Buffer: "(10, )"} field.Init() diff --git a/primitive/compute/result/result_csv_parser_test.go b/primitive/compute/result/result_csv_parser_test.go index 2ef2f43..ec2d8f8 100644 --- a/primitive/compute/result/result_csv_parser_test.go +++ b/primitive/compute/result/result_csv_parser_test.go @@ -22,4 +22,5 @@ func TestCSVResultParser(t *testing.T) { assert.Equal(t, []interface{}{"4", []interface{}{"-10.001", "20.1"}, []interface{}{"30", "40"}}, result[5]) assert.Equal(t, []interface{}{"5", []interface{}{"int"}, []interface{}{"0.989599347114563"}}, result[6]) assert.Equal(t, []interface{}{"7", []interface{}{"int", "categorical"}, []interface{}{"0.9885959029197693", "1"}}, result[8]) + assert.Equal(t, []interface{}{"10", "( ibid )", "hotel"}, result[11]) } diff --git a/primitive/compute/result/testdata/test.csv b/primitive/compute/result/testdata/test.csv index ed06fca..f84c2e2 100644 --- a/primitive/compute/result/testdata/test.csv +++ b/primitive/compute/result/testdata/test.csv @@ -9,3 +9,4 @@ idx,col a,col b 7,"('int', 'categorical')","[0.9885959029197693, 1]" 8,"('int',)",[0.9898993372917175] 9,"('int',)",[0.9903160929679871] +10,( ibid ),hotel From cc333dbe32e5cf4b093d8f3e597e83a18943f784 Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Mon, 29 Oct 2018 08:12:22 -0400 Subject: [PATCH 20/23] Updated sloth to latest version. --- metadata/metadata.go | 4 +- primitive/compute/client.go | 2 +- .../compute/description/preprocessing.go | 33 ++---------- .../compute/description/preprocessing_test.go | 18 +++++-- .../compute/description/primitive_steps.go | 25 +++++++--- primitive/merge.go | 2 +- primitive/pipeline.go | 50 +++++++++++++------ 7 files changed, 75 insertions(+), 59 deletions(-) diff --git a/metadata/metadata.go b/metadata/metadata.go index d32094c..5a359ea 100644 --- a/metadata/metadata.go +++ b/metadata/metadata.go @@ -118,6 +118,7 @@ type Metadata struct { Redacted bool } +// NewMetadata creates a new metadata instance. func NewMetadata(id string, name string, description string) *Metadata { return &Metadata{ ID: id, @@ -127,6 +128,7 @@ func NewMetadata(id string, name string, description string) *Metadata { } } +// NewDataResource creates a new data resource instance. func NewDataResource(id string, typ string, format []string) *DataResource { return &DataResource{ ResID: id, @@ -471,7 +473,7 @@ func (m *Metadata) GenerateHeaders() ([][]string, error) { return headers, nil } -// GenerateHeaders generates csv headers for the data resource. +// GenerateHeader generates csv headers for the data resource. func (dr *DataResource) GenerateHeader() []string { header := make([]string, len(dr.Variables)) diff --git a/primitive/compute/client.go b/primitive/compute/client.go index bba3d5d..1c402a6 100644 --- a/primitive/compute/client.go +++ b/primitive/compute/client.go @@ -106,7 +106,7 @@ func NewClientWithRunner(serverAddr string, runnerAddr string, trace bool, userA return client, nil } -// NewClientWithRunner creates a new pipline request dispatcher instance. This will establish +// NewRunner creates a new pipline request dispatcher instance. This will establish // the connection to the solution server or return an error on fail func NewRunner(runnerAddr string, trace bool, userAgent string, pullTimeout time.Duration, pullMax int, skipPreprocessing bool) (*Client, error) { diff --git a/primitive/compute/description/preprocessing.go b/primitive/compute/description/preprocessing.go index 4a6ab15..cc9c8cc 100644 --- a/primitive/compute/description/preprocessing.go +++ b/primitive/compute/description/preprocessing.go @@ -16,7 +16,6 @@ func CreateUserDatasetPipeline(name string, description string, targetFeature st // instantiate the pipeline builder := NewBuilder(name, description) - pip, err := builder.AddInferencePoint().Compile() if err != nil { return nil, err @@ -30,13 +29,8 @@ func CreateUserDatasetPipeline(name string, description string, targetFeature st } // CreateSlothPipeline creates a pipeline to peform timeseries clustering on a dataset. -func CreateSlothPipeline(name string, description string, targetColumn string, timeColumn string, valueColumn string, - baseFeatures []*metadata.Variable, timeSeriesFeatures []*metadata.Variable) (*pipeline.PipelineDescription, error) { - - targetIdx, err := getIndex(baseFeatures, targetColumn) - if err != nil { - return nil, err - } +func CreateSlothPipeline(name string, description string, timeColumn string, valueColumn string, + timeSeriesFeatures []*metadata.Variable) (*pipeline.PipelineDescription, error) { timeIdx, err := getIndex(timeSeriesFeatures, timeColumn) if err != nil { @@ -52,7 +46,7 @@ func CreateSlothPipeline(name string, description string, targetColumn string, t pipeline, err := NewBuilder(name, description). Add(NewDenormalizeStep()). Add(NewDatasetToDataframeStep()). - Add(NewTimeSeriesLoaderStep(targetIdx, timeIdx, valueIdx)). + Add(NewTimeSeriesLoaderStep(-1, timeIdx, valueIdx)). Add(NewSlothStep()). Compile() @@ -158,24 +152,3 @@ func getIndex(allFeatures []*metadata.Variable, name string) (int, error) { } return -1, errors.Errorf("can't find var '%s'", name) } - -// NewTimeSeriesLoaderStep creates a primitive step that reads time series values using a dataframe -// containing a file URI column. The result is a new dataframe that stores the timetamps as the column headers, -// and the accompanying values for each file as a row. -func NewTimeSeriesLoaderStep(fileColIndex int, timeColIndex int, valueColIndex int) *StepData { - return NewStepDataWithHyperparameters( - &pipeline.Primitive{ - Id: "1689aafa-16dc-4c55-8ad4-76cadcf46086", - Version: "0.1.0", - Name: "Time series loader", - PythonPath: "d3m.primitives.distil.TimeSeriesLoader", - Digest: "", - }, - []string{"produce"}, - map[string]interface{}{ - "file_col_index": fileColIndex, - "time_col_index": timeColIndex, - "value_col_index": valueColIndex, - }, - ) -} diff --git a/primitive/compute/description/preprocessing_test.go b/primitive/compute/description/preprocessing_test.go index 96ec3c6..1dc1fee 100644 --- a/primitive/compute/description/preprocessing_test.go +++ b/primitive/compute/description/preprocessing_test.go @@ -106,16 +106,12 @@ func TestCreateUnicornPipeline(t *testing.T) { } func TestCreateSlothPipeline(t *testing.T) { - baseVriables := []*metadata.Variable{ - {Name: "filename", Index: 1}, - } - timeSeriesVariables := []*metadata.Variable{ {Name: "time", Index: 0}, {Name: "value", Index: 1}, } - pipeline, err := CreateSlothPipeline("sloth_test", "test sloth object detection pipeline", "filename", "time", "value", baseVriables, timeSeriesVariables) + pipeline, err := CreateSlothPipeline("sloth_test", "test sloth object detection pipeline", "time", "value", timeSeriesVariables) assert.NoError(t, err) data, err := proto.Marshal(pipeline) @@ -125,3 +121,15 @@ func TestCreateSlothPipeline(t *testing.T) { err = ioutil.WriteFile("/tmp/create_sloth.pln", data, 0644) assert.NoError(t, err) } + +func TestCreateDukePipeline(t *testing.T) { + pipeline, err := CreateDukePipeline("duke_test", "test duke data summary pipeline") + assert.NoError(t, err) + + data, err := proto.Marshal(pipeline) + assert.NoError(t, err) + assert.NotNil(t, data) + + err = ioutil.WriteFile("/tmp/create_duke.pln", data, 0644) + assert.NoError(t, err) +} diff --git a/primitive/compute/description/primitive_steps.go b/primitive/compute/description/primitive_steps.go index 89b5787..92d08fd 100644 --- a/primitive/compute/description/primitive_steps.go +++ b/primitive/compute/description/primitive_steps.go @@ -244,16 +244,29 @@ func NewNumericRangeFilterStep(resourceID string, colindex int, inclusive bool, ) } -// NewTimeSeriesReaderStep creates a primitive step that reads time series values using a dataframe -// containing a file URI column. The result is a new dataframe that stores the timetamps as the column headers, -// and the accompanying values for each file as a row. -func NewTimeSeriesReaderStep(fileColIndex int, timeColIndex int, valueColIndex int) *StepData { +// NewTimeSeriesLoaderStep creates a primitive step that reads time series values using a dataframe +// containing a file URI column. The file URIs are expected to point to CSV files, with the +// supplied time and value indices pointing the columns in the CSV that form the series data. +// The result is a new dataframe that stores the timetamps as the column headers, +// and the accompanying values for each file as a row. Note that the file index column is negative, +// the primitive will use the first CSV file name column if finds. +func NewTimeSeriesLoaderStep(fileColIndex int, timeColIndex int, valueColIndex int) *StepData { + // exclude the file col index val ue in the case of a negative index so that the + // primitive will infer the colum + args := map[string]interface{}{ + "time_col_index": timeColIndex, + "value_col_index": valueColIndex, + } + if fileColIndex >= 0 { + args["file_col_index"] = fileColIndex + } + return NewStepDataWithHyperparameters( &pipeline.Primitive{ Id: "1689aafa-16dc-4c55-8ad4-76cadcf46086", Version: "0.1.0", - Name: "Time series reader", - PythonPath: "d3m.primitives.data.TimeSeriesReader", + Name: "Time series loader", + PythonPath: "d3m.primitives.distil.TimeSeriesLoader", Digest: "", }, []string{"produce"}, diff --git a/primitive/merge.go b/primitive/merge.go index be1bb61..aaf097b 100644 --- a/primitive/merge.go +++ b/primitive/merge.go @@ -15,7 +15,7 @@ import ( "github.com/unchartedsoftware/distil-ingest/util" ) -// RankPrimitive will rank the dataset using a primitive. +// MergePrimitive will merge data resources into a single data resource. func (s *IngestStep) MergePrimitive(dataset string, outputFolder string) error { outputSchemaPath := path.Join(outputFolder, D3MSchemaPathRelative) outputDataPath := path.Join(outputFolder, D3MDataPathRelative) diff --git a/primitive/pipeline.go b/primitive/pipeline.go index 6522f36..b540dbd 100644 --- a/primitive/pipeline.go +++ b/primitive/pipeline.go @@ -39,10 +39,12 @@ type FeatureRequest struct { Step *pipeline.PipelineDescription } +// IngestStep is a step in the ingest process. type IngestStep struct { client *compute.Client } +// NewIngestStep creates a new ingest step. func NewIngestStep(client *compute.Client) *IngestStep { return &IngestStep{ client: client, @@ -199,19 +201,8 @@ func getClusterVariables(meta *metadata.Metadata, prefix string) ([]*FeatureRequ if res.CanBeFeaturized() { step, err = description.CreateUnicornPipeline("horned", "", []string{denormFieldName}, []string{indexName}) } else { - // TODO: Extract actual column names from time series resource - timeField, valueField := getTimeSeriesFields(res) - resFields := []*metadata.Variable{ - { - Name: "time", - Index: 0, - }, - { - Name: "value", - Index: 1, - }, - } - step, err = description.CreateSlothPipeline("leaf", "", v.Name, timeField, valueField, mainDR.Variables, resFields) + fields, _ := getTimeValueCols(res) + step, err = description.CreateSlothPipeline("leaf", "", fields.timeCol, fields.valueCol, res.Variables) } if err != nil { return nil, errors.Wrap(err, "unable to create step pipeline") @@ -308,6 +299,35 @@ func copyResourceFiles(sourceFolder string, destinationFolder string) error { return nil } -func getTimeSeriesFields(dr *metadata.DataResource) (string, string) { - return "time", "value" +type timeValueCols struct { + timeCol string + valueCol string +} + +func getTimeValueCols(dr *metadata.DataResource) (*timeValueCols, bool) { + // find the first column marked as a time and the first that is an + // attribute and use those as series values + var timeCol string + var valueCol string + if dr.ResType == "timeseries" { + // find a suitable time column and value column - we take the first that works in each + // case + for _, v := range dr.Variables { + for _, r := range v.Role { + if r == "timeIndicator" && timeCol != "" { + timeCol = v.Name + } + if r == "attribute" && valueCol != "" { + valueCol = v.Name + } + } + } + if timeCol != "" && valueCol != "" { + return &timeValueCols{ + timeCol: timeCol, + valueCol: valueCol, + }, true + } + } + return nil, false } From 3de158269fe8216fa3d6802ab6446c36ffaea33f Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Mon, 29 Oct 2018 09:28:08 -0400 Subject: [PATCH 21/23] Added timeseries resource type handling. --- metadata/metadata.go | 5 +++- metadata/timeseries.go | 63 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 metadata/timeseries.go diff --git a/metadata/metadata.go b/metadata/metadata.go index 5a359ea..5c5002a 100644 --- a/metadata/metadata.go +++ b/metadata/metadata.go @@ -783,12 +783,15 @@ func (m *Metadata) loadOriginalSchemaVariables() error { var parser DataResourceParser switch resType { - case resTypeAudio, resTypeImage, resTypeText, resTypeTime: + case resTypeAudio, resTypeImage, resTypeText: parser = NewMedia(resType) break case resTypeTable: parser = &Table{} break + case resTypeTime: + parser = &Timeseries{} + break default: return errors.Errorf("Unrecognized resource type '%s'", resType) } diff --git a/metadata/timeseries.go b/metadata/timeseries.go new file mode 100644 index 0000000..219fadc --- /dev/null +++ b/metadata/timeseries.go @@ -0,0 +1,63 @@ +package metadata + +import ( + "fmt" + + "github.com/jeffail/gabs" + "github.com/pkg/errors" +) + +// Table is a data respurce that is contained within one or many tabular files. +type Timeseries struct { +} + +// Parse extracts the data resource from the data schema document. +func (r *Timeseries) Parse(res *gabs.Container) (*DataResource, error) { + schemaVariables, err := res.Path("columns").Children() + if err != nil { + return nil, errors.Wrap(err, "failed to parse column data") + } + + if res.Path("resID").Data() == nil { + return nil, fmt.Errorf("unable to parse resource id") + } + resID := res.Path("resID").Data().(string) + + if res.Path("resPath").Data() == nil { + return nil, fmt.Errorf("unable to parse resource path") + } + resPath := res.Path("resPath").Data().(string) + + var resFormats []string + if res.Path("resFormat").Data() != nil { + formatsRaw, err := res.Path("resFormat").Children() + if err != nil { + return nil, errors.Wrap(err, "unable to parse resource format") + } + resFormats = make([]string, len(formatsRaw)) + for i, r := range formatsRaw { + resFormats[i] = r.Data().(string) + } + } else { + resFormats = make([]string, 0) + } + + dr := &DataResource{ + ResID: resID, + ResPath: resPath, + ResType: resTypeTime, + ResFormat: resFormats, + IsCollection: true, + Variables: make([]*Variable, 0), + } + + for _, v := range schemaVariables { + variable, err := parseSchemaVariable(v, dr.Variables, false) + if err != nil { + return nil, err + } + dr.Variables = append(dr.Variables, variable) + } + + return dr, nil +} From 7649338ad7101f3905476e0cbc27d81906028634 Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Mon, 29 Oct 2018 14:38:43 -0400 Subject: [PATCH 22/23] Updated sloth to latest version --- primitive/compute/description/primitive_steps.go | 6 +----- primitive/pipeline.go | 4 ++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/primitive/compute/description/primitive_steps.go b/primitive/compute/description/primitive_steps.go index 92d08fd..a95ad3b 100644 --- a/primitive/compute/description/primitive_steps.go +++ b/primitive/compute/description/primitive_steps.go @@ -270,10 +270,6 @@ func NewTimeSeriesLoaderStep(fileColIndex int, timeColIndex int, valueColIndex i Digest: "", }, []string{"produce"}, - map[string]interface{}{ - "file_col_index": fileColIndex, - "time_col_index": timeColIndex, - "value_col_index": valueColIndex, - }, + args, ) } diff --git a/primitive/pipeline.go b/primitive/pipeline.go index b540dbd..bb1e5ae 100644 --- a/primitive/pipeline.go +++ b/primitive/pipeline.go @@ -314,10 +314,10 @@ func getTimeValueCols(dr *metadata.DataResource) (*timeValueCols, bool) { // case for _, v := range dr.Variables { for _, r := range v.Role { - if r == "timeIndicator" && timeCol != "" { + if r == "timeIndicator" && timeCol == "" { timeCol = v.Name } - if r == "attribute" && valueCol != "" { + if r == "attribute" && valueCol == "" { valueCol = v.Name } } From c6857a2894f0a599127def518f5070b5a6411e1d Mon Sep 17 00:00:00 2001 From: Philippe Horne Date: Tue, 30 Oct 2018 09:07:43 -0400 Subject: [PATCH 23/23] Added copy dependency and wiped refers-to when ingesting metadata. --- Gopkg.lock | 9 +++++++++ Gopkg.toml | 4 ++++ metadata/metadata.go | 6 ++++++ 3 files changed, 19 insertions(+) diff --git a/Gopkg.lock b/Gopkg.lock index 659a70d..0eb2a3b 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -196,6 +196,15 @@ packages = ["pbkdf2"] revision = "e3636079e1a4c1f337f212cc5cd2aca108f6c900" +[[projects]] + branch = "master" + digest = "1:d15b2fd801f2341f1bf31293085717ab203572baa271e8b495e6a0a5a2bea316" + name = "github.com/otiai10/copy" + packages = ["."] + pruneopts = "UT" + revision = "7e9a647135a142c2669943d4a4d29be015ce9392" + source = "git@github.com:otiai10/copy.git" + [[projects]] branch = "master" name = "golang.org/x/net" diff --git a/Gopkg.toml b/Gopkg.toml index d954c70..6d6b3b1 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -57,6 +57,10 @@ name = "gopkg.in/olivere/elastic.v5" version = "5.0.45" +[[constraint]] + name = "github.com/otiai10/copy" + source = "git@github.com:otiai10/copy.git" + [prune] go-tests = true unused-packages = true diff --git a/metadata/metadata.go b/metadata/metadata.go index 5c5002a..5119c9d 100644 --- a/metadata/metadata.go +++ b/metadata/metadata.go @@ -981,6 +981,12 @@ func IngestMetadata(client *elastic.Client, index string, datasetPrefix string, if len(meta.DataResources) > 1 { return errors.New("metadata variables not merged into a single dataset") } + + // clear refers to + for _, v := range meta.DataResources[0].Variables { + v.RefersTo = nil + } + adjustedID := datasetPrefix + meta.ID source := map[string]interface{}{