Skip to content

Commit

Permalink
Project import generated by Copybara.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 210171509
  • Loading branch information
tf-metadata-team authored and mzinkevi committed Aug 24, 2018
1 parent b402775 commit 223923d
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 5 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from setuptools import setup

# tf.Metadata version.
__version__ = '0.6.0'
__version__ = '0.9.0dev'


setup(
Expand Down
4 changes: 3 additions & 1 deletion tensorflow_metadata/proto/v0/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ cc_proto_library(
name = "metadata_v0_proto_cc_pb2",
srcs = [
"anomalies.proto",
"path.proto",
"schema.proto",
"statistics.proto",
],
Expand All @@ -36,12 +37,13 @@ py_proto_library(
name = "metadata_v0_proto_py_pb2",
srcs = [
"anomalies.proto",
"path.proto",
"schema.proto",
"statistics.proto",
],
default_runtime = "@protobuf_archive//:protobuf_python",
protoc = "@protobuf_archive//:protoc",
deps = ["@protobuf_archive//:protobuf_python"],
srcs_version = "PY2AND3",
deps = ["@protobuf_archive//:protobuf_python"],
)

15 changes: 15 additions & 0 deletions tensorflow_metadata/proto/v0/anomalies.proto
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,17 @@ package tensorflow.metadata.v0;
option java_package = "org.tensorflow.metadata.v0";
option java_multiple_files = true;

import "tensorflow_metadata/proto/v0/path.proto";
import "tensorflow_metadata/proto/v0/schema.proto";

// Message to represent information about an individual anomaly.
message AnomalyInfo {
// Deleted fields.
reserved 1, 3;

// A path indicating where the anomaly occurred.
optional Path path = 8;

enum Severity {
UNKNOWN = 0;
WARNING = 1;
Expand Down Expand Up @@ -161,6 +165,17 @@ message Anomalies {
}

// Map from a column to the difference that it represents.
enum AnomalyNameFormat {
// At present, this indicates that the keys in anomaly_info
// refers to the raw field name in the Schema.
UNKNOWN = 0;
// The serialized path to a struct.
SERIALIZED_PATH = 1;
}

// The format of the keys in anomaly_info.
// If absent, default is DEFAULT.
optional AnomalyNameFormat anomaly_name_format = 7;
map<string, AnomalyInfo> anomaly_info = 2;
// True if numExamples == 0.
optional bool data_missing = 3;
Expand Down
43 changes: 43 additions & 0 deletions tensorflow_metadata/proto/v0/path.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright 2018 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================

syntax = "proto2";
option cc_enable_arenas = true;

package tensorflow.metadata.v0;

option java_package = "org.tensorflow.metadata.v0";
option java_multiple_files = true;

// A path is a more general substitute for the name of a field or feature that
// can be used for flat examples as well as structured data. For example, if
// we had data in a protocol buffer:
// message Person {
// int age = 1;
// optional string gender = 2;
// repeated Person parent = 3;
// }
// Thus, here the path {step:["parent", "age"]} in statistics would refer to the
// age of a parent, and {step:["parent", "parent", "age"]} would refer to the
// age of a grandparent. This allows us to distinguish between the statistics
// of parents' ages and grandparents' ages. In general, repeated messages are
// to be preferred to linked lists of arbitrary length.
// For SequenceExample, if we have a feature list "foo", this is represented
// by {step:["##SEQUENCE##", "foo"]}.
message Path {
// Any string is a valid step.
// However, whenever possible have a step be [A-Za-z0-9_]+.
repeated string step = 1;
}
18 changes: 15 additions & 3 deletions tensorflow_metadata/proto/v0/statistics.proto
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ package tensorflow.metadata.v0;
option java_package = "org.tensorflow.metadata.v0";
option java_multiple_files = true;

import "tensorflow_metadata/proto/v0/path.proto";

// Copied from Facets feature_statistics.proto
// Must be kept binary-compatible with the original, until all usages
// are updated to use this version, or we write a proto-to-proto converter.
Expand Down Expand Up @@ -61,8 +63,18 @@ message FeatureNameStatistics {
STRUCT = 4;
}

// The feature name
string name = 1;
// One can identify a field either by the name (for simple fields), or by
// a path (for structured fields). Note that:
// name: "foo"
// is equivalent to:
// path: {step:"foo"}
oneof field_id {
// The feature name
string name = 1;

// The path of the feature.
Path path = 8;
}

// The data type of the feature
Type type = 2;
Expand Down Expand Up @@ -202,7 +214,7 @@ message BytesStatistics {
}

message StructStatistics {
CommonStatistics common_statistics = 1;
CommonStatistics common_stats = 1;
}

// Common statistics for all feature types
Expand Down

0 comments on commit 223923d

Please sign in to comment.