Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement ParseToken-related proto #43

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions zetasql/local_service/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,10 @@ cc_library(
"//zetasql/public:function",
"//zetasql/public:id_string",
"//zetasql/public:language_options",
"//zetasql/public:parse_helpers",
"//zetasql/public:parse_resume_location",
"//zetasql/public:parse_resume_location_cc_proto",
"//zetasql/public:parse_tokens_cc_proto",
"//zetasql/public:simple_catalog",
"//zetasql/public:sql_formatter",
"//zetasql/public:templated_sql_tvf",
Expand Down Expand Up @@ -98,6 +100,7 @@ cc_test(
"//zetasql/proto:function_cc_proto",
"//zetasql/proto:simple_catalog_cc_proto",
"//zetasql/public:parse_resume_location_cc_proto",
"//zetasql/public:parse_tokens_cc_proto",
"//zetasql/public:simple_catalog",
"//zetasql/public:simple_table_cc_proto",
"//zetasql/public:type",
Expand Down Expand Up @@ -145,6 +148,7 @@ proto_library(
"//zetasql/proto:simple_catalog_proto",
"//zetasql/public:options_proto",
"//zetasql/public:parse_resume_location_proto",
"//zetasql/public:parse_tokens_proto",
"//zetasql/public:simple_table_proto",
"//zetasql/public:type_proto",
"//zetasql/public:value_proto",
Expand Down
21 changes: 21 additions & 0 deletions zetasql/local_service/local_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "zetasql/public/function.h"
#include "zetasql/public/id_string.h"
#include "zetasql/public/language_options.h"
#include "zetasql/public/parse_tokens.h"
#include "zetasql/public/simple_catalog.h"
#include "zetasql/public/sql_formatter.h"
#include "zetasql/public/table_from_proto.h"
Expand Down Expand Up @@ -807,6 +808,26 @@ absl::Status ZetaSqlLocalServiceImpl::GetLanguageOptions(
return absl::OkStatus();
}

absl::Status ZetaSqlLocalServiceImpl::GetParseTokens(
const GetParseTokensRequest &request, GetParseTokensResponse *response) {

auto options = ParseTokenOptions::FromProto(request.options());
auto resume_location = ParseResumeLocation::FromProto(request.resume_location());
std::vector<ParseToken> tokens;
ZETASQL_RETURN_IF_ERROR(::zetasql::GetParseTokens(options, &resume_location, &tokens));

for (auto& token : tokens) {
auto status_or_token_proto = token.ToProto();
// Return error if a token cannot be converted to a token proto.
if (!status_or_token_proto.ok()) {
return status_or_token_proto.status();
}
response->add_tokens()->CopyFrom(status_or_token_proto.value());
}

return absl::OkStatus();
}

size_t ZetaSqlLocalServiceImpl::NumSavedPreparedExpression() const {
return prepared_expressions_->NumSavedStates();
}
Expand Down
3 changes: 3 additions & 0 deletions zetasql/local_service/local_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ class ZetaSqlLocalServiceImpl {
absl::Status GetLanguageOptions(const LanguageOptionsRequest& request,
LanguageOptionsProto* response);

absl::Status GetParseTokens(const GetParseTokensRequest& request,
GetParseTokensResponse* response);

private:
std::unique_ptr<RegisteredCatalogPool> registered_catalogs_;
std::unique_ptr<PreparedExpressionPool> prepared_expressions_;
Expand Down
14 changes: 14 additions & 0 deletions zetasql/local_service/local_service.proto
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import "zetasql/proto/options.proto";
import "zetasql/proto/simple_catalog.proto";
import "zetasql/public/options.proto";
import "zetasql/public/parse_resume_location.proto";
import "zetasql/public/parse_tokens.proto";
import "zetasql/public/simple_table.proto";
import "zetasql/public/type.proto";
import "zetasql/public/value.proto";
Expand Down Expand Up @@ -139,6 +140,10 @@ service ZetaSqlLocalService {
rpc GetLanguageOptions(LanguageOptionsRequest)
returns (LanguageOptionsProto) {
}

// Tokenize an input query (enclosed in ParseResumeLocation) and return its parse tokens.
rpc GetParseTokens(GetParseTokensRequest) returns (GetParseTokensResponse) {
}
}

message PrepareRequest {
Expand Down Expand Up @@ -330,3 +335,12 @@ message LanguageOptionsRequest {
optional bool maximum_features = 1;
optional LanguageVersion language_version = 2;
}

message GetParseTokensRequest {
optional ParseTokenOptionsProto options = 1;
optional ParseResumeLocationProto resume_location = 2;
}

message GetParseTokensResponse {
repeated ParseTokenProto tokens = 1;
}
6 changes: 6 additions & 0 deletions zetasql/local_service/local_service_grpc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -184,5 +184,11 @@ grpc::Status ZetaSqlLocalServiceGrpcImpl::GetLanguageOptions(
return ToGrpcStatus(service_.GetLanguageOptions(*req, resp));
}

grpc::Status ZetaSqlLocalServiceGrpcImpl::GetParseTokens(grpc::ServerContext *context,
const GetParseTokensRequest *req,
GetParseTokensResponse *resp) {
return ToGrpcStatus(service_.GetParseTokens(*req, resp));
}

} // namespace local_service
} // namespace zetasql
6 changes: 5 additions & 1 deletion zetasql/local_service/local_service_grpc.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,11 @@ class ZetaSqlLocalServiceGrpcImpl
const LanguageOptionsRequest* req,
LanguageOptionsProto* resp) override;

private:
grpc::Status GetParseTokens(grpc::ServerContext *context,
const GetParseTokensRequest* req,
GetParseTokensResponse* resp) override;

private:
ZetaSqlLocalServiceImpl service_;
};

Expand Down
32 changes: 32 additions & 0 deletions zetasql/local_service/local_service_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@ class ZetaSqlLocalServiceImplTest : public ::testing::Test {
return service_.GetBuiltinFunctions(proto, response);
}

absl::Status GetParseTokens(
const GetParseTokensRequest& request,
GetParseTokensResponse* response) {
return service_.GetParseTokens(request, response);
}

ZetaSqlLocalServiceImpl service_;
google::protobuf::compiler::DiskSourceTree source_tree_;
std::unique_ptr<google::protobuf::compiler::Importer> proto_importer_;
Expand Down Expand Up @@ -1188,5 +1194,31 @@ TEST_F(ZetaSqlLocalServiceImplTest, GetBuiltinFunctions) {
EXPECT_EQ(function2.DebugString(), response.function(1).DebugString());
}

TEST_F(ZetaSqlLocalServiceImplTest, GetParseTokens) {
GetParseTokensRequest request;
// ParseTokenOptionsProto has the default value as same as the ParseTokenOptions.
// Therefore, it can be created directly without setting values.
request.set_allocated_options(new ParseTokenOptionsProto());

// Create ResumeLocation object and convert it to its proto.
// Then assign the ResumeLocationProto to the Request.
auto resume_location = ParseResumeLocation::FromString(
"some_filename",
"Select foo from bar");
auto resume_location_proto = new ParseResumeLocationProto();
resume_location.Serialize(resume_location_proto);
request.set_allocated_resume_location(resume_location_proto);

GetParseTokensResponse response;
ZETASQL_EXPECT_OK(GetParseTokens(request, &response));

EXPECT_EQ(5, response.tokens().size());
EXPECT_EQ("Select", response.tokens().Get(0).image());
EXPECT_EQ("foo", response.tokens().Get(1).image());
EXPECT_EQ("from", response.tokens().Get(2).image());
EXPECT_EQ("bar", response.tokens().Get(3).image());
EXPECT_EQ(ParseTokenProto_Kind_END_OF_INPUT, response.tokens().Get(4).kind());
}

} // namespace local_service
} // namespace zetasql
21 changes: 21 additions & 0 deletions zetasql/public/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,26 @@ java_proto_library(
deps = [":parse_location_range_proto"],
)

proto_library(
name = "parse_tokens_proto",
srcs = ["parse_tokens.proto"],
deps = [
":parse_location_range_proto",
":type_proto",
":value_proto",
],
)

cc_proto_library(
name = "parse_tokens_cc_proto",
deps = [":parse_tokens_proto"],
)

java_proto_library(
name = "parse_tokens_java_proto",
deps = [":parse_tokens_proto"],
)

proto_library(
name = "type_proto",
srcs = ["type.proto"],
Expand Down Expand Up @@ -1622,6 +1642,7 @@ cc_library(
"//zetasql/parser",
"//zetasql/parser:bison_parser_generated_lib",
"//zetasql/parser:keywords",
"//zetasql/public:parse_tokens_cc_proto",
"//zetasql/public/functions:convert_string",
"//zetasql/resolved_ast:resolved_node_kind_cc_proto",
"@com_google_absl//absl/container:flat_hash_map",
Expand Down
62 changes: 62 additions & 0 deletions zetasql/public/parse_tokens.cc
Original file line number Diff line number Diff line change
Expand Up @@ -384,4 +384,66 @@ ParseToken::ParseToken(ParseLocationRange location_range, std::string image,
DCHECK(!value_.is_null());
}

zetasql_base::StatusOr<ParseTokenProto> ParseToken::ToProto() const {
ParseTokenProto token_proto;

// Create a location range proto from the field location_range_. Create the proto in heap and assign
// the pointer to the token proto, which will release the assigned location range proto when the
// token proto is about to be released. Therefore, no need to manage the allocated memory of the
// LocationRangeProto even when it is created by new method.
auto status_or_location_range_proto = location_range_.ToProto();
if (!status_or_location_range_proto.ok()) {
return status_or_location_range_proto.status();
}
auto range_proto = new ParseLocationRangeProto(status_or_location_range_proto.value());
token_proto.set_allocated_parse_location_range(range_proto);

// New a value proto in heap and immediately assign it to the token proto, so that
// the token proto can manage the memory of the value proto.
// Besides assigning the value proto, a type proto is created and assigned, because
// it is required to identify the type of value when the proto is deserialized.
if (value_.is_valid()) {
auto type_proto = new TypeProto();
token_proto.set_allocated_type(type_proto);
ZETASQL_RETURN_IF_ERROR(value_.type()->SerializeToSelfContainedProto(type_proto));

auto value_proto = new ValueProto();
token_proto.set_allocated_value(value_proto);
ZETASQL_RETURN_IF_ERROR(value_.Serialize(value_proto));
}
token_proto.set_image(image_);
token_proto.set_kind(serialize_kind(kind_));
return token_proto;
}

ParseTokenProto_Kind ParseToken::serialize_kind(const ParseToken::Kind kind) {
using zetasql::ParseToken;
switch (kind) {
case ParseToken::Kind::KEYWORD:return ParseTokenProto_Kind::ParseTokenProto_Kind_KEYWORD;
case ParseToken::IDENTIFIER:return ParseTokenProto_Kind::ParseTokenProto_Kind_IDENTIFIER;
case ParseToken::IDENTIFIER_OR_KEYWORD:return ParseTokenProto_Kind::ParseTokenProto_Kind_IDENTIFIER_OR_KEYWORD;
case ParseToken::VALUE:return ParseTokenProto_Kind::ParseTokenProto_Kind_VALUE;
case ParseToken::COMMENT:return ParseTokenProto_Kind::ParseTokenProto_Kind_COMMENT;
case ParseToken::END_OF_INPUT:return ParseTokenProto_Kind::ParseTokenProto_Kind_END_OF_INPUT;
}
}


ParseTokenOptionsProto ParseTokenOptions::ToProto() const {
ParseTokenOptionsProto options_proto;
options_proto.set_max_tokens(max_tokens);
options_proto.set_stop_at_end_of_statement(stop_at_end_of_statement);
options_proto.set_include_comments(include_comments);
return options_proto;
}


ParseTokenOptions ParseTokenOptions::FromProto(const ParseTokenOptionsProto& proto) {
ParseTokenOptions options;
options.max_tokens = proto.max_tokens();
options.stop_at_end_of_statement = proto.stop_at_end_of_statement();
options.include_comments = proto.include_comments();
return options;
}

} // namespace zetasql
14 changes: 14 additions & 0 deletions zetasql/public/parse_tokens.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "zetasql/public/value.h"
#include "absl/strings/string_view.h"
#include "zetasql/base/status.h"
#include "zetasql/public/parse_tokens.pb.h"

namespace zetasql {

Expand Down Expand Up @@ -119,6 +120,9 @@ class ParseToken {
// Returns the location of the token in the input.
ParseLocationRange GetLocationRange() const { return location_range_; }

// Convert a ParseToken object into its proto.
zetasql_base::StatusOr<ParseTokenProto> ToProto() const;

// The declarations below are intended for internal use.

enum Kind {
Expand Down Expand Up @@ -146,6 +150,10 @@ class ParseToken {
ParseLocationRange location_range_;
Value value_;

// Convert a Token Kind into its proto form. It is used by the ToProto method that converts
// a token to its proto.
static ParseTokenProto_Kind serialize_kind(ParseToken::Kind kind);

// Copyable
};

Expand All @@ -160,6 +168,12 @@ struct ParseTokenOptions {

// Return the comments in the ParseToken vector or silently drop them.
bool include_comments = false;

// Convert the token options into its proto.
ParseTokenOptionsProto ToProto() const;

// Create a ParseTokenOption object from its proto.
static ParseTokenOptions FromProto(const ParseTokenOptionsProto& proto);
};

// Gets a vector of ParseTokens starting from <resume_location>, and updates
Expand Down
64 changes: 64 additions & 0 deletions zetasql/public/parse_tokens.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//
// Copyright 2020 ZetaSQL Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

syntax = "proto2";

package zetasql;

import "zetasql/public/parse_location_range.proto";
import "zetasql/public/value.proto";
import "zetasql/public/type.proto";

option java_package = "com.google.zetasql";
option java_outer_classname = "ZetaSqlParseTokenProtos";

message ParseTokenProto {

enum Kind {
KEYWORD = 0; // A zetasql keyword or symbol.
IDENTIFIER = 1; // An identifier that was quoted.
IDENTIFIER_OR_KEYWORD = 2; // An unquoted identifier.
VALUE = 3; // A literal value.
COMMENT = 4; // A comment.
END_OF_INPUT = 5; // The end of the input string was reached.
};

// The image of a token.
optional string image = 1;
// The kind of a token.
optional Kind kind = 2;
// The start and end position of a token.
optional zetasql.ParseLocationRangeProto parse_location_range = 3;
// The value of a token. This field is filled when Kind is VALUE.
optional zetasql.ValueProto value = 4;
// The type of the value. It is needed to deserialize the value proto back to value.
optional zetasql.TypeProto type = 5;
}

message ParseTokenOptionsProto {
// Return at most this many tokens (only if positive). It is not possible to
// resume a GetParseTokens() call for which max_tokens was set.
optional int32 max_tokens = 1 [default = 0];

// Stop parsing after a ";" token. The last token returned will be either
// a ";" or an EOF.
optional bool stop_at_end_of_statement = 2 [default = false];

// Return the comments in the ParseToken vector or silently drop them.
optional bool include_comments = 3 [default = false];
}


Loading