Skip to content

Commit

Permalink
Revert "Integration with REPL Spark job (opensearch-project#2327) (op…
Browse files Browse the repository at this point in the history
…ensearch-project#2338)"

This reverts commit 58a5ae5.

Signed-off-by: Eric <[email protected]>
  • Loading branch information
mengweieric committed Nov 8, 2023
1 parent 224a216 commit 9237044
Show file tree
Hide file tree
Showing 22 changed files with 157 additions and 810 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import static org.opensearch.sql.common.setting.Settings.Key.SPARK_EXECUTION_ENGINE_CONFIG;
import static org.opensearch.sql.datasource.model.DataSourceMetadata.defaultOpenSearchDataSourceMetadata;
import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_REQUEST_BUFFER_INDEX_NAME;

import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
import com.amazonaws.services.emrserverless.AWSEMRServerless;
Expand Down Expand Up @@ -320,7 +321,9 @@ private AsyncQueryExecutorService createAsyncQueryExecutorService(
new FlintIndexMetadataReaderImpl(client),
client,
new SessionManager(
new StateStore(client, clusterService), emrServerlessClient, pluginSettings));
new StateStore(SPARK_REQUEST_BUFFER_INDEX_NAME, client),
emrServerlessClient,
pluginSettings));
return new AsyncQueryExecutorServiceImpl(
asyncQueryJobMetadataStorageService,
sparkQueryDispatcher,
Expand Down
1 change: 0 additions & 1 deletion spark/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ dependencies {
because 'allows tests to run from IDEs that bundle older version of launcher'
}
testImplementation("org.opensearch.test:framework:${opensearch_version}")
testImplementation project(':opensearch')
}

test {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_URI;
import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_ROLE_ARN;
import static org.opensearch.sql.spark.data.constants.SparkConstants.*;
import static org.opensearch.sql.spark.execution.statestore.StateStore.DATASOURCE_TO_REQUEST_INDEX;

import java.net.URI;
import java.net.URISyntaxException;
Expand Down Expand Up @@ -40,7 +39,7 @@ public class SparkSubmitParameters {

public static class Builder {

private String className;
private final String className;
private final Map<String, String> config;
private String extraParameters;

Expand Down Expand Up @@ -71,11 +70,6 @@ public static Builder builder() {
return new Builder();
}

public Builder className(String className) {
this.className = className;
return this;
}

public Builder dataSource(DataSourceMetadata metadata) {
if (DataSourceType.S3GLUE.equals(metadata.getConnector())) {
String roleArn = metadata.getProperties().get(GLUE_ROLE_ARN);
Expand Down Expand Up @@ -147,12 +141,6 @@ public Builder extraParameters(String params) {
return this;
}

public Builder sessionExecution(String sessionId, String datasourceName) {
config.put(FLINT_JOB_REQUEST_INDEX, DATASOURCE_TO_REQUEST_INDEX.apply(datasourceName));
config.put(FLINT_JOB_SESSION_ID, sessionId);
return this;
}

public SparkSubmitParameters build() {
return new SparkSubmitParameters(className, config, extraParameters);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,4 @@ public class SparkConstants {
public static final String EMR_ASSUME_ROLE_CREDENTIALS_PROVIDER =
"com.amazonaws.emr.AssumeRoleAWSCredentialsProvider";
public static final String JAVA_HOME_LOCATION = "/usr/lib/jvm/java-17-amazon-corretto.x86_64/";

public static final String FLINT_JOB_REQUEST_INDEX = "spark.flint.job.requestIndex";
public static final String FLINT_JOB_SESSION_ID = "spark.flint.job.sessionId";
public static final String FLINT_SESSION_CLASS_NAME = "org.apache.spark.sql.FlintREPL";
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import static org.opensearch.sql.spark.data.constants.SparkConstants.DATA_FIELD;
import static org.opensearch.sql.spark.data.constants.SparkConstants.ERROR_FIELD;
import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_SESSION_CLASS_NAME;
import static org.opensearch.sql.spark.data.constants.SparkConstants.STATUS_FIELD;

import com.amazonaws.services.emrserverless.model.CancelJobRunResult;
Expand Down Expand Up @@ -97,19 +96,12 @@ public JSONObject getQueryResponse(AsyncQueryJobMetadata asyncQueryJobMetadata)
return DropIndexResult.fromJobId(asyncQueryJobMetadata.getJobId()).result();
}

JSONObject result;
if (asyncQueryJobMetadata.getSessionId() == null) {
// either empty json when the result is not available or data with status
// Fetch from Result Index
result =
jobExecutionResponseReader.getResultFromOpensearchIndex(
asyncQueryJobMetadata.getJobId(), asyncQueryJobMetadata.getResultIndex());
} else {
// when session enabled, jobId in asyncQueryJobMetadata is actually queryId.
result =
jobExecutionResponseReader.getResultWithQueryId(
asyncQueryJobMetadata.getJobId(), asyncQueryJobMetadata.getResultIndex());
}
// either empty json when the result is not available or data with status
// Fetch from Result Index
JSONObject result =
jobExecutionResponseReader.getResultFromOpensearchIndex(
asyncQueryJobMetadata.getJobId(), asyncQueryJobMetadata.getResultIndex());

// if result index document has a status, we are gonna use the status directly; otherwise, we
// will use emr-s job status.
// That a job is successful does not mean there is no error in execution. For example, even if
Expand Down Expand Up @@ -238,7 +230,22 @@ private DispatchQueryResponse handleNonIndexQuery(DispatchQueryRequest dispatchQ
dataSourceUserAuthorizationHelper.authorizeDataSource(dataSourceMetadata);
String jobName = dispatchQueryRequest.getClusterName() + ":" + "non-index-query";
Map<String, String> tags = getDefaultTagsForJobSubmission(dispatchQueryRequest);

StartJobRequest startJobRequest =
new StartJobRequest(
dispatchQueryRequest.getQuery(),
jobName,
dispatchQueryRequest.getApplicationId(),
dispatchQueryRequest.getExecutionRoleARN(),
SparkSubmitParameters.Builder.builder()
.dataSource(
dataSourceService.getRawDataSourceMetadata(
dispatchQueryRequest.getDatasource()))
.extraParameters(dispatchQueryRequest.getExtraSparkSubmitParams())
.build()
.toString(),
tags,
false,
dataSourceMetadata.getResultIndex());
if (sessionManager.isEnabled()) {
Session session;
if (dispatchQueryRequest.getSessionId() != null) {
Expand All @@ -253,19 +260,7 @@ private DispatchQueryResponse handleNonIndexQuery(DispatchQueryRequest dispatchQ
// create session if not exist
session =
sessionManager.createSession(
new CreateSessionRequest(
jobName,
dispatchQueryRequest.getApplicationId(),
dispatchQueryRequest.getExecutionRoleARN(),
SparkSubmitParameters.Builder.builder()
.className(FLINT_SESSION_CLASS_NAME)
.dataSource(
dataSourceService.getRawDataSourceMetadata(
dispatchQueryRequest.getDatasource()))
.extraParameters(dispatchQueryRequest.getExtraSparkSubmitParams()),
tags,
dataSourceMetadata.getResultIndex(),
dataSourceMetadata.getName()));
new CreateSessionRequest(startJobRequest, dataSourceMetadata.getName()));
}
StatementId statementId =
session.submit(
Expand All @@ -277,22 +272,6 @@ private DispatchQueryResponse handleNonIndexQuery(DispatchQueryRequest dispatchQ
dataSourceMetadata.getResultIndex(),
session.getSessionId().getSessionId());
} else {
StartJobRequest startJobRequest =
new StartJobRequest(
dispatchQueryRequest.getQuery(),
jobName,
dispatchQueryRequest.getApplicationId(),
dispatchQueryRequest.getExecutionRoleARN(),
SparkSubmitParameters.Builder.builder()
.dataSource(
dataSourceService.getRawDataSourceMetadata(
dispatchQueryRequest.getDatasource()))
.extraParameters(dispatchQueryRequest.getExtraSparkSubmitParams())
.build()
.toString(),
tags,
false,
dataSourceMetadata.getResultIndex());
String jobId = emrServerlessClient.startJobRun(startJobRequest);
return new DispatchQueryResponse(jobId, false, dataSourceMetadata.getResultIndex(), null);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,11 @@

package org.opensearch.sql.spark.execution.session;

import java.util.Map;
import lombok.Data;
import org.opensearch.sql.spark.asyncquery.model.SparkSubmitParameters;
import org.opensearch.sql.spark.client.StartJobRequest;

@Data
public class CreateSessionRequest {
private final String jobName;
private final String applicationId;
private final String executionRoleArn;
private final SparkSubmitParameters.Builder sparkSubmitParametersBuilder;
private final Map<String, String> tags;
private final String resultIndex;
private final StartJobRequest startJobRequest;
private final String datasourceName;

public StartJobRequest getStartJobRequest() {
return new StartJobRequest(
"select 1",
jobName,
applicationId,
executionRoleArn,
sparkSubmitParametersBuilder.build().toString(),
tags,
false,
resultIndex);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,13 @@ public class InteractiveSession implements Session {
@Override
public void open(CreateSessionRequest createSessionRequest) {
try {
// append session id;
createSessionRequest
.getSparkSubmitParametersBuilder()
.sessionExecution(sessionId.getSessionId(), createSessionRequest.getDatasourceName());
String jobID = serverlessClient.startJobRun(createSessionRequest.getStartJobRequest());
String applicationId = createSessionRequest.getStartJobRequest().getApplicationId();

sessionModel =
initInteractiveSession(
applicationId, jobID, sessionId, createSessionRequest.getDatasourceName());
createSession(stateStore, sessionModel.getDatasourceName()).apply(sessionModel);
createSession(stateStore).apply(sessionModel);
} catch (VersionConflictEngineException e) {
String errorMsg = "session already exist. " + sessionId;
LOG.error(errorMsg);
Expand All @@ -63,8 +59,7 @@ public void open(CreateSessionRequest createSessionRequest) {
/** todo. StatementSweeper will delete doc. */
@Override
public void close() {
Optional<SessionModel> model =
getSession(stateStore, sessionModel.getDatasourceName()).apply(sessionModel.getId());
Optional<SessionModel> model = getSession(stateStore).apply(sessionModel.getId());
if (model.isEmpty()) {
throw new IllegalStateException("session does not exist. " + sessionModel.getSessionId());
} else {
Expand All @@ -74,8 +69,7 @@ public void close() {

/** Submit statement. If submit successfully, Statement in waiting state. */
public StatementId submit(QueryRequest request) {
Optional<SessionModel> model =
getSession(stateStore, sessionModel.getDatasourceName()).apply(sessionModel.getId());
Optional<SessionModel> model = getSession(stateStore).apply(sessionModel.getId());
if (model.isEmpty()) {
throw new IllegalStateException("session does not exist. " + sessionModel.getSessionId());
} else {
Expand All @@ -90,7 +84,6 @@ public StatementId submit(QueryRequest request) {
.stateStore(stateStore)
.statementId(statementId)
.langType(LangType.SQL)
.datasourceName(sessionModel.getDatasourceName())
.query(request.getQuery())
.queryId(statementId.getId())
.build();
Expand All @@ -110,7 +103,7 @@ public StatementId submit(QueryRequest request) {

@Override
public Optional<Statement> get(StatementId stID) {
return StateStore.getStatement(stateStore, sessionModel.getDatasourceName())
return StateStore.getStatement(stateStore)
.apply(stID.getId())
.map(
model ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,15 @@

package org.opensearch.sql.spark.execution.session;

import java.nio.charset.StandardCharsets;
import java.util.Base64;
import lombok.Data;
import org.apache.commons.lang3.RandomStringUtils;

@Data
public class SessionId {
public static final int PREFIX_LEN = 10;

private final String sessionId;

public static SessionId newSessionId(String datasourceName) {
return new SessionId(encode(datasourceName));
}

public String getDataSourceName() {
return decode(sessionId);
}

private static String decode(String sessionId) {
return new String(Base64.getDecoder().decode(sessionId)).substring(PREFIX_LEN);
}

private static String encode(String datasourceName) {
String randomId = RandomStringUtils.randomAlphanumeric(PREFIX_LEN) + datasourceName;
return Base64.getEncoder().encodeToString(randomId.getBytes(StandardCharsets.UTF_8));
public static SessionId newSessionId() {
return new SessionId(RandomStringUtils.randomAlphanumeric(16));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public class SessionManager {
public Session createSession(CreateSessionRequest request) {
InteractiveSession session =
InteractiveSession.builder()
.sessionId(newSessionId(request.getDatasourceName()))
.sessionId(newSessionId())
.stateStore(stateStore)
.serverlessClient(emrServerlessClient)
.build();
Expand All @@ -37,8 +37,7 @@ public Session createSession(CreateSessionRequest request) {
}

public Optional<Session> getSession(SessionId sid) {
Optional<SessionModel> model =
StateStore.getSession(stateStore, sid.getDataSourceName()).apply(sid.getSessionId());
Optional<SessionModel> model = StateStore.getSession(stateStore).apply(sid.getSessionId());
if (model.isPresent()) {
InteractiveSession session =
InteractiveSession.builder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import com.google.common.collect.ImmutableList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.stream.Collectors;
import lombok.Getter;
Expand All @@ -33,10 +32,8 @@ public enum SessionState {
.collect(Collectors.toMap(t -> t.name().toLowerCase(), t -> t));

public static SessionState fromString(String key) {
for (SessionState ss : SessionState.values()) {
if (ss.getSessionState().toLowerCase(Locale.ROOT).equals(key)) {
return ss;
}
if (STATES.containsKey(key)) {
return STATES.get(key);
}
throw new IllegalArgumentException("Invalid session state: " + key);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@

package org.opensearch.sql.spark.execution.session;

import java.util.Locale;
import java.util.Arrays;
import java.util.Map;
import java.util.stream.Collectors;
import lombok.Getter;

@Getter
Expand All @@ -18,11 +20,13 @@ public enum SessionType {
this.sessionType = sessionType;
}

private static Map<String, SessionType> TYPES =
Arrays.stream(SessionType.values())
.collect(Collectors.toMap(t -> t.name().toLowerCase(), t -> t));

public static SessionType fromString(String key) {
for (SessionType sType : SessionType.values()) {
if (sType.getSessionType().toLowerCase(Locale.ROOT).equals(key)) {
return sType;
}
if (TYPES.containsKey(key)) {
return TYPES.get(key);
}
throw new IllegalArgumentException("Invalid session type: " + key);
}
Expand Down
Loading

0 comments on commit 9237044

Please sign in to comment.