Skip to content

Commit

Permalink
Add support for remote-store feature in OpenSearch (#38)
Browse files Browse the repository at this point in the history
Signed-off-by: Rishabh Singh <[email protected]>
  • Loading branch information
rishabh6788 authored Jun 26, 2023
1 parent 7e7d6cf commit d1adf6d
Show file tree
Hide file tree
Showing 6 changed files with 219 additions and 10 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
*.d.ts
node_modules
cdk.context.json
.DS_Store
lib/.DS_Store

# CDK asset staging directory
.cdk.staging
Expand Down
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
- [Required context parameters](#required-context-parameters)
- [Interacting with OpenSearch cluster](#interacting-with-opensearch-cluster)
- [Restricting Server Access](#restricting-server-access)
- [Enable Remote Store Feature](#enable-remote-store-feature)
- [Check Logs](#check-logs)
- [Access EC2 Instances](#access-ec2-instances)
- [Port Mapping](#port-mapping)
Expand Down Expand Up @@ -62,6 +63,7 @@ In order to deploy both the stacks the user needs to provide a set of required a
| mlNodeStorage (Optional) | string | User provided ebs block storage size, defaults to 100Gb |
| use50PercentHeap (Optional) | boolean | Boolean flag to use 50% of physical memory as heap. Default is 1GB. e.g., `--context use50PercentHeap=true` |
| isInternal (Optional) | boolean | Boolean flag to make network load balancer internal. Default is internet-facing e.g., `--context isInternal=true` |
| enableRemoteStore (Optional) | boolean | Boolean flag to enable Remote Store feature e.g., `--context enableRemoteStore=true`. See [Enable Remote Store Feature](#enable-remote-store-feature) for more details. |



Expand Down Expand Up @@ -131,6 +133,19 @@ Below values are allowed:
| prefixList | Prefix List id (eg: ab-12345) |
| securityGroupId | A security group ID (eg: sg-123456789) |

### Enable Remote Store Feature

`Remote Store` feature provides an option to store indexed data in a remote durable data store. To enable this feature the user needs to register a snapshot repository (S3 or File System) which is used to store the index data.
Apart from passing `enableRemoteStore` flag as `true` the user needs to be provide additional settings to `opensearch.yml`, the settings are:
```
1. opensearch.experimental.feature.remote_store.enabled: 'true'
2. cluster.remote_store.enabled: 'true'
3. opensearch.experimental.feature.segment_replication_experimental.enabled: 'true'
4. cluster.indices.replication.strategy: SEGMENT
```
The above-mentioned settings need to be passed using `additionalConfig` parameter.
Please note the `experimental` settings are only applicable till the feature is under development and will be removed when the feature becomes GA.

## Check logs

The opensearch logs are available in cloudwatch logs log-group `opensearchLogGroup/opensearch.log` in the same region your stack is deployed.
Expand Down
82 changes: 72 additions & 10 deletions lib/infra/infra-stack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import { dump, load } from 'js-yaml';
import { InstanceTarget } from 'aws-cdk-lib/aws-elasticloadbalancingv2-targets';
import { CloudwatchAgent } from '../cloudwatch/cloudwatch-agent';
import { nodeConfig } from '../opensearch-config/node-config';
import { RemoteStoreResources } from './remote-store-resources';

export interface infraProps extends StackProps{
readonly vpc: IVpc,
Expand All @@ -59,9 +60,12 @@ export interface infraProps extends StackProps{
readonly mlEc2InstanceType: InstanceType,
readonly use50PercentHeap: boolean,
readonly isInternal: boolean,
readonly enableRemoteStore: boolean
}

export class InfraStack extends Stack {
private instanceRole: Role;

constructor(scope: Stack, id: string, props: infraProps) {
super(scope, id, props);
let opensearchListener: NetworkListener;
Expand All @@ -79,13 +83,20 @@ export class InfraStack extends Stack {
removalPolicy: RemovalPolicy.DESTROY,
});

const instanceRole = new Role(this, 'instanceRole', {
this.instanceRole = new Role(this, 'instanceRole', {
managedPolicies: [ManagedPolicy.fromAwsManagedPolicyName('AmazonEC2ReadOnlyAccess'),
ManagedPolicy.fromAwsManagedPolicyName('CloudWatchAgentServerPolicy'),
ManagedPolicy.fromAwsManagedPolicyName('AmazonSSMManagedInstanceCore')],
assumedBy: new ServicePrincipal('ec2.amazonaws.com'),
});

if (props.enableRemoteStore) {
// Remote Store needs an S3 bucket to be registered as snapshot repo
// Add scoped bucket policy to the instance role attached to the EC2
const remoteStoreObj = new RemoteStoreResources(this);
this.instanceRole.addToPolicy(remoteStoreObj.getRemoteStoreBucketPolicy());
}

const singleNodeInstanceType = (props.cpuType === AmazonLinuxCpuType.X86_64)
? InstanceType.of(InstanceClass.R5, InstanceSize.XLARGE) : InstanceType.of(InstanceClass.R6G, InstanceSize.XLARGE);

Expand Down Expand Up @@ -126,7 +137,7 @@ export class InfraStack extends Stack {
generation: AmazonLinuxGeneration.AMAZON_LINUX_2,
cpuType: props.cpuType,
}),
role: instanceRole,
role: this.instanceRole,
vpcSubnets: {
subnetType: SubnetType.PRIVATE_WITH_EGRESS,
},
Expand Down Expand Up @@ -174,7 +185,7 @@ export class InfraStack extends Stack {
generation: AmazonLinuxGeneration.AMAZON_LINUX_2,
cpuType: props.cpuType,
}),
role: instanceRole,
role: this.instanceRole,
maxCapacity: managerAsgCapacity,
minCapacity: managerAsgCapacity,
desiredCapacity: managerAsgCapacity,
Expand Down Expand Up @@ -206,7 +217,7 @@ export class InfraStack extends Stack {
generation: AmazonLinuxGeneration.AMAZON_LINUX_2,
cpuType: props.cpuType,
}),
role: instanceRole,
role: this.instanceRole,
maxCapacity: 1,
minCapacity: 1,
desiredCapacity: 1,
Expand Down Expand Up @@ -234,7 +245,7 @@ export class InfraStack extends Stack {
generation: AmazonLinuxGeneration.AMAZON_LINUX_2,
cpuType: props.cpuType,
}),
role: instanceRole,
role: this.instanceRole,
maxCapacity: dataAsgCapacity,
minCapacity: dataAsgCapacity,
desiredCapacity: dataAsgCapacity,
Expand Down Expand Up @@ -264,7 +275,7 @@ export class InfraStack extends Stack {
generation: AmazonLinuxGeneration.AMAZON_LINUX_2,
cpuType: props.cpuType,
}),
role: instanceRole,
role: this.instanceRole,
maxCapacity: props.clientNodeCount,
minCapacity: props.clientNodeCount,
desiredCapacity: props.clientNodeCount,
Expand Down Expand Up @@ -295,7 +306,7 @@ export class InfraStack extends Stack {
generation: AmazonLinuxGeneration.AMAZON_LINUX_2,
cpuType: props.cpuType,
}),
role: instanceRole,
role: this.instanceRole,
maxCapacity: props.mlNodeCount,
minCapacity: props.mlNodeCount,
desiredCapacity: props.mlNodeCount,
Expand Down Expand Up @@ -445,14 +456,36 @@ export class InfraStack extends Stack {
}

if (props.distributionUrl.includes('artifacts.opensearch.org') && !props.minDistribution) {
cfnInitConfig.push(InitCommand.shellCommand('set -ex;cd opensearch; echo "y"|sudo -u ec2-user bin/opensearch-plugin install discovery-ec2', {
cfnInitConfig.push(InitCommand.shellCommand('set -ex;cd opensearch;sudo -u ec2-user bin/opensearch-plugin install discovery-ec2 --batch', {
cwd: '/home/ec2-user',
ignoreErrors: false,
}));
} else {
cfnInitConfig.push(InitCommand.shellCommand('set -ex;cd opensearch; echo "y"|sudo -u ec2-user bin/opensearch-plugin install '
cfnInitConfig.push(InitCommand.shellCommand('set -ex;cd opensearch;sudo -u ec2-user bin/opensearch-plugin install '
+ `https://ci.opensearch.org/ci/dbc/distribution-build-opensearch/${props.opensearchVersion}/latest/linux/${props.cpuArch}`
+ `/tar/builds/opensearch/core-plugins/discovery-ec2-${props.opensearchVersion}.zip`, {
+ `/tar/builds/opensearch/core-plugins/discovery-ec2-${props.opensearchVersion}.zip --batch`, {
cwd: '/home/ec2-user',
ignoreErrors: false,
}));
}

if (props.enableRemoteStore) {
if (props.distributionUrl.includes('artifacts.opensearch.org') && !props.minDistribution) {
cfnInitConfig.push(InitCommand.shellCommand('set -ex;cd opensearch;sudo -u ec2-user bin/opensearch-plugin install repository-s3 --batch', {
cwd: '/home/ec2-user',
ignoreErrors: false,
}));
} else {
cfnInitConfig.push(InitCommand.shellCommand('set -ex;cd opensearch;sudo -u ec2-user bin/opensearch-plugin install '
+ `https://ci.opensearch.org/ci/dbc/distribution-build-opensearch/${props.opensearchVersion}/latest/linux/${props.cpuArch}`
+ `/tar/builds/opensearch/core-plugins/repository-s3-${props.opensearchVersion}.zip --batch`, {
cwd: '/home/ec2-user',
ignoreErrors: false,
}));
}

// eslint-disable-next-line max-len
cfnInitConfig.push(InitCommand.shellCommand(`set -ex;cd opensearch; echo "cluster.remote_store.repository: ${scope.stackName}-repo" >> config/opensearch.yml`, {
cwd: '/home/ec2-user',
ignoreErrors: false,
}));
Expand Down Expand Up @@ -519,6 +552,35 @@ export class InfraStack extends Stack {
}));
}

if (props.enableRemoteStore) {
// Snapshot creation call should be done from one node to avoid any race condition, using seed node.
if (nodeType === 'seed-manager' || nodeType === 'seed-data') {
if (props.securityDisabled) {
// eslint-disable-next-line max-len
cfnInitConfig.push(InitCommand.shellCommand(`set -ex; sleep 60; curl -XPUT "http://localhost:9200/_snapshot/${scope.stackName}-repo" -H 'Content-Type: application/json' -d'
{
"type": "s3",
"settings": {
"bucket": "${scope.stackName}",
"region": "${scope.region}",
"base_path": "remote-store"
}
}'`));
} else {
// eslint-disable-next-line max-len
cfnInitConfig.push(InitCommand.shellCommand(`set -ex; sleep 60; curl -XPUT "https://localhost:9200/_snapshot/${scope.stackName}-repo" -ku admin:admin -H 'Content-Type: application/json' -d'
{
"type": "s3",
"settings": {
"bucket": "${scope.stackName}",
"region": "${scope.region}",
"base_path": "remote-store"
}
}'`));
}
}
}

// If OSD Url is present
if (props.dashboardsUrl !== 'undefined') {
cfnInitConfig.push(InitCommand.shellCommand(`set -ex;mkdir opensearch-dashboards; curl -L ${props.dashboardsUrl} -o opensearch-dashboards.tar.gz;`
Expand Down
37 changes: 37 additions & 0 deletions lib/infra/remote-store-resources.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { RemovalPolicy, Stack } from 'aws-cdk-lib';
import { Bucket } from 'aws-cdk-lib/aws-s3';
import { Effect, Policy, PolicyStatement } from 'aws-cdk-lib/aws-iam';

export class RemoteStoreResources {
private readonly snapshotS3Bucket: Bucket

private readonly bucketPolicyStatement: PolicyStatement

constructor(scope: Stack) {
this.snapshotS3Bucket = new Bucket(scope, `remote-store-${scope.stackName}`, {
removalPolicy: RemovalPolicy.DESTROY,
autoDeleteObjects: true,
bucketName: `${scope.stackName}`,
});

this.bucketPolicyStatement = new PolicyStatement({
effect: Effect.ALLOW,
actions: [
's3:ListBucket',
's3:GetBucketLocation',
's3:ListBucketMultipartUploads',
's3:ListBucketVersions',
's3:GetObject',
's3:PutObject',
's3:DeleteObject',
's3:AbortMultipartUpload',
's3:ListMultipartUploadParts',
],
resources: [this.snapshotS3Bucket.bucketArn, `${this.snapshotS3Bucket.bucketArn}/*`],
});
}

public getRemoteStoreBucketPolicy() {
return this.bucketPolicyStatement;
}
}
4 changes: 4 additions & 0 deletions lib/os-cluster-entrypoint.ts
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,9 @@ export class OsClusterEntrypoint {
const nlbScheme = `${scope.node.tryGetContext('isInternal')}`;
const isInternal = nlbScheme === 'true';

const remoteStore = `${scope.node.tryGetContext('enableRemoteStore')}`;
const enableRemoteStore = remoteStore === 'true';

const network = new NetworkStack(scope, 'opensearch-network-stack', {
cidrBlock: cidrRange,
maxAzs: 3,
Expand Down Expand Up @@ -230,6 +233,7 @@ export class OsClusterEntrypoint {
additionalConfig: ymlConfig,
use50PercentHeap,
isInternal,
enableRemoteStore,
...props,
});

Expand Down
89 changes: 89 additions & 0 deletions test/os-cluster.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -305,3 +305,92 @@ test('Test multi-node cluster with only data-nodes', () => {
],
});
});

test('Test multi-node cluster with remote-store enabled', () => {
const app = new App({
context: {
securityDisabled: true,
minDistribution: false,
distributionUrl: 'www.example.com',
cpuArch: 'x64',
singleNodeCluster: false,
dashboardsUrl: 'www.example.com',
distVersion: '1.0.0',
serverAccessType: 'ipv4',
restrictServerAccessTo: 'all',
managerNodeCount: 0,
dataNodeCount: 3,
dataNodeStorage: 200,
enableRemoteStore: true,
},
});

// WHEN
const testStack = new OsClusterEntrypoint(app, {
env: { account: 'test-account', region: 'us-east-1' },
});
expect(testStack.stacks).toHaveLength(2);

const infraStack = testStack.stacks.filter((s) => s.stackName === 'opensearch-infra-stack')[0];
const infraTemplate = Template.fromStack(infraStack);
infraTemplate.resourceCountIs('AWS::S3::Bucket', 1);
infraTemplate.resourceCountIs('AWS::S3::BucketPolicy', 1);
infraTemplate.resourceCountIs('AWS::Lambda::Function', 1);
infraTemplate.resourceCountIs('AWS::IAM::Role', 2);
infraTemplate.resourceCountIs('AWS::IAM::Policy', 1);
infraTemplate.hasResourceProperties('AWS::S3::Bucket', {
BucketName: 'opensearch-infra-stack',
});
infraTemplate.hasResourceProperties('AWS::IAM::Policy', {
PolicyDocument: {
Statement: [
{
Action: [
's3:ListBucket',
's3:GetBucketLocation',
's3:ListBucketMultipartUploads',
's3:ListBucketVersions',
's3:GetObject',
's3:PutObject',
's3:DeleteObject',
's3:AbortMultipartUpload',
's3:ListMultipartUploadParts',
],
Effect: 'Allow',
Resource: [
{
'Fn::GetAtt': [
'remotestoreopensearchinfrastack6A47755C',
'Arn',
],
},
{
'Fn::Join': [
'',
[
{
'Fn::GetAtt': [
'remotestoreopensearchinfrastack6A47755C',
'Arn',
],
},
'/*',
],
],
},
],
},
{
Action: [
'cloudformation:DescribeStackResource',
'cloudformation:SignalResource',
],
Effect: 'Allow',
Resource: {
Ref: 'AWS::StackId',
},
},
],
},
});
});

0 comments on commit d1adf6d

Please sign in to comment.