-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Destroying opensearch application results in 2/3 errored units #243
Comments
Only solution was to remove machines with |
@phvalguima can you provide the full DEBUG-level log? without it, it's not possible to determine if this is a lock issue or another issue |
Tried reproducing with deploying 3 units and removing application |
_cluster/health endpoint shows {
"cluster_name": "opensearch-phav",
"status": "red",
"timed_out": false,
"number_of_nodes": 1,
"number_of_data_nodes": 1,
"discovered_master": true,
"discovered_cluster_manager": true,
"active_primary_shards": 2,
"active_shards": 2,
"relocating_shards": 0,
"initializing_shards": 0,
"unassigned_shards": 3,
"delayed_unassigned_shards": 0,
"number_of_pending_tasks": 0,
"number_of_in_flight_fetch": 0,
"task_max_waiting_in_queue_millis": 0,
"active_shards_percent_as_number": 40.0
} _nodes endpoint shows _nodes{
"_nodes": {
"total": 1,
"successful": 1,
"failed": 0
},
"cluster_name": "opensearch-phav",
"nodes": {
"mno4iMe5RiuRAw3yysFyQg": {
"name": "opensearch-0",
"transport_address": "10.139.243.49:9300",
"host": "10.139.243.49",
"ip": "10.139.243.49",
"version": "2.12.0",
"build_type": "tar",
"build_hash": "6a67da4db576f1f2c6c60bbfd97a74809bdb2dfc",
"total_indexing_buffer": 107374182,
"roles": [
"cluster_manager",
"coordinating_only",
"data",
"ingest",
"ml"
],
"attributes": {
"shard_indexing_pressure_enabled": "true"
},
"settings": {
"cluster": {
"name": "opensearch-phav",
"initial_cluster_manager_nodes": [
"opensearch-2",
"opensearch-0"
]
},
"node": {
"attr": {
"shard_indexing_pressure_enabled": "true"
},
"name": "opensearch-0",
"roles": [
"data",
"ingest",
"ml",
"coordinating_only",
"cluster_manager"
]
},
"path": {
"data": [
"/var/snap/opensearch/common/var/lib/opensearch"
],
"logs": "/var/snap/opensearch/common/var/log/opensearch",
"home": "/var/snap/opensearch/current/usr/share/opensearch"
},
"discovery": {
"seed_providers": "file"
},
"client": {
"type": "node"
},
"http": {
"compression": "false",
"type": "org.opensearch.security.http.SecurityHttpServerTransport",
"type.default": "netty4"
},
"index": {
"store": {
"hybrid": {
"mmap": {
"extensions": [
"nvd",
"dvd",
"tim",
"tip",
"dim",
"kdd",
"kdi",
"cfs",
"doc",
"vec",
"vex"
]
}
}
}
},
"prometheus": {
"cluster": {
"settings": "false"
},
"indices": "false",
"nodes": {
"filter": "_local"
},
"metric_name": {
"prefix": "opensearch_"
}
},
"transport": {
"type": "org.opensearch.security.ssl.http.netty.SecuritySSLNettyTransport",
"type.default": "netty4"
},
"network": {
"host": [
"_site_",
"juju-dc3ec8-1.lxd",
"10.139.243.49"
]
}
},
"os": {
"refresh_interval_in_millis": 1000,
"name": "Linux",
"pretty_name": "Ubuntu Core 22",
"arch": "amd64",
"version": "6.5.0-1018-gcp",
"available_processors": 8,
"allocated_processors": 8
},
"process": {
"refresh_interval_in_millis": 1000,
"id": 26323,
"mlockall": false
},
"jvm": {
"pid": 26323,
"version": "21.0.2",
"vm_name": "OpenJDK 64-Bit Server VM",
"vm_version": "21.0.2+13-Ubuntu-122.04.1",
"vm_vendor": "Private Build",
"bundled_jdk": false,
"using_bundled_jdk": null,
"start_time_in_millis": 1713856248566,
"mem": {
"heap_init_in_bytes": 1073741824,
"heap_max_in_bytes": 1073741824,
"non_heap_init_in_bytes": 7667712,
"non_heap_max_in_bytes": 0,
"direct_max_in_bytes": 0
},
"gc_collectors": [
"G1 Young Generation",
"G1 Concurrent GC",
"G1 Old Generation"
],
"memory_pools": [
"CodeHeap 'non-nmethods'",
"Metaspace",
"CodeHeap 'profiled nmethods'",
"Compressed Class Space",
"G1 Eden Space",
"G1 Old Gen",
"G1 Survivor Space",
"CodeHeap 'non-profiled nmethods'"
],
"using_compressed_ordinary_object_pointers": "true",
"input_arguments": [
"-Xshare:auto",
"-Dopensearch.networkaddress.cache.ttl=60",
"-Dopensearch.networkaddress.cache.negative.ttl=10",
"-XX:+AlwaysPreTouch",
"-Xss1m",
"-Djava.awt.headless=true",
"-Dfile.encoding=UTF-8",
"-Djna.nosys=true",
"-XX:-OmitStackTraceInFastThrow",
"-XX:+ShowCodeDetailsInExceptionMessages",
"-Dio.netty.noUnsafe=true",
"-Dio.netty.noKeySetOptimization=true",
"-Dio.netty.recycler.maxCapacityPerThread=0",
"-Dio.netty.allocator.numDirectArenas=0",
"-Dlog4j.shutdownHookEnabled=false",
"-Dlog4j2.disable.jmx=true",
"-Djava.security.manager=allow",
"-Djava.locale.providers=SPI,COMPAT",
"-Xms1g",
"-Xmx1g",
"-XX:+UseG1GC",
"-XX:G1ReservePercent=25",
"-XX:InitiatingHeapOccupancyPercent=30",
"-Djava.io.tmpdir=/var/snap/opensearch/common/usr/share/tmp",
"-XX:+HeapDumpOnOutOfMemoryError",
"-XX:HeapDumpPath=data",
"-XX:ErrorFile=/var/snap/opensearch/common/var/log/opensearch/hs_err_pid%p.log",
"-Xlog:gc*,gc+age=trace,safepoint:file=/var/snap/opensearch/common/var/log/opensearch/gc.log:utctime,pid,tags:filecount=32,filesize=64m",
"-Djava.security.manager=allow",
"--add-modules=jdk.incubator.vector",
"-Djava.util.concurrent.ForkJoinPool.common.threadFactory=org.opensearch.secure_sm.SecuredForkJoinWorkerThreadFactory",
"-XX:MaxDirectMemorySize=536870912",
"-Dopensearch.path.home=/var/snap/opensearch/current/usr/share/opensearch",
"-Dopensearch.path.conf=/var/snap/opensearch/current/etc/opensearch",
"-Dopensearch.distribution.type=tar",
"-Dopensearch.bundled_jdk=false"
]
},
"thread_pool": {
"remote_refresh_retry": {
"type": "scaling",
"core": 1,
"max": 4,
"keep_alive": "5m",
"queue_size": -1
},
"force_merge": {
"type": "fixed",
"size": 1,
"queue_size": -1
},
"fetch_shard_started": {
"type": "scaling",
"core": 1,
"max": 16,
"keep_alive": "5m",
"queue_size": -1
},
"urgent_stream_reader": {
"type": "scaling",
"core": 1,
"max": 4,
"keep_alive": "5m",
"queue_size": -1
},
"listener": {
"type": "fixed",
"size": 4,
"queue_size": -1
},
"priority_stream_reader": {
"type": "scaling",
"core": 1,
"max": 4,
"keep_alive": "5m",
"queue_size": -1
},
"opensearch_ml_execute": {
"type": "fixed",
"size": 7,
"queue_size": 10
},
"remote_recovery": {
"type": "scaling",
"core": 1,
"max": 16,
"keep_alive": "5m",
"queue_size": -1
},
"training": {
"type": "fixed",
"size": 1,
"queue_size": 1
},
"opensearch_ml_train": {
"type": "fixed",
"size": 7,
"queue_size": 10
},
"stream_reader": {
"type": "scaling",
"core": 8,
"max": 32,
"keep_alive": "5m",
"queue_size": -1
},
"priority_future_completion": {
"type": "scaling",
"core": 1,
"max": 8,
"keep_alive": "5m",
"queue_size": -1
},
"index_searcher": {
"type": "fixed_auto_queue_size",
"size": 16,
"queue_size": 1000
},
"remote_purge": {
"type": "scaling",
"core": 1,
"max": 4,
"keep_alive": "5m",
"queue_size": -1
},
"sql-worker": {
"type": "fixed",
"size": 8,
"queue_size": 1000
},
"search": {
"type": "fixed_auto_queue_size",
"size": 13,
"queue_size": 1000
},
"opensearch_workflow": {
"type": "scaling",
"core": 1,
"max": 7,
"keep_alive": "1m",
"queue_size": -1
},
"opensearch_asynchronous_search_generic": {
"type": "scaling",
"core": 1,
"max": 16,
"keep_alive": "30m",
"queue_size": -1
},
"flush": {
"type": "scaling",
"core": 1,
"max": 4,
"keep_alive": "5m",
"queue_size": -1
},
"fetch_shard_store": {
"type": "scaling",
"core": 1,
"max": 16,
"keep_alive": "5m",
"queue_size": -1
},
"opensearch_ml_predict": {
"type": "fixed",
"size": 16,
"queue_size": 10000
},
"opensearch_provision_workflow": {
"type": "scaling",
"core": 1,
"max": 7,
"keep_alive": "5m",
"queue_size": -1
},
"get": {
"type": "fixed",
"size": 8,
"queue_size": 1000
},
"system_read": {
"type": "fixed",
"size": 4,
"queue_size": 2000
},
"urgent_future_completion": {
"type": "fixed",
"size": 1,
"queue_size": 10000
},
"open_distro_job_scheduler": {
"type": "fixed",
"size": 8,
"queue_size": 200
},
"write": {
"type": "fixed",
"size": 8,
"queue_size": 10000
},
"opensearch_ml_general": {
"type": "fixed",
"size": 7,
"queue_size": 100
},
"replication_follower": {
"type": "scaling",
"core": 1,
"max": 10,
"keep_alive": "1m",
"queue_size": -1
},
"opensearch_deprovision_workflow": {
"type": "scaling",
"core": 1,
"max": 7,
"keep_alive": "1m",
"queue_size": -1
},
"refresh": {
"type": "scaling",
"core": 1,
"max": 4,
"keep_alive": "5m",
"queue_size": -1
},
"opensearch_ml_deploy": {
"type": "fixed",
"size": 7,
"queue_size": 10
},
"replication_leader": {
"type": "fixed",
"size": 13,
"queue_size": 1000
},
"translog_sync": {
"type": "fixed",
"size": 32,
"queue_size": 10000
},
"system_write": {
"type": "fixed",
"size": 4,
"queue_size": 1000
},
"generic": {
"type": "scaling",
"core": 4,
"max": 128,
"keep_alive": "30s",
"queue_size": -1
},
"warmer": {
"type": "scaling",
"core": 1,
"max": 4,
"keep_alive": "5m",
"queue_size": -1
},
"translog_transfer": {
"type": "scaling",
"core": 1,
"max": 4,
"keep_alive": "5m",
"queue_size": -1
},
"opensearch_ml_register": {
"type": "fixed",
"size": 7,
"queue_size": 10
},
"management": {
"type": "scaling",
"core": 1,
"max": 5,
"keep_alive": "5m",
"queue_size": -1
},
"future_completion": {
"type": "scaling",
"core": 1,
"max": 8,
"keep_alive": "5m",
"queue_size": -1
},
"analyze": {
"type": "fixed",
"size": 1,
"queue_size": 16
},
"_plugin_geospatial_ip2geo_datasource_update": {
"type": "fixed",
"size": 1,
"queue_size": 1000
},
"ad-threadpool": {
"type": "scaling",
"core": 1,
"max": 4,
"keep_alive": "10m",
"queue_size": -1
},
"snapshot": {
"type": "scaling",
"core": 1,
"max": 4,
"keep_alive": "5m",
"queue_size": -1
},
"search_throttled": {
"type": "fixed_auto_queue_size",
"size": 1,
"queue_size": 100
},
"ad-batch-task-threadpool": {
"type": "scaling",
"core": 1,
"max": 1,
"keep_alive": "10m",
"queue_size": -1
}
},
"transport": {
"bound_address": [
"10.139.243.49:9300"
],
"publish_address": "10.139.243.49:9300",
"profiles": {}
},
"http": {
"bound_address": [
"10.139.243.49:9200"
],
"publish_address": "10.139.243.49:9200",
"max_content_length_in_bytes": 104857600
},
"plugins": [
{
"name": "opensearch-alerting",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Amazon OpenSearch alerting plugin",
"classname": "org.opensearch.alerting.AlertingPlugin",
"custom_foldername": "",
"extended_plugins": [
"lang-painless"
],
"has_native_controller": false
},
{
"name": "opensearch-anomaly-detection",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "OpenSearch anomaly detector plugin",
"classname": "org.opensearch.ad.AnomalyDetectorPlugin",
"custom_foldername": "",
"extended_plugins": [
"lang-painless",
"opensearch-job-scheduler"
],
"has_native_controller": false
},
{
"name": "opensearch-asynchronous-search",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Provides support for asynchronous search",
"classname": "org.opensearch.search.asynchronous.plugin.AsynchronousSearchPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "opensearch-cross-cluster-replication",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "OpenSearch Cross Cluster Replication Plugin",
"classname": "org.opensearch.replication.ReplicationPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "opensearch-custom-codecs",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "OpenSearch plugin that implements custom compression codecs",
"classname": "org.opensearch.index.codec.customcodecs.CustomCodecPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "opensearch-flow-framework",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "OpenSearch plugin that enables builders to innovate AI apps on OpenSearch",
"classname": "org.opensearch.flowframework.FlowFrameworkPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "opensearch-geospatial",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "OpenSearch Geospatial plugin to host geospatial features",
"classname": "org.opensearch.geospatial.plugin.GeospatialPlugin",
"custom_foldername": "",
"extended_plugins": [
"opensearch-job-scheduler"
],
"has_native_controller": false
},
{
"name": "opensearch-index-management",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "OpenSearch Index Management Plugin",
"classname": "org.opensearch.indexmanagement.IndexManagementPlugin",
"custom_foldername": "",
"extended_plugins": [
"opensearch-job-scheduler"
],
"has_native_controller": false
},
{
"name": "opensearch-job-scheduler",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "OpenSearch Job Scheduler plugin",
"classname": "org.opensearch.jobscheduler.JobSchedulerPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "opensearch-knn",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "OpenSearch k-NN plugin",
"classname": "org.opensearch.knn.plugin.KNNPlugin",
"custom_foldername": "",
"extended_plugins": [
"lang-painless"
],
"has_native_controller": false
},
{
"name": "opensearch-ml",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "machine learning plugin for opensearch",
"classname": "org.opensearch.ml.plugin.MachineLearningPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "opensearch-neural-search",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "A plugin that adds dense neural retrieval into the OpenSearch ecosytem",
"classname": "org.opensearch.neuralsearch.plugin.NeuralSearch",
"custom_foldername": "",
"extended_plugins": [
"opensearch-knn"
],
"has_native_controller": false
},
{
"name": "opensearch-notifications",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "OpenSearch Notifications Plugin",
"classname": "org.opensearch.notifications.NotificationPlugin",
"custom_foldername": "",
"extended_plugins": [
"opensearch-notifications-core"
],
"has_native_controller": false
},
{
"name": "opensearch-notifications-core",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "OpenSearch Notifications Core Plugin",
"classname": "org.opensearch.notifications.core.NotificationCorePlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "opensearch-observability",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "OpenSearch Plugin for OpenSearch Dashboards Observability",
"classname": "org.opensearch.observability.ObservabilityPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "opensearch-performance-analyzer",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "OpenSearch Performance Analyzer Plugin",
"classname": "org.opensearch.performanceanalyzer.PerformanceAnalyzerPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "opensearch-reports-scheduler",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Scheduler for Dashboards Reports Plugin",
"classname": "org.opensearch.reportsscheduler.ReportsSchedulerPlugin",
"custom_foldername": "",
"extended_plugins": [
"opensearch-job-scheduler"
],
"has_native_controller": false
},
{
"name": "opensearch-security",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Provide access control related features for OpenSearch",
"classname": "org.opensearch.security.OpenSearchSecurityPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "opensearch-security-analytics",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "OpenSearch Security Analytics plugin",
"classname": "org.opensearch.securityanalytics.SecurityAnalyticsPlugin",
"custom_foldername": "",
"extended_plugins": [
"opensearch-job-scheduler"
],
"has_native_controller": false
},
{
"name": "opensearch-skills",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "OpenSearch Skills",
"classname": "org.opensearch.agent.ToolPlugin",
"custom_foldername": "",
"extended_plugins": [
"opensearch-ml"
],
"has_native_controller": false
},
{
"name": "opensearch-sql",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "OpenSearch SQL",
"classname": "org.opensearch.sql.plugin.SQLPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "prometheus-exporter",
"version": "2.12.0.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Prometheus exporter plugin for OpenSearch",
"classname": "org.opensearch.plugin.prometheus.PrometheusExporterPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "repository-gcs",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "The GCS repository plugin adds Google Cloud Storage support for repositories.",
"classname": "org.opensearch.repositories.gcs.GoogleCloudStoragePlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "repository-s3",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "The S3 repository plugin adds S3 repositories",
"classname": "org.opensearch.repositories.s3.S3RepositoryPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
}
],
"modules": [
{
"name": "aggs-matrix-stats",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Adds aggregations whose input are a list of numeric fields and output includes a matrix.",
"classname": "org.opensearch.search.aggregations.matrix.MatrixAggregationPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "analysis-common",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Adds \"built in\" analyzers to OpenSearch.",
"classname": "org.opensearch.analysis.common.CommonAnalysisPlugin",
"custom_foldername": "",
"extended_plugins": [
"lang-painless"
],
"has_native_controller": false
},
{
"name": "geo",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Plugin for geospatial features in OpenSearch. Registering the geo_shape and aggregations on GeoShape and GeoPoint",
"classname": "org.opensearch.geo.GeoModulePlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "ingest-common",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Module for ingest processors that do not require additional security permissions or have large dependencies and resources",
"classname": "org.opensearch.ingest.common.IngestCommonPlugin",
"custom_foldername": "",
"extended_plugins": [
"lang-painless"
],
"has_native_controller": false
},
{
"name": "ingest-geoip",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Ingest processor that uses looksup geo data based on ip adresses using the Maxmind geo database",
"classname": "org.opensearch.ingest.geoip.IngestGeoIpPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "ingest-user-agent",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Ingest processor that extracts information from a user agent",
"classname": "org.opensearch.ingest.useragent.IngestUserAgentPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "lang-expression",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Lucene expressions integration for OpenSearch",
"classname": "org.opensearch.script.expression.ExpressionPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "lang-mustache",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Mustache scripting integration for OpenSearch",
"classname": "org.opensearch.script.mustache.MustachePlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "lang-painless",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "An easy, safe and fast scripting language for OpenSearch",
"classname": "org.opensearch.painless.PainlessPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "mapper-extras",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Adds advanced field mappers",
"classname": "org.opensearch.index.mapper.MapperExtrasPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "opensearch-dashboards",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Plugin exposing APIs for OpenSearch Dashboards system indices",
"classname": "org.opensearch.dashboards.OpenSearchDashboardsPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "parent-join",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "This module adds the support parent-child queries and aggregations",
"classname": "org.opensearch.join.ParentJoinPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "percolator",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Percolator module adds capability to index queries and query these queries by specifying documents",
"classname": "org.opensearch.percolator.PercolatorPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "rank-eval",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "The Rank Eval module adds APIs to evaluate ranking quality.",
"classname": "org.opensearch.index.rankeval.RankEvalPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "reindex",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "The Reindex module adds APIs to reindex from one index to another or update documents in place.",
"classname": "org.opensearch.index.reindex.ReindexPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "repository-url",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Module for URL repository",
"classname": "org.opensearch.plugin.repository.url.URLRepositoryPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "search-pipeline-common",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Module for search pipeline processors that do not require additional security permissions or have large dependencies and resources",
"classname": "org.opensearch.search.pipeline.common.SearchPipelineCommonModulePlugin",
"custom_foldername": "",
"extended_plugins": [
"lang-painless"
],
"has_native_controller": false
},
{
"name": "systemd",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Integrates OpenSearch with systemd",
"classname": "org.opensearch.systemd.SystemdPlugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
},
{
"name": "transport-netty4",
"version": "2.12.0",
"opensearch_version": "2.12.0",
"java_version": "11",
"description": "Netty 4 based transport implementation",
"classname": "org.opensearch.transport.Netty4Plugin",
"custom_foldername": "",
"extended_plugins": [],
"has_native_controller": false
}
],
"ingest": {
"processors": [
{
"type": "append"
},
{
"type": "bytes"
},
{
"type": "convert"
},
{
"type": "copy"
},
{
"type": "csv"
},
{
"type": "date"
},
{
"type": "date_index_name"
},
{
"type": "dissect"
},
{
"type": "dot_expander"
},
{
"type": "drop"
},
{
"type": "fail"
},
{
"type": "foreach"
},
{
"type": "geoip"
},
{
"type": "geojson-feature"
},
{
"type": "grok"
},
{
"type": "gsub"
},
{
"type": "html_strip"
},
{
"type": "ip2geo"
},
{
"type": "join"
},
{
"type": "json"
},
{
"type": "kv"
},
{
"type": "lowercase"
},
{
"type": "pipeline"
},
{
"type": "remove"
},
{
"type": "remove_by_pattern"
},
{
"type": "rename"
},
{
"type": "script"
},
{
"type": "set"
},
{
"type": "sort"
},
{
"type": "sparse_encoding"
},
{
"type": "split"
},
{
"type": "text_embedding"
},
{
"type": "text_image_embedding"
},
{
"type": "trim"
},
{
"type": "uppercase"
},
{
"type": "urldecode"
},
{
"type": "user_agent"
}
]
},
"aggregations": {
"adjacency_matrix": {
"types": [
"other"
]
},
"auto_date_histogram": {
"types": [
"boolean",
"date",
"numeric"
]
},
"avg": {
"types": [
"boolean",
"date",
"numeric"
]
},
"cardinality": {
"types": [
"boolean",
"bytes",
"date",
"geo_shape",
"geopoint",
"ip",
"numeric",
"range"
]
},
"children": {
"types": [
"other"
]
},
"composite": {
"types": [
"other"
]
},
"date_histogram": {
"types": [
"boolean",
"date",
"numeric",
"range"
]
},
"date_range": {
"types": [
"boolean",
"date",
"numeric"
]
},
"diversified_sampler": {
"types": [
"boolean",
"bytes",
"date",
"numeric"
]
},
"extended_stats": {
"types": [
"boolean",
"date",
"numeric"
]
},
"filter": {
"types": [
"other"
]
},
"filters": {
"types": [
"other"
]
},
"geo_bounds": {
"types": [
"geo_shape",
"geopoint"
]
},
"geo_centroid": {
"types": [
"geopoint"
]
},
"geo_distance": {
"types": [
"geopoint"
]
},
"geohash_grid": {
"types": [
"geo_shape",
"geopoint"
]
},
"geohex_grid": {
"types": [
"geopoint"
]
},
"geotile_grid": {
"types": [
"geo_shape",
"geopoint"
]
},
"global": {
"types": [
"other"
]
},
"histogram": {
"types": [
"boolean",
"date",
"numeric",
"range"
]
},
"ip_range": {
"types": [
"ip"
]
},
"matrix_stats": {
"types": [
"other"
]
},
"max": {
"types": [
"boolean",
"date",
"numeric"
]
},
"median_absolute_deviation": {
"types": [
"numeric"
]
},
"min": {
"types": [
"boolean",
"date",
"numeric"
]
},
"missing": {
"types": [
"boolean",
"bytes",
"date",
"geo_shape",
"geopoint",
"ip",
"numeric",
"range"
]
},
"multi_terms": {
"types": [
"boolean",
"bytes",
"date",
"ip",
"numeric",
"other"
]
},
"nested": {
"types": [
"other"
]
},
"parent": {
"types": [
"other"
]
},
"percentile_ranks": {
"types": [
"boolean",
"date",
"numeric"
]
},
"percentiles": {
"types": [
"boolean",
"date",
"numeric"
]
},
"range": {
"types": [
"boolean",
"date",
"numeric"
]
},
"rare_terms": {
"types": [
"boolean",
"bytes",
"date",
"ip",
"numeric"
]
},
"reverse_nested": {
"types": [
"other"
]
},
"sampler": {
"types": [
"other"
]
},
"scripted_metric": {
"types": [
"other"
]
},
"significant_terms": {
"types": [
"boolean",
"bytes",
"date",
"ip",
"numeric"
]
},
"significant_text": {
"types": [
"other"
]
},
"stats": {
"types": [
"boolean",
"date",
"numeric"
]
},
"sum": {
"types": [
"boolean",
"date",
"numeric"
]
},
"terms": {
"types": [
"boolean",
"bytes",
"date",
"ip",
"numeric"
]
},
"top_hits": {
"types": [
"other"
]
},
"value_count": {
"types": [
"boolean",
"bytes",
"date",
"geo_shape",
"geopoint",
"ip",
"numeric",
"range"
]
},
"variable_width_histogram": {
"types": [
"numeric"
]
},
"weighted_avg": {
"types": [
"numeric"
]
}
},
"search_pipelines": {
"request_processors": [
{
"type": "filter_query"
},
{
"type": "neural_query_enricher"
},
{
"type": "oversample"
},
{
"type": "question_rewrite"
},
{
"type": "script"
}
],
"response_processors": [
{
"type": "collapse"
},
{
"type": "rename_field"
},
{
"type": "rerank"
},
{
"type": "retrieval_augmented_generation"
},
{
"type": "truncate_hits"
}
]
}
}
}
} .charm_node_lock/_source/0 endpoint shows {
"error": {
"root_cause": [
{
"type": "no_shard_available_action_exception",
"reason": "No shard available for [get [.charm_node_lock][0]: routing [null]]"
}
],
"type": "no_shard_available_action_exception",
"reason": "No shard available for [get [.charm_node_lock][0]: routing [null]]"
},
"status": 503
} .charm_node_lock endpoint shows {
".charm_node_lock": {
"aliases": {},
"mappings": {
"properties": {
"unit-name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"settings": {
"index": {
"replication": {
"type": "DOCUMENT"
},
"number_of_shards": "1",
"auto_expand_replicas": "0-all",
"provided_name": ".charm_node_lock",
"creation_date": "1713856271230",
"number_of_replicas": "0",
"uuid": "0A75oySnQIiw0ur0XdfTbw",
"version": {
"created": "136337827"
}
}
}
}
} |
theory: root cause of issue could be that when scaling from 2 -> 1 units new cluster manager is not elected since solution for #230 (described here: https://chat.canonical.com/canonical/pl/ahuxubh4u7dbprufehp5s81s5r) not implemented more info: https://chat.canonical.com/canonical/pl/jzu7nqu5n7gdmq3joufsgft5oh |
I did some digging, and this is essentially related to the nodes allocation exclusion on the departing node holding the primary shard of the locking index. This is addressed and fixed in #175 - where we block until the allocation exclusion fully takes effect |
Able to reliably produce by:
all units except debug-hooks unit will successfully shutdown & release opensearch lock
you'll see an error
reproduced using #263 PR branch also worth noting that the cluster manager switchover worked fine from 2 -> 1 units in this test (confirmed with Before
When two units left
After
(debug-hooks ran on unit 0) |
By checking that opensearch lock is replicated to all nodes, we should avoid edge cases where we have 2+ online nodes and then a node has a network cut & only sees 1 online node, doesn't have the lock replicated, and requests the peer databag lock since it thinks no unit has the opensearch lock ~~Potentially fixes~~ no effect on #243 @Mehdi-Bendriss reproduced the issue in #243 and found: - unit 0 was cluster manager - unit 1 (the one scaling down) had primary shard which means the issue should not be related to a failing cluster manager election with 2 -> 1 units so it's most likely that the lock document was not replicated to unit 0 Context: https://chat.canonical.com/canonical/pl/oc797xcddpn53giu6gtfp4sboo
…al, adjust the logic to destroy the application due to #243
…al, adjust the logic to destroy the application due to #243
…al, adjust the logic to destroy the application due to #243
…al, adjust the logic to destroy the application due to #243
Trying to reproduce the issue with the current main (revision 95 in channel 2/edge) did not show the error anymore. Assumption is it was resolved with one of the recent fixes, presumably #312. |
So, #312 will affect the lock release. Here, the issue was reachng out to the cluster so we can acquire the lock, or check its state. I think what may have changed now and then is the error handling when issue a request. |
Hi @phvalguima Could you please explain what you mean with that? |
I've further investigated here, especially the error seen in
This error happens here: https://github.com/canonical/opensearch-operator/blob/main/lib/charms/opensearch/v0/opensearch_locking.py#L241-L243 Instead of returning I've tested this now locally, and it seems to be working fine. What do you think @phvalguima ? |
## Issue When attaching an existing storage to a new unit, 2 issues happen: - Snap install failed because of permissions / ownership of directories - snap_common gets completely deleted ## Solution - bump snap version, use the fixed one (the fixed revision is 47, this is already outdated as a newer version of the snap is already available and merged to main prior to this PR) - enhance test coverage for integration tests ## Integration Testing Tests for attaching existing storage can be found in integration/ha/test_storage.py. There are now three test cases: 1. test_storage_reuse_after_scale_down: remove one unit from the deployment, afterwards add a new one re-using the storage from the removed unit. check if the continuous writes are ok and a testfile that was created intially is still there. 2. test_storage_reuse_after_scale_to_zero: remove both units from the deployment, keep the application, add two new units using the storage again. check the continuous writes. 3. test_storage_reuse_in_new_cluster_after_app_removal: from a cluster of three units, remove all of them and remove the application. deploy a new application (with one unit) to the same model, attach the storage, then add two more units with the other storage volumes. check the continuous writes. ## Other Issues - As part of this PR, another issue is addressed: #306. It is resolved with this commit: 19f843c - Furthermore problems with acquiring the OpenSearch lock are worked around with this PR, especially when the shards for the locking index within OpenSearch are not assigned to a new primary when removing the former primary. This was also reported in #243 and will be further investigated in #327.
Seems it is not possible to remove opensearch application without
--force
anymore. In the end, it ends with 2x openseach units, both in error with:Full logs: https://pastebin.ubuntu.com/p/vHxJX9rWdr/
Core error being:
The text was updated successfully, but these errors were encountered: