diff --git a/pkg/storage/chunk/client/aws/s3_storage_client.go b/pkg/storage/chunk/client/aws/s3_storage_client.go
index 12fea874e311f..2b5458af6af52 100644
--- a/pkg/storage/chunk/client/aws/s3_storage_client.go
+++ b/pkg/storage/chunk/client/aws/s3_storage_client.go
@@ -21,12 +21,15 @@ import (
 	"github.com/aws/aws-sdk-go/service/s3"
 	"github.com/aws/aws-sdk-go/service/s3/s3iface"
 	awscommon "github.com/grafana/dskit/aws"
+
 	"github.com/grafana/dskit/backoff"
 	"github.com/grafana/dskit/flagext"
 	"github.com/grafana/dskit/instrument"
 	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 
+	amnet "k8s.io/apimachinery/pkg/util/net"
+
 	bucket_s3 "github.com/grafana/loki/v3/pkg/storage/bucket/s3"
 	"github.com/grafana/loki/v3/pkg/storage/chunk/client"
 	"github.com/grafana/loki/v3/pkg/storage/chunk/client/hedging"
@@ -532,5 +535,61 @@ func (a *S3ObjectClient) IsObjectNotFoundErr(err error) bool {
 	return false
 }
 
-// TODO(dannyk): implement for client
-func (a *S3ObjectClient) IsRetryableErr(error) bool { return false }
+func isTimeoutError(err error) bool {
+	var netErr net.Error
+	return errors.As(err, &netErr) && netErr.Timeout()
+}
+
+func isContextErr(err error) bool {
+	return errors.Is(err, context.DeadlineExceeded) ||
+		errors.Is(err, context.Canceled)
+}
+
+// IsStorageTimeoutErr returns true if error means that object cannot be retrieved right now due to server-side timeouts.
+func (a *S3ObjectClient) IsStorageTimeoutErr(err error) bool {
+	// TODO(dannyk): move these out to be generic
+	// context errors are all client-side
+	if isContextErr(err) {
+		return false
+	}
+
+	// connection misconfiguration, or writing on a closed connection
+	// do NOT retry; this is not a server-side issue
+	if errors.Is(err, net.ErrClosed) || amnet.IsConnectionRefused(err) {
+		return false
+	}
+
+	// this is a server-side timeout
+	if isTimeoutError(err) {
+		return true
+	}
+
+	// connection closed (closed before established) or reset (closed after established)
+	// this is a server-side issue
+	if errors.Is(err, io.EOF) || amnet.IsConnectionReset(err) {
+		return true
+	}
+
+	if rerr, ok := err.(awserr.RequestFailure); ok {
+		// https://docs.aws.amazon.com/sdkref/latest/guide/feature-retry-behavior.html
+		return rerr.StatusCode() == http.StatusRequestTimeout ||
+			rerr.StatusCode() == http.StatusGatewayTimeout
+	}
+
+	return false
+}
+
+// IsStorageThrottledErr returns true if error means that object cannot be retrieved right now due to throttling.
+func (a *S3ObjectClient) IsStorageThrottledErr(err error) bool {
+	if rerr, ok := err.(awserr.RequestFailure); ok {
+
+		// https://docs.aws.amazon.com/sdkref/latest/guide/feature-retry-behavior.html
+		return rerr.StatusCode() == http.StatusTooManyRequests ||
+			(rerr.StatusCode()/100 == 5) // all 5xx errors are retryable
+	}
+
+	return false
+}
+func (a *S3ObjectClient) IsRetryableErr(err error) bool {
+	return a.IsStorageTimeoutErr(err) || a.IsStorageThrottledErr(err)
+}
diff --git a/pkg/storage/chunk/client/aws/s3_storage_client_test.go b/pkg/storage/chunk/client/aws/s3_storage_client_test.go
index 3a2c1e8dc33c3..ba2939ff46884 100644
--- a/pkg/storage/chunk/client/aws/s3_storage_client_test.go
+++ b/pkg/storage/chunk/client/aws/s3_storage_client_test.go
@@ -6,9 +6,11 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"net"
 	"net/http"
 	"net/http/httptest"
 	"strings"
+	"syscall"
 	"testing"
 	"time"
 
@@ -73,6 +75,108 @@ func TestIsObjectNotFoundErr(t *testing.T) {
 	}
 }
 
+func TestIsRetryableErr(t *testing.T) {
+	tests := []struct {
+		err      error
+		expected bool
+		name     string
+	}{
+		{
+			name: "IsStorageThrottledErr - Too Many Requests",
+			err: awserr.NewRequestFailure(
+				awserr.New("TooManyRequests", "TooManyRequests", nil), 429, "reqId",
+			),
+			expected: true,
+		},
+		{
+			name: "IsStorageThrottledErr - 500",
+			err: awserr.NewRequestFailure(
+				awserr.New("500", "500", nil), 500, "reqId",
+			),
+			expected: true,
+		},
+		{
+			name: "IsStorageThrottledErr - 5xx",
+			err: awserr.NewRequestFailure(
+				awserr.New("501", "501", nil), 501, "reqId",
+			),
+			expected: true,
+		},
+		{
+			name: "IsStorageTimeoutErr - Request Timeout",
+			err: awserr.NewRequestFailure(
+				awserr.New("Request Timeout", "Request Timeout", nil), 408, "reqId",
+			),
+			expected: true,
+		},
+		{
+			name: "IsStorageTimeoutErr - Gateway Timeout",
+			err: awserr.NewRequestFailure(
+				awserr.New("Gateway Timeout", "Gateway Timeout", nil), 504, "reqId",
+			),
+			expected: true,
+		},
+		{
+			name:     "IsStorageTimeoutErr - EOF",
+			err:      io.EOF,
+			expected: true,
+		},
+		{
+			name:     "IsStorageTimeoutErr - Connection Reset",
+			err:      syscall.ECONNRESET,
+			expected: true,
+		},
+		{
+			name: "IsStorageTimeoutErr - Timeout Error",
+			err: awserr.NewRequestFailure(
+				awserr.New("RequestCanceled", "request canceled due to timeout", nil), 408, "request-id",
+			),
+			expected: true,
+		},
+		{
+			name:     "IsStorageTimeoutErr - Closed",
+			err:      net.ErrClosed,
+			expected: false,
+		},
+		{
+			name:     "IsStorageTimeoutErr - Connection Refused",
+			err:      syscall.ECONNREFUSED,
+			expected: false,
+		},
+		{
+			name:     "IsStorageTimeoutErr - Context Deadline Exceeded",
+			err:      context.DeadlineExceeded,
+			expected: false,
+		},
+		{
+			name:     "IsStorageTimeoutErr - Context Canceled",
+			err:      context.Canceled,
+			expected: false,
+		},
+		{
+			name:     "Not a retryable error",
+			err:      syscall.EINVAL,
+			expected: false,
+		},
+		{
+			name: "Not found 404",
+			err: awserr.NewRequestFailure(
+				awserr.New("404", "404", nil), 404, "reqId",
+			),
+			expected: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			client, err := NewS3ObjectClient(S3Config{BucketNames: "mybucket"}, hedging.Config{})
+			require.NoError(t, err)
+
+			require.Equal(t, tt.expected, client.IsRetryableErr(tt.err))
+		})
+	}
+}
+
 func TestRequestMiddleware(t *testing.T) {
 	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		fmt.Fprintln(w, r.Header.Get("echo-me"))
diff --git a/production/helm/loki/src/alerts.yaml.tpl b/production/helm/loki/src/alerts.yaml.tpl
index 144e263f7061f..0aa37b708b523 100644
--- a/production/helm/loki/src/alerts.yaml.tpl
+++ b/production/helm/loki/src/alerts.yaml.tpl
@@ -52,7 +52,7 @@ groups:
           message: |
             {{`{{`}} $labels.cluster {{`}}`}} {{`{{`}} $labels.namespace {{`}}`}} has had {{`{{`}} printf "%.0f" $value {{`}}`}} compactors running for more than 5m. Only one compactor should run at a time.
         expr: |
-          sum(loki_boltdb_shipper_compactor_running) by (namespace, cluster) > 1
+          sum(loki_boltdb_shipper_compactor_running) by (cluster, namespace) > 1
         for: "5m"
         labels:
           severity: "warning"
diff --git a/production/loki-mixin-compiled-ssd/alerts.yaml b/production/loki-mixin-compiled-ssd/alerts.yaml
index 7c0825d8580d6..09b9b6f543412 100644
--- a/production/loki-mixin-compiled-ssd/alerts.yaml
+++ b/production/loki-mixin-compiled-ssd/alerts.yaml
@@ -4,12 +4,12 @@ groups:
         - alert: LokiRequestErrors
           annotations:
             description: |
-                {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
+                {{ $labels.cluster }} {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
             summary: Loki request error rate is high.
           expr: |
-            100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (namespace, job, route)
+            100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (cluster, namespace, job, route)
               /
-            sum(rate(loki_request_duration_seconds_count[2m])) by (namespace, job, route)
+            sum(rate(loki_request_duration_seconds_count[2m])) by (cluster, namespace, job, route)
               > 10
           for: 15m
           labels:
@@ -17,16 +17,16 @@ groups:
         - alert: LokiRequestPanics
           annotations:
             description: |
-                {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
+                {{ $labels.cluster }} {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
             summary: Loki requests are causing code panics.
           expr: |
-            sum(increase(loki_panic_total[10m])) by (namespace, job) > 0
+            sum(increase(loki_panic_total[10m])) by (cluster, namespace, job) > 0
           labels:
             severity: critical
         - alert: LokiRequestLatency
           annotations:
             description: |
-                {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
+                {{ $labels.cluster }} {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
             summary: Loki request error latency is high.
           expr: |
             cluster_namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1
@@ -39,7 +39,7 @@ groups:
                 {{ $labels.cluster }} {{ $labels.namespace }} has had {{ printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time.
             summary: Loki deployment is running more than one compactor.
           expr: |
-            sum(loki_boltdb_shipper_compactor_running) by (namespace, cluster) > 1
+            sum(loki_boltdb_shipper_compactor_running) by (cluster, namespace) > 1
           for: 5m
           labels:
             severity: warning
diff --git a/production/loki-mixin-compiled-ssd/dashboards/loki-resources-overview.json b/production/loki-mixin-compiled-ssd/dashboards/loki-resources-overview.json
index 6e669361a057c..72b6eaf785b42 100644
--- a/production/loki-mixin-compiled-ssd/dashboards/loki-resources-overview.json
+++ b/production/loki-mixin-compiled-ssd/dashboards/loki-resources-overview.json
@@ -325,7 +325,7 @@
                         "sort": "none"
                      }
                   },
-                  "span": 1,
+                  "span": 3,
                   "targets": [
                      {
                         "expr": "sum by(pod) (loki_write_memory_streams{cluster=~\"$cluster\", job=~\"($namespace)/(loki.*|enterprise-logs)-write\"})",
@@ -414,7 +414,7 @@
                         "sort": "none"
                      }
                   },
-                  "span": 1,
+                  "span": 3,
                   "targets": [
                      {
                         "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\"}[$__rate_interval]))",
@@ -515,7 +515,7 @@
                         "sort": "none"
                      }
                   },
-                  "span": 1,
+                  "span": 3,
                   "targets": [
                      {
                         "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\"})",
@@ -577,7 +577,7 @@
                         "sort": "none"
                      }
                   },
-                  "span": 1,
+                  "span": 3,
                   "targets": [
                      {
                         "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki.*|enterprise-logs)-write\"})",
@@ -591,7 +591,19 @@
                      "sort": 2
                   },
                   "type": "timeseries"
-               },
+               }
+            ],
+            "repeat": null,
+            "repeatIteration": null,
+            "repeatRowId": null,
+            "showTitle": true,
+            "title": "Write path",
+            "titleSize": "h6"
+         },
+         {
+            "collapse": false,
+            "height": "250px",
+            "panels": [
                {
                   "datasource": "$datasource",
                   "fieldConfig": {
@@ -627,7 +639,7 @@
                         "sort": "none"
                      }
                   },
-                  "span": 1,
+                  "span": 4,
                   "targets": [
                      {
                         "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
@@ -674,7 +686,7 @@
                         "sort": "none"
                      }
                   },
-                  "span": 1,
+                  "span": 4,
                   "targets": [
                      {
                         "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
@@ -721,7 +733,7 @@
                         "sort": "none"
                      }
                   },
-                  "span": 1,
+                  "span": 4,
                   "targets": [
                      {
                         "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(loki.*|enterprise-logs)-write.*\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(loki.*|enterprise-logs)-write.*\"})",
@@ -738,7 +750,7 @@
             "repeatIteration": null,
             "repeatRowId": null,
             "showTitle": true,
-            "title": "Write path",
+            "title": "",
             "titleSize": "h6"
          },
          {
@@ -819,7 +831,7 @@
                         "sort": "none"
                      }
                   },
-                  "span": 2,
+                  "span": 4,
                   "targets": [
                      {
                         "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\"}[$__rate_interval]))",
@@ -920,7 +932,7 @@
                         "sort": "none"
                      }
                   },
-                  "span": 2,
+                  "span": 4,
                   "targets": [
                      {
                         "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\"})",
@@ -982,7 +994,7 @@
                         "sort": "none"
                      }
                   },
-                  "span": 2,
+                  "span": 4,
                   "targets": [
                      {
                         "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki.*|enterprise-logs)-backend\"})",
@@ -996,7 +1008,19 @@
                      "sort": 2
                   },
                   "type": "timeseries"
-               },
+               }
+            ],
+            "repeat": null,
+            "repeatIteration": null,
+            "repeatRowId": null,
+            "showTitle": true,
+            "title": "Backend path",
+            "titleSize": "h6"
+         },
+         {
+            "collapse": false,
+            "height": "250px",
+            "panels": [
                {
                   "datasource": "$datasource",
                   "fieldConfig": {
@@ -1032,7 +1056,7 @@
                         "sort": "none"
                      }
                   },
-                  "span": 2,
+                  "span": 4,
                   "targets": [
                      {
                         "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
@@ -1079,7 +1103,7 @@
                         "sort": "none"
                      }
                   },
-                  "span": 2,
+                  "span": 4,
                   "targets": [
                      {
                         "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n",
@@ -1126,7 +1150,7 @@
                         "sort": "none"
                      }
                   },
-                  "span": 2,
+                  "span": 4,
                   "targets": [
                      {
                         "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(loki.*|enterprise-logs)-backend.*\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(loki.*|enterprise-logs)-backend.*\"})",
@@ -1143,7 +1167,7 @@
             "repeatIteration": null,
             "repeatRowId": null,
             "showTitle": true,
-            "title": "Backend path",
+            "title": "",
             "titleSize": "h6"
          }
       ],
diff --git a/production/loki-mixin-compiled/alerts.yaml b/production/loki-mixin-compiled/alerts.yaml
index 7c0825d8580d6..09b9b6f543412 100644
--- a/production/loki-mixin-compiled/alerts.yaml
+++ b/production/loki-mixin-compiled/alerts.yaml
@@ -4,12 +4,12 @@ groups:
         - alert: LokiRequestErrors
           annotations:
             description: |
-                {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
+                {{ $labels.cluster }} {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
             summary: Loki request error rate is high.
           expr: |
-            100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (namespace, job, route)
+            100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (cluster, namespace, job, route)
               /
-            sum(rate(loki_request_duration_seconds_count[2m])) by (namespace, job, route)
+            sum(rate(loki_request_duration_seconds_count[2m])) by (cluster, namespace, job, route)
               > 10
           for: 15m
           labels:
@@ -17,16 +17,16 @@ groups:
         - alert: LokiRequestPanics
           annotations:
             description: |
-                {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
+                {{ $labels.cluster }} {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
             summary: Loki requests are causing code panics.
           expr: |
-            sum(increase(loki_panic_total[10m])) by (namespace, job) > 0
+            sum(increase(loki_panic_total[10m])) by (cluster, namespace, job) > 0
           labels:
             severity: critical
         - alert: LokiRequestLatency
           annotations:
             description: |
-                {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
+                {{ $labels.cluster }} {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
             summary: Loki request error latency is high.
           expr: |
             cluster_namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1
@@ -39,7 +39,7 @@ groups:
                 {{ $labels.cluster }} {{ $labels.namespace }} has had {{ printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time.
             summary: Loki deployment is running more than one compactor.
           expr: |
-            sum(loki_boltdb_shipper_compactor_running) by (namespace, cluster) > 1
+            sum(loki_boltdb_shipper_compactor_running) by (cluster, namespace) > 1
           for: 5m
           labels:
             severity: warning
diff --git a/production/loki-mixin/alerts.libsonnet b/production/loki-mixin/alerts.libsonnet
index 5bff18e72c6e5..9261dbccecf99 100644
--- a/production/loki-mixin/alerts.libsonnet
+++ b/production/loki-mixin/alerts.libsonnet
@@ -6,36 +6,36 @@
         rules: [
           {
             alert: 'LokiRequestErrors',
-            expr: |||
-              100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (namespace, job, route)
+            expr: std.strReplace(|||
+              100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (cluster, namespace, job, route)
                 /
-              sum(rate(loki_request_duration_seconds_count[2m])) by (namespace, job, route)
+              sum(rate(loki_request_duration_seconds_count[2m])) by (cluster, namespace, job, route)
                 > 10
-            |||,
+            |||, 'cluster', $._config.per_cluster_label),
             'for': '15m',
             labels: {
               severity: 'critical',
             },
             annotations: {
               summary: 'Loki request error rate is high.',
-              description: |||
-                {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
-              |||,
+              description: std.strReplace(|||
+                {{ $labels.cluster }} {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
+              |||, 'cluster', $._config.per_cluster_label),
             },
           },
           {
             alert: 'LokiRequestPanics',
             expr: |||
-              sum(increase(loki_panic_total[10m])) by (namespace, job) > 0
-            |||,
+              sum(increase(loki_panic_total[10m])) by (%s, namespace, job) > 0
+            ||| % $._config.per_cluster_label,
             labels: {
               severity: 'critical',
             },
             annotations: {
               summary: 'Loki requests are causing code panics.',
-              description: |||
-                {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
-              |||,
+              description: std.strReplace(|||
+                {{ $labels.cluster }} {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
+              |||, 'cluster', $._config.per_cluster_label),
             },
           },
           {
@@ -49,15 +49,15 @@
             },
             annotations: {
               summary: 'Loki request error latency is high.',
-              description: |||
-                {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
-              |||,
+              description: std.strReplace(|||
+                {{ $labels.cluster }} {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
+              |||, 'cluster', $._config.per_cluster_label),
             },
           },
           {
             alert: 'LokiTooManyCompactorsRunning',
             expr: |||
-              sum(loki_boltdb_shipper_compactor_running) by (namespace, %s) > 1
+              sum(loki_boltdb_shipper_compactor_running) by (%s, namespace) > 1
             ||| % $._config.per_cluster_label,
             'for': '5m',
             labels: {
diff --git a/production/loki-mixin/dashboards/loki-resources-overview.libsonnet b/production/loki-mixin/dashboards/loki-resources-overview.libsonnet
index 76c77a1453446..a93df5d42e41d 100644
--- a/production/loki-mixin/dashboards/loki-resources-overview.libsonnet
+++ b/production/loki-mixin/dashboards/loki-resources-overview.libsonnet
@@ -41,7 +41,7 @@
           ) +
           {
             tooltip: { sort: 2 },  // Sort descending.
-          },
+          }
         )
         .addPanel(
           $.CPUUsagePanel('CPU', write_pod_matcher),
@@ -52,6 +52,9 @@
         .addPanel(
           $.goHeapInUsePanel('Memory (go heap inuse)', write_job_matcher),
         )
+      )
+      .addRow(
+        $.row('')
         .addPanel(
           $.newQueryPanel('Disk Writes', 'Bps') +
           $.queryPanel(
@@ -83,6 +86,9 @@
         .addPanel(
           $.goHeapInUsePanel('Memory (go heap inuse)', backend_job_matcher),
         )
+      )
+      .addRow(
+        $.row('')
         .addPanel(
           $.newQueryPanel('Disk Writes', 'Bps') +
           $.queryPanel(