Skip to content

Commit

Permalink
Deploy postgres-exporter (#74)
Browse files Browse the repository at this point in the history
Enables deploying postgres-exporter, to collect postgres stats.
This is done by setting the ENV varaible  PSQL_EXPORT to true, if the value is not set
PSQL_EXPORT is set to false by default.
postgres-exporter is installed using helm chart prometheus-community/prometheus-postgres-exporter.
  • Loading branch information
yogananth-subramanian authored Sep 27, 2024
1 parent 7334f51 commit a3bf22d
Show file tree
Hide file tree
Showing 6 changed files with 561 additions and 0 deletions.
37 changes: 37 additions & 0 deletions ci-scripts/rhdh-setup/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ export ENABLE_PROFILING="${ENABLE_PROFILING:-false}"

export PSQL_LOG="${PSQL_LOG:-true}"
export RHDH_METRIC="${RHDH_METRIC:-true}"
export PSQL_EXPORT="${PSQL_EXPORT:-false}"
export LOG_MIN_DURATION_STATEMENT="${LOG_MIN_DURATION_STATEMENT:-65}"
export LOG_MIN_DURATION_SAMPLE="${LOG_MIN_DURATION_SAMPLE:-50}"
export LOG_STATEMENT_SAMPLE_RATE="${LOG_STATEMENT_SAMPLE_RATE:-0.7}"
Expand Down Expand Up @@ -346,13 +347,49 @@ psql_debug() {
$clin exec "${psql_db}" -- sh -c "sed -i "s/^\s*#log_min_duration_sample.*/log_min_duration_sample=${LOG_MIN_DURATION_SAMPLE}/" /var/lib/pgsql/data/userdata/postgresql.conf "
$clin exec "${psql_db}" -- sh -c "sed -i "s/^\s*#log_statement_sample_rate.*/log_statement_sample_rate=${LOG_STATEMENT_SAMPLE_RATE}/" /var/lib/pgsql/data/userdata/postgresql.conf "
fi
if ${PSQL_EXPORT}; then
$clin exec "${psql_db}" -- sh -c 'sed -i "s/^\s*#track_io_timing.*/track_io_timing = on/" /var/lib/pgsql/data/userdata/postgresql.conf'
$clin exec "${psql_db}" -- sh -c 'sed -i "s/^\s*#track_wal_io_timing.*/track_wal_io_timing = on/" /var/lib/pgsql/data/userdata/postgresql.conf'
$clin exec "${psql_db}" -- sh -c 'sed -i "s/^\s*#track_functions.*/track_functions = all/" /var/lib/pgsql/data/userdata/postgresql.conf'
$clin exec "${psql_db}" -- sh -c 'sed -i "s/^\s*#stats_fetch_consistency.*/stats_fetch_consistency = cache/" /var/lib/pgsql/data/userdata/postgresql.conf'
$clin exec "${psql_db}" -- sh -c "echo shared_preload_libraries = \'pgaudit,auto_explain,pg_stat_statements\' >> /var/lib/pgsql/data/userdata/postgresql.conf"
fi
echo "Restarting RHDH DB..."
$clin rollout restart statefulset/"$psql_db_ss"
wait_to_start statefulset "$psql_db_ss" 300 300

if ${PSQL_EXPORT}; then
$clin exec "${psql_db}" -- sh -c 'psql -c "CREATE EXTENSION pg_stat_statements;"'
uid=$(oc get namespace "${RHDH_NAMESPACE}" -o go-template='{{ index .metadata.annotations "openshift.io/sa.scc.supplemental-groups" }}'| cut -d '/' -f 1)
pg_pass=$(${clin} get secret rhdh-postgresql -o jsonpath='{.data.postgres-password}'|base64 -d)
plugins=("backstage_plugin_permission" "backstage_plugin_auth" "backstage_plugin_catalog" "backstage_plugin_scaffolder" "backstage_plugin_search" "backstage_plugin_app")
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
cp template/postgres-exporter/chart-values.yaml "$TMP_DIR/pg-exporter.yaml"
sed -i "s/uid/$uid/g" "$TMP_DIR/pg-exporter.yaml"
sed -i "s/pg_password/'$pg_pass'/g" "$TMP_DIR/pg-exporter.yaml"
helm install pg-exporter prometheus-community/prometheus-postgres-exporter -n "${RHDH_NAMESPACE}" -f "$TMP_DIR/pg-exporter.yaml"
for plugin in "${plugins[@]}"; do
cp template/postgres-exporter/values-template.yaml "${TMP_DIR}/${plugin}.yaml"
sed -i "s/'dbname'/'$plugin'/" "${TMP_DIR}/${plugin}.yaml"
sed -i "s/uid/$uid/g" "${TMP_DIR}/${plugin}.yaml"
sed -i "s/pg_password/'$pg_pass'/g" "${TMP_DIR}/${plugin}.yaml"
helm_name=${plugin//_/-}
helm install "${helm_name}" prometheus-community/prometheus-postgres-exporter -n "${RHDH_NAMESPACE}" -f "${TMP_DIR}/${plugin}.yaml"
done
fi

echo "Restarting RHDH..."
$clin rollout restart deployment/"$rhdh_deployment"
wait_to_start deployment "$rhdh_deployment" 300 300
if ${PSQL_EXPORT}; then
plugins=("pg-exporter" "backstage-plugin-permission" "backstage-plugin-auth" "backstage-plugin-catalog" "backstage-plugin-scaffolder" "backstage-plugin-search" "backstage-plugin-app")
for plugin in "${plugins[@]}"; do
cp template/postgres-exporter/service-monitor-template.yaml "${TMP_DIR}/${plugin}-monitor.yaml"
sed -i "s/pglabel/$plugin/" "${TMP_DIR}/${plugin}-monitor.yaml"
sed -i "s/pgnamespace/$RHDH_NAMESPACE/g" "${TMP_DIR}/${plugin}-monitor.yaml"
$clin create -f "${TMP_DIR}/${plugin}-monitor.yaml"
done
fi
}
setup_monitoring() {
echo "Enabling user workload monitoring"
Expand Down
25 changes: 25 additions & 0 deletions ci-scripts/rhdh-setup/template/postgres-exporter/chart-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
podSecurityContext:
runAsGroup: uid
runAsUser: uid
runAsNonRoot: true
seccompProfile:
type: RuntimeDefault
securityContext: {}
config:
logLevel: 'debug'
extraArgs:
- --collector.long_running_transactions
- --collector.process_idle
- --collector.stat_activity_autovacuum
- --collector.stat_user_tables
- --collector.statio_user_indexes
- --collector.statio_user_tables
- --collector.postmaster
- --collector.stat_statements
- --auto-discover-databases
datasource:
host: 'rhdh-postgresql-primary'
user: 'postgres'
password: pg_password
port: "5432"
database: ''
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
release: pglabel
name: prometheus-pglabel
spec:
endpoints:
- interval: 30s
port: http
scheme: http
path: /metrics
namespaceSelector:
matchNames:
- pgnamespace
selector:
matchLabels:
release: pglabel
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
podSecurityContext:
runAsGroup: uid
runAsUser: uid
runAsNonRoot: true
seccompProfile:
type: RuntimeDefault
securityContext: {}
config:
logLevel: 'debug'
extraArgs:
- --no-collector.database
- --no-collector.database_wraparound
- --no-collector.locks
- --no-collector.long_running_transactions
- --no-collector.postmaster
- --no-collector.process_idle
- --no-collector.replication
- --no-collector.replication_slot
- --no-collector.stat_activity_autovacuum
- --no-collector.stat_bgwriter
- --no-collector.stat_database
- --no-collector.stat_statements
- --no-collector.stat_wal_receiver
- --no-collector.wal
- --no-collector.xlog_location
- --collector.stat_user_tables
- --collector.statio_user_indexes
- --collector.statio_user_tables
datasource:
host: 'rhdh-postgresql-primary'
user: 'postgres'
password: pg_password
port: "5432"
database: 'dbname'
223 changes: 223 additions & 0 deletions config/cluster_read_config.populate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,226 @@ value }}
- name: measurements.nodejs.populate.nodejs_gc_duration_seconds_major_average
monitoring_query: sum(rate(nodejs_gc_duration_seconds_sum{kind="major",job="rhdh-metrics"}[5m]))/sum(rate(nodejs_gc_duration_seconds_count{kind="major",job="rhdh-metrics"}[5m]))
monitoring_step: 15


{% macro pg_query_sum(alias, query) -%}
# Gather monitoring data about the db {{ alias }}
- name: measurements.postgresql.populate.{{ alias }}.{{ query }}
monitoring_query: sum({{ query }}{service='{{ alias }}-prometheus-postgres-exporter'})
monitoring_step: 15
{%- endmacro %}

{% for query in [
'pg_statio_user_indexes_idx_blks_hit_total',
'pg_statio_user_indexes_idx_blks_read_total',
'pg_statio_user_tables_heap_blocks_hit',
'pg_statio_user_tables_heap_blocks_read',
'pg_statio_user_tables_idx_blocks_hit',
'pg_statio_user_tables_idx_blocks_read',
'pg_statio_user_tables_tidx_blocks_hit',
'pg_statio_user_tables_tidx_blocks_read',
'pg_statio_user_tables_toast_blocks_hit',
'pg_statio_user_tables_toast_blocks_read',
'pg_stat_user_tables_vacuum_count',
'pg_stat_user_tables_size_bytes',
'pg_stat_user_tables_seq_tup_read',
'pg_stat_user_tables_seq_scan',
'pg_stat_user_tables_n_tup_upd',
'pg_stat_user_tables_n_tup_ins',
'pg_stat_user_tables_n_tup_hot_upd',
'pg_stat_user_tables_n_tup_del',
'pg_stat_user_tables_n_mod_since_analyze',
'pg_stat_user_tables_n_live_tup',
'pg_stat_user_tables_n_dead_tup',
'pg_stat_user_tables_last_vacuum',
'pg_stat_user_tables_last_autovacuum',
'pg_stat_user_tables_last_autoanalyze',
'pg_stat_user_tables_last_analyze',
'pg_stat_user_tables_idx_tup_fetch',
'pg_stat_user_tables_idx_scan',
'pg_stat_user_tables_autovacuum_count',
'pg_stat_user_tables_autoanalyze_count',
'pg_stat_user_tables_analyze_count'
] %}
{% for db in [
'backstage-plugin-permission',
'backstage-plugin-auth',
'backstage-plugin-catalog',
'backstage-plugin-scaffolder',
'backstage-plugin-search',
'backstage-plugin-app'
] %}
{{ pg_query_sum(db, query ) }}
{% endfor %}
{% endfor %}

{% macro pg_query(alias, query) -%}
# Gather monitoring data about the db {{ alias }}
- name: measurements.postgresql.populate.{{ alias }}.{{ query }}
monitoring_query: {{ query }}{datname="{{ alias }}"}
monitoring_step: 15
{%- endmacro %}

{% for query in [
'pg_stat_database_blk_read_time',
'pg_stat_database_blk_write_time',
'pg_stat_database_blks_hit',
'pg_stat_database_blks_read',
'pg_stat_database_conflicts',
'pg_stat_database_conflicts_confl_bufferpin',
'pg_stat_database_conflicts_confl_deadlock',
'pg_stat_database_conflicts_confl_lock',
'pg_stat_database_conflicts_confl_snapshot',
'pg_stat_database_conflicts_confl_tablespace',
'pg_stat_database_deadlocks',
'pg_stat_database_numbackends',
'pg_stat_database_temp_bytes',
'pg_stat_database_temp_files',
'pg_stat_database_tup_deleted',
'pg_stat_database_tup_fetched',
'pg_stat_database_tup_inserted',
'pg_stat_database_tup_returned',
'pg_stat_database_tup_updated',
'pg_stat_database_xact_commit',
'pg_stat_database_xact_rollback',
'pg_database_size_bytes'
] %}
{% for db in [
'backstage_plugin_permission',
'backstage_plugin_auth',
'backstage_plugin_catalog',
'backstage_plugin_scaffolder',
'backstage_plugin_search',
'backstage_plugin_app'
] %}
{{ pg_query(db, query ) }}
{% endfor %}
{% endfor %}

{% macro pg_stat_statements_sum(alias, query) -%}
# Gather monitoring data about the db {{ alias }}
- name: measurements.postgresql.populate.{{ alias }}.{{ query }}
monitoring_query: sum({{ query }}{datname='{{ alias }}'})
monitoring_step: 15
{%- endmacro %}

{% for query in [
'pg_stat_statements_block_read_seconds_total',
'pg_stat_statements_block_write_seconds_total',
'pg_stat_statements_calls_total',
'pg_stat_statements_rows_total',
'pg_stat_statements_seconds_total',
'pg_locks_count'
] %}
{% for db in [
'backstage_plugin_permission',
'backstage_plugin_auth',
'backstage_plugin_catalog',
'backstage_plugin_scaffolder',
'backstage_plugin_search',
'backstage_plugin_app'
] %}
{{ pg_stat_statements_sum(db, query ) }}
{% endfor %}
{% endfor %}


{% macro pg_settings(query) -%}
# Gather monitoring data about the db {{ alias }}
- name: measurements.postgresql.populate.{{ query }}
monitoring_query: {{ query }}{service="pg-exporter-prometheus-postgres-exporter"}
monitoring_step: 30
{%- endmacro %}

{% for query in [
'pg_settings_max_connections',
'pg_settings_superuser_reserved_connections',
'pg_settings_shared_buffers_bytes',
'pg_settings_work_mem_bytes',
'pg_settings_maintenance_work_mem_bytes',
'pg_settings_shared_memory_size_in_huge_pages',
'pg_settings_effective_cache_size_bytes',
'pg_settings_effective_io_concurrency',
'pg_settings_random_page_cost',
'pg_settings_track_io_timing',
'pg_settings_max_wal_senders',
'pg_settings_checkpoint_timeout_seconds',
'pg_settings_checkpoint_completion_target',
'pg_settings_max_wal_size_bytes',
'pg_settings_min_wal_size_bytes',
'pg_settings_wal_buffers_bytes',
'pg_settings_wal_writer_delay_seconds',
'pg_settings_wal_writer_flush_after_bytes',
'pg_settings_bgwriter_delay_seconds',
'pg_settings_bgwriter_lru_maxpages',
'pg_settings_bgwriter_lru_multiplier',
'pg_settings_bgwriter_flush_after_bytes',
'pg_settings_max_worker_processes',
'pg_settings_max_parallel_workers_per_gather',
'pg_settings_max_parallel_maintenance_workers',
'pg_settings_max_parallel_workers',
'pg_settings_parallel_leader_participation',
'pg_settings_enable_partitionwise_join',
'pg_settings_enable_partitionwise_aggregate',
'pg_settings_jit',
'pg_settings_max_slot_wal_keep_size_bytes',
'pg_settings_track_wal_io_timing',
'pg_settings_maintenance_io_concurrency',
'pg_settings_wal_recycle',
'pg_process_idle_seconds_sum',
'pg_process_idle_seconds_count',
'pg_stat_bgwriter_buffers_alloc_total',
'pg_stat_bgwriter_buffers_backend_fsync_total',
'pg_stat_bgwriter_buffers_backend_total',
'pg_stat_bgwriter_buffers_checkpoint_tota',
'pg_stat_bgwriter_buffers_clean_total',
'pg_stat_bgwriter_checkpoint_sync_time_total',
'pg_stat_bgwriter_checkpoint_write_time_total',
'pg_stat_bgwriter_checkpoints_req_total',
'pg_stat_bgwriter_checkpoints_timed_total',
'pg_stat_bgwriter_maxwritten_clean_total',
'pg_stat_archiver_archived_count',
'pg_stat_archiver_failed_count',
'pg_long_running_transactions',
'pg_long_running_transactions_oldest_timestamp_seconds',
'pg_wal_segments',
'pg_wal_size_bytes',
'process_cpu_seconds_total',
'process_max_fds',
'process_open_fds',
'process_resident_memory_bytes',
'process_virtual_memory_bytes',
'process_virtual_memory_max_bytes',
] %}
{{ pg_settings( query ) }}
{% endfor %}

{% macro pg_stat_activity(alias, query, state) -%}
# Gather monitoring data about the db {{ alias }}
- name: measurements.postgresql.populate.{{ alias }}.{{ query }}.{{ state }}
monitoring_query: sum({{ query }}{datname='{{ alias }}',state='{{ state }}',service="pg-exporter-prometheus-postgres-exporter"})
monitoring_step: 15
{%- endmacro %}

{% for query in [
'pg_stat_activity_count',
'pg_stat_activity_max_tx_duration'
] %}
{% for db in [
'backstage_plugin_permission',
'backstage_plugin_auth',
'backstage_plugin_catalog',
'backstage_plugin_scaffolder',
'backstage_plugin_search',
'backstage_plugin_app'
] %}
{% for state in [
'active',
'disabled',
'fastpath',
'idle'
] %}
{{ pg_stat_activity(db, query, state ) }}
{% endfor %}
{% endfor %}
{% endfor %}
Loading

0 comments on commit a3bf22d

Please sign in to comment.