From f3659160d86d1e940d2e4540d60cefd56d52c71d Mon Sep 17 00:00:00 2001 From: Nicola Tarocco Date: Tue, 31 Oct 2023 17:26:48 +0100 Subject: [PATCH] pbbouncer: add pgbouncer config and docker service --- docker-compose.yml | 6 +- docker-services.yml | 10 +- docker/pgbouncer/pgbouncer.ini | 310 +++++++++++++++++++++++++++++++++ docker/pgbouncer/userlist.txt | 1 + invenio.cfg | 18 +- 5 files changed, 341 insertions(+), 4 deletions(-) create mode 100644 docker/pgbouncer/pgbouncer.ini create mode 100644 docker/pgbouncer/userlist.txt diff --git a/docker-compose.yml b/docker-compose.yml index d0ce411c..23557c73 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,6 +8,10 @@ services: extends: file: docker-services.yml service: db + pgbouncer: + extends: + file: docker-services.yml + service: pgbouncer mq: extends: file: docker-services.yml @@ -23,4 +27,4 @@ services: pgadmin: extends: file: docker-services.yml - service: pgadmin \ No newline at end of file + service: pgadmin diff --git a/docker-services.yml b/docker-services.yml index ffedad36..40bb275c 100644 --- a/docker-services.yml +++ b/docker-services.yml @@ -16,7 +16,7 @@ services: - "INVENIO_CELERY_RESULT_BACKEND=redis://cache:6379/2" - "INVENIO_SEARCH_HOSTS=['search:9200']" - "INVENIO_SECRET_KEY=CHANGE_ME" - - "INVENIO_SQLALCHEMY_DATABASE_URI=postgresql+psycopg2://zenodo:zenodo@db/zenodo" + - "INVENIO_SQLALCHEMY_DATABASE_URI=postgresql+psycopg2://zenodo:zenodo@pgbouncer:6432/zenodo" - "INVENIO_WSGI_PROXIES=2" - "INVENIO_RATELIMIT_STORAGE_URL=redis://cache:6379/3" - "INVENIO_SITE_UI_URL=https://127.0.0.1" @@ -54,6 +54,14 @@ services: PGADMIN_DEFAULT_PASSWORD: "zenodo" volumes: - ./docker/pgadmin/servers.json:/pgadmin4/servers.json + pgbouncer: + image: registry.cern.ch/pgbouncer/pgbouncer:1.19.1 + restart: "unless-stopped" + ports: + - "6432:6432" + volumes: + - ./docker/pgbouncer/pgbouncer.ini:/etc/pgbouncer/pgbouncer.ini + - ./docker/pgbouncer/userlist.txt:/etc/pgbouncer/userlist.txt mq: image: registry.cern.ch/docker.io/library/rabbitmq:3-management restart: "unless-stopped" diff --git a/docker/pgbouncer/pgbouncer.ini b/docker/pgbouncer/pgbouncer.ini new file mode 100644 index 00000000..1bc98429 --- /dev/null +++ b/docker/pgbouncer/pgbouncer.ini @@ -0,0 +1,310 @@ +;;; +;;; PgBouncer configuration file +;;; + +[databases] +zenodo = host=db port=5432 auth_user=zenodo dbname=zenodo + +;; User-specific configuration +[users] + +;user1 = pool_mode=transaction max_user_connections=10 + +;; Configuration section +[pgbouncer] + +;;; +;;; Administrative settings +;;; + +logfile = /dev/stdout +; pidfile = /var/run/pgbouncer/pgbouncer.pid + +;;; +;;; Where to wait for clients +;;; + +;; IP address or * which means all IPs +listen_addr = 0.0.0.0 +listen_port = 6432 + +;; Unix socket is also used for -R. +;; On Debian it should be /var/run/postgresql +;unix_socket_dir = /tmp +;unix_socket_mode = 0777 +;unix_socket_group = + +;; The peer id used to identify this pgbouncer process in a group of pgbouncer +;; processes that are peered together. When set to 0 pgbouncer peering is disabled +;peer_id = 0 + +;;; +;;; TLS settings for accepting clients +;;; + +;; disable, allow, require, verify-ca, verify-full +;client_tls_sslmode = disable + +;; Path to file that contains trusted CA certs +;client_tls_ca_file = + +;; Private key and cert to present to clients. +;; Required for accepting TLS connections from clients. +;client_tls_key_file = +;client_tls_cert_file = + +;; default, secure, fast, normal, +;client_tls_ciphers = default + +;; all, secure, tlsv1.0, tlsv1.1, tlsv1.2, tlsv1.3 +;client_tls_protocols = secure + +;; none, auto, legacy +;client_tls_dheparams = auto + +;; none, auto, +;client_tls_ecdhcurve = auto + +;;; +;;; TLS settings for connecting to backend databases +;;; + +;; disable, allow, require, verify-ca, verify-full +;server_tls_sslmode = disable + +;; Path to that contains trusted CA certs +;server_tls_ca_file = + +;; Private key and cert to present to backend. +;; Needed only if backend server require client cert. +;server_tls_key_file = +;server_tls_cert_file = + +;; all, secure, tlsv1.0, tlsv1.1, tlsv1.2, tlsv1.3 +;server_tls_protocols = secure + +;; default, secure, fast, normal, +;server_tls_ciphers = default + +;;; +;;; Authentication settings +;;; + + +;; any, trust, plain, md5, cert, hba, pam +auth_type = md5 +auth_file = /etc/pgbouncer/userlist.txt + +;; Path to HBA-style auth config +;auth_hba_file = + +;; Query to use to fetch password from database. Result +;; must have 2 columns - username and password hash. +;auth_query = SELECT usename, passwd FROM pg_shadow WHERE usename=$1 + + +;; Authentication database that can be set globally to run "auth_query". +;auth_dbname = + +;;; +;;; Users allowed into database 'pgbouncer' +;;; + +;; comma-separated list of users who are allowed to change settings +admin_users = zenodo + +;; comma-separated list of users who are just allowed to use SHOW command +stats_users = zenodo + +;;; +;;; Pooler personality questions +;;; + +;; When server connection is released back to pool: +;; session - after client disconnects (default) +;; transaction - after transaction finishes +;; statement - after statement finishes +pool_mode = transaction + +;; Default PostgreSQL instance max_connections = 100. + +;; Maximum number of client connections allowed (2-3x of max_connections). +max_client_conn = 300 + +;; How many server connections to allow per user/database pair. (70-80% of max_connections) +default_pool_size = 70 + +;; Add more server connections to pool if below this number (20-30% of max_connections). +;; Improves behavior when the normal load suddenly comes back after a period of total inactivity. +;; The value is effectively capped at the pool size. +min_pool_size = 20 + +;; How many additional connections to allow to a pool (5-10% of max_connections) +reserve_pool_size = 10 + +;; If a client has not been serviced in this time, use additional connections from the reserve pool +reserve_pool_timeout = 5 + +;; Do not allow more than this many server connections per database (regardless of user). +;; This considers the PgBouncer database that the client has connected to, not the PostgreSQL database of the outgoing connection. +max_db_connections = 100 + +;; do not allow more than this many server connections per user (regardless of database). +;; This considers the PgBouncer user that is associated with a pool, which is either the user specified for the server connection or in absence of that the user the client has connected as. +max_user_connections = 100 + +;;; +;;; Logging +;;; + +;; Syslog settings +;syslog = 0 +;syslog_facility = daemon +;syslog_ident = pgbouncer + +;; log if client connects or server connection is made +log_connections = 1 + +;; log if and why connection was closed +log_disconnections = 1 + +;; log error messages pooler sends to clients +log_pooler_errors = 1 + +;; write aggregated stats into log +log_stats = 1 + +;; Logging verbosity. Same as -v switch on command line. +verbose = 0 + +;;; +;;; Timeouts +;;; + +;; Close server connection if its been connected longer. +;server_lifetime = 3600 + +;; Close server connection if its not been used in this time. Allows +;; to clean unnecessary connections from pool after peak. +;server_idle_timeout = 600 + +;; Cancel connection attempt if server does not answer takes longer. +;server_connect_timeout = 15 + +;; If server login failed (server_connect_timeout or auth failure) +;; then wait this many second before trying again. +;server_login_retry = 15 + +;; Dangerous. Server connection is closed if query does not return in +;; this time. Should be used to survive network problems, _not_ as +;; statement_timeout. (default: 0) +;query_timeout = 0 + +;; Dangerous. Client connection is closed if the query is not +;; assigned to a server in this time. Should be used to limit the +;; number of queued queries in case of a database or network +;; failure. (default: 120) +;query_wait_timeout = 120 + +;; Dangerous. Client connection is closed if the cancellation request +;; is not assigned to a server in this time. Should be used to limit +;; the time a client application blocks on a queued cancel request in +;; case of a database or network failure. (default: 10) +;cancel_wait_timeout = 10 + +;; Dangerous. Client connection is closed if no activity in this +;; time. Should be used to survive network problems. (default: 0) +;client_idle_timeout = 0 + +;; Disconnect clients who have not managed to log in after connecting +;; in this many seconds. +;client_login_timeout = 60 + +;; Clean automatically created database entries (via "*") if they stay +;; unused in this many seconds. +;autodb_idle_timeout = 3600 + +;; Close connections which are in "IDLE in transaction" state longer +;; than this many seconds. +;idle_transaction_timeout = 0 + +;; How long SUSPEND/-R waits for buffer flush before closing +;; connection. +;suspend_timeout = 10 + +;;; +;;; Low-level tuning options +;;; + +;; buffer for streaming packets +;pkt_buf = 4096 + +;; man 2 listen +;listen_backlog = 128 + +;; Max number pkt_buf to process in one event loop. +;sbuf_loopcnt = 5 + +;; Maximum PostgreSQL protocol packet size. +;max_packet_size = 2147483647 + +;; Set SO_REUSEPORT socket option +;so_reuseport = 0 + +;; networking options, for info: man 7 tcp + +;; Linux: Notify program about new connection only if there is also +;; data received. (Seconds to wait.) On Linux the default is 45, on +;; other OS'es 0. +;tcp_defer_accept = 0 + +;; In-kernel buffer size (Linux default: 4096) +;tcp_socket_buffer = 0 + +;; whether tcp keepalive should be turned on (0/1) +;tcp_keepalive = 1 + +;; The following options are Linux-specific. They also require +;; tcp_keepalive=1. + +;; Count of keepalive packets +;tcp_keepcnt = 0 + +;; How long the connection can be idle before sending keepalive +;; packets +;tcp_keepidle = 0 + +;; The time between individual keepalive probes +;tcp_keepintvl = 0 + +;; How long may transmitted data remain unacknowledged before TCP +;; connection is closed (in milliseconds) +;tcp_user_timeout = 0 + +;; DNS lookup caching time +;dns_max_ttl = 15 + +;; DNS zone SOA lookup period +;dns_zone_check_period = 0 + +;; DNS negative result caching time +;dns_nxdomain_ttl = 15 + +;; Custom resolv.conf file, to set custom DNS servers or other options +;; (default: empty = use OS settings) +;resolv_conf = /etc/pgbouncer/resolv.conf + +;;; +;;; Random stuff +;;; + +;; Hackish security feature. Helps against SQL injection: when PQexec +;; is disabled, multi-statement cannot be made. +;disable_pqexec = 0 + +;; Config file to use for next RELOAD/SIGHUP +;; By default contains config file from command line. +;conffile + +;; Read additional config from other file +;%include /etc/pgbouncer/pgbouncer-other.ini diff --git a/docker/pgbouncer/userlist.txt b/docker/pgbouncer/userlist.txt new file mode 100644 index 00000000..3e4339ac --- /dev/null +++ b/docker/pgbouncer/userlist.txt @@ -0,0 +1 @@ +"zenodo" "md538915355fbe7028464f6a1e36870f98e" diff --git a/invenio.cfg b/invenio.cfg index 50691758..c133b7d4 100644 --- a/invenio.cfg +++ b/invenio.cfg @@ -12,6 +12,7 @@ Only configuration created via cookiecutter or very likely to be edited by installer are included here. """ +import os from celery.schedules import crontab from copy import deepcopy @@ -68,8 +69,21 @@ APP_RDM_ROUTES["index"] = ("/", frontpage_view_function) # Flask-SQLAlchemy # ================ # See https://flask-sqlalchemy.palletsprojects.com/en/2.x/config/ - -SQLALCHEMY_DATABASE_URI="postgresql+psycopg2://zenodo:zenodo@localhost/zenodo" +def _parse_env_bool(var_name, default=None): + if str(os.environ.get(var_name)).lower() == "true": + return True + elif str(os.environ.get(var_name)).lower() == "false": + return False + return default + +SQLALCHEMY_ENGINE_OPTIONS = { + "application_name": socket.gethostname(), +} +if _parse_env_bool("INVENIO_PGBOUNCER_ENABLED", False): + from sqlalchemy.pool import NullPool + SQLALCHEMY_ENGINE_OPTIONS["poolclass"] = NullPool +else: + SQLALCHEMY_DATABASE_URI = "postgresql+psycopg2://zenodo:zenodo@localhost/zenodo" # Invenio-App # ===========