PostgresSQL Clustering the hard way: Difference between revisions
No edit summary |
|||
| Line 362: | Line 362: | ||
| https://192.168.60.105:2379 | 9d74b3125c745c74 | 3.5.17 | 20 kB | false | false | 2 | 12 | 12 | | | | https://192.168.60.105:2379 | 9d74b3125c745c74 | 3.5.17 | 20 kB | false | false | 2 | 12 | 12 | | | ||
+-----------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+ | +-----------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+ | ||
</pre>'''<big>PostgreSQL and Patroni</big>''' | |||
'''<big>PostgreSQL Certificates</big>''' | |||
'''<big>Once this is all set up and working, we can now configure <code>postgres</code> and <code>patroni</code>.</big>''' | |||
'''<big>We need to create some dirs for <code>postgres</code></big>'''<pre> | |||
sudo mkdir -p /var/lib/postgresql/data | |||
sudo mkdir -p /var/lib/postgresql/ssl | |||
</pre>'''<big>Notice we are using ssl, we need to generate a certificate</big>''' | |||
'''<big>Generate a self-signed certificate</big>''' | |||
'''<big>(this will last 20 years)</big>'''<pre> | |||
openssl genrsa -out server.key 2048 # private key | |||
openssl req -new -key server.key -out server.req # csr | |||
openssl req -x509 -key server.key -in server.req -out server.crt -days 7300 # generate cert, valid for 20 years | |||
</pre>'''<big>Update permissions</big>'''<pre> | |||
chmod 600 server.key | |||
</pre>'''<big>Move files to cert location</big>'''<pre> | |||
sudo mv server.crt server.key server.req /var/lib/postgresql/ssl | |||
</pre>'''<big>After doing this, we need to copy this to our other servers</big>''' | |||
'''<big>Copy files locally from (your local machine!) to node1, node2, node3 using <code>scp</code></big>''' | |||
'''<big><code>scp</code> them to node1, node2, and node3</big>'''<pre> | |||
scp server.crt server.key server.req [email protected]:/tmp | |||
scp server.crt server.key server.req [email protected]:/tmp | |||
scp server.crt server.key server.req [email protected]:/tmp | |||
</pre>'''<big>On the servers (not your local machine) move files</big>'''<pre> | |||
cd /tmp | |||
sudo mv server.crt server.key server.req /var/lib/postgresql/ssl | |||
</pre>'''<big>Update permissions on certificate</big>'''<pre> | |||
sudo chmod 600 /var/lib/postgresql/ssl/server.key | |||
sudo chmod 644 /var/lib/postgresql/ssl/server.crt | |||
sudo chmod 600 /var/lib/postgresql/ssl/server.req | |||
sudo chown postgres:postgres /var/lib/postgresql/data | |||
sudo chown postgres:postgres /var/lib/postgresql/ssl/server.* | |||
</pre>'''<big>We will need to give the postgres user read access to the etcd certificates using acls</big>'''<pre> | |||
sudo apt update | |||
sudo apt install -y acl | |||
</pre>'''<big>node1</big>'''<pre> | |||
sudo setfacl -m u:postgres:r /etc/etcd/ssl/ca.crt | |||
sudo setfacl -m u:postgres:r /etc/etcd/ssl/etcd-node1.crt | |||
sudo setfacl -m u:postgres:r /etc/etcd/ssl/etcd-node1.key | |||
</pre>'''<big>node2</big>'''<pre> | |||
sudo setfacl -m u:postgres:r /etc/etcd/ssl/ca.crt | |||
sudo setfacl -m u:postgres:r /etc/etcd/ssl/etcd-node2.crt | |||
sudo setfacl -m u:postgres:r /etc/etcd/ssl/etcd-node2.key | |||
</pre>'''<big>node3</big>'''<pre> | |||
sudo setfacl -m u:postgres:r /etc/etcd/ssl/ca.crt | |||
sudo setfacl -m u:postgres:r /etc/etcd/ssl/etcd-node3.crt | |||
sudo setfacl -m u:postgres:r /etc/etcd/ssl/etcd-node3.key | |||
</pre>'''<big>Now that we have postgres certs configured, it’s now time to configure patroni to operate or drive postgres</big>''' | |||
'''<big>Patroni</big>''' | |||
'''<big>Installing Patroni</big>'''<pre> | |||
sudo apt install -y patroni | |||
</pre>'''<big><code>ssh</code> into node1</big>''' | |||
'''<big>Make a dir for <code>patroni</code></big>'''<pre> | |||
sudo mkdir -p /etc/patroni/ | |||
</pre>'''<big>Configuring Patroni</big>''' | |||
'''<big>Create a config file and edit</big>'''<pre> | |||
sudo nano /etc/patroni/config.yml | |||
</pre>'''<big>node1</big>'''<pre> | |||
scope: postgresql-cluster | |||
namespace: /service/ | |||
name: postgresql-01 # node1 | |||
etcd3: | |||
hosts: 192.168.60.103:2379,192.168.60.104:2379,192.168.60.105:2379 # etcd cluster nodes | |||
protocol: https | |||
cacert: /etc/etcd/ssl/ca.crt | |||
cert: /etc/etcd/ssl/etcd-node1.crt # node1's etcd certificate | |||
key: /etc/etcd/ssl/etcd-node1.key # node1's etcd key | |||
restapi: | |||
listen: 0.0.0.0:8008 | |||
connect_address: 192.168.60.103:8008 # IP for node1's REST API | |||
certfile: /var/lib/postgresql/ssl/server.pem | |||
bootstrap: | |||
dcs: | |||
ttl: 30 | |||
loop_wait: 10 | |||
retry_timeout: 10 | |||
maximum_lag_on_failover: 1048576 # Failover parameters | |||
postgresql: | |||
parameters: | |||
ssl: 'on' # Enable SSL | |||
ssl_cert_file: /var/lib/postgresql/ssl/server.crt # PostgreSQL server certificate | |||
ssl_key_file: /var/lib/postgresql/ssl/server.key # PostgreSQL server key | |||
pg_hba: # Access rules | |||
- hostssl replication replicator 127.0.0.1/32 md5 | |||
- hostssl replication replicator 192.168.60.103/32 md5 | |||
- hostssl replication replicator 192.168.60.104/32 md5 | |||
- hostssl replication replicator 192.168.60.105/32 md5 | |||
- hostssl all all 127.0.0.1/32 md5 | |||
- hostssl all all 0.0.0.0/0 md5 | |||
initdb: | |||
- encoding: UTF8 | |||
- data-checksums | |||
postgresql: | |||
listen: 0.0.0.0:5432 | |||
connect_address: 192.168.60.103:5432 # IP for node1's PostgreSQL | |||
data_dir: /var/lib/postgresql/data | |||
bin_dir: /usr/lib/postgresql/17/bin # Binary directory for PostgreSQL 17 | |||
authentication: | |||
superuser: | |||
username: postgres | |||
password: cnV2abjbDpbh64e12987wR4mj5kQ3456Y0Qf # Superuser password - be sure to change | |||
replication: | |||
username: replicator | |||
password: sad9a23jga8jsuedrwtsskj74567suiuwe23 # Replication password - be sure to change | |||
parameters: | |||
max_connections: 100 | |||
shared_buffers: 256MB | |||
tags: | |||
nofailover: false | |||
noloadbalance: false | |||
clonefrom: false | |||
</pre>'''<big>node2</big>'''<pre> | |||
scope: postgresql-cluster | |||
namespace: /service/ | |||
name: postgresql-02 # Unique name for Node 2 | |||
etcd3: | |||
hosts: 192.168.60.103:2379,192.168.60.104:2379,192.168.60.105:2379 # etcd cluster nodes | |||
protocol: https | |||
cacert: /etc/etcd/ssl/ca.crt | |||
cert: /etc/etcd/ssl/etcd-node2.crt # Node 2's etcd certificate | |||
key: /etc/etcd/ssl/etcd-node2.key # Node 2's etcd key | |||
restapi: | |||
listen: 0.0.0.0:8008 | |||
connect_address: 192.168.60.104:8008 # IP for Node 2's REST API | |||
certfile: /var/lib/postgresql/ssl/server.pem | |||
bootstrap: | |||
dcs: | |||
ttl: 30 | |||
loop_wait: 10 | |||
retry_timeout: 10 | |||
maximum_lag_on_failover: 1048576 | |||
postgresql: | |||
parameters: | |||
ssl: 'on' | |||
ssl_cert_file: /var/lib/postgresql/ssl/server.crt | |||
ssl_key_file: /var/lib/postgresql/ssl/server.key | |||
pg_hba: | |||
- hostssl replication replicator 127.0.0.1/32 md5 | |||
- hostssl replication replicator 192.168.60.103/32 md5 | |||
- hostssl replication replicator 192.168.60.104/32 md5 | |||
- hostssl replication replicator 192.168.60.105/32 md5 | |||
- hostssl all all 127.0.0.1/32 md5 | |||
- hostssl all all 0.0.0.0/0 md5 | |||
initdb: | |||
- encoding: UTF8 | |||
- data-checksums | |||
postgresql: | |||
listen: 0.0.0.0:5432 | |||
connect_address: 192.168.60.104:5432 # IP for Node 2's PostgreSQL | |||
data_dir: /var/lib/postgresql/data | |||
bin_dir: /usr/lib/postgresql/17/bin | |||
authentication: | |||
superuser: | |||
username: postgres | |||
password: cnV2abjbDpbh64e12987wR4mj5kQ3456Y0Qf # Superuser password (provided) | |||
replication: | |||
username: replicator | |||
password: sad9a23jga8jsuedrwtsskj74567suiuwe23 # Replication password (provided) | |||
parameters: | |||
max_connections: 100 | |||
shared_buffers: 256MB | |||
tags: | |||
nofailover: false | |||
noloadbalance: false | |||
clonefrom: false | |||
</pre>'''<big>node3</big>'''<pre> | |||
scope: postgresql-cluster | |||
namespace: /service/ | |||
name: postgresql-03 # Unique name for Node 3 | |||
etcd3: | |||
hosts: 192.168.60.103:2379,192.168.60.104:2379,192.168.60.105:2379 #etcd cluster nodes | |||
protocol: https | |||
cacert: /etc/etcd/ssl/ca.crt | |||
cert: /etc/etcd/ssl/etcd-node3.crt # Node 3's etcd certificate | |||
key: /etc/etcd/ssl/etcd-node3.key # Node 3's etcd key | |||
restapi: | |||
listen: 0.0.0.0:8008 | |||
connect_address: 192.168.60.105:8008 # IP for Node 3's REST API | |||
certfile: /var/lib/postgresql/ssl/server.pem | |||
bootstrap: | |||
dcs: | |||
ttl: 30 | |||
loop_wait: 10 | |||
retry_timeout: 10 | |||
maximum_lag_on_failover: 1048576 | |||
postgresql: | |||
parameters: | |||
ssl: 'on' | |||
ssl_cert_file: /var/lib/postgresql/ssl/server.crt | |||
ssl_key_file: /var/lib/postgresql/ssl/server.key | |||
pg_hba: | |||
- hostssl replication replicator 127.0.0.1/32 md5 | |||
- hostssl replication replicator 192.168.60.103/32 md5 | |||
- hostssl replication replicator 192.168.60.104/32 md5 | |||
- hostssl replication replicator 192.168.60.105/32 md5 | |||
- hostssl all all 127.0.0.1/32 md5 | |||
- hostssl all all 0.0.0.0/0 md5 | |||
initdb: | |||
- encoding: UTF8 | |||
- data-checksums | |||
postgresql: | |||
listen: 0.0.0.0:5432 | |||
connect_address: 192.168.60.105:5432 # IP for Node 3's PostgreSQL | |||
data_dir: /var/lib/postgresql/data | |||
bin_dir: /usr/lib/postgresql/17/bin | |||
authentication: | |||
superuser: | |||
username: postgres | |||
password: "cnV2abjbDpbh64e12987wR4mj5kQ3456Y0Qf" # Superuser password (provided) | |||
replication: | |||
username: replicator | |||
password: sad9a23jga8jsuedrwtsskj74567suiuwe23 # Replication password (provided) | |||
parameters: | |||
max_connections: 100 | |||
shared_buffers: 256MB | |||
tags: | |||
nofailover: false | |||
noloadbalance: false | |||
clonefrom: false | |||
</pre>'''<big>Patroni Certificates</big>''' | |||
'''<big>Let’s also use a certificate for this, requires a pem</big>'''<pre> | |||
sudo sh -c 'cat /var/lib/postgresql/ssl/server.crt /var/lib/postgresql/ssl/server.key > /var/lib/postgresql/ssl/server.pem' | |||
sudo chown postgres:postgres /var/lib/postgresql/ssl/server.pem | |||
sudo chmod 600 /var/lib/postgresql/ssl/server.pem | |||
</pre>'''<big>We can verify with:</big>'''<pre> | |||
sudo openssl x509 -in /var/lib/postgresql/ssl/server.pem -text -noout | |||
</pre>'''<big>Starting Patroni and HA Cluster</big>'''<pre> | |||
sudo systemctl restart patroni | |||
</pre>'''<big>Check logs</big>'''<pre> | |||
journalctl -u patroni -f | |||
</pre><code>Dec 03 22:16:05 postgres-01 patroni[770]: 2024-12-03 22:16:05,399 INFO: no action. I am (postgresql-01), the leader with the lock</code> | |||
<code>Dec 03 22:16:15 postgres-01 patroni[770]: 2024-12-03 22:16:15,399 INFO: no action. I am (postgresql-01), the leader with the lock</code> | |||
and | |||
<code>Dec 03 22:16:21 postgres-02 patroni[768]: 2024-12-03 22:16:21,780 INFO: Lock owner: postgresql-01; I am postgresql-02</code> | |||
<code>Dec 03 22:16:21 postgres-02 patroni[768]: 2024-12-03 22:16:21,823 INFO: bootstrap from leader 'postgresql-01' in progress</code> | |||
'''<big>Reconfiguring our etcd Cluster</big>'''<pre> | |||
sudo nano /etc/etcd/etcd.env | |||
</pre>'''<big>Change</big>'''<pre> | |||
ETCD_INITIAL_CLUSTER_STATE="new" | |||
</pre>'''<big>to</big>'''<pre> | |||
ETCD_INITIAL_CLUSTER_STATE="existing" | |||
</pre>'''<big>Do this on all 3 nodes!</big>''' | |||
'''<big>Verifying Our Postgres Cluster</big>''' | |||
'''<big>We now have a ha postgres cluster!</big>''' | |||
'''<big>However we don’t always know who the leader is so we can’t use an IP</big>''' | |||
'''<big>We can test the patroni endpoint to see who is leader</big>'''<pre> | |||
curl -k https://192.168.60.103:8008/primary | |||
curl -k https://192.168.60.104:8008/primary | |||
curl -k https://192.168.60.105:8008/primary | |||
</pre>'''<big>Editing your pg_hba after bootstrapping</big>''' | |||
'''<big>If you ever want to see your global config you can with</big>'''<pre> | |||
sudo patronictl -c /etc/patroni/config.yml show-config | |||
</pre>'''<big>If you ever want to edit it, you can with:</big>'''<pre> | |||
sudo patronictl -c /etc/patroni/config.yml edit-config | |||
</pre>'''<big>After saving these will be replicated to all nodes, note you might want to update your boostrap config at some pouint</big>''' | |||
'''<big>HA Proxy</big>''' | |||
'''<big>Installing HA Proxy</big>''' | |||
'''<big>this is where ha proxy comes in</big>''' | |||
'''<big>switch nodes</big>''' | |||
'''<big>install ha proxy</big>'''<pre> | |||
sudo apt -y install haproxy | |||
</pre>'''<big>Once installed we need to add some config</big>'''<pre> | |||
sudo nano /etc/haproxy/haproxy.cfg | |||
</pre><pre> | |||
frontend postgres_frontend | |||
bind *:5432 | |||
mode tcp | |||
default_backend postgres_backend | |||
backend postgres_backend | |||
mode tcp | |||
option tcp-check | |||
option httpchk GET /primary # patroni provides an endpoint to check node roles | |||
http-check expect status 200 # expect 200 for the primary node | |||
timeout connect 5s | |||
timeout client 30s | |||
timeout server 30s | |||
server postgresql-01 192.168.60.103:5432 port 8008 check check-ssl verify none | |||
server postgresql-02 192.168.60.104:5432 port 8008 check check-ssl verify none | |||
server postgresql-03 192.168.60.105:5432 port 8008 check check-ssl verify none | |||
</pre>'''<big>Do this for all 3 nodes</big>''' | |||
'''<big>Starting HA Proxy</big>''' | |||
'''<big>restart</big>'''<pre> | |||
sudo systemctl reload haproxy | |||
</pre>'''<big>check logs</big>'''<pre> | |||
sudo tail -f /var/log/syslog | grep haproxy | |||
</pre>'''<big>keepalived</big>''' | |||
'''<big>Installing keepalived</big>''' | |||
'''<big>Now we need to install keepalived to create a VIP</big>'''<pre> | |||
sudo apt update | |||
sudo apt install keepalived -y | |||
</pre>'''<big>Confuring keepalived</big>''' | |||
'''<big>Now we need to apply a configuration file</big>'''<pre> | |||
sudo nano /etc/keepalived/keepalived.conf | |||
</pre>'''<big>haproxy1</big>'''<pre> | |||
global_defs { | |||
enable_script_security | |||
script_user keepalived_script | |||
} | |||
vrrp_script check_haproxy { | |||
script "/etc/keepalived/check_haproxy.sh" | |||
interval 2 | |||
fall 3 | |||
rise 2 | |||
} | |||
vrrp_instance VI_1 { | |||
state MASTER | |||
interface eth0 # update with your nic | |||
virtual_router_id 51 | |||
priority 100 | |||
advert_int 1 | |||
authentication { | |||
auth_type PASS | |||
auth_pass tDHjh7by # change | |||
} | |||
virtual_ipaddress { | |||
192.168.60.110 | |||
} | |||
track_script { | |||
check_haproxy | |||
} | |||
} | |||
</pre>'''<big>haproxy2</big>'''<pre> | |||
global_defs { | |||
enable_script_security | |||
script_user keepalived_script | |||
} | |||
vrrp_script check_haproxy { | |||
script "/etc/keepalived/check_haproxy.sh" | |||
interval 2 | |||
fall 3 | |||
rise 2 | |||
} | |||
vrrp_instance VI_1 { | |||
state BACKUP | |||
interface eth0 # update with your nic | |||
virtual_router_id 51 | |||
priority 90 | |||
advert_int 1 | |||
authentication { | |||
auth_type PASS # change | |||
auth_pass tDHjh7by # change | |||
} | |||
virtual_ipaddress { | |||
192.168.60.110 | |||
} | |||
track_script { | |||
check_haproxy | |||
} | |||
} | |||
</pre>'''<big>haproxy3</big>'''<pre> | |||
global_defs { | |||
enable_script_security | |||
script_user keepalived_script | |||
} | |||
vrrp_script check_haproxy { | |||
script "/etc/keepalived/check_haproxy.sh" | |||
interval 2 | |||
fall 3 | |||
rise 2 | |||
} | |||
vrrp_instance VI_1 { | |||
state BACKUP | |||
interface eth0 # update with your nic | |||
virtual_router_id 51 | |||
priority 80 | |||
advert_int 1 | |||
authentication { | |||
auth_type PASS # change | |||
auth_pass tDHjh7by # change | |||
} | |||
virtual_ipaddress { | |||
192.168.60.110 | |||
} | |||
track_script { | |||
check_haproxy | |||
} | |||
} | |||
</pre>'''<big>Create a check script on each</big>'''<pre> | |||
sudo nano /etc/keepalived/check_haproxy.sh | |||
</pre><pre> | |||
#!/bin/bash | |||
# Define the port to check (e.g., HAProxy frontend port) | |||
PORT=5432 | |||
# Check if HAProxy is running | |||
if ! pidof haproxy > /dev/null; then | |||
echo "HAProxy is not running" | |||
exit 1 | |||
fi | |||
# Check if HAProxy is listening on the expected port | |||
if ! ss -ltn | grep -q ":${PORT}"; then | |||
echo "HAProxy is not listening on port ${PORT}" | |||
exit 2 | |||
fi | |||
# All checks passed | |||
exit 0 | |||
</pre>'''<big>we need to add a user to execute these scripts</big>'''<pre> | |||
sudo useradd -r -s /bin/false keepalived_script | |||
</pre><pre> | |||
sudo chmod +x /etc/keepalived/check_haproxy.sh | |||
sudo chown keepalived_script:keepalived_script /etc/keepalived/check_haproxy.sh | |||
sudo chmod 700 /etc/keepalived/check_haproxy.sh | |||
</pre>'''<big>Starting keepalived</big>'''<pre> | |||
sudo systemctl restart keepalived | |||
</pre>'''<big>Verifying keepalived</big>''' | |||
'''<big>Check logs</big>'''<pre> | |||
sudo journalctl -u keepalived -f | |||
</pre>'''<big>we should not be able to ping the VIP</big>'''<pre> | |||
ping 192.168.60.110 | |||
</pre> | </pre> | ||
Revision as of 12:24, 26 September 2025

Today is the day that you start running PostgresSQL in a cluster! In this tutorial we’ll be setting up a production ready Postgres cluster that’s highly available and fault tolerant using PostgreSQL, etcd, Patroni, HA Proxy, and keepalived. This resilient combination will ensure that you can always reach your database even when a node in the cluster goes down!
Nodes
# ha proxy 192.168.60.100 # haproxy-01 192.168.60.101 # haproxy-02 192.168.60.102 # haproxy-03 # postgres 192.168.60.103 # postgres-01 192.168.60.104 # postgres-02 192.168.60.105 # postgres-03
PostgreSQL
On postgres nodes install latest postgres.
Install updated postgres repositories.
sudo apt update sudo apt install -y postgresql-common sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh
We’ll configure postgres later. Stop and disable the service because Patroni will handle the lifecycle of postgres
sudo systemctl stop postgresql sudo systemctl disable postgresql
etcd
Install
This should be installed on the postgres servers.
Make sure you have wget and curl
sudo apt update sudo apt-get install -y wget curl
Find latest release. https://github.com/etcd-io/etcd/releases
wget https://github.com/etcd-io/etcd/releases/download/v3.5.17/etcd-v3.5.17-linux-amd64.tar.gz
Uncompress and rename.
tar xvf etcd-v3.5.17-linux-amd64.tar.gz mv etcd-v3.5.17-linux-amd64 etcd
Move all binaries into /usr/local/bin/ for later use.
sudo mv etcd/etcd* /usr/local/bin/
Check etcd version
etcd --version etcdctl version
Let’s create a user for etcd service.
sudo useradd --system --home /var/lib/etcd --shell /bin/false etcd
Before configuring etcd, we need to repeat all of these steps for the other 2 nodes.
Let’s configure etcd.
Make dir and edit file.
sudo mkdir -p /etc/etcd sudo mkdir -p /etc/etcd/ssl
Linux
sudo apt install openssl
Verify it’s installed and working
openssl version
Should see something like:
OpenSSL 3.4.0 22 Oct 2024 (Library: OpenSSL 3.4.0 22 Oct 2024)
Now let’s generate and configure certs
mkdir certs cd certs
Generate cert authority
openssl genrsa -out ca.key 2048 openssl req -x509 -new -nodes -key ca.key -subj "/CN=etcd-ca" -days 7300 -out ca.crt
Generate certificate each node. Note, pay attention to SANS, I am using IP, update with your IP and oh DNS/hostname. node1
# Generate a private key openssl genrsa -out etcd-node1.key 2048 # Create temp file for config cat > temp.cnf <<EOF [ req ] distinguished_name = req_distinguished_name req_extensions = v3_req [ req_distinguished_name ] [ v3_req ] subjectAltName = @alt_names [ alt_names ] IP.1 = 192.168.60.103 IP.2 = 127.0.0.1 EOF # Create a csr openssl req -new -key etcd-node1.key -out etcd-node1.csr \ -subj "/C=US/ST=YourState/L=YourCity/O=YourOrganization/OU=YourUnit/CN=etcd-node1" \ -config temp.cnf # Sign the cert openssl x509 -req -in etcd-node1.csr -CA ca.crt -CAkey ca.key -CAcreateserial \ -out etcd-node1.crt -days 7300 -sha256 -extensions v3_req -extfile temp.cnf # Verify the cert and be sure you see Subject Name Alternative openssl x509 -in etcd-node1.crt -text -noout | grep -A1 "Subject Alternative Name" # Remove temp file rm temp.cnf
node 2
- Generate a private key
openssl genrsa -out etcd-node2.key 2048
- Create temp file for config
cat > temp.cnf <<EOF [ req ] distinguished_name = req_distinguished_name req_extensions = v3_req [ req_distinguished_name ] [ v3_req ] subjectAltName = @alt_names [ alt_names ] IP.1 = 192.168.60.104 IP.2 = 127.0.0.1 EOF
- Create a csr
openssl req -new -key etcd-node2.key -out etcd-node2.csr \
-subj "/C=US/ST=YourState/L=YourCity/O=YourOrganization/OU=YourUnit/CN=etcd-node2" \ -config temp.cnf
- Sign the cert
openssl x509 -req -in etcd-node2.csr -CA ca.crt -CAkey ca.key -CAcreateserial \
-out etcd-node2.crt -days 7300 -sha256 -extensions v3_req -extfile temp.cnf
- Verify the cert and be sure you see Subject Name Alternative
openssl x509 -in etcd-node2.crt -text -noout | grep -A1 "Subject Alternative Name"
- Remove temp file
rm temp.cnf
node3
- Generate a private key
openssl genrsa -out etcd-node3.key 2048
- Create temp file for config
cat > temp.cnf <<EOF [ req ] distinguished_name = req_distinguished_name req_extensions = v3_req [ req_distinguished_name ] [ v3_req ] subjectAltName = @alt_names [ alt_names ] IP.1 = 192.168.60.105 IP.2 = 127.0.0.1 EOF
- Create a csr
openssl req -new -key etcd-node3.key -out etcd-node3.csr \
-subj "/C=US/ST=YourState/L=YourCity/O=YourOrganization/OU=YourUnit/CN=etcd-node3" \ -config temp.cnf
- Sign the cert
openssl x509 -req -in etcd-node3.csr -CA ca.crt -CAkey ca.key -CAcreateserial \
-out etcd-node3.crt -days 7300 -sha256 -extensions v3_req -extfile temp.cnf
- Verify the cert and be sure you see Subject Name Alternative
openssl x509 -in etcd-node3.crt -text -noout | grep -A1 "Subject Alternative Name"
- Remove temp file
rm temp.cnf
List all files
ls
Should see:
ca.crt ca.key ca.srl etcd-node1.crt etcd-node1.csr etcd-node2.crt etcd-node2.csr etcd-node2.key etcd-node3.crt etcd-node3.csr etcd-node3.key
Secure copy (scp) the certs to each node:
scp ca.crt etcd-node1.crt etcd-node1.key [email protected]:/tmp/ scp ca.crt etcd-node2.crt etcd-node2.key [email protected]:/tmp/ scp ca.crt etcd-node3.crt etcd-node3.key [email protected]:/tmp/
Should see:
ssh into each node
We need to move certs from /tmp to ssl location (/etc/etcd/ssl)
sudo mkdir -p /etc/etcd/ssl sudo mv /tmp/etcd-node*.crt /etc/etcd/ssl/ sudo mv /tmp/etcd-node*.key /etc/etcd/ssl/ sudo mv /tmp/ca.crt /etc/etcd/ssl/ sudo chown -R etcd:etcd /etc/etcd/ sudo chmod 600 /etc/etcd/ssl/etcd-node*.key sudo chmod 644 /etc/etcd/ssl/etcd-node*.crt /etc/etcd/ssl/ca.crt
Configure Create our config
sudo nano /etc/etcd/etcd.env
node1
ETCD_NAME="postgresql-01" ETCD_DATA_DIR="/var/lib/etcd" ETCD_INITIAL_CLUSTER="postgresql-01=https://192.168.60.103:2380,postgresql-02=https://192.168.60.104:2380,postgresql-03=https://192.168.60.105:2380" ETCD_INITIAL_CLUSTER_STATE="new" ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster" ETCD_INITIAL_ADVERTISE_PEER_URLS="https://192.168.60.103:2380" ETCD_LISTEN_PEER_URLS="https://0.0.0.0:2380" ETCD_LISTEN_CLIENT_URLS="https://0.0.0.0:2379" ETCD_ADVERTISE_CLIENT_URLS="https://192.168.60.103:2379" ETCD_CLIENT_CERT_AUTH="true" ETCD_TRUSTED_CA_FILE="/etc/etcd/ssl/ca.crt" ETCD_CERT_FILE="/etc/etcd/ssl/etcd-node1.crt" ETCD_KEY_FILE="/etc/etcd/ssl/etcd-node1.key" ETCD_PEER_CLIENT_CERT_AUTH="true" ETCD_PEER_TRUSTED_CA_FILE="/etc/etcd/ssl/ca.crt" ETCD_PEER_CERT_FILE="/etc/etcd/ssl/etcd-node1.crt" ETCD_PEER_KEY_FILE="/etc/etcd/ssl/etcd-node1.key"
node2
ETCD_NAME="postgresql-02" ETCD_DATA_DIR="/var/lib/etcd" ETCD_INITIAL_CLUSTER="postgresql-01=https://192.168.60.103:2380,postgresql-02=https://192.168.60.104:2380,postgresql-03=https://192.168.60.105:2380" ETCD_INITIAL_CLUSTER_STATE="new" ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster" ETCD_INITIAL_ADVERTISE_PEER_URLS="https://192.168.60.104:2380" ETCD_LISTEN_PEER_URLS="https://0.0.0.0:2380" ETCD_LISTEN_CLIENT_URLS="https://0.0.0.0:2379" ETCD_ADVERTISE_CLIENT_URLS="https://192.168.60.104:2379" ETCD_CLIENT_CERT_AUTH="true" ETCD_TRUSTED_CA_FILE="/etc/etcd/ssl/ca.crt" ETCD_CERT_FILE="/etc/etcd/ssl/etcd-node2.crt" ETCD_KEY_FILE="/etc/etcd/ssl/etcd-node2.key" ETCD_PEER_CLIENT_CERT_AUTH="true" ETCD_PEER_TRUSTED_CA_FILE="/etc/etcd/ssl/ca.crt" ETCD_PEER_CERT_FILE="/etc/etcd/ssl/etcd-node2.crt" ETCD_PEER_KEY_FILE="/etc/etcd/ssl/etcd-node2.key"
node3
ETCD_NAME="postgresql-03" ETCD_DATA_DIR="/var/lib/etcd" ETCD_INITIAL_CLUSTER="postgresql-01=https://192.168.60.103:2380,postgresql-02=https://192.168.60.104:2380,postgresql-03=https://192.168.60.105:2380" ETCD_INITIAL_CLUSTER_STATE="new" ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster" ETCD_INITIAL_ADVERTISE_PEER_URLS="https://192.168.60.105:2380" ETCD_LISTEN_PEER_URLS="https://0.0.0.0:2380" ETCD_LISTEN_CLIENT_URLS="https://0.0.0.0:2379" ETCD_ADVERTISE_CLIENT_URLS="https://192.168.60.105:2379" ETCD_CLIENT_CERT_AUTH="true" ETCD_TRUSTED_CA_FILE="/etc/etcd/ssl/ca.crt" ETCD_CERT_FILE="/etc/etcd/ssl/etcd-node3.crt" ETCD_KEY_FILE="/etc/etcd/ssl/etcd-node3.key" ETCD_PEER_CLIENT_CERT_AUTH="true" ETCD_PEER_TRUSTED_CA_FILE="/etc/etcd/ssl/ca.crt" ETCD_PEER_CERT_FILE="/etc/etcd/ssl/etcd-node3.crt" ETCD_PEER_KEY_FILE="/etc/etcd/ssl/etcd-node3.key"
Now let’s create a service for etcd on all 3 nodes
sudo nano /etc/systemd/system/etcd.service
Contents of service file, same for all 3 nodes
[Unit] Description=etcd key-value store Documentation=https://github.com/etcd-io/etcd After=network-online.target Wants=network-online.target
[Service] Type=notify WorkingDirectory=/var/lib/etcd EnvironmentFile=/etc/etcd/etcd.env ExecStart=/usr/local/bin/etcd Restart=always RestartSec=10s LimitNOFILE=40000 User=etcd Group=etcd
[Install] WantedBy=multi-user.target
We need to create a directory for etcd ETCD_DATA_DIR defined in service file.
sudo mkdir -p /var/lib/etcd sudo chown -R etcd:etcd /var/lib/etcd
Running etcd Reload daemon and enable the service
sudo systemctl daemon-reload sudo systemctl enable etcd
Start etcd and check status this looks like it’s hanging on the first node but once you add another node it will complete
sudo systemctl start etcd sudo systemctl status etcd
Should see something like:
● etcd.service - etcd key-value store
Loaded: loaded (/etc/systemd/system/etcd.service; enabled; preset: enabled)
Active: active (running) since Mon 2024-12-02 14:09:30 CST; 2s ago
Docs: https://github.com/etcd-io/etcd
Main PID: 7266 (etcd)
Tasks: 9 (limit: 4612)
Memory: 29.3M (peak: 30.0M)
CPU: 246ms
CGroup: /system.slice/etcd.service
└─7266 /usr/local/bin/etcd
You can also check the logs for etcd by running:
journalctl -xeu etcd.service
Should see something like:
░░ Subject: A start job for unit etcd.service has finished successfully ░░ Defined-By: systemd ░░ Support: http://www.ubuntu.com/support ░░ ░░ A start job for unit etcd.service has finished successfully. ░░ ░░ The job identifier is 5757. Dec 02 14:09:30 postgres-01 etcd[7266]: {"level":"info","ts":"2024-12-02T14:09:30.862122-0600","caller":"v3rpc/health.go:61","msg":"grpc service status changed","service":"","status":"SERVING"} Dec 02 14:09:30 postgres-01 etcd[7266]: {"level":"info","ts":"2024-12-02T14:09:30.862347-0600","caller":"embed/serve.go:187","msg":"serving client traffic insecurely; this is strongly discouraged!","traffic":"grpc+http","address":"[::]:2379"}
Verification Once cluster is running, we should verify it’s working on each by running
etcdctl endpoint health etcdctl member list
Should see something like on node1:
127.0.0.1:2379 is healthy: successfully committed proposal: took = 1.786976ms
eb8ee14ab5150b4, started, postgresql-01, http://192.168.60.103:2380, http://192.168.60.103:2379, false
34e89b244664f02d, started, postgresql-02, http://192.168.60.104:2380, http://192.168.60.104:2379, false
8ee2a9473a41c400, started, postgresql-03, http://192.168.60.105:2380, http://192.168.60.105:2379, false
Do this for all 3 nodes
Now we should now edit the variables since the cluster is bootstrapped
Then restart etcd and verify the cluster is still working
sudo systemctl restart etcd
If you want to run the following commands without sudo, you can run:
sudo usermod -aG etcd $USER
sudo etcdctl \--endpoints=https://127.0.0.1:2379 \ --cacert=/etc/etcd/ssl/ca.crt \ --cert=/etc/etcd/ssl/etcd-node1.crt \ --key=/etc/etcd/ssl/etcd-node1.key \ endpoint health
sudo etcdctl \ --endpoints=https://127.0.0.1:2379 \ --cacert=/etc/etcd/ssl/ca.crt \ --cert=/etc/etcd/ssl/etcd-node1.crt \ --key=/etc/etcd/ssl/etcd-node1.key \ member list
Should see something like
https://127.0.0.1:2379 is healthy: successfully committed proposal: took = 4.611121ms
59afb19d7cb2565d, started, postgresql-01, https://192.168.60.103:2380, https://192.168.60.103:2379, false
6338565ebcb76aa2, started, postgresql-02, https://192.168.60.104:2380, https://192.168.60.104:2379, false
9d74b3125c745c74, started, postgresql-03, https://192.168.60.105:2380, https://192.168.60.105:2379, false
You can check to see who is leader by running
sudo etcdctl \ --endpoints=https://192.168.60.103:2379,https://192.168.60.104:2379,https://192.168.60.105:2379 \ --cacert=/etc/etcd/ssl/ca.crt \ --cert=/etc/etcd/ssl/etcd-node1.crt \ --key=/etc/etcd/ssl/etcd-node1.key \ endpoint status --write-out=table
You should see something like this, specifically one of the nodes IS LEADER = true
+-----------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+ | ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | IS LEARNER | RAFT TERM | RAFT INDEX | RAFT APPLIED INDEX | ERRORS | +-----------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+ | https://192.168.60.103:2379 | 59afb19d7cb2565d | 3.5.17 | 20 kB | true | false | 2 | 12 | 12 | | | https://192.168.60.104:2379 | 6338565ebcb76aa2 | 3.5.17 | 20 kB | false | false | 2 | 12 | 12 | | | https://192.168.60.105:2379 | 9d74b3125c745c74 | 3.5.17 | 20 kB | false | false | 2 | 12 | 12 | | +-----------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
PostgreSQL and Patroni
PostgreSQL Certificates
Once this is all set up and working, we can now configure postgres and patroni.
We need to create some dirs for postgres
sudo mkdir -p /var/lib/postgresql/data sudo mkdir -p /var/lib/postgresql/ssl
Notice we are using ssl, we need to generate a certificate
Generate a self-signed certificate
(this will last 20 years)
openssl genrsa -out server.key 2048 # private key openssl req -new -key server.key -out server.req # csr openssl req -x509 -key server.key -in server.req -out server.crt -days 7300 # generate cert, valid for 20 years
Update permissions
chmod 600 server.key
Move files to cert location
sudo mv server.crt server.key server.req /var/lib/postgresql/ssl
After doing this, we need to copy this to our other servers
Copy files locally from (your local machine!) to node1, node2, node3 using scp
scp them to node1, node2, and node3
scp server.crt server.key server.req [email protected]:/tmp scp server.crt server.key server.req [email protected]:/tmp scp server.crt server.key server.req [email protected]:/tmp
On the servers (not your local machine) move files
cd /tmp sudo mv server.crt server.key server.req /var/lib/postgresql/ssl
Update permissions on certificate
sudo chmod 600 /var/lib/postgresql/ssl/server.key sudo chmod 644 /var/lib/postgresql/ssl/server.crt sudo chmod 600 /var/lib/postgresql/ssl/server.req sudo chown postgres:postgres /var/lib/postgresql/data sudo chown postgres:postgres /var/lib/postgresql/ssl/server.*
We will need to give the postgres user read access to the etcd certificates using acls
sudo apt update sudo apt install -y acl
node1
sudo setfacl -m u:postgres:r /etc/etcd/ssl/ca.crt sudo setfacl -m u:postgres:r /etc/etcd/ssl/etcd-node1.crt sudo setfacl -m u:postgres:r /etc/etcd/ssl/etcd-node1.key
node2
sudo setfacl -m u:postgres:r /etc/etcd/ssl/ca.crt sudo setfacl -m u:postgres:r /etc/etcd/ssl/etcd-node2.crt sudo setfacl -m u:postgres:r /etc/etcd/ssl/etcd-node2.key
node3
sudo setfacl -m u:postgres:r /etc/etcd/ssl/ca.crt sudo setfacl -m u:postgres:r /etc/etcd/ssl/etcd-node3.crt sudo setfacl -m u:postgres:r /etc/etcd/ssl/etcd-node3.key
Now that we have postgres certs configured, it’s now time to configure patroni to operate or drive postgres
Patroni
Installing Patroni
sudo apt install -y patroni
ssh into node1
Make a dir for patroni
sudo mkdir -p /etc/patroni/
Configuring Patroni Create a config file and edit
sudo nano /etc/patroni/config.yml
node1
scope: postgresql-cluster namespace: /service/ name: postgresql-01 # node1
etcd3:
hosts: 192.168.60.103:2379,192.168.60.104:2379,192.168.60.105:2379 # etcd cluster nodes protocol: https cacert: /etc/etcd/ssl/ca.crt cert: /etc/etcd/ssl/etcd-node1.crt # node1's etcd certificate key: /etc/etcd/ssl/etcd-node1.key # node1's etcd keyrestapi:
listen: 0.0.0.0:8008 connect_address: 192.168.60.103:8008 # IP for node1's REST API certfile: /var/lib/postgresql/ssl/server.pembootstrap:
dcs: ttl: 30 loop_wait: 10 retry_timeout: 10 maximum_lag_on_failover: 1048576 # Failover parameters postgresql: parameters: ssl: 'on' # Enable SSL ssl_cert_file: /var/lib/postgresql/ssl/server.crt # PostgreSQL server certificate ssl_key_file: /var/lib/postgresql/ssl/server.key # PostgreSQL server key pg_hba: # Access rules - hostssl replication replicator 127.0.0.1/32 md5 - hostssl replication replicator 192.168.60.103/32 md5 - hostssl replication replicator 192.168.60.104/32 md5 - hostssl replication replicator 192.168.60.105/32 md5 - hostssl all all 127.0.0.1/32 md5 - hostssl all all 0.0.0.0/0 md5 initdb: - encoding: UTF8 - data-checksumspostgresql:
listen: 0.0.0.0:5432 connect_address: 192.168.60.103:5432 # IP for node1's PostgreSQL data_dir: /var/lib/postgresql/data bin_dir: /usr/lib/postgresql/17/bin # Binary directory for PostgreSQL 17 authentication: superuser: username: postgres password: cnV2abjbDpbh64e12987wR4mj5kQ3456Y0Qf # Superuser password - be sure to change replication: username: replicator password: sad9a23jga8jsuedrwtsskj74567suiuwe23 # Replication password - be sure to change parameters: max_connections: 100 shared_buffers: 256MBtags:
nofailover: false noloadbalance: false clonefrom: false
node2
scope: postgresql-cluster namespace: /service/ name: postgresql-02 # Unique name for Node 2
etcd3:
hosts: 192.168.60.103:2379,192.168.60.104:2379,192.168.60.105:2379 # etcd cluster nodes protocol: https cacert: /etc/etcd/ssl/ca.crt cert: /etc/etcd/ssl/etcd-node2.crt # Node 2's etcd certificate key: /etc/etcd/ssl/etcd-node2.key # Node 2's etcd keyrestapi:
listen: 0.0.0.0:8008 connect_address: 192.168.60.104:8008 # IP for Node 2's REST API certfile: /var/lib/postgresql/ssl/server.pembootstrap:
dcs: ttl: 30 loop_wait: 10 retry_timeout: 10 maximum_lag_on_failover: 1048576 postgresql: parameters: ssl: 'on' ssl_cert_file: /var/lib/postgresql/ssl/server.crt ssl_key_file: /var/lib/postgresql/ssl/server.key pg_hba: - hostssl replication replicator 127.0.0.1/32 md5 - hostssl replication replicator 192.168.60.103/32 md5 - hostssl replication replicator 192.168.60.104/32 md5 - hostssl replication replicator 192.168.60.105/32 md5 - hostssl all all 127.0.0.1/32 md5 - hostssl all all 0.0.0.0/0 md5 initdb: - encoding: UTF8 - data-checksumspostgresql:
listen: 0.0.0.0:5432 connect_address: 192.168.60.104:5432 # IP for Node 2's PostgreSQL data_dir: /var/lib/postgresql/data bin_dir: /usr/lib/postgresql/17/bin authentication: superuser: username: postgres password: cnV2abjbDpbh64e12987wR4mj5kQ3456Y0Qf # Superuser password (provided) replication: username: replicator password: sad9a23jga8jsuedrwtsskj74567suiuwe23 # Replication password (provided) parameters: max_connections: 100 shared_buffers: 256MBtags:
nofailover: false noloadbalance: false clonefrom: false
node3
scope: postgresql-cluster namespace: /service/ name: postgresql-03 # Unique name for Node 3
etcd3:
hosts: 192.168.60.103:2379,192.168.60.104:2379,192.168.60.105:2379 #etcd cluster nodes protocol: https cacert: /etc/etcd/ssl/ca.crt cert: /etc/etcd/ssl/etcd-node3.crt # Node 3's etcd certificate key: /etc/etcd/ssl/etcd-node3.key # Node 3's etcd keyrestapi:
listen: 0.0.0.0:8008 connect_address: 192.168.60.105:8008 # IP for Node 3's REST API certfile: /var/lib/postgresql/ssl/server.pembootstrap:
dcs: ttl: 30 loop_wait: 10 retry_timeout: 10 maximum_lag_on_failover: 1048576 postgresql: parameters: ssl: 'on' ssl_cert_file: /var/lib/postgresql/ssl/server.crt ssl_key_file: /var/lib/postgresql/ssl/server.key pg_hba: - hostssl replication replicator 127.0.0.1/32 md5 - hostssl replication replicator 192.168.60.103/32 md5 - hostssl replication replicator 192.168.60.104/32 md5 - hostssl replication replicator 192.168.60.105/32 md5 - hostssl all all 127.0.0.1/32 md5 - hostssl all all 0.0.0.0/0 md5 initdb: - encoding: UTF8 - data-checksumspostgresql:
listen: 0.0.0.0:5432 connect_address: 192.168.60.105:5432 # IP for Node 3's PostgreSQL data_dir: /var/lib/postgresql/data bin_dir: /usr/lib/postgresql/17/bin authentication: superuser: username: postgres password: "cnV2abjbDpbh64e12987wR4mj5kQ3456Y0Qf" # Superuser password (provided) replication: username: replicator password: sad9a23jga8jsuedrwtsskj74567suiuwe23 # Replication password (provided) parameters: max_connections: 100 shared_buffers: 256MBtags:
nofailover: false noloadbalance: false clonefrom: false
Patroni Certificates Let’s also use a certificate for this, requires a pem
sudo sh -c 'cat /var/lib/postgresql/ssl/server.crt /var/lib/postgresql/ssl/server.key > /var/lib/postgresql/ssl/server.pem' sudo chown postgres:postgres /var/lib/postgresql/ssl/server.pem sudo chmod 600 /var/lib/postgresql/ssl/server.pem
We can verify with:
sudo openssl x509 -in /var/lib/postgresql/ssl/server.pem -text -noout
Starting Patroni and HA Cluster
sudo systemctl restart patroni
Check logs
journalctl -u patroni -f
Dec 03 22:16:05 postgres-01 patroni[770]: 2024-12-03 22:16:05,399 INFO: no action. I am (postgresql-01), the leader with the lock
Dec 03 22:16:15 postgres-01 patroni[770]: 2024-12-03 22:16:15,399 INFO: no action. I am (postgresql-01), the leader with the lock
and
Dec 03 22:16:21 postgres-02 patroni[768]: 2024-12-03 22:16:21,780 INFO: Lock owner: postgresql-01; I am postgresql-02
Dec 03 22:16:21 postgres-02 patroni[768]: 2024-12-03 22:16:21,823 INFO: bootstrap from leader 'postgresql-01' in progress
Reconfiguring our etcd Cluster
sudo nano /etc/etcd/etcd.env
Change
ETCD_INITIAL_CLUSTER_STATE="new"
to
ETCD_INITIAL_CLUSTER_STATE="existing"
Do this on all 3 nodes!
Verifying Our Postgres Cluster
We now have a ha postgres cluster!
However we don’t always know who the leader is so we can’t use an IP
We can test the patroni endpoint to see who is leader
curl -k https://192.168.60.103:8008/primary curl -k https://192.168.60.104:8008/primary curl -k https://192.168.60.105:8008/primary
Editing your pg_hba after bootstrapping If you ever want to see your global config you can with
sudo patronictl -c /etc/patroni/config.yml show-config
If you ever want to edit it, you can with:
sudo patronictl -c /etc/patroni/config.yml edit-config
After saving these will be replicated to all nodes, note you might want to update your boostrap config at some pouint
HA Proxy
Installing HA Proxy
this is where ha proxy comes in
switch nodes
install ha proxy
sudo apt -y install haproxy
Once installed we need to add some config
sudo nano /etc/haproxy/haproxy.cfg
frontend postgres_frontend
bind *:5432 mode tcp default_backend postgres_backendbackend postgres_backend
mode tcp option tcp-check option httpchk GET /primary # patroni provides an endpoint to check node roles http-check expect status 200 # expect 200 for the primary node timeout connect 5s timeout client 30s timeout server 30s server postgresql-01 192.168.60.103:5432 port 8008 check check-ssl verify none server postgresql-02 192.168.60.104:5432 port 8008 check check-ssl verify none server postgresql-03 192.168.60.105:5432 port 8008 check check-ssl verify none
Do this for all 3 nodes
Starting HA Proxy
restart
sudo systemctl reload haproxy
check logs
sudo tail -f /var/log/syslog | grep haproxy
keepalived
Installing keepalived
Now we need to install keepalived to create a VIP
sudo apt update sudo apt install keepalived -y
Confuring keepalived Now we need to apply a configuration file
sudo nano /etc/keepalived/keepalived.conf
haproxy1
global_defs {
enable_script_security script_user keepalived_script}
vrrp_script check_haproxy {
script "/etc/keepalived/check_haproxy.sh" interval 2 fall 3 rise 2}
vrrp_instance VI_1 {
state MASTER interface eth0 # update with your nic virtual_router_id 51 priority 100 advert_int 1 authentication { auth_type PASS auth_pass tDHjh7by # change } virtual_ipaddress { 192.168.60.110 } track_script { check_haproxy }}
haproxy2
global_defs {
enable_script_security script_user keepalived_script}
vrrp_script check_haproxy {
script "/etc/keepalived/check_haproxy.sh" interval 2 fall 3 rise 2}
vrrp_instance VI_1 {
state BACKUP interface eth0 # update with your nic virtual_router_id 51 priority 90 advert_int 1 authentication { auth_type PASS # change auth_pass tDHjh7by # change } virtual_ipaddress { 192.168.60.110 } track_script { check_haproxy }}
haproxy3
global_defs {
enable_script_security script_user keepalived_script}
vrrp_script check_haproxy {
script "/etc/keepalived/check_haproxy.sh" interval 2 fall 3 rise 2}
vrrp_instance VI_1 {
state BACKUP interface eth0 # update with your nic virtual_router_id 51 priority 80 advert_int 1 authentication { auth_type PASS # change auth_pass tDHjh7by # change } virtual_ipaddress { 192.168.60.110 } track_script { check_haproxy }}
Create a check script on each
sudo nano /etc/keepalived/check_haproxy.sh
- !/bin/bash
- Define the port to check (e.g., HAProxy frontend port)
PORT=5432
- Check if HAProxy is running
if ! pidof haproxy > /dev/null; then
echo "HAProxy is not running" exit 1fi
- Check if HAProxy is listening on the expected port
if ! ss -ltn | grep -q ":${PORT}"; then
echo "HAProxy is not listening on port ${PORT}" exit 2fi
- All checks passed
exit 0
we need to add a user to execute these scripts
sudo useradd -r -s /bin/false keepalived_script
sudo chmod +x /etc/keepalived/check_haproxy.sh sudo chown keepalived_script:keepalived_script /etc/keepalived/check_haproxy.sh sudo chmod 700 /etc/keepalived/check_haproxy.sh
Starting keepalived
sudo systemctl restart keepalived
Verifying keepalived Check logs
sudo journalctl -u keepalived -f
we should not be able to ping the VIP
ping 192.168.60.110