cloudnative-pg.github.io/assets/documentation/1.25/architecture/index.html at 8f42635df36c238d59c363e14d7ae89fa3ba2ce6 · cloudnative-pg/cloudnative-pg.github.io · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
    <meta charset="utf-8" />
    <meta http-equiv="X-UA-Compatible" content="IE=edge" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="author" content="The CloudNativePG Contributors" />
      <link rel="shortcut icon" href="../img/favicon.ico" />
    <title>Architecture - CloudNativePG v1.25</title>
    <link rel="stylesheet" href="../css/theme.css" />
    <link rel="stylesheet" href="../css/theme_extra.css" />
        <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.8.0/styles/github.min.css" />
        <link href="../css/override.css" rel="stylesheet" />

      <script>
        // Current page data
        var mkdocs_page_name = "Architecture";
        var mkdocs_page_input_path = "architecture.md";
        var mkdocs_page_url = null;
      </script>

    <!--[if lt IE 9]>
      <script src="../js/html5shiv.min.js"></script>
    <![endif]-->
      <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.8.0/highlight.min.js"></script>
      <script>hljs.highlightAll();</script>
</head>

<body class="wy-body-for-nav" role="document">

  <div class="wy-grid-for-nav">
    <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
    <div class="wy-side-scroll">
      <div class="wy-side-nav-search">
          <a href=".." class="icon icon-home"> CloudNativePG v1.25
        </a><div role="search">
  <form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
      <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" title="Type search term here" />
  </form>
</div>
      </div>

      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="..">CloudNativePG</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../before_you_start/">Before You Start</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../use_cases/">Use cases</a>
                </li>
              </ul>
              <ul class="current">
                <li class="toctree-l1 current"><a class="reference internal current" href="#">Architecture</a>
    <ul class="current">
    <li class="toctree-l2"><a class="reference internal" href="#synchronizing-the-state">Synchronizing the state</a>
    </li>
    <li class="toctree-l2"><a class="reference internal" href="#kubernetes-architecture">Kubernetes architecture</a>
        <ul>
    <li class="toctree-l3"><a class="reference internal" href="#multi-availability-zone-kubernetes-clusters">Multi-availability zone Kubernetes clusters</a>
    </li>
    <li class="toctree-l3"><a class="reference internal" href="#single-availability-zone-kubernetes-clusters">Single availability zone Kubernetes clusters</a>
    </li>
    <li class="toctree-l3"><a class="reference internal" href="#reserving-nodes-for-postgresql-workloads">Reserving nodes for PostgreSQL workloads</a>
        <ul>
    <li class="toctree-l4"><a class="reference internal" href="#proposed-node-label">Proposed node label</a>
    </li>
    <li class="toctree-l4"><a class="reference internal" href="#proposed-node-taint">Proposed node taint</a>
    </li>
        </ul>
    </li>
        </ul>
    </li>
    <li class="toctree-l2"><a class="reference internal" href="#postgresql-architecture">PostgreSQL architecture</a>
        <ul>
    <li class="toctree-l3"><a class="reference internal" href="#read-write-workloads">Read-write workloads</a>
    </li>
    <li class="toctree-l3"><a class="reference internal" href="#read-only-workloads">Read-only workloads</a>
    </li>
        </ul>
    </li>
    <li class="toctree-l2"><a class="reference internal" href="#deployments-across-kubernetes-clusters">Deployments across Kubernetes clusters</a>
    </li>
    </ul>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../installation_upgrade/">Installation and upgrades</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../quickstart/">Quickstart</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../image_catalog/">Image Catalog</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../bootstrap/">Bootstrap</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../database_import/">Importing Postgres databases</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../security/">Security</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../instance_manager/">Postgres instance manager</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../scheduling/">Scheduling</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../resource_management/">Resource management</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../failure_modes/">Failure Modes</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../rolling_update/">Rolling Updates</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../replication/">Replication</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../logical_replication/">Logical Replication</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../backup/">Backup</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../backup_barmanobjectstore/">Backup on object stores</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../wal_archiving/">WAL archiving</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../backup_volumesnapshot/">Backup on volume snapshots</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../recovery/">Recovery</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../service_management/">Service Management</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../postgresql_conf/">PostgreSQL Configuration</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../declarative_role_management/">PostgreSQL Role Management</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../declarative_database_management/">PostgreSQL Database Management</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../tablespaces/">Tablespaces</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../operator_conf/">Operator configuration</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../cluster_conf/">Instance pod configuration</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../storage/">Storage</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../labels_annotations/">Labels and annotations</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../monitoring/">Monitoring</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../logging/">Logging</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../certificates/">Certificates</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../ssl_connections/">Client TLS/SSL connections</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../applications/">Connecting from an application</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../connection_pooling/">Connection pooling</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../replica_cluster/">Replica clusters</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../kubernetes_upgrade/">Kubernetes Upgrade and Maintenance</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../kubectl-plugin/">Kubectl Plugin</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../failover/">Automated failover</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../troubleshooting/">Troubleshooting</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../fencing/">Fencing</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../declarative_hibernation/">Declarative hibernation</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../postgis/">PostGIS</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../e2e/">End-to-End Tests</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../container_images/">Container Image Requirements</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../operator_capability_levels/">Operator capability levels</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../controller/">Custom Pod Controller</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../samples/">Examples</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../networking/">Networking</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../benchmarking/">Benchmarking</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../faq/">Frequently Asked Questions (FAQ)</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../cloudnative-pg.v1/">API Reference</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../supported_releases/">Supported releases</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../preview_version/">Preview Versions</a>
                </li>
              </ul>
              <ul>
                <li class="toctree-l1"><a class="reference internal" href="../release_notes/">Release notes</a>
                </li>
              </ul>
              <p class="caption"><span class="caption-text">CNCF Projects Integrations</span></p>
              <ul>
                  <li class="toctree-l1"><a class="reference internal" href="../cncf-projects/external-secrets/">External Secrets</a>
                  </li>
                  <li class="toctree-l1"><a class="reference internal" href="../cncf-projects/cilium/">Cilium</a>
                  </li>
              </ul>
              <p class="caption"><span class="caption-text">Appendixes</span></p>
              <ul>
                  <li class="toctree-l1"><a class="reference internal" href="../appendixes/object_stores/">Appendix A - Common object stores for backups</a>
                  </li>
              </ul>
      </div>
    </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
      <nav class="wy-nav-top" role="navigation" aria-label="Mobile navigation menu">
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="..">CloudNativePG v1.25</a>

      </nav>
      <div class="wy-nav-content">
        <div class="rst-content"><div role="navigation" aria-label="breadcrumbs navigation">
  <ul class="wy-breadcrumbs">
    <li><a href=".." class="icon icon-home" aria-label="Docs"></a></li>
      <li class="breadcrumb-item active">Architecture</li>
    <li class="wy-breadcrumbs-aside">
    </li>
  </ul>
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div class="section" itemprop="articleBody">

                <h1 id="architecture">Architecture</h1>
<!-- SPDX-License-Identifier: CC-BY-4.0 -->

<div class="admonition hint">
<p class="admonition-title">Hint</p>
<p>For a deeper understanding, we recommend reading our article on the CNCF
blog post titled <a href="https://www.cncf.io/blog/2023/09/29/recommended-architectures-for-postgresql-in-kubernetes/">"Recommended Architectures for PostgreSQL in Kubernetes"</a>,
which provides valuable insights into best practices and design
considerations for PostgreSQL deployments in Kubernetes.</p>
</div>
<p>This documentation page provides an overview of the key architectural
considerations for implementing a robust business continuity strategy when
deploying PostgreSQL in Kubernetes. These considerations include:</p>
<ul>
<li><strong><a href="#multi-availability-zone-kubernetes-clusters">Deployments in <em>stretched</em></a>
  vs. <a href="#single-availability-zone-kubernetes-clusters"><em>non-stretched</em> clusters</a></strong>:
  Evaluating the differences between deploying in stretched clusters (across 3
  or more availability zones) versus non-stretched clusters (within a single
  availability zone).</li>
<li><a href="#reserving-nodes-for-postgresql-workloads"><strong>Reservation of <code>postgres</code> worker nodes</strong></a>: Isolating PostgreSQL workloads by
  dedicating specific worker nodes to <code>postgres</code> tasks, ensuring optimal
  performance and minimizing interference from other workloads.</li>
<li><a href="#postgresql-architecture"><strong>PostgreSQL architectures within a single Kubernetes cluster</strong></a>:
  Designing effective PostgreSQL deployments within a single Kubernetes cluster
  to meet high availability and performance requirements.</li>
<li><a href="#deployments-across-kubernetes-clusters"><strong>PostgreSQL architectures across Kubernetes clusters for disaster recovery</strong></a>:
  Planning and implementing PostgreSQL architectures that span multiple
  Kubernetes clusters to provide comprehensive disaster recovery capabilities.</li>
</ul>
<h2 id="synchronizing-the-state">Synchronizing the state</h2>
<p>PostgreSQL is a database management system and, as such, it needs to be treated
as a <strong>stateful workload</strong> in Kubernetes. While stateless applications
mainly use traffic redirection to achieve High Availability (HA) and
Disaster Recovery (DR), in the case of a database, state must be replicated in
multiple locations, preferably in a continuous and instantaneous way, by
adopting either of the following two strategies:</p>
<ul>
<li><em>storage-level replication</em>, normally persistent volumes</li>
<li><em>application-level replication</em>, in this specific case PostgreSQL</li>
</ul>
<p>CloudNativePG relies on application-level replication, for a simple reason: the
PostgreSQL database management system comes with robust and reliable
built-in <strong>physical replication</strong> capabilities based on <strong>Write Ahead Log (WAL)
shipping</strong>, which have been used in production by millions of users all over
the world for over a decade.</p>
<p>PostgreSQL supports both asynchronous and synchronous streaming replication
over the network, as well as asynchronous file-based log shipping (normally
used as a fallback option, for example, to store WAL files in an object store).
Replicas are usually called <em>standby servers</em> and can also be used for
read-only workloads, thanks to the <em>Hot Standby</em> feature.</p>
<div class="admonition important">
<p class="admonition-title">Important</p>
<p><strong>We recommend against storage-level replication with PostgreSQL</strong>, although
CloudNativePG allows you to adopt that strategy. For more information, please refer
to the talk given by Chris Milsted and Gabriele Bartolini at KubeCon NA 2022 entitled
<a href="https://www.youtube.com/watch?v=99uSJXkKpeI&amp;ab_channel=CNCF%5BCloudNativeComputingFoundation%5D">"Data On Kubernetes, Deploying And Running PostgreSQL And Patterns For Databases In a Kubernetes Cluster"</a>
where this topic was covered in detail.</p>
</div>
<h2 id="kubernetes-architecture">Kubernetes architecture</h2>
<p>Kubernetes natively provides the possibility to span separate physical
locations - also known as data centers, failure zones, or more frequently
<strong>availability zones</strong> - connected to each other via redundant, low-latency,
private network connectivity.</p>
<p>Being a distributed system, the recommended minimum number of availability
zones for a Kubernetes cluster is three (3), in order to make the control
plane resilient to the failure of a single zone.
For details, please refer to
<a href="https://kubernetes.io/docs/setup/best-practices/multiple-zones/">"Running in multiple zones"</a>.
This means that <strong>each data center is active at any time</strong> and can run workloads
simultaneously.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Most of the public Cloud Providers' managed Kubernetes services already
provide 3 or more availability zones in each region.</p>
</div>
<h3 id="multi-availability-zone-kubernetes-clusters">Multi-availability zone Kubernetes clusters</h3>
<p>The multi-availability zone Kubernetes architecture with three (3) or more
zones is the one that we recommend for PostgreSQL usage.
This scenario is typical of Kubernetes services managed by Cloud Providers.</p>
<p><img alt="Kubernetes cluster spanning over 3 independent data centers" src="../images/k8s-architecture-3-az.png" /></p>
<p>Such an architecture enables the CloudNativePG operator to control the full
lifecycle of a <code>Cluster</code> resource across the zones within a single Kubernetes
cluster, by treating all the availability zones as active: this includes, among
other operations,
<a href="../scheduling/">scheduling</a> the workloads in a declarative manner (based on
affinity rules, tolerations and node selectors), automated failover,
self-healing, and updates. All will work seamlessly across the zones in a single
Kubernetes cluster.</p>
<p>Please refer to the <a href="#postgresql-architecture">"PostgreSQL architecture"</a>
section below for details on how you can design your PostgreSQL clusters within
the same Kubernetes cluster through shared-nothing deployments at the storage,
worker node, and availability zone levels.</p>
<p>Additionally, you can leverage <a href="#deployments-across-kubernetes-clusters">Kubernetes clusters</a>
to deploy distributed PostgreSQL topologies hosting "passive"
<a href="../replica_cluster/">PostgreSQL replica clusters</a> in different regions and
managing them via declarative configuration. This setup is ideal for disaster
recovery (DR), read-only operations, or cross-region availability.</p>
<div class="admonition important">
<p class="admonition-title">Important</p>
<p>Each operator deployment can only manage operations within its local
Kubernetes cluster. For operations across Kubernetes clusters, such as
controlled switchover or unexpected failover, coordination must be handled
manually (through GitOps, for example) or by using a higher-level cluster
management tool.</p>
</div>
<p><img alt="Example of a multiple Kubernetes cluster architecture distributed over 3 regions each with 3 independent data centers" src="../images/k8s-architecture-multi.png" /></p>
<h3 id="single-availability-zone-kubernetes-clusters">Single availability zone Kubernetes clusters</h3>
<p>If your Kubernetes cluster has only one availability zone, CloudNativePG still
provides you with a lot of features to improve HA and DR outcomes for your
PostgreSQL databases, pushing the single point of failure (SPoF) to the level
of the zone as much as possible - i.e. the zone must have an outage before your
CloudNativePG clusters suffer a failure.</p>
<p>This scenario is typical of self-managed on-premise Kubernetes clusters, where
only one data center is available.</p>
<p>Single availability zone Kubernetes clusters are the only viable option when
only <strong>two data centers</strong> are available within reach of a low-latency
connection (typically in the same metropolitan area). Having only two zones
prevents the creation of a multi-availability zone Kubernetes cluster, which
requires a minimum of three zones. As a result, users must create two separate
Kubernetes clusters in an active/passive configuration, with the second cluster
primarily used for Disaster Recovery (see
the <a href="../replica_cluster/">replica cluster feature</a>).</p>
<p><img alt="Example of a Kubernetes architecture with only 2 data centers" src="../images/k8s-architecture-2-az.png" /></p>
<div class="admonition hint">
<p class="admonition-title">Hint</p>
<p>If you are at an early stage of your Kubernetes journey, please share this
document with your infrastructure team. The two data centers setup might
be simply the result of a "lift-and-shift" transition to Kubernetes
from a traditional bare-metal or VM based infrastructure, and the benefits
that Kubernetes offers in a 3+ zone scenario might not have been known,
or addressed at the time the infrastructure architecture was designed.
Ultimately, a third physical location connected to the other two might
represent a valid option to consider for organization, as it reduces the
overall costs of the infrastructure by moving the day-to-day complexity
from the application level down to the physical infrastructure level.</p>
</div>
<p>Please refer to the <a href="#postgresql-architecture">"PostgreSQL architecture"</a>
section below for details on how you can design your PostgreSQL clusters within
your single availability zone Kubernetes cluster through shared-nothing
deployments at the storage and worker node levels only. For HA, in such a
scenario it becomes even more important that the PostgreSQL instances be
located on different worker nodes and do not share the same storage.</p>
<p>For DR, you can push the SPoF above the single zone, by using additional
<a href="#deployments-across-kubernetes-clusters">Kubernetes clusters</a> to define a
distributed topology hosting "passive" <a href="../replica_cluster/">PostgreSQL replica clusters</a>.
As with other Kubernetes workloads in this scenario, promotion of a Kubernetes
cluster as primary must be done manually.</p>
<p>Through the <a href="../replica_cluster/">replica cluster feature</a>, you can define a
distributed PostgreSQL topology and coordinate a controlled switchover between
data centers by first demoting the primary cluster and then promoting the
replica cluster, without the need to re-clone the former primary. While failover
is now fully declarative, automated failover across Kubernetes clusters is not
within CloudNativePG's scope, as the operator can only function within a single
Kubernetes cluster.</p>
<div class="admonition important">
<p class="admonition-title">Important</p>
<p>CloudNativePG provides all the necessary primitives and probes to
coordinate PostgreSQL active/passive topologies across different Kubernetes
clusters through a higher-level operator or management tool.</p>
</div>
<h3 id="reserving-nodes-for-postgresql-workloads">Reserving nodes for PostgreSQL workloads</h3>
<p>Whether you're operating in a multi-availability zone environment or, more
critically, within a single availability zone, we strongly recommend isolating
PostgreSQL workloads by dedicating specific worker nodes exclusively to
<code>postgres</code> in production. A Kubernetes worker node dedicated to running
PostgreSQL workloads is referred to as a <strong>Postgres node</strong> or <code>postgres</code> node.
This approach ensures optimal performance and resource allocation for your
database operations.</p>
<div class="admonition hint">
<p class="admonition-title">Hint</p>
<p>As a general rule of thumb, deploy Postgres nodes in multiples of
three—ideally with one node per availability zone. Three nodes is
an optimal number because it ensures that a PostgreSQL cluster with three
instances (one primary and two standby replicas) is distributed across
different nodes, enhancing fault tolerance and availability.</p>
</div>
<p>In Kubernetes, this can be achieved using node labels and taints in a
declarative manner, aligning with Infrastructure as Code (IaC) practices:
labels ensure that a node is capable of running <code>postgres</code> workloads, while
taints help prevent any non-<code>postgres</code> workloads from being scheduled on that
node.</p>
<div class="admonition important">
<p class="admonition-title">Important</p>
<p>This methodology is the most straightforward way to ensure that PostgreSQL
workloads are isolated from other workloads in terms of both computing
resources and, when using locally attached disks, storage. While different
PostgreSQL clusters may share the same node, you can take this a step further
by using labels and taints to ensure that a node is dedicated to a single
instance of a specific <code>Cluster</code>.</p>
</div>
<h4 id="proposed-node-label">Proposed node label</h4>
<p>CloudNativePG recommends using the <code>node-role.kubernetes.io/postgres</code> label.
Since this is a reserved label (<code>*.kubernetes.io</code>), it can only be applied
after a worker node is created.</p>
<p>To assign the <code>postgres</code> label to a node, use the following command:</p>
<pre><code class="language-sh">kubectl label node &lt;NODE-NAME&gt; node-role.kubernetes.io/postgres=
</code></pre>
<p>To ensure that a <code>Cluster</code> resource is scheduled on a <code>postgres</code> node, you must
correctly configure the <code>.spec.affinity.nodeSelector</code> stanza in your manifests.
Here’s an example:</p>
<pre><code class="language-yaml">spec:
  # &lt;snip&gt;
  affinity:
    # &lt;snip&gt;
    nodeSelector:
      node-role.kubernetes.io/postgres: &quot;&quot;
</code></pre>
<h4 id="proposed-node-taint">Proposed node taint</h4>
<p>CloudNativePG recommends using the <code>node-role.kubernetes.io/postgres</code> taint.</p>
<p>To assign the <code>postgres</code> taint to a node, use the following command:</p>
<pre><code class="language-sh">kubectl taint node &lt;NODE-NAME&gt; node-role.kubernetes.io/postgres=:NoSchedule
</code></pre>
<p>To ensure that a <code>Cluster</code> resource is scheduled on a node with a <code>postgres</code> taint, you must correctly configure the <code>.spec.affinity.tolerations</code> stanza in your manifests.
Here’s an example:</p>
<pre><code class="language-yaml">spec:
  # &lt;snip&gt;
  affinity:
    # &lt;snip&gt;
    tolerations:
    - key: node-role.kubernetes.io/postgres
      operator: Exists
      effect: NoSchedule
</code></pre>
<h2 id="postgresql-architecture">PostgreSQL architecture</h2>
<p>CloudNativePG supports clusters based on asynchronous and synchronous
streaming replication to manage multiple hot standby replicas within the same
Kubernetes cluster, with the following specifications:</p>
<ul>
<li>One primary, with optional multiple hot standby replicas for HA</li>
<li>
<p>Available services for applications:</p>
<ul>
<li><code>-rw</code>: applications connect only to the primary instance of the cluster</li>
<li><code>-ro</code>: applications connect only to hot standby replicas for
  read-only-workloads (optional)</li>
<li><code>-r</code>: applications connect to any of the instances for read-only
  workloads (optional)</li>
</ul>
</li>
<li>
<p>Shared-nothing architecture recommended for better resilience of the PostgreSQL cluster:</p>
<ul>
<li>PostgreSQL instances should reside on different Kubernetes worker nodes
  and share only the network - as a result, instances should not share
  the storage and preferably use local volumes attached to the node they
  run on</li>
<li>PostgreSQL instances should reside in different availability zones
  within the same Kubernetes cluster / region</li>
</ul>
</li>
</ul>
<div class="admonition important">
<p class="admonition-title">Important</p>
<p>You can configure the above services through the <code>managed.services</code> section
in the <code>Cluster</code> configuration. This can be done by reducing the number of
services and selecting the type (default is <code>ClusterIP</code>). For more details,
please refer to the <a href="../service_management/">"Service Management" section</a>
below.</p>
</div>
<p>The below diagram provides a simplistic view of the recommended shared-nothing
architecture for a PostgreSQL cluster spanning across 3 different availability
zones, running on separate nodes, each with dedicated local storage for
PostgreSQL data.</p>
<p><img alt="Bird-eye view of the recommended shared nothing architecture for PostgreSQL in Kubernetes" src="../images/k8s-pg-architecture.png" /></p>
<p>CloudNativePG automatically takes care of updating the above services if
the topology of the cluster changes. For example, in case of failover, it
automatically updates the <code>-rw</code> service to point to the promoted primary,
making sure that traffic from the applications is seamlessly redirected.</p>
<div class="admonition seealso">
<p class="admonition-title">Replication</p>
<p>Please refer to the <a href="../replication/">"Replication" section</a> for more
information about how CloudNativePG relies on PostgreSQL replication,
including synchronous settings.</p>
</div>
<div class="admonition seealso">
<p class="admonition-title">Connecting from an application</p>
<p>Please refer to the <a href="../applications/">"Connecting from an application" section</a> for
information about how to connect to CloudNativePG from a stateless
application within the same Kubernetes cluster.</p>
</div>
<div class="admonition seealso">
<p class="admonition-title">Connection Pooling</p>
<p>Please refer to the <a href="../connection_pooling/">"Connection Pooling" section</a> for
information about how to take advantage of PgBouncer as a connection pooler,
and create an access layer between your applications and the PostgreSQL clusters.</p>
</div>
<h3 id="read-write-workloads">Read-write workloads</h3>
<p>Applications can decide to connect to the PostgreSQL instance elected as
<em>current primary</em> by the Kubernetes operator, as depicted in the following
diagram:</p>
<p><img alt="Applications writing to the single primary" src="../images/architecture-rw.png" /></p>
<p>Applications can use the <code>-rw</code> suffix service.</p>
<p>In case of temporary or permanent unavailability of the primary, for High
Availability purposes CloudNativePG will trigger a failover, pointing the <code>-rw</code>
service to another instance of the cluster.</p>
<h3 id="read-only-workloads">Read-only workloads</h3>
<div class="admonition important">
<p class="admonition-title">Important</p>
<p>Applications must be aware of the limitations that
<a href="https://www.postgresql.org/docs/current/hot-standby.html">Hot Standby</a>
presents and familiar with the way PostgreSQL operates when dealing with
these workloads.</p>
</div>
<p>Applications can access hot standby replicas through the <code>-ro</code> service made available
by the operator. This service enables the application to offload read-only queries from the
primary node.</p>
<p>The following diagram shows the architecture:</p>
<p><img alt="Applications reading from hot standby replicas in round robin" src="../images/architecture-read-only.png" /></p>
<p>Applications can also access any PostgreSQL instance through the
<code>-r</code> service.</p>
<h2 id="deployments-across-kubernetes-clusters">Deployments across Kubernetes clusters</h2>
<div class="admonition info">
<p class="admonition-title">Info</p>
<p>CloudNativePG supports deploying PostgreSQL across multiple Kubernetes
clusters through a feature that allows you to define a distributed PostgreSQL
topology using replica clusters, as described in this section.</p>
</div>
<p>In a distributed PostgreSQL cluster there can only be a single PostgreSQL
instance acting as a primary at all times. This means that applications can
only write inside a single Kubernetes cluster, at any time.</p>
<p>However, for business continuity objectives it is fundamental to:</p>
<ul>
<li>reduce global <strong>recovery point objectives</strong> (<a href="../before_you_start/#rpo">RPO</a>)
  by storing PostgreSQL backup data in multiple locations, regions and possibly
  using different providers (Disaster Recovery)</li>
<li>reduce global <strong>recovery time objectives</strong> (<a href="../before_you_start/#rto">RTO</a>)
  by taking advantage of PostgreSQL replication beyond the primary Kubernetes
  cluster (High Availability)</li>
</ul>
<p>In order to address the above concerns, CloudNativePG introduces the concept of
a PostgreSQL Topology that is distributed across different Kubernetes clusters
and is made up of a primary PostgreSQL cluster and one or more PostgreSQL
replica clusters.
This feature is called <strong>distributed PostgreSQL topology with replica clusters</strong>,
and it enables multi-cluster deployments in private, public, hybrid, and
multi-cloud contexts.</p>
<p>A replica cluster is a separate <code>Cluster</code> resource that is in continuous
recovery, replicating from another source, either via WAL shipping from a WAL
archive or via streaming replication from a primary or a standby (cascading).</p>
<p>The diagram below depicts a PostgreSQL cluster spanning over two different
Kubernetes clusters, where the primary cluster is in the first Kubernetes
cluster and the replica cluster is in the second. The second Kubernetes cluster
acts as the company's disaster recovery cluster, ready to be activated in case
of disaster and unavailability of the first one.</p>
<p><img alt="An example of multi-cluster deployment with a primary and a replica cluster" src="../images/multi-cluster.png" /></p>
<p>A replica cluster can have the same architecture as the primary cluster.
Instead of a primary instance, a replica cluster has a <strong>designated primary</strong>
instance, which is a standby server with an arbitrary number of cascading
standby servers in streaming replication (symmetric architecture).</p>
<p>The designated primary can be promoted at any time, transforming the replica
cluster into a primary cluster capable of accepting write connections.
This is typically triggered by:</p>
<ul>
<li><strong>Human decision:</strong> You choose to make the other PostgreSQL cluster (or the
  entire Kubernetes cluster) the primary. To avoid data loss and ensure that
  the former primary can follow without being re-cloned (especially with large
  data sets), you first demote the current primary, then promote the designated
  primary using the API provided by CloudNativePG.</li>
<li><strong>Unexpected failure:</strong> If the entire Kubernetes cluster fails, you might
  experience data loss, but you need to fail over to the other Kubernetes
  cluster by promoting the PostgreSQL replica cluster.</li>
</ul>
<div class="admonition warning">
<p class="admonition-title">Warning</p>
<p>CloudNativePG cannot perform any cross-cluster automated failover, as it
does not have authority beyond a single Kubernetes cluster. Such operations
must be performed manually or delegated to a multi-cluster/federated
cluster-aware authority.</p>
</div>
<div class="admonition important">
<p class="admonition-title">Important</p>
<p>CloudNativePG allows you to control the distributed topology via
declarative configuration, enabling you to automate these procedures as part of
your Infrastructure as Code (IaC) process, including GitOps.</p>
</div>
<p>The designated primary in the above example is fed via WAL streaming
(<code>primary_conninfo</code>), with fallback option for file-based WAL shipping through
the <code>restore_command</code> and <code>barman-cloud-wal-restore</code>.</p>
<p>CloudNativePG allows you to define topologies with multiple replica clusters.
You can also define replica clusters with a lower number of replicas, and then
increase this number when the cluster is promoted to primary.</p>
<div class="admonition seealso">
<p class="admonition-title">Replica clusters</p>
<p>Please refer to the <a href="../replica_cluster/">"Replica Clusters" section</a> for
more detailed information on how physical replica clusters operate and how to
define a distributed topology with read-only clusters across different
Kubernetes clusters. This approach can significantly enhance your global
disaster recovery and high availability (HA) strategy.</p>
</div>

            </div>
          </div><footer>
    <div class="rst-footer-buttons" role="navigation" aria-label="Footer Navigation">
        <a href="../use_cases/" class="btn btn-neutral float-left" title="Use cases"><span class="icon icon-circle-arrow-left"></span> Previous</a>
        <a href="../installation_upgrade/" class="btn btn-neutral float-right" title="Installation and upgrades">Next <span class="icon icon-circle-arrow-right"></span></a>
    </div>

  <hr/>

  <div role="contentinfo">
    <!-- Copyright etc -->
      <p>Copyright © CloudNativePG a Series of LF Projects, LLC</p>
  </div>

  Built with <a href="https://www.mkdocs.org/">MkDocs</a> using a <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>

        </div>
      </div>

    </section>

  </div>

  <div class="rst-versions" role="note" aria-label="Versions">
  <span class="rst-current-version" data-toggle="rst-current-version">


      <span><a href="../use_cases/" style="color: #fcfcfc">&laquo; Previous</a></span>


      <span><a href="../installation_upgrade/" style="color: #fcfcfc">Next &raquo;</a></span>

  </span>
</div>
    <script src="../js/jquery-3.6.0.min.js"></script>
    <script>var base_url = "..";</script>
    <script src="../js/theme_extra.js"></script>
    <script src="../js/theme.js"></script>
      <script src="../search/main.js"></script>
    <script>
        jQuery(function () {
            SphinxRtdTheme.Navigation.enable(true);
        });
    </script>

</body>
</html>