From 8dee32ce87e51828faacb7ff907fe0efb78c77fb Mon Sep 17 00:00:00 2001
From: Liwei Jiang <wrlcke@outlook.com>
Date: Wed, 25 Feb 2026 19:27:29 +0800
Subject: [PATCH 1/3] doc for aggregate function entropy

---
 .../aggregate-functions/entropy.md            | 101 +++++++++++++++++
 .../aggregate-functions/entropy.md            | 103 ++++++++++++++++++
 sidebars.ts                                   |   1 +
 3 files changed, 205 insertions(+)
 create mode 100644 docs/sql-manual/sql-functions/aggregate-functions/entropy.md
 create mode 100644 i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/entropy.md
diff --git a/docs/sql-manual/sql-functions/aggregate-functions/entropy.md b/docs/sql-manual/sql-functions/aggregate-functions/entropy.md
new file mode 100644
index 0000000000000..04f4d297dc3bc
--- /dev/null
+++ b/docs/sql-manual/sql-functions/aggregate-functions/entropy.md
@@ -0,0 +1,101 @@
+---
+{
+    "title": "ENTROPY",
+    "language": "en",
+    "description": "Calculate the Shannon entropy of all non-null values in the specified column or expression."
+}
+---
+
+## Description
+
+Computes the Shannon entropy of all non-null values in the specified column or expression.
+
+Entropy measures the uncertainty or randomness of a distribution. This function builds an empirical frequency map of the input values and computes entropy in bits using the base‑2 logarithm.
+
+The Shannon entropy is defined as:
+
+$
+Entropy(X) = -\sum_{i=1}^{k} p_i \log_2(p_i)
+$
+
+Where:
+
+- $k$ is the number of distinct non-null values  
+- $p_i = \frac{\text{count}(x_i)}{\text{total non-null count}}$
+
+## Syntax
+
+```sql
+ENTROPY(<expr1> [, <expr2>, ... , <exprN>])
+```
+
+## Parameters
+
+| Parameter | Description |
+|----------|-------------|
+| `<expr1> [, <expr2>, ...]` | One or more expressions or columns. Supported types: TinyInt, SmallInt, Integer, BigInt, LargeInt, Float, Double, Decimal, String, IPv4/IPv6, Array, Map,Struct. When multiple expressions are provided, their values are serialized together to form a single composite key, and entropy is computed over the frequency distribution of these composite keys. |
+
+## Return Value
+
+Returns a DOUBLE representing the Shannon entropy in bits.
+
+- Returns NULL if all values are NULL or the input is empty.
+- Ignores NULL values during computation.
+
+## Examples
+
+```sql
+CREATE TABLE t1 (
+    id INT,
+    v  INT
+) DISTRIBUTED BY HASH(id) BUCKETS 1
+PROPERTIES ("replication_num"="1");
+
+INSERT INTO t1 VALUES
+    (1, 1),
+    (2, 2),
+    (3, 2),
+    (4, NULL);
+```
+
+```sql
+SELECT entropy(v) FROM t1;
+```
+
+Distribution: `{1:1, 2:2}`      $H = -\left(\frac{1}{3}\log_2\frac{1}{3} + \frac{2}{3}\log_2\frac{2}{3}\right)=0.9183$
+
+```text
++--------------------+
+| entropy(x)         |
++--------------------+
+| 0.9182958340544896 |
++--------------------+
+```
+
+```sql
+SELECT entropy(1);
+```
+
+Only one distinct value → entropy = 0
+
+```text
++------------+
+| entropy(1) |
++------------+
+|          0 |
++------------+
+```
+
+```sql
+SELECT entropy(NULL) FROM t1;
+```
+
+Returns NULL if all values are NULL or the input is empty.
+
+```sql
++---------------+
+| entropy(NULL) |
++---------------+
+|          NULL |
++---------------+
+```
diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/entropy.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/entropy.md
new file mode 100644
index 0000000000000..0e952764f3698
--- /dev/null
+++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/entropy.md
@@ -0,0 +1,103 @@
+---
+{
+    "title": "ENTROPY",
+    "language": "zh-CN",
+    "description": "计算指定列或表达式中所有非 NULL 值的香农熵（Shannon Entropy）。"
+}
+---
+
+## 描述
+
+计算指定列或表达式中所有非 NULL 值的香农熵（Shannon Entropy）。
+
+熵用于衡量分布的不确定性或随机性。该函数会基于输入值构建经验频率分布，并使用以 2 为底的对数计算熵，单位为 比特（bits）。
+
+香农熵的定义如下：
+
+$
+Entropy(X) = -\sum_{i=1}^{k} p_i \log_2(p_i)
+$
+
+其中：
+
+- $k$ 为非 NULL 的不同值的数量  
+- $p_i = \frac{x_i的数量}{\text{所有非null值数量}}$
+
+## 语法
+
+```sql
+ENTROPY(<expr1> [, <expr2>, ... , <exprN>])
+```
+
+## 参数
+
+| 参数 | 说明 |
+|------|------|
+| `<expr1> [, <expr2>, ...]` | 一个或多个表达式或列。支持的类型包括：TinyInt、SmallInt、Integer、BigInt、LargeInt、Float、Double、Decimal、String、IPv4/IPv6、Array、Map、Struct 等。当提供多列时，每行的多个值会被序列化为一个复合键，并基于复合键的频率分布计算熵。 |
+
+## 返回值
+
+返回一个 DOUBLE，表示以比特为单位的香农熵。
+
+- 如果所有值均为 NULL 或输入为空，则返回 NULL。
+- 计算过程中会忽略 NULL 值。
+
+## 举例
+
+```sql
+CREATE TABLE t1 (
+    id INT,
+    v  INT
+) DISTRIBUTED BY HASH(id) BUCKETS 1
+PROPERTIES ("replication_num"="1");
+
+INSERT INTO t1 VALUES
+    (1, 1),
+    (2, 2),
+    (3, 2),
+    (4, NULL);
+```
+
+```sql
+SELECT entropy(v) FROM t1;
+```
+
+频率分布：`{1:1, 2:2}`  
+
+熵的计算：  $H = -\left(\frac{1}{3}\log_2\frac{1}{3} + \frac{2}{3}\log_2\frac{2}{3}\right)=0.9183$
+
+```text
++--------------------+
+| entropy(x)         |
++--------------------+
+| 0.9182958340544896 |
++--------------------+
+```
+
+```sql
+SELECT entropy(1);
+```
+
+只有一个唯一值 → 熵 = 0
+
+```text
++------------+
+| entropy(1) |
++------------+
+|          0 |
++------------+
+```
+
+```sql
+SELECT entropy(NULL) FROM t1;
+```
+
+当所有值均为 NULL 或输入为空时返回 NULL。
+
+```text
++---------------+
+| entropy(NULL) |
++---------------+
+|          NULL |
++---------------+
+```
diff --git a/sidebars.ts b/sidebars.ts
index 718e735d560fa..16e1bda661392 100644
--- a/sidebars.ts
+++ b/sidebars.ts
@@ -1895,6 +1895,7 @@ const sidebars: SidebarsConfig = {
                                 'sql-manual/sql-functions/aggregate-functions/count-by-enum',
                                 'sql-manual/sql-functions/aggregate-functions/covar',
                                 'sql-manual/sql-functions/aggregate-functions/covar-samp',
+                                'sql-manual/sql-functions/aggregate-functions/entropy',
                                 'sql-manual/sql-functions/aggregate-functions/group-array-intersect',
                                 'sql-manual/sql-functions/aggregate-functions/group-array-union',
                                 'sql-manual/sql-functions/aggregate-functions/group-bit-and',

From 5a0792f3517a3d9a4466f4241b6d06914606fb8a Mon Sep 17 00:00:00 2001
From: Liwei Jiang <wrlcke@outlook.com>
Date: Sat, 14 Mar 2026 03:11:13 +0800
Subject: [PATCH 2/3] fix and add multi column example

---
 .../aggregate-functions/entropy.md            | 42 ++++++++++++++-----
 .../aggregate-functions/entropy.md            | 38 ++++++++++++-----
 2 files changed, 59 insertions(+), 21 deletions(-)

diff --git a/docs/sql-manual/sql-functions/aggregate-functions/entropy.md b/docs/sql-manual/sql-functions/aggregate-functions/entropy.md
index 04f4d297dc3bc..d06b732f593b0 100644
--- a/docs/sql-manual/sql-functions/aggregate-functions/entropy.md
+++ b/docs/sql-manual/sql-functions/aggregate-functions/entropy.md
@@ -33,7 +33,7 @@ ENTROPY(<expr1> [, <expr2>, ... , <exprN>])
 
 | Parameter | Description |
 |----------|-------------|
-| `<expr1> [, <expr2>, ...]` | One or more expressions or columns. Supported types: TinyInt, SmallInt, Integer, BigInt, LargeInt, Float, Double, Decimal, String, IPv4/IPv6, Array, Map,Struct. When multiple expressions are provided, their values are serialized together to form a single composite key, and entropy is computed over the frequency distribution of these composite keys. |
+| `<expr1> [, <expr2>, ...]` | One or more expressions or columns. Supported types: TinyInt, SmallInt, Integer, BigInt, LargeInt, Float, Double, Decimal, String, IPv4/IPv6, Array, Map, Struct. When multiple expressions are provided, their values are serialized together to form a single composite key, and entropy is computed over the frequency distribution of these composite keys. |
 
 ## Return Value
 
@@ -47,31 +47,51 @@ Returns a DOUBLE representing the Shannon entropy in bits.
 ```sql
 CREATE TABLE t1 (
     id INT,
-    v  INT
+    c1 INT,
+    c2 STRING
 ) DISTRIBUTED BY HASH(id) BUCKETS 1
 PROPERTIES ("replication_num"="1");
 
 INSERT INTO t1 VALUES
-    (1, 1),
-    (2, 2),
-    (3, 2),
-    (4, NULL);
+    (1, 1, "a"),
+    (2, 1, "a"),
+    (3, 1, "b"),
+    (4, 2, "a"),
+    (5, NULL, "a");
 ```
 
 ```sql
-SELECT entropy(v) FROM t1;
+SELECT entropy(c1) FROM t1;
 ```
 
-Distribution: `{1:1, 2:2}`      $H = -\left(\frac{1}{3}\log_2\frac{1}{3} + \frac{2}{3}\log_2\frac{2}{3}\right)=0.9183$
+Distribution: 1 → 3, 2 → 1
+
+$H = -\left(\frac{1}{4}\log_2\frac{1}{4} + \frac{3}{4}\log_2\frac{3}{4}\right)=0.811$
 
 ```text
 +--------------------+
-| entropy(x)         |
+| entropy(c1)        |
 +--------------------+
-| 0.9182958340544896 |
+| 0.8112781244591328 |
 +--------------------+
 ```
 
+```sql
+SELECT entropy(c1, c2) FROM t1;
+```
+
+Distribution: (1, "a") → 2, (1, "b") → 1, (2, "a") → 1
+
+$H = -\left(\frac{1}{4}\log_2\frac{1}{4} + \frac{2}{4}\log_2\frac{2}{4}+ \frac{1}{4}\log_2\frac{1}{4}\right)=1.5$
+
+```text
++-----------------+
+| entropy(c1, c2) |
++-----------------+
+|             1.5 |
++-----------------+
+```
+
 ```sql
 SELECT entropy(1);
 ```
@@ -92,7 +112,7 @@ SELECT entropy(NULL) FROM t1;
 
 Returns NULL if all values are NULL or the input is empty.
 
-```sql
+```text
 +---------------+
 | entropy(NULL) |
 +---------------+
diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/entropy.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/entropy.md
index 0e952764f3698..2b58b697d4d35 100644
--- a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/entropy.md
+++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/entropy.md
@@ -47,33 +47,51 @@ ENTROPY(<expr1> [, <expr2>, ... , <exprN>])
 ```sql
 CREATE TABLE t1 (
     id INT,
-    v  INT
+    c1 INT,
+    c2 STRING
 ) DISTRIBUTED BY HASH(id) BUCKETS 1
 PROPERTIES ("replication_num"="1");
 
 INSERT INTO t1 VALUES
-    (1, 1),
-    (2, 2),
-    (3, 2),
-    (4, NULL);
+    (1, 1, "a"),
+    (2, 1, "a"),
+    (3, 1, "b"),
+    (4, 2, "a"),
+    (5, NULL, "a");
 ```
 
 ```sql
-SELECT entropy(v) FROM t1;
+SELECT entropy(c1) FROM t1;
 ```
 
-频率分布：`{1:1, 2:2}`  
+频率分布：1 → 3, 2 → 1
 
-熵的计算：  $H = -\left(\frac{1}{3}\log_2\frac{1}{3} + \frac{2}{3}\log_2\frac{2}{3}\right)=0.9183$
+熵的计算：$H = -\left(\frac{1}{4}\log_2\frac{1}{4} + \frac{3}{4}\log_2\frac{3}{4}\right)=0.811$
 
 ```text
 +--------------------+
-| entropy(x)         |
+| entropy(c1)        |
 +--------------------+
-| 0.9182958340544896 |
+| 0.8112781244591328 |
 +--------------------+
 ```
 
+```sql
+SELECT entropy(c1, c2) FROM t1;
+```
+
+频率分布：(1, "a") → 2, (1, "b") → 1, (2, "a") → 1
+
+熵的计算：$H = -\left(\frac{1}{4}\log_2\frac{1}{4} + \frac{2}{4}\log_2\frac{2}{4}+ \frac{1}{4}\log_2\frac{1}{4}\right)=1.5$
+
+```text
++-----------------+
+| entropy(c1, c2) |
++-----------------+
+|             1.5 |
++-----------------+
+```
+
 ```sql
 SELECT entropy(1);
 ```

From e69f2261e2132d356494438ea8c5da4e81c50d71 Mon Sep 17 00:00:00 2001
From: Liwei Jiang <wrlcke@outlook.com>
Date: Sun, 15 Mar 2026 06:42:13 +0800
Subject: [PATCH 3/3] add doc for version 4.x

---
 .../aggregate-functions/entropy.md            | 125 ++++++++++++++++++
 .../aggregate-functions/entropy.md            | 125 ++++++++++++++++++
 versioned_sidebars/version-4.x-sidebars.json  |   1 +
 3 files changed, 251 insertions(+)
 create mode 100644 i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/sql-functions/aggregate-functions/entropy.md
 create mode 100644 versioned_docs/version-4.x/sql-manual/sql-functions/aggregate-functions/entropy.md

diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/sql-functions/aggregate-functions/entropy.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/sql-functions/aggregate-functions/entropy.md
new file mode 100644
index 0000000000000..ce54acc363f98
--- /dev/null
+++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/sql-functions/aggregate-functions/entropy.md
@@ -0,0 +1,125 @@
+---
+{
+    "title": "ENTROPY",
+    "language": "zh-CN",
+    "description": "计算指定列或表达式中所有非 NULL 值的香农熵（Shannon Entropy）。"
+}
+---
+
+## 描述
+
+计算指定列或表达式中所有非 NULL 值的香农熵（Shannon Entropy）。
+
+熵用于衡量分布的不确定性或随机性。该函数会基于输入值构建经验频率分布，并使用以 2 为底的对数计算熵，单位为 比特（bits）。
+
+香农熵的定义如下：
+
+$
+Entropy(X) = -\sum_{i=1}^{k} p_i \log_2(p_i)
+$
+
+其中：
+
+- $k$ 为非 NULL 的不同值的数量  
+- $p_i = \frac{x_i的数量}{\text{所有非null值数量}}$
+
+:::info 备注
+从 Apache Doris 4.1.0 开始支持该函数
+:::
+
+## 语法
+
+```sql
+ENTROPY(<expr1> [, <expr2>, ... , <exprN>])
+```
+
+## 参数
+
+| 参数 | 说明 |
+|------|------|
+| `<expr1> [, <expr2>, ...]` | 一个或多个表达式或列。支持的类型包括：TinyInt、SmallInt、Integer、BigInt、LargeInt、Float、Double、Decimal、String、IPv4/IPv6、Array、Map、Struct 等。当提供多列时，每行的多个值会被序列化为一个复合键，并基于复合键的频率分布计算熵。 |
+
+## 返回值
+
+返回一个 DOUBLE，表示以比特为单位的香农熵。
+
+- 如果所有值均为 NULL 或输入为空，则返回 NULL。
+- 计算过程中会忽略 NULL 值。
+
+## 举例
+
+```sql
+CREATE TABLE t1 (
+    id INT,
+    c1 INT,
+    c2 STRING
+) DISTRIBUTED BY HASH(id) BUCKETS 1
+PROPERTIES ("replication_num"="1");
+
+INSERT INTO t1 VALUES
+    (1, 1, "a"),
+    (2, 1, "a"),
+    (3, 1, "b"),
+    (4, 2, "a"),
+    (5, NULL, "a");
+```
+
+```sql
+SELECT entropy(c1) FROM t1;
+```
+
+频率分布：1 → 3, 2 → 1
+
+熵的计算：$H = -\left(\frac{1}{4}\log_2\frac{1}{4} + \frac{3}{4}\log_2\frac{3}{4}\right)=0.811$
+
+```text
++--------------------+
+| entropy(c1)        |
++--------------------+
+| 0.8112781244591328 |
++--------------------+
+```
+
+```sql
+SELECT entropy(c1, c2) FROM t1;
+```
+
+频率分布：(1, "a") → 2, (1, "b") → 1, (2, "a") → 1
+
+熵的计算：$H = -\left(\frac{1}{4}\log_2\frac{1}{4} + \frac{2}{4}\log_2\frac{2}{4}+ \frac{1}{4}\log_2\frac{1}{4}\right)=1.5$
+
+```text
++-----------------+
+| entropy(c1, c2) |
++-----------------+
+|             1.5 |
++-----------------+
+```
+
+```sql
+SELECT entropy(1);
+```
+
+只有一个唯一值 → 熵 = 0
+
+```text
++------------+
+| entropy(1) |
++------------+
+|          0 |
++------------+
+```
+
+```sql
+SELECT entropy(NULL) FROM t1;
+```
+
+当所有值均为 NULL 或输入为空时返回 NULL。
+
+```text
++---------------+
+| entropy(NULL) |
++---------------+
+|          NULL |
++---------------+
+```
diff --git a/versioned_docs/version-4.x/sql-manual/sql-functions/aggregate-functions/entropy.md b/versioned_docs/version-4.x/sql-manual/sql-functions/aggregate-functions/entropy.md
new file mode 100644
index 0000000000000..85e707f20ec9b
--- /dev/null
+++ b/versioned_docs/version-4.x/sql-manual/sql-functions/aggregate-functions/entropy.md
@@ -0,0 +1,125 @@
+---
+{
+    "title": "ENTROPY",
+    "language": "en",
+    "description": "Calculate the Shannon entropy of all non-null values in the specified column or expression."
+}
+---
+
+## Description
+
+Computes the Shannon entropy of all non-null values in the specified column or expression.
+
+Entropy measures the uncertainty or randomness of a distribution. This function builds an empirical frequency map of the input values and computes entropy in bits using the base‑2 logarithm.
+
+The Shannon entropy is defined as:
+
+$
+Entropy(X) = -\sum_{i=1}^{k} p_i \log_2(p_i)
+$
+
+Where:
+
+- $k$ is the number of distinct non-null values  
+- $p_i = \frac{\text{count}(x_i)}{\text{total non-null count}}$
+
+:::info Note
+Supported since Apache Doris 4.1.0
+:::
+
+## Syntax
+
+```sql
+ENTROPY(<expr1> [, <expr2>, ... , <exprN>])
+```
+
+## Parameters
+
+| Parameter | Description |
+|----------|-------------|
+| `<expr1> [, <expr2>, ...]` | One or more expressions or columns. Supported types: TinyInt, SmallInt, Integer, BigInt, LargeInt, Float, Double, Decimal, String, IPv4/IPv6, Array, Map, Struct. When multiple expressions are provided, their values are serialized together to form a single composite key, and entropy is computed over the frequency distribution of these composite keys. |
+
+## Return Value
+
+Returns a DOUBLE representing the Shannon entropy in bits.
+
+- Returns NULL if all values are NULL or the input is empty.
+- Ignores NULL values during computation.
+
+## Examples
+
+```sql
+CREATE TABLE t1 (
+    id INT,
+    c1 INT,
+    c2 STRING
+) DISTRIBUTED BY HASH(id) BUCKETS 1
+PROPERTIES ("replication_num"="1");
+
+INSERT INTO t1 VALUES
+    (1, 1, "a"),
+    (2, 1, "a"),
+    (3, 1, "b"),
+    (4, 2, "a"),
+    (5, NULL, "a");
+```
+
+```sql
+SELECT entropy(c1) FROM t1;
+```
+
+Distribution: 1 → 3, 2 → 1
+
+$H = -\left(\frac{1}{4}\log_2\frac{1}{4} + \frac{3}{4}\log_2\frac{3}{4}\right)=0.811$
+
+```text
++--------------------+
+| entropy(c1)        |
++--------------------+
+| 0.8112781244591328 |
++--------------------+
+```
+
+```sql
+SELECT entropy(c1, c2) FROM t1;
+```
+
+Distribution: (1, "a") → 2, (1, "b") → 1, (2, "a") → 1
+
+$H = -\left(\frac{1}{4}\log_2\frac{1}{4} + \frac{2}{4}\log_2\frac{2}{4}+ \frac{1}{4}\log_2\frac{1}{4}\right)=1.5$
+
+```text
++-----------------+
+| entropy(c1, c2) |
++-----------------+
+|             1.5 |
++-----------------+
+```
+
+```sql
+SELECT entropy(1);
+```
+
+Only one distinct value → entropy = 0
+
+```text
++------------+
+| entropy(1) |
++------------+
+|          0 |
++------------+
+```
+
+```sql
+SELECT entropy(NULL) FROM t1;
+```
+
+Returns NULL if all values are NULL or the input is empty.
+
+```text
++---------------+
+| entropy(NULL) |
++---------------+
+|          NULL |
++---------------+
+```
diff --git a/versioned_sidebars/version-4.x-sidebars.json b/versioned_sidebars/version-4.x-sidebars.json
index a61946af0f0f6..0fd7743e0c7d1 100644
--- a/versioned_sidebars/version-4.x-sidebars.json
+++ b/versioned_sidebars/version-4.x-sidebars.json
@@ -1933,6 +1933,7 @@
                                 "sql-manual/sql-functions/aggregate-functions/count-by-enum",
                                 "sql-manual/sql-functions/aggregate-functions/covar",
                                 "sql-manual/sql-functions/aggregate-functions/covar-samp",
+                                "sql-manual/sql-functions/aggregate-functions/entropy",
                                 "sql-manual/sql-functions/aggregate-functions/group-array-intersect",
                                 "sql-manual/sql-functions/aggregate-functions/group-array-union",
                                 "sql-manual/sql-functions/aggregate-functions/group-bit-and",