Skip to content

Commit c6d04c1

Browse files
authored
Merge branch 'master' into fix-doris-25672
2 parents 92520ac + 2eff9ea commit c6d04c1

508 files changed

Lines changed: 21339 additions & 4176 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.claude/skills/be-code-style/SKILL.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@ Fix C++ code formatting issues in the BE and Cloud modules using the project's c
1414
- When CI reports clang-format failures
1515
- When you need to check or fix C++ code style
1616

17+
## Prerequisites
18+
19+
You need to confirm that the major version of the called clang-format is 16. If the current environment's default does not meet this requirement, try the following:
20+
21+
1. If `.vscode/settings.json` exists, use the clang-format.executable item in it.
22+
2. If it is a worktree directory, use the `.vscode/settings.json` from the main directory.
23+
3. Check the path to the compiler toolchain by trying to find it from the `PATH` environment variable, the current directory, and the main directory's `custom_env.sh`. Look for a clang-format with the major version number 16 in that path and its parent directory.
24+
1725
## Procedure
1826

1927
### Step 1: Auto-fix formatting
@@ -53,6 +61,7 @@ After running `clang-format.sh`, review the changes with `git diff` to verify on
5361
## Excluded Directories
5462

5563
The following are excluded from formatting (see `.clang-format-ignore`):
64+
5665
- `be/src/apache-orc/*`, `be/src/clucene/*`, `be/src/gutil/*`
5766
- `be/src/glibc-compatibility/*`
5867
- Specific third-party vendored files (mustache, sse2neon, utf8_check)

.github/workflows/comment-to-trigger-teamcity.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,10 @@ jobs:
133133
reg="run (buildall|compile|p0|p1|feut|beut|cloudut|external|clickbench|cloud_p0|cloud_p1|vault_p0|nonConcurrent|performance|check_coverage)( [1-9]*[0-9]+)*"
134134
COMMENT_TRIGGER_TYPE="$(echo -e "${COMMENT_BODY}" | xargs | grep -E "${reg}" | awk -F' ' '{print $2}' | sed -n 1p | sed 's/\r//g')"
135135
COMMENT_REPEAT_TIMES="$(echo -e "${COMMENT_BODY}" | xargs | grep -E "${reg}" | awk -F' ' '{print $3}' | sed -n 1p | sed 's/\r//g')"
136+
if [[ -n "${COMMENT_REPEAT_TIMES}" && ! "${COMMENT_REPEAT_TIMES}" =~ ^[0-9]+$ ]]; then
137+
echo "COMMENT_REPEAT_TIMES '${COMMENT_REPEAT_TIMES}' is not a valid number, ignoring."
138+
COMMENT_REPEAT_TIMES=""
139+
fi
136140
echo "COMMENT_TRIGGER_TYPE=${COMMENT_TRIGGER_TYPE}" | tee -a "$GITHUB_OUTPUT"
137141
echo "COMMENT_REPEAT_TIMES=${COMMENT_REPEAT_TIMES}" | tee -a "$GITHUB_OUTPUT"
138142

.github/workflows/license-eyes.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ jobs:
4747
uses: actions/checkout@v3
4848
with:
4949
ref: ${{ github.event.pull_request.head.sha }}
50+
persist-credentials: false
5051

5152
- name: Get changed files
5253
if: github.event_name == 'pull_request_target'
@@ -90,6 +91,9 @@ jobs:
9091
CHANGED_FILES: ${{ steps.changed-files.outputs.added_modified }}
9192
run: |
9293
python3 - <<'EOF'
94+
import sys
95+
# Prevent fork-supplied files from shadowing stdlib modules
96+
sys.path = [p for p in sys.path if p not in ('', '.')]
9397
import yaml, os
9498
9599
with open('.licenserc.yaml') as f:

.github/workflows/opencode-review-runner.yml

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ jobs:
3232
with:
3333
ref: ${{ inputs.head_sha }}
3434

35-
- name: Install ripgrep
35+
- name: Install runner utilities
3636
run: |
3737
sudo apt-get update
38-
sudo apt-get install -y ripgrep
38+
sudo apt-get install -y ripgrep unzip
3939
4040
- name: Install OpenCode
4141
run: |
@@ -50,24 +50,26 @@ jobs:
5050
echo "All install attempts failed"
5151
exit 1
5252
53+
- name: Install ossutil
54+
run: |
55+
tmp_dir="$(mktemp -d)"
56+
trap 'rm -rf "$tmp_dir"' EXIT
57+
curl -fsSL -o "$tmp_dir/ossutil.zip" https://gosspublic.alicdn.com/ossutil/1.7.19/ossutil-v1.7.19-linux-amd64.zip
58+
unzip -q "$tmp_dir/ossutil.zip" -d "$tmp_dir"
59+
sudo install -m 0755 "$tmp_dir/ossutil-v1.7.19-linux-amd64/ossutil" /usr/local/bin/ossutil
60+
5361
- name: Configure OpenCode auth
62+
id: configure-auth
63+
env:
64+
OSS_AK: ${{ secrets.OSS_AK }}
65+
OSS_SK: ${{ secrets.OSS_SK }}
66+
OSS_ENDPOINT: oss-cn-hongkong.aliyuncs.com
67+
OSS_AUTH_OBJECT: oss://doris-community-ci/auth.json
5468
run: |
5569
mkdir -p ~/.local/share/opencode
56-
cat > ~/.local/share/opencode/auth.json <<EOF
57-
{
58-
"openai": {
59-
"type": "oauth",
60-
"access": "${CODE_REVIEW_ZCLLL_OPENAI_ACCESS_KEY}",
61-
"refresh": "${CODE_REVIEW_ZCLLL_OPENAI_REFRESH_KEY}",
62-
"expires": 1779122093655,
63-
"accountId": "${CODE_REVIEW_ZCLLL_OPENAI_ACCOUNT_ID}"
64-
}
65-
}
66-
EOF
67-
env:
68-
CODE_REVIEW_ZCLLL_OPENAI_ACCESS_KEY: ${{ secrets.CODE_REVIEW_ZCLLL_OPENAI_ACCESS_KEY }}
69-
CODE_REVIEW_ZCLLL_OPENAI_REFRESH_KEY: ${{ secrets.CODE_REVIEW_ZCLLL_OPENAI_REFRESH_KEY }}
70-
CODE_REVIEW_ZCLLL_OPENAI_ACCOUNT_ID: ${{ secrets.CODE_REVIEW_ZCLLL_OPENAI_ACCOUNT_ID }}
70+
ossutil -i "$OSS_AK" -k "$OSS_SK" -e "$OSS_ENDPOINT" cp -f "$OSS_AUTH_OBJECT" ~/.local/share/opencode/auth.json
71+
chmod 600 ~/.local/share/opencode/auth.json
72+
test -s ~/.local/share/opencode/auth.json
7173
7274
- name: Prepare review context directory
7375
run: |
@@ -209,7 +211,10 @@ jobs:
209211
status=${PIPESTATUS[0]}
210212
set -e
211213
212-
last_log_line=$(awk 'NF { line = $0 } END { print line }' "$REVIEW_CONTEXT_DIR/opencode-review.log")
214+
last_log_line=$(
215+
awk 'NF { line = $0 } END { print line }' "$REVIEW_CONTEXT_DIR/opencode-review.log" \
216+
| perl -pe 's/\e\[[0-9;?]*[ -\/]*[@-~]//g'
217+
)
213218
214219
failure_reason=""
215220
if printf '%s\n' "$last_log_line" | rg -q -i '^Error:|SSE read timed out'; then
@@ -227,6 +232,23 @@ jobs:
227232
exit 1
228233
fi
229234
235+
- name: Persist OpenCode auth
236+
if: ${{ always() && steps.configure-auth.outcome == 'success' }}
237+
env:
238+
OSS_AK: ${{ secrets.OSS_AK }}
239+
OSS_SK: ${{ secrets.OSS_SK }}
240+
OSS_ENDPOINT: oss-cn-hongkong.aliyuncs.com
241+
OSS_AUTH_OBJECT: oss://doris-community-ci/auth.json
242+
run: |
243+
if [ ! -s ~/.local/share/opencode/auth.json ]; then
244+
echo "::warning::OpenCode auth file is missing or empty; skip OSS auth persistence."
245+
exit 0
246+
fi
247+
248+
if ! ossutil -i "$OSS_AK" -k "$OSS_SK" -e "$OSS_ENDPOINT" cp -f ~/.local/share/opencode/auth.json "$OSS_AUTH_OBJECT"; then
249+
echo "::warning::Failed to persist OpenCode auth to OSS; continue because review already finished."
250+
fi
251+
230252
- name: Comment PR on review failure
231253
if: ${{ always() && steps.review.outcome != 'success' }}
232254
env:

AGENTS.md

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
This is the codebase for Apache Doris, an MPP OLAP database. It primarily consists of the Backend module BE (`be/`, execution and storage engine), the Frontend module FE (`fe/`, optimizer and transaction core), and the Cloud module (`cloud/`, storage-compute separation). Your basic development workflow is: modify code, build using standard procedures, add and run tests, and submit relevant changes.
44

5+
## Security Threat Model
6+
7+
For security scans, vulnerability triage, security reviews, and changes involving authentication, authorization, network boundaries, external catalogs, cloud tenancy, or other security-sensitive behavior, read `threat-model.md` first. Use it to determine in-scope components, trust boundaries, attacker roles, explicit non-goals, and triage classification. Findings that are out of model or by design under `threat-model.md` should be reported as such, not treated as Doris vulnerabilities.
8+
59
## When running in a WORKTREE directory
610

711
To ensure smooth test execution without interference between worktrees, the first thing to do upon entering a worktree directory is to check if `.worktree_initialized` exists. If not, execute `hooks/setup_worktree.sh`, setting `$ROOT_WORKSPACE_PATH` to the base directory (typically `${DORIS_REPO}`) beforehand. After successful execution, verify that `.worktree_initialized` has been touched and that `thirdparty/installed` dependencies exist correctly. Also check if submodules have been properly initialized; if not, do so manually.
@@ -12,6 +16,8 @@ When working in worktree mode, all operations must be confined to the current wo
1216

1317
Assert correctness only—never use defensive programming with `if` or similar constructs. Any `if` check for errors must have a clearly known inevitable failure path (not speculation). If no such scenario is found, strictly avoid using `if(valid)` checks. However, you may use the `DORIS_CHECK` macro for precondition assertions (if inside performance-sensitive areas like loops, it can only be `DCHECK`). For example, if logically A=true should always imply B=true, then strictly avoid `if (A && B)` and instead use `if (A) { DORIS_CHECK(B); ... }`. In short, the principle is: upon discovering errors or unexpected situations, report errors or crash—never allow the process to continue.
1418

19+
For `PaddedPODArray` and its peripheral packaging types, such as some certain Column, negative alignment allows the use of -1 as a valid index. No additional special handling is needed when the index may be -1.
20+
1521
When adding code, strictly follow existing similar code in similar contexts, including interface usage, error handling, and locking patterns. When adding any code, first try to reference existing functionality. Second, examine the relevant context paragraphs to fully understand the logic.
1622

1723
After adding code, conduct self-review and refactoring attempts to ensure good abstraction and reuse as much as possible.
@@ -35,7 +41,7 @@ When conducting code review (including self-review and review tasks), complete t
3541
Always use only the `build.sh` script with its correct parameters to build Doris BE and FE. For example, the simplest BE+FE build command is `./build.sh --be --fe`.
3642
Build type can be set via `BUILD_TYPE` in `custom_env.sh`, but only set it to `RELEASE` when explicitly required for performance testing; otherwise, keep it as `ASAN`.
3743
You may modify BE and FE ports and network settings in `conf/` before compilation to ensure correctness and avoid conflicts.
38-
Build artifacts are in the current directory's `output/`. If starting the service, ensure all process artifacts have their conf set with appropriate non-conflicting ports and `priority_networks = 10.16.10.3/24`. Use `--daemon` when starting. Cluster startup is slow; wait at least 30s for success. If still not ready after waiting, continue waiting. If not ready after a long time, check BE and FE logs to investigate.
44+
Build artifacts are in the current directory's `output/`. If starting the service, ensure all process artifacts have their conf set with appropriate non-conflicting ports and the correct `priority_networks` that match the current network environment, for example `priority_networks = 10.16.10.3/24`. Use `--daemon` when starting. Cluster startup is slow; wait at least 30s for success. If still not ready after waiting, continue waiting. If not ready after a long time, check BE and FE logs to investigate.
3945
For first-time cluster startup, you may need to manually add the backend.
4046

4147
## Testing Standards
@@ -47,11 +53,13 @@ You must use the preset scripts in the codebase with their correct parameters to
4753
Key utility functions in BE code, as well as the core logic of complete features, must have corresponding unit tests. If it is inconvenient to add unit tests, revisit the module design and function decomposition to ensure high cohesion and low coupling.
4854

4955
Added regression tests must comply with the following standards:
56+
5057
1. Use `order_qt` prefix or manually add `order by` to ensure ordered results
5158
2. For cases expected to error, use the `test{sql,exception}` pattern
5259
3. After completing tests, do not drop tables; instead drop tables before using them in tests, to preserve the environment for debugging
5360
4. For ordinary single test tables, do not use `def tableName` form; instead hardcode your table name in all SQL
5461
5. Except for variables you explicitly need to adjust for testing current functionality, other variables do not need extra setup before testing. For example, nereids optimizer and pipeline engine settings can use default states
62+
6. For determined expected results, do not using methods like `assert` in test groovy files, but instead generate the `.out` file using `qt_sql` and similar methods.
5563

5664
## Commit Standards
5765

@@ -83,10 +91,12 @@ Problem Summary: <Describe the problem this commit addresses>
8391
```
8492

8593
Key rules for commit messages:
94+
8695
1. The title must follow the `[type](module)` format validated by the PR title checker (`.github/workflows/title-checker.yml`). Common types include: `fix`, `feature`, `improvement`, `refactor`, `chore`, `test`, `doc`. Common modules include: `fe`, `be`, `cloud`, `regression`, `build`
8796
2. The short summary must be concise and written in imperative mood (e.g., `[fix](fe) Fix null pointer in scan node` not `[fix](fe) Fixed null pointer`)
8897
3. The `Issue Number` field must reference the corresponding GitHub Issue with `close #xxx` syntax when applicable
8998
4. The `Release note` section must be filled in for any user-visible behavior or feature change; write "None" for internal refactoring or test-only changes
90-
5. The test section must honestly reflect the testing performed; do not claim tests that were not actually run
99+
5. The `Problem Summary` section should cover the following content when available: problem reproduction method, root cause in code, end-to-end results/phenomena before and after repair, and the fix. If it's a refactoring, explain the reason. If it's a performance improvement, specify the case and the exact improvement amount. DO NOT mention any specific JIRA numbers. The background of the problem should be fully understandable through this section alone.
100+
6. The test section must honestly reflect the testing performed; do not claim tests that were not actually run
91101

92102
Files in a git commit should only be related to the current modification task. Environment modifications for running (for example `conf/`, `AGENTS.md`, `hooks/`) must not be `git add`ed. When delivering the final task, ensure all actual code modifications have been committed.

be/benchmark/benchmark_zone_map_index.hpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434

3535
#include "core/data_type/data_type_factory.hpp"
3636
#include "core/string_ref.h"
37-
#include "storage/field.h"
3837
#include "storage/index/zone_map/zone_map_index.h"
3938
#include "storage/tablet/tablet_schema.h"
4039
#include "util/slice.h"
@@ -116,9 +115,8 @@ std::unique_ptr<ZoneMapIndexWriter> make_writer() {
116115
col = make_column(FieldType::OLAP_FIELD_TYPE_VARCHAR, 64, 1);
117116
dtype = DataTypeFactory::instance().create_data_type(TYPE_VARCHAR, false, 0, 0, 64);
118117
}
119-
std::unique_ptr<StorageField> field(StorageFieldFactory::create(*col));
120118
std::unique_ptr<ZoneMapIndexWriter> w;
121-
(void)ZoneMapIndexWriter::create(dtype, field.get(), w);
119+
(void)ZoneMapIndexWriter::create(dtype, col.get(), w);
122120
return w;
123121
}
124122

be/src/cloud/cloud_rowset_writer.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ Status CloudRowsetWriter::init(const RowsetWriterContext& rowset_writer_context)
5454
_rowset_meta->set_rowset_id(_context.rowset_id);
5555
_rowset_meta->set_partition_id(_context.partition_id);
5656
_rowset_meta->set_tablet_id(_context.tablet_id);
57+
_rowset_meta->set_db_id(_context.db_id);
58+
_rowset_meta->set_table_id(_context.table_id);
5759
_rowset_meta->set_index_id(_context.index_id);
5860
_rowset_meta->set_tablet_schema_hash(_context.tablet_schema_hash);
5961
_rowset_meta->set_rowset_type(_context.rowset_type);

be/src/cloud/cloud_schema_change_job.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,10 @@ Status CloudSchemaChangeJob::_convert_historical_rowsets(const SchemaChangeParam
573573
}
574574
}
575575
_new_tablet->add_rowsets(std::move(_output_rowsets), true, wlock, false);
576+
// Ensure the real new tablet has a continuous local version graph before it becomes
577+
// visible. Later RUNNING-tablet delete bitmap sync depends on capturing all old versions.
578+
RETURN_IF_ERROR(_cloud_storage_engine.meta_mgr().fill_version_holes(
579+
_new_tablet.get(), _new_tablet->max_version_unlocked(), wlock));
576580
_new_tablet->set_cumulative_layer_point(_output_cumulative_point);
577581
_new_tablet->reset_approximate_stats(stats.num_rowsets(), stats.num_segments(),
578582
stats.num_rows(), stats.data_size());

be/src/cloud/cloud_tablets_channel.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "cloud/config.h"
2626
#include "load/channel/tablets_channel.h"
2727
#include "load/delta_writer/delta_writer.h"
28+
#include "storage/tablet_info.h"
2829

2930
namespace doris {
3031

@@ -42,6 +43,9 @@ std::unique_ptr<BaseDeltaWriter> CloudTabletsChannel::create_delta_writer(
4243

4344
Status CloudTabletsChannel::add_batch(const PTabletWriterAddBlockRequest& request,
4445
PTabletWriterAddBlockResult* response) {
46+
if (_schema != nullptr && _schema->row_binlog_index_schema() != nullptr) {
47+
return Status::NotSupported("cloud mode does not support binlog<row> now");
48+
}
4549
// FIXME(plat1ko): Too many duplicate code with `TabletsChannel`
4650
SCOPED_TIMER(_add_batch_timer);
4751
int64_t cur_seq = 0;

be/src/cloud/pb_convert.cpp

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,17 @@ void doris_rowset_meta_to_cloud(RowsetMetaCloudPB* out, const RowsetMetaPB& in)
118118
out->set_job_id(in.job_id());
119119
}
120120
if (in.has_commit_tso()) {
121-
out->set_commit_tso(in.commit_tso());
121+
out->mutable_commit_tso()->CopyFrom(in.commit_tso());
122122
}
123123
if (in.has_is_row_binlog()) {
124124
out->set_is_row_binlog(in.is_row_binlog());
125125
}
126+
if (in.has_db_id()) {
127+
out->set_db_id(in.db_id());
128+
}
129+
if (in.has_table_id()) {
130+
out->set_table_id(in.table_id());
131+
}
126132
}
127133

128134
void doris_rowset_meta_to_cloud(RowsetMetaCloudPB* out, RowsetMetaPB&& in) {
@@ -208,11 +214,17 @@ void doris_rowset_meta_to_cloud(RowsetMetaCloudPB* out, RowsetMetaPB&& in) {
208214
out->set_job_id(in.job_id());
209215
}
210216
if (in.has_commit_tso()) {
211-
out->set_commit_tso(in.commit_tso());
217+
out->mutable_commit_tso()->CopyFrom(in.commit_tso());
212218
}
213219
if (in.has_is_row_binlog()) {
214220
out->set_is_row_binlog(in.is_row_binlog());
215221
}
222+
if (in.has_db_id()) {
223+
out->set_db_id(in.db_id());
224+
}
225+
if (in.has_table_id()) {
226+
out->set_table_id(in.table_id());
227+
}
216228
}
217229

218230
RowsetMetaPB cloud_rowset_meta_to_doris(const RowsetMetaCloudPB& in) {
@@ -308,11 +320,17 @@ void cloud_rowset_meta_to_doris(RowsetMetaPB* out, const RowsetMetaCloudPB& in)
308320
out->set_job_id(in.job_id());
309321
}
310322
if (in.has_commit_tso()) {
311-
out->set_commit_tso(in.commit_tso());
323+
out->mutable_commit_tso()->CopyFrom(in.commit_tso());
312324
}
313325
if (in.has_is_row_binlog()) {
314326
out->set_is_row_binlog(in.is_row_binlog());
315327
}
328+
if (in.has_db_id()) {
329+
out->set_db_id(in.db_id());
330+
}
331+
if (in.has_table_id()) {
332+
out->set_table_id(in.table_id());
333+
}
316334
}
317335

318336
void cloud_rowset_meta_to_doris(RowsetMetaPB* out, RowsetMetaCloudPB&& in) {
@@ -397,11 +415,17 @@ void cloud_rowset_meta_to_doris(RowsetMetaPB* out, RowsetMetaCloudPB&& in) {
397415
out->set_job_id(in.job_id());
398416
}
399417
if (in.has_commit_tso()) {
400-
out->set_commit_tso(in.commit_tso());
418+
out->mutable_commit_tso()->CopyFrom(in.commit_tso());
401419
}
402420
if (in.has_is_row_binlog()) {
403421
out->set_is_row_binlog(in.is_row_binlog());
404422
}
423+
if (in.has_db_id()) {
424+
out->set_db_id(in.db_id());
425+
}
426+
if (in.has_table_id()) {
427+
out->set_table_id(in.table_id());
428+
}
405429
}
406430

407431
TabletSchemaCloudPB doris_tablet_schema_to_cloud(const TabletSchemaPB& in) {

0 commit comments

Comments
 (0)