Skip to content

Commit 69630db

Browse files
authored
Merge branch 'master' into functions/entropy
2 parents 33d8b77 + 687f90f commit 69630db

File tree

1,539 files changed

+60015
-18041
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,539 files changed

+60015
-18041
lines changed

.claude/skills/code-review/SKILL.md

Lines changed: 841 additions & 0 deletions
Large diffs are not rendered by default.

.github/CODEOWNERS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,6 @@ fe/fe-core/src/main/java/org/apache/doris/fs @CalvinKirs
3939
fe/fe-core/src/main/java/org/apache/doris/fsv2 @CalvinKirs
4040
be/src/vec/functions @zclllyybb
4141
be/**/CMakeLists.txt @zclllyybb @BiteTheDDDDt
42+
be/src/olap/rowset/segment_v2/variant @eldenmoon @csun5285
43+
.claude/ @zclllyybb
44+
AGENTS.md @zclllyybb
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
name: Code Review
2+
3+
on:
4+
issue_comment:
5+
types: [created]
6+
7+
permissions:
8+
pull-requests: write
9+
contents: read
10+
issues: write
11+
12+
jobs:
13+
code-review:
14+
runs-on: ubuntu-latest
15+
timeout-minutes: 30
16+
if: >-
17+
github.event.issue.pull_request &&
18+
contains(github.event.comment.body, '/review') &&
19+
(
20+
github.event.comment.author_association == 'MEMBER' ||
21+
github.event.comment.author_association == 'OWNER' ||
22+
github.event.comment.author_association == 'COLLABORATOR'
23+
)
24+
steps:
25+
- name: Get PR info
26+
id: pr
27+
env:
28+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
29+
run: |
30+
PR_JSON=$(gh api repos/${{ github.repository }}/pulls/${{ github.event.issue.number }})
31+
HEAD_SHA=$(echo "$PR_JSON" | jq -r '.head.sha')
32+
BASE_SHA=$(echo "$PR_JSON" | jq -r '.base.sha')
33+
HEAD_REF=$(echo "$PR_JSON" | jq -r '.head.ref')
34+
BASE_REF=$(echo "$PR_JSON" | jq -r '.base.ref')
35+
echo "head_sha=$HEAD_SHA" >> "$GITHUB_OUTPUT"
36+
echo "base_sha=$BASE_SHA" >> "$GITHUB_OUTPUT"
37+
echo "head_ref=$HEAD_REF" >> "$GITHUB_OUTPUT"
38+
echo "base_ref=$BASE_REF" >> "$GITHUB_OUTPUT"
39+
40+
- name: Checkout repository
41+
uses: actions/checkout@v4
42+
with:
43+
ref: ${{ steps.pr.outputs.head_sha }}
44+
45+
- name: Install OpenCode
46+
run: |
47+
for attempt in 1 2 3; do
48+
if curl -fsSL https://opencode.ai/install | bash; then
49+
echo "$HOME/.opencode/bin" >> $GITHUB_PATH
50+
exit 0
51+
fi
52+
echo "Install attempt $attempt failed, retrying in 10s..."
53+
sleep 10
54+
done
55+
echo "All install attempts failed"
56+
exit 1
57+
58+
- name: Configure OpenCode auth
59+
run: |
60+
mkdir -p ~/.local/share/opencode
61+
cat > ~/.local/share/opencode/auth.json <<EOF
62+
{
63+
"github-copilot": {
64+
"type": "oauth",
65+
"refresh": "${CODE_REVIEW_ZCLLL_COPILOT_OPENCODE_KEY}",
66+
"access": "${CODE_REVIEW_ZCLLL_COPILOT_OPENCODE_KEY}",
67+
"expires": 0
68+
}
69+
}
70+
EOF
71+
env:
72+
CODE_REVIEW_ZCLLL_COPILOT_OPENCODE_KEY: ${{ secrets.CODE_REVIEW_ZCLLL_COPILOT_OPENCODE_KEY }}
73+
74+
- name: Configure OpenCode permission
75+
run: |
76+
echo '{"permission":"allow"}' > opencode.json
77+
78+
- name: Prepare review prompt
79+
run: |
80+
cat > /tmp/review_prompt.txt <<'PROMPT'
81+
You are performing an automated code review inside a GitHub Actions runner. The gh CLI is available and authenticated via GH_TOKEN. You can comment on the pull request.
82+
83+
Context:
84+
- Repository: PLACEHOLDER_REPO
85+
- PR number: PLACEHOLDER_PR_NUMBER
86+
- PR Head SHA: PLACEHOLDER_HEAD_SHA
87+
- PR Base SHA: PLACEHOLDER_BASE_SHA
88+
89+
When reviewing, you must strictly follow AGENTS.md and the related skills. In addition, you can perform any desired review operations to observe suspicious code and details in order to identify issues as much as possible.
90+
91+
## Submission
92+
- After completing the review, you MUST provide a final summary opinion based on the rules defined in AGENTS.md and the code-review skill. The summary must include conclusions for each applicable critical checkpoint.
93+
- If no issues to report, submit a short summary comment saying no issues found using: gh pr comment PLACEHOLDER_PR_NUMBER --body "<summary>"
94+
- If issues found, submit a review with inline comments plus a comprehensive summary body. Use GitHub Reviews API to ensure comments are inline:
95+
- Build a JSON array of comments like: [{ "path": "<file>", "position": <diff_position>, "body": "..." }]
96+
- Submit via: gh api repos/PLACEHOLDER_REPO/pulls/PLACEHOLDER_PR_NUMBER/reviews --input <json_file>
97+
- The JSON file should contain: {"event":"COMMENT","body":"<summary>","comments":[...]}
98+
- Do not use: gh pr review --approve or --request-changes
99+
PROMPT
100+
sed -i "s|PLACEHOLDER_REPO|${REPO}|g" /tmp/review_prompt.txt
101+
sed -i "s|PLACEHOLDER_PR_NUMBER|${PR_NUMBER}|g" /tmp/review_prompt.txt
102+
sed -i "s|PLACEHOLDER_HEAD_SHA|${HEAD_SHA}|g" /tmp/review_prompt.txt
103+
sed -i "s|PLACEHOLDER_BASE_SHA|${BASE_SHA}|g" /tmp/review_prompt.txt
104+
env:
105+
REPO: ${{ github.repository }}
106+
PR_NUMBER: ${{ github.event.issue.number }}
107+
HEAD_SHA: ${{ steps.pr.outputs.head_sha }}
108+
BASE_SHA: ${{ steps.pr.outputs.base_sha }}
109+
110+
- name: Run automated code review
111+
env:
112+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
113+
run: |
114+
PROMPT=$(cat /tmp/review_prompt.txt)
115+
opencode run "$PROMPT" -m "github-copilot/claude-opus-4.6"

.licenserc.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ header:
9090
- "docker/thirdparties/docker-compose/kerberos/paimon_data/**"
9191
- "docker/thirdparties/docker-compose/kerberos/sql/**"
9292
- "docker/thirdparties/docker-compose/iceberg/spark-defaults.conf.tpl"
93+
- "docker/thirdparties/docker-compose/postgresql/certs/**"
9394
- "conf/mysql_ssl_default_certificate/*"
9495
- "conf/mysql_ssl_default_certificate/client_certificate/ca.pem"
9596
- "conf/mysql_ssl_default_certificate/client_certificate/client-cert.pem"

AGENTS.md

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# AGENTS.md — Apache Doris
2+
3+
This is the codebase for Apache Doris, an MPP OLAP database. It primarily consists of the Backend module BE (be/, execution and storage engine), the Frontend module FE (fe/, optimizer and transaction core), and the Cloud module (cloud/, storage-compute separation). Your basic development workflow is: modify code, build using standard procedures, add and run tests, and submit relevant changes.
4+
5+
## When running in a WORKTREE directory
6+
7+
To ensure smooth test execution without interference between worktrees, the first thing to do upon entering a worktree directory is to check if `.worktree_initialized` exists. If not, execute `hooks/setup_worktree.sh`, setting `$ROOT_WORKSPACE_PATH` to the base directory (typically `${DORIS_REPO}`) beforehand. After successful execution, verify that `.worktree_initialized` has been touched and that `thirdparty/installed` dependencies exist correctly. Also check if submodules have been properly initialized; if not, do so manually.
8+
9+
When working in worktree mode, all operations must be confined to the current worktree directory. Do not enter `${DORIS_REPO}` or use any resources there. Compilation and execution must be done within the current worktree directory. The compiled Doris cluster must use random ports not used by other worktrees (modify BE and FE conf before compilation, using a uniform offset of 707-807 from default ports without conflicting with other worktrees' ports). Run from the `output` directory within the worktree. To run regression tests, modify `regression-test/conf/regression-conf.groovy` and set the port numbers in jdbcUrl and other configuration items to your new ports so the corresponding worktree cluster can be used for regression testing.
10+
11+
## Coding Standards
12+
13+
Assert correctness only—never use defensive programming with `if` or similar constructs. Any `if` check for errors must have a clearly known inevitable failure path (not speculation). If no such scenario is found, strictly avoid using `if(valid)` checks. However, you may use the `DORIS_CHECK` macro for precondition assertions. For example, if logically A=true should always imply B=true, then strictly avoid `if(A&&B)` and instead use `if(A){DORIS_CHECK(B);...}`. In short, the principle is: upon discovering errors or unexpected situations, report errors or crash—never allow the process to continue.
14+
15+
When adding code, strictly follow existing similar code in similar contexts, including interface usage, error handling, etc., maintaining consistency. When adding any code, first try to reference existing functionality. Second, you must examine the relevant context paragraphs to fully understand the logic.
16+
17+
After adding code, you must first conduct self-review and refactoring attempts to ensure good abstraction and reuse as much as possible.
18+
19+
## Code Review
20+
21+
When conducting code review (including self-review and review tasks), it is necessary to complete the key checkpoints according to our `code-review` skill and provide conclusions for each key checkpoint (if applicable) as part of the final written description. Other content does not require individual responses; just check them during the review process.
22+
23+
## Build and Run Standards
24+
25+
Always use only the `build.sh` script with its correct parameters to build Doris BE and FE. When building, use at least `-j${DORIS_PARALLELISM}` parallelism. For example, the simplest BE+FE build command is `./build.sh --be --fe -j${DORIS_PARALLELISM}`.
26+
Build type can be set via `BUILD_TYPE` in `custom_env.sh`, but only set it to `RELEASE` when explicitly required for performance testing; otherwise, keep it as `ASAN`.
27+
You may modify BE and FE ports and network settings in `conf/` before compilation to ensure correctness and avoid conflicts.
28+
Build artifacts are in the current directory's `output/`. If starting the service, ensure all process artifacts have their conf set with appropriate non-conflicting ports and `priority_networks = 10.16.10.3/24`. Use `--daemon` when starting. Cluster startup is slow; wait at least 30s for success. If still not ready after waiting, continue waiting. If not ready after a long time, check BE and FE logs to investigate.
29+
For first-time cluster startup, you may need to manually add the backend.
30+
31+
## Testing Standards
32+
33+
All kernel features must have corresponding tests. Prioritize adding regression tests under `regression-test/`, while also having BE unit tests (`be/test/`) and FE unit tests (`fe/fe-core/src/test/`) where possible. Interface usage in test cases must first reference similar cases.
34+
35+
You must use the preset scripts in the codebase with their correct parameters to run tests (`run-regression-test.sh`, `run-be-ut.sh`, `run-fe-ut.sh`). Regression test result files must not be handwritten; they must be auto-generated via test scripts. When running regression tests, if using `-s` to specify a case, also try to use `-d` to specify the parent directory for faster execution. For example, for cases under `nereids_p0`, you can use `-d nereids_p0 -s xxx`, where `xxx` is the name from `suite("xxx")` in the groovy file.
36+
37+
BE-UT compilation must not be below `${DORIS_PARALLELISM}` parallelism.
38+
39+
Key utility functions in BE code, as well as the core logic (functions) of complete features, must have corresponding unit tests. If it's inconvenient to add unit tests, the module design and function decomposition should be reviewed again to ensure high cohesion and low coupling are properly achieved.
40+
41+
Added regression tests must comply with the following standards:
42+
1. Use `order_qt` prefix or manually add `order by` to ensure ordered results
43+
2. For cases expected to error, use the `test{sql,exception}` pattern
44+
3. After completing tests, do not drop tables; instead drop tables before using them in tests, to preserve the environment for debugging
45+
4. For ordinary single test tables, do not use `def tableName` form; instead hardcode your table name in all SQL
46+
5. Except for variables you explicitly need to adjust for testing current functionality, other variables do not need extra setup before testing. For example, nereids optimizer and pipeline engine settings can use default states
47+
48+
## Commit Standards
49+
50+
Files in git commit should only be related to the current modification task. Environment modifications for running (e.g., `conf/`, `AGENTS.md`, `hooks/`, etc.) must not be `git add`ed. When delivering the final task, you must ensure all actual code modifications have been committed.

be/src/agent/task_worker_pool.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
#include "runtime/index_policy/index_policy_mgr.h"
8989
#include "runtime/memory/global_memory_arbitrator.h"
9090
#include "runtime/snapshot_loader.h"
91+
#include "runtime/user_function_cache.h"
9192
#include "service/backend_options.h"
9293
#include "util/brpc_client_cache.h"
9394
#include "util/debug_points.h"
@@ -2396,12 +2397,17 @@ void clean_trash_callback(StorageEngine& engine, const TAgentTaskRequest& req) {
23962397
}
23972398

23982399
void clean_udf_cache_callback(const TAgentTaskRequest& req) {
2400+
const auto& clean_req = req.clean_udf_cache_req;
2401+
23992402
if (doris::config::enable_java_support) {
2400-
LOG(INFO) << "clean udf cache start: " << req.clean_udf_cache_req.function_signature;
2401-
static_cast<void>(
2402-
Jni::Util::clean_udf_class_load_cache(req.clean_udf_cache_req.function_signature));
2403-
LOG(INFO) << "clean udf cache finish: " << req.clean_udf_cache_req.function_signature;
2403+
static_cast<void>(Jni::Util::clean_udf_class_load_cache(clean_req.function_signature));
2404+
}
2405+
2406+
if (clean_req.__isset.function_id && clean_req.function_id > 0) {
2407+
UserFunctionCache::instance()->drop_function_cache(clean_req.function_id);
24042408
}
2409+
2410+
LOG(INFO) << "clean udf cache finish: function_signature=" << clean_req.function_signature;
24052411
}
24062412

24072413
void report_index_policy_callback(const ClusterInfo* cluster_info) {

0 commit comments

Comments
 (0)