Skip to content

Commit 55d0ad1

Browse files
committed
blog
2 parents 4722a79 + 3bb6f29 commit 55d0ad1

14 files changed

Lines changed: 1028 additions & 11 deletions

File tree

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ tutorial/example_math_agent/math_agent_debug.yaml
145145
saved_experiments
146146
tests/temp
147147
vsdb.py
148-
appworld_pack_v2.tar*
148+
appworld_pack_v3.tar*
149149
saved_checkpoints
150150
data
151151
datasets
@@ -160,8 +160,11 @@ tutorial/example_deep_finance/scripts/*
160160
flash_attn-2.8.*.whl
161161
tutorial/example_deep_finance/prepare_data/*
162162
tutorial/example_deep_finance/judge/analytical_sufficiency/*
163+
<<<<<<< HEAD
163164
tutorial/example_deep_finance/output_report/*
164165
dataset_gsm8k/*
166+
=======
167+
>>>>>>> origin/main
165168

166169
.dockerignore
167170
benchmark_datasets

ajet/utils/metric_helper/reward_metric_helper.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,14 @@ def compute_reward_metrics(reward_stats_list: List[Dict[str, Any]], prefix: str
8383
openjudge_graders = [
8484
"presentation_quality",
8585
"grounding",
86+
<<<<<<< HEAD
8687
"planning",
8788
"audit",
8889
"traceability",
8990
"cgcv"
91+
=======
92+
"planning"
93+
>>>>>>> origin/main
9094
]
9195

9296
for grader_name in openjudge_graders:

docs/en/example_app_world.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ export APPWORLD_PATH="${base_path}/pack_all_in_one"
2424
export APPWORLD_SCRIPT="bash EnvService/env_sandbox/appworld.sh"
2525

2626
rm -rf "${APPWORLD_PATH}"
27-
rm -f ./appworld_pack_v2.tar.gz
27+
rm -f ./appworld_pack_v3.tar.gz
2828

29-
wget -q "https://dail-wlcb.oss-cn-wulanchabu.aliyuncs.com/astuner_archive/appworld_pack_v2.tar.gz" -O appworld_pack_v2.tar.gz
30-
tar -xzf ./appworld_pack_v2.tar.gz -C "${base_path}"
29+
wget -q "https://dail-wlcb.oss-cn-wulanchabu.aliyuncs.com/astuner_archive/appworld_pack_v3.tar.gz" -O appworld_pack_v3.tar.gz
30+
tar -xzf ./appworld_pack_v3.tar.gz -C "${base_path}"
3131
```
3232

3333
Then export the environment variables (re-run in every new shell):

tests/bench/benchmark_appworld/execute_benchmark_appworld.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,25 +75,25 @@ def clear_system_processes(self):
7575

7676
def install_appworld(self):
7777
# run:
78-
# `rm -rf /tmp/pack_all_in_one & wget https://dail-wlcb.oss-cn-wulanchabu.aliyuncs.com/astuner_archive/appworld_pack_v2.tar.gz && tar -xzf ./appworld_pack_v2.tar.gz -C /tmp`
78+
# `rm -rf /tmp/pack_all_in_one & wget https://dail-wlcb.oss-cn-wulanchabu.aliyuncs.com/astuner_archive/appworld_pack_v3.tar.gz && tar -xzf ./appworld_pack_v3.tar.gz -C /tmp`
7979
self.clear_system_processes()
8080
import shutil
8181

8282
if os.path.exists("/tmp/pack_all_in_one"):
8383
shutil.rmtree("/tmp/pack_all_in_one")
84-
if os.path.exists("./appworld_pack_v2.tar.gz"):
85-
os.remove("./appworld_pack_v2.tar.gz")
84+
if os.path.exists("./appworld_pack_v3.tar.gz"):
85+
os.remove("./appworld_pack_v3.tar.gz")
8686
subprocess.run(
8787
[
8888
"wget",
89-
"https://dail-wlcb.oss-cn-wulanchabu.aliyuncs.com/astuner_archive/appworld_pack_v2.tar.gz",
89+
"https://dail-wlcb.oss-cn-wulanchabu.aliyuncs.com/astuner_archive/appworld_pack_v3.tar.gz",
9090
]
9191
)
9292
subprocess.run(
9393
[
9494
"tar",
9595
"-xzf",
96-
"./appworld_pack_v2.tar.gz",
96+
"./appworld_pack_v3.tar.gz",
9797
"-C",
9898
"/tmp",
9999
]

tests/bench/benchmark_frozenlake/benchmark_frozenlake.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
class TestProbe(BenchmarkProbe):
1010
def __init__(self):
1111
# fmt: off
12-
self.expected_train_time = 3600 * 24 # 24 hours budget for frozenlake easy benchmark
12+
self.expected_train_time = 3600 * 48 # 48 hours budget for frozenlake easy benchmark
1313
self.begin_time = time.time()
1414
self.reward_array = []
1515
self.reward_expectation_avg_window = 20

tutorial/example_deep_finance/blog.md

Lines changed: 482 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)