File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -69,7 +69,7 @@ run_with_timeout() {
6969
7070 # Check that the response mentions the researcher agent or its tools
7171 if [ " $code " -eq 0 ] && [ -n " $output " ]; then
72- if echo " $output " | grep -qi " research\| researcher\|grep\|glob\|view " ; then
72+ if echo " $output " | grep -qi " researcher\|Research " ; then
7373 echo " ✅ $name passed (confirmed custom agent)"
7474 PASS=$(( PASS + 1 ))
7575 else
Original file line number Diff line number Diff line change @@ -67,13 +67,17 @@ run_with_timeout() {
6767
6868 echo " $output "
6969
70- if [ " $code " -eq 0 ] && [ -n " $output " ]; then
71- echo " ✅ $name passed (got response)"
70+ if [ " $code " -eq 0 ] && [ -n " $output " ] && echo " $output " | grep -qi " MCP\|mcp\|capital\|France\|Paris\|configured " ; then
71+ echo " ✅ $name passed (got meaningful response)"
7272 PASS=$(( PASS + 1 ))
7373 elif [ " $code " -eq 124 ]; then
7474 echo " ❌ $name failed (timed out after ${TIMEOUT} s)"
7575 FAIL=$(( FAIL + 1 ))
7676 ERRORS=" $ERRORS \n - $name (timeout)"
77+ elif [ " $code " -eq 0 ]; then
78+ echo " ❌ $name failed (expected pattern not found)"
79+ FAIL=$(( FAIL + 1 ))
80+ ERRORS=" $ERRORS \n - $name "
7781 else
7882 echo " ❌ $name failed (exit code $code )"
7983 FAIL=$(( FAIL + 1 ))
Original file line number Diff line number Diff line change @@ -30,7 +30,7 @@ You can only respond with text based on your training data.
3030
3131 var response = await session . SendAndWaitAsync ( new MessageOptions
3232 {
33- Prompt = "What tools do you have available? List them ." ,
33+ Prompt = "Use the bash tool to run 'echo hello' ." ,
3434 } ) ;
3535
3636 if ( response != null )
Original file line number Diff line number Diff line change @@ -39,7 +39,7 @@ func main() {
3939 defer session .Destroy ()
4040
4141 response , err := session .SendAndWait (ctx , copilot.MessageOptions {
42- Prompt : "What tools do you have available? List them ." ,
42+ Prompt : "Use the bash tool to run 'echo hello' ." ,
4343 })
4444 if err != nil {
4545 log .Fatal (err )
Original file line number Diff line number Diff line change @@ -24,7 +24,7 @@ async def main():
2424 )
2525
2626 response = await session .send_and_wait (
27- {"prompt" : "What tools do you have available? List them ." }
27+ {"prompt" : "Use the bash tool to run 'echo hello' ." }
2828 )
2929
3030 if response :
Original file line number Diff line number Diff line change @@ -19,7 +19,7 @@ async function main() {
1919 } ) ;
2020
2121 const response = await session . sendAndWait ( {
22- prompt : "What tools do you have available? List them ." ,
22+ prompt : "Use the bash tool to run 'echo hello' ." ,
2323 } ) ;
2424
2525 if ( response ) {
Original file line number Diff line number Diff line change @@ -69,7 +69,7 @@ run_with_timeout() {
6969
7070 # Check that the response indicates no tools are available
7171 if [ " $code " -eq 0 ] && [ -n " $output " ]; then
72- if echo " $output " | grep -qi " no tool\|not have\| don't have\|do not have\|no .* tools\|cannot\| not available\|none " ; then
72+ if echo " $output " | grep -qi " no tool\|can't\|cannot\|unable\| don't have\|do not have\|not available" ; then
7373 echo " ✅ $name passed (confirmed no tools)"
7474 PASS=$(( PASS + 1 ))
7575 else
Original file line number Diff line number Diff line change @@ -68,7 +68,7 @@ run_with_timeout() {
6868 echo " $output "
6969
7070 if [ " $code " -eq 0 ] && [ -n " $output " ]; then
71- if echo " $output " | grep -qi " Skill directories configured\|Alice \|greeting" ; then
71+ if echo " $output " | grep -qi " skill\|Skill \|greeting\|Alice " ; then
7272 echo " ✅ $name passed (confirmed skill execution)"
7373 PASS=$(( PASS + 1 ))
7474 else
Original file line number Diff line number Diff line change @@ -75,7 +75,7 @@ run_with_timeout() {
7575 if echo " $output " | grep -qi " grep\|glob\|view" ; then
7676 has_whitelisted=true
7777 fi
78- if echo " $output " | grep -qi " bash\|edit\|create_file" ; then
78+ if echo " $output " | grep -qiw " bash\|edit\|create_file" ; then
7979 has_blacklisted=true
8080 fi
8181
Original file line number Diff line number Diff line change @@ -68,10 +68,10 @@ run_with_timeout() {
6868 echo " $output "
6969
7070 if [ " $code " -eq 0 ] && [ -n " $output " ]; then
71- if echo " $output " | grep -qi " Virtual filesystem contents" ; then
71+ if echo " $output " | grep -qi " Virtual filesystem contents" && echo " $output " | grep -qi " plan\.md " ; then
7272 echo " ✅ $name passed (virtual FS operations confirmed)"
7373 PASS=$(( PASS + 1 ))
74- elif [ " $code " -eq 0 ] && [ -n " $output " ] ; then
74+ else
7575 echo " ❌ $name failed (expected pattern not found)"
7676 FAIL=$(( FAIL + 1 ))
7777 ERRORS=" $ERRORS \n - $name "
You can’t perform that action at this time.
0 commit comments