@@ -10,9 +10,16 @@ import { join } from "path";
1010import { ClaudeCodeProvider } from "../providers/claude-code.js" ;
1111import { testCases } from "../fixtures/test-cases.js" ;
1212import { CostTracker } from "../utils/cost-tracker.js" ;
13- import { assertContentIncludes } from "../utils/assertions.js" ;
1413import { OutputCapture } from "../utils/output-capture.js" ;
1514import { TestReporter } from "../utils/test-reporter.js" ;
15+ import {
16+ shouldSkipTest ,
17+ executeTestWithMetrics ,
18+ handleTestFailure ,
19+ assertSkillTriggering ,
20+ assertExpectedContent ,
21+ } from "../utils/test-helpers.js" ;
22+ import { TestLogger } from "../utils/test-logger.js" ;
1623
1724// Test context type
1825interface TestContext {
@@ -46,10 +53,10 @@ test.before(async (t) => {
4653
4754 pluginInstalled = existsSync ( pluginPath ) ;
4855 if ( pluginInstalled ) {
49- console . log ( `\n✅ Plugin auto-installed at: ${ pluginPath } ` ) ;
56+ TestLogger . success ( `\nPlugin auto-installed at: ${ pluginPath } ` ) ;
5057 }
5158 } catch ( error ) {
52- console . warn ( `\n⚠️ Failed to auto-install plugin: ${ error } ` ) ;
59+ TestLogger . warning ( `\nFailed to auto-install plugin: ${ error } ` ) ;
5360 }
5461 }
5562
@@ -70,17 +77,17 @@ test.before(async (t) => {
7077 } as TestContext ;
7178
7279 if ( ! claudeAvailable ) {
73- console . warn ( "\n⚠️ Claude Code CLI not available" ) ;
74- console . warn ( " Install from: https://claude.ai/code") ;
75- console . warn ( " Skipping all Claude Code integration tests\n") ;
80+ TestLogger . warning ( "\nClaude Code CLI not available" ) ;
81+ TestLogger . plain ( " Install from: https://claude.ai/code") ;
82+ TestLogger . plain ( " Skipping all Claude Code integration tests\n") ;
7683 } else if ( ! pluginInstalled ) {
77- console . warn ( "\n⚠️ ui5 plugin could not be installed" ) ;
78- console . warn ( ` Expected at: ${ pluginPath } `) ;
79- console . warn ( " Skipping all Claude Code integration tests\n") ;
84+ TestLogger . warning ( "\nui5 plugin could not be installed" ) ;
85+ TestLogger . plain ( ` Expected at: ${ pluginPath } `) ;
86+ TestLogger . plain ( " Skipping all Claude Code integration tests\n") ;
8087 } else {
81- console . log ( "\n✅ Claude Code CLI available" ) ;
82- console . log ( `✅ Plugin ready at: ${ pluginPath } `) ;
83- console . log ( "🚀 Running integration tests...\n") ;
88+ TestLogger . success ( "\nClaude Code CLI available" ) ;
89+ TestLogger . success ( ` Plugin ready at: ${ pluginPath } `) ;
90+ TestLogger . start ( " Running integration tests...\n") ;
8491 }
8592} ) ;
8693
@@ -98,14 +105,14 @@ test.after.always(async (t) => {
98105
99106 // Save JSON report
100107 const jsonPath = await reporter . saveJSON ( summary ) ;
101- console . log ( `\n📊 JSON report saved to: ${ jsonPath } ` ) ;
108+ TestLogger . metrics ( `\nJSON report saved to: ${ jsonPath } ` ) ;
102109
103110 // Save HTML dashboard
104111 const htmlPath = await reporter . saveHTML ( summary , categoryMetrics ) ;
105- console . log ( `📈 HTML dashboard saved to: ${ htmlPath } `) ;
106- console . log ( ` Open in browser: file://${ htmlPath } \n`) ;
112+ TestLogger . document ( ` HTML dashboard saved to: ${ htmlPath } `) ;
113+ TestLogger . plain ( ` Open in browser: file://${ htmlPath } \n`) ;
107114 } catch ( error ) {
108- console . error ( '⚠️ Failed to generate reports:' , error ) ;
115+ TestLogger . error ( ` Failed to generate reports: ${ error } ` ) ;
109116 }
110117} ) ;
111118
@@ -117,110 +124,34 @@ for (const testCase of testCases) {
117124 const { provider, costTracker, outputCapture, reporter, claudeAvailable, pluginInstalled } = t . context as TestContext ;
118125
119126 // Skip if Claude not available or plugin not installed
120- if ( ! claudeAvailable || ! pluginInstalled ) {
121- t . log ( "⊘ Skipped - Claude Code CLI or plugin not available" ) ;
122- t . pass ( ) ; // Mark as passed but skipped
127+ if ( shouldSkipTest ( claudeAvailable , pluginInstalled , t ) ) {
123128 return ;
124129 }
125130
126- const testStartTime = Date . now ( ) ;
127-
128- // Run the test
129- const result = await provider . runTest ( testCase . prompt , {
130- timeout : 120000 , // 120s timeout (increased from 90s)
131- maxRetries : 2 , // Retry up to 2 times for timeouts/rate limits
132- } ) ;
133-
134- const testDuration = Date . now ( ) - testStartTime ;
135-
136- // Track metrics
137- costTracker . track ( {
138- provider : provider . name ,
139- testId : testCase . id ,
140- prompt : testCase . prompt ,
141- tokensUsed : result . tokensUsed ,
142- cost : result . cost ,
143- timestamp : new Date ( ) ,
144- } ) ;
145-
146- // Add to reporter (estimate retry count from duration vs latency)
147- const estimatedRetries = Math . max ( 0 , Math . floor ( ( testDuration - result . latencyMs ) / 5000 ) ) ;
148- reporter . addResult ( {
131+ // Execute test with metrics tracking
132+ const { result } = await executeTestWithMetrics (
133+ provider ,
149134 testCase ,
150- result,
151- duration : testDuration ,
152- retryCount : estimatedRetries ,
153- timestamp : new Date ( ) . toISOString ( ) ,
154- } ) ;
135+ costTracker ,
136+ reporter
137+ ) ;
155138
156139 // Log test result
157- t . log ( `⏱️ ${ result . latencyMs } ms | 🔤 ${ result . tokensUsed } tokens` ) ;
140+ TestLogger . info ( `⏱️ ${ result . latencyMs } ms | 🔤 ${ result . tokensUsed } tokens` ) ;
158141
159142 // Assert: Test should succeed
160143 if ( ! result . success ) {
161- // Capture full output for failed test
162- const outputPath = await outputCapture . saveFailedTest ( {
163- testId : testCase . id ,
164- testName : testCase . name ,
165- prompt : testCase . prompt ,
166- response : result . responseContent ,
167- error : result . error ,
168- timestamp : new Date ( ) . toISOString ( ) ,
169- skillTriggered : result . skillTriggered ,
170- } ) ;
171- t . log ( `📄 Full response saved to: ${ outputPath } ` ) ;
144+ await handleTestFailure ( t , outputCapture , testCase , result , result . error || 'Unknown error' ) ;
172145 t . fail ( `Test execution failed: ${ result . error } ` ) ;
173146 return ;
174147 }
175148 t . true ( result . success , "Test execution should succeed" ) ;
176149
177150 // Assert: Skill triggering correctness
178- if ( testCase . expectedSkill === null ) {
179- // Negative test - should NOT trigger UI5 skill
180- if ( result . skillTriggered !== null ) {
181- t . log ( `⚠️ Unexpected skill trigger: ${ result . skillTriggered } ` ) ;
182- t . log ( `Response preview: ${ result . responseContent . substring ( 0 , 200 ) } ...` ) ;
183- }
184- t . is (
185- result . skillTriggered ,
186- null ,
187- `Should NOT trigger UI5 skill for: "${ testCase . prompt } "`
188- ) ;
189- } else {
190- // Positive test - should trigger expected skill
191- if ( result . skillTriggered !== testCase . expectedSkill ) {
192- t . log ( `❌ Expected: ${ testCase . expectedSkill } ` ) ;
193- t . log ( ` Got: ${ result . skillTriggered || "none" } ` ) ;
194- t . log ( `Response preview: ${ result . responseContent . substring ( 0 , 200 ) } ...` ) ;
195-
196- // Capture full response for skill detection failure
197- const outputPath = await outputCapture . saveFailedTest ( {
198- testId : testCase . id ,
199- testName : testCase . name ,
200- prompt : testCase . prompt ,
201- response : result . responseContent ,
202- error : `Skill detection failed: expected ${ testCase . expectedSkill } , got ${ result . skillTriggered || 'none' } ` ,
203- timestamp : new Date ( ) . toISOString ( ) ,
204- skillTriggered : result . skillTriggered ,
205- } ) ;
206- t . log ( `📄 Full response saved to: ${ outputPath } ` ) ;
207- }
208- t . is (
209- result . skillTriggered ,
210- testCase . expectedSkill ,
211- `Should trigger "${ testCase . expectedSkill } "`
212- ) ;
213- }
151+ await assertSkillTriggering ( t , result , testCase , outputCapture ) ;
214152
215153 // Assert: Expected content (if specified)
216- if ( testCase . expectedContent && result . skillTriggered !== null ) {
217- assertContentIncludes (
218- t ,
219- result . responseContent ,
220- testCase . expectedContent ,
221- testCase . name
222- ) ;
223- }
154+ assertExpectedContent ( t , result , testCase ) ;
224155 }
225156 ) ;
226157}
@@ -229,9 +160,7 @@ for (const testCase of testCases) {
229160test . serial ( "[Claude Code] Test Summary" , ( t ) => {
230161 const { provider, costTracker, claudeAvailable, pluginInstalled } = t . context as TestContext ;
231162
232- if ( ! claudeAvailable || ! pluginInstalled ) {
233- t . log ( "⊘ Skipped - Claude Code CLI or plugin not available" ) ;
234- t . pass ( ) ; // Mark as passed but skipped
163+ if ( shouldSkipTest ( claudeAvailable , pluginInstalled , t ) ) {
235164 return ;
236165 }
237166
@@ -240,16 +169,16 @@ test.serial("[Claude Code] Test Summary", (t) => {
240169 const positiveTests = testCases . filter ( tc => tc . expectedSkill !== null ) . length ;
241170 const negativeTests = testCases . filter ( tc => tc . expectedSkill === null ) . length ;
242171
243- t . log ( "\n📊 Claude Code Test Summary:" ) ;
244- t . log ( ` Total tests: ${ testCases . length } (${ positiveTests } positive, ${ negativeTests } negative)`) ;
245- t . log ( ` Tests executed: ${ entries . length } `) ;
246- t . log ( ` Total tokens (estimated): ${ totalTokens . toLocaleString ( ) } `) ;
247- t . log ( ` Provider: ${ provider . getInfo ( ) . description } `) ;
172+ TestLogger . metrics ( "\nClaude Code Test Summary:" ) ;
173+ TestLogger . plain ( ` Total tests: ${ testCases . length } (${ positiveTests } positive, ${ negativeTests } negative)`) ;
174+ TestLogger . plain ( ` Tests executed: ${ entries . length } `) ;
175+ TestLogger . plain ( ` Total tokens (estimated): ${ totalTokens . toLocaleString ( ) } `) ;
176+ TestLogger . plain ( ` Provider: ${ provider . getInfo ( ) . description } `) ;
248177
249178 // Validate actual test count vs expected
250179 const expectedExecuted = claudeAvailable && pluginInstalled ? testCases . length : 0 ;
251180 if ( entries . length !== expectedExecuted ) {
252- t . log ( `⚠️ Expected ${ expectedExecuted } tests but executed ${ entries . length } `) ;
181+ TestLogger . warning ( ` Expected ${ expectedExecuted } tests but executed ${ entries . length } `) ;
253182 }
254183
255184 // At least some tests should have run if environment is ready
0 commit comments