prestodb
diff --git a/‎.gitignore‎
Lines changed: 6 additions & 0 deletions b/‎.gitignore‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎CLAUDE.md‎
Lines changed: 79 additions & 0 deletions b/‎CLAUDE.md‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎benchmarks/java_trino.json‎
Lines changed: 2 additions & 1 deletion b/‎benchmarks/java_trino.json‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎cmd/forward/main.go‎
Lines changed: 9 additions & 9 deletions b/‎cmd/forward/main.go‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎cmd/loadjson/main.go‎
Lines changed: 8 additions & 4 deletions b/‎cmd/loadjson/main.go‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎cmd/loadjson/query_info_test.go‎
Lines changed: 1 addition & 1 deletion b/‎cmd/loadjson/query_info_test.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cmd/queryplan/main.go‎
Lines changed: 1 addition & 1 deletion b/‎cmd/queryplan/main.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cmd/replay/frame_test.go‎
Lines changed: 2 additions & 2 deletions b/‎cmd/replay/frame_test.go‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cmd/replay/main.go‎
Lines changed: 2 additions & 1 deletion b/‎cmd/replay/main.go‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎cmd/replay/query_log.go‎
Lines changed: 1 addition & 1 deletion b/‎cmd/replay/query_log.go‎
Lines changed: 1 addition & 1 deletion
@@ -35,3 +35,9 @@ pbench_*
 
 # genddl output dir
 cmd/genddl/out/
+
+# IDE
+.idea/
+
+# Python
+__pycache__/
@@ -0,0 +1,79 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Build Commands
+
+```bash
+# Build for current platform
+make
+
+# Build for all platforms (darwin/linux, amd64/arm64)
+make all
+
+# Build with InfluxDB support
+make TAGS=influx
+
+# Install locally (creates symlink to /usr/local/bin/pbench)
+make install
+
+# Generate cluster configurations from templates
+make clusters
+```
+
+## Testing
+
+```bash
+# Run all tests
+go test ./...
+
+# Run a specific package's tests
+go test ./presto
+go test ./stage
+go test ./cmd/cmp
+
+# Run a single test
+go test ./presto -run TestQuerySplitter
+```
+
+## Architecture
+
+PBench is a Presto/Trino benchmark runner built with Cobra CLI. It replaces Benchto with support for concurrent workloads, result capture, and query log collection.
+
+### Package Structure
+
+- **main.go** - Entry point, calls `cmd.Execute()`
+- **cmd/** - Cobra command definitions. Each subcommand has a wrapper file (e.g., `run.go`) and implementation package (e.g., `cmd/run/`)
+- **presto/** - Presto/Trino HTTP client implementation with query execution, session management, and result parsing
+- **stage/** - Core benchmark execution engine. A `Stage` defines queries, settings, and can chain to other stages via `next` field in JSON
+- **utils/** - Shared utilities including Presto flag handling and path helpers
+- **clusters/** - Cluster configuration templates and generated configs
+- **benchmarks/** - Benchmark definitions (TPC-DS, TPC-H, ClickBench, etc.) as JSON stage files and SQL queries
+
+### Key Concepts
+
+**Stages**: Benchmarks are defined as JSON files that specify queries (inline or via files), session parameters, catalog/schema, and execution settings. Stages form a DAG via `next` field, enabling sequential/parallel execution patterns.
+
+**Stage Settings** (inherited by child stages unless overridden):
+- `catalog`, `schema`, `timezone` - Presto session settings
+- `cold_runs`, `warm_runs` - Number of runs per query
+- `save_output`, `save_json`, `save_column_metadata` - Output capture options
+- `abort_on_error` - Stop on first failure
+- `random_execution`, `randomly_execute_until` - Random query selection mode
+
+**Run Recorders**: Results can be recorded to file (default), InfluxDB (requires `TAGS=influx` build), or MySQL.
+
+### Build Tags
+
+- `influx` - Enables InfluxDB run recorder support. Without this tag, `stage/no_influx.go` provides a stub that returns an error if InfluxDB config is provided.
+
+## Commands
+
+- `run` - Execute benchmarks from stage JSON files
+- `cmp` - Compare query results between two directories
+- `genconfig` - Generate cluster configs from templates
+- `genddl` - Generate DDL scripts
+- `loadjson` - Load query JSON files into databases
+- `replay` - Replay workloads from CSV
+- `forward` - Forward queries between Presto clusters
+- `save` - Save table schema/data information
@@ -4,6 +4,7 @@
   "session_params": {
     "query_max_execution_time": "8h",
     "join_distribution_type": "AUTOMATIC",
-    "join_reordering_strategy": "AUTOMATIC"
+    "join_reordering_strategy": "AUTOMATIC",
+    "optimizer_use_histograms": null
   }
 }
@@ -7,9 +7,10 @@ import (
 	"os/signal"
 	"path/filepath"
 	"pbench/log"
-	"pbench/presto"
-	"pbench/presto/query_json"
 	"pbench/utils"
+
+	presto "github.com/ethanyzhang/presto-go"
+	"github.com/ethanyzhang/presto-go/query_json"
 	"regexp"
 	"sync"
 	"sync/atomic"
@@ -140,7 +141,7 @@ func Run(_ *cobra.Command, _ []string) {
 	// Keep running until user interrupts or quits using Ctrl + C or Ctrl + D.
 	// When the cluster is unavailable to return the running queries, wait and retry for at most 10 times before quitting.
 	for attempt := 1; ctx.Err() == nil && attempt <= maxRetry; {
-		states, _, err := sourceClient.GetQueryState(ctx, &presto.GetQueryStatsOptions{
+		states, _, err := sourceClient.GetQueryState(ctx, &presto.GetQueryStateOptions{
 			IncludeAllQueries:  &trueValue,
 			QueryTextSizeLimit: &queryTextSizeLimit,
 		})
@@ -209,21 +210,20 @@ func checkAndCancelQuery(ctx context.Context, queryState *presto.QueryStateInfo)
 
 func forwardQuery(ctx context.Context, queryState *presto.QueryStateInfo, clients []*presto.Client) {
 	defer runningTasks.Done()
-	var (
-		queryInfo    *query_json.QueryInfo
-		queryInfoErr error
-	)
+	var queryInfoErr error
+	queryInfo := new(query_json.QueryInfo)
 	for attempt := 1; attempt <= maxRetry; attempt++ {
-		queryInfo, _, queryInfoErr = clients[0].GetQueryInfo(ctx, queryState.QueryId, false, nil)
+		_, queryInfoErr = clients[0].GetQueryInfo(ctx, queryState.QueryId, queryInfo)
 		if queryInfoErr != nil {
+			queryInfo = new(query_json.QueryInfo)
 			log.Error().Str("source_query_id", queryState.QueryId).Err(queryInfoErr).
 				Msgf("failed to get query info for forwarding, attempt %d/%d", attempt, maxRetry)
 			waitForNextPoll(ctx)
 		} else {
 			break
 		}
 	}
-	if queryInfo == nil {
+	if queryInfoErr != nil {
 		log.Error().Str("source_query_id", queryState.QueryId).
 			Msgf("cannot get query info for forwarding after %d retries, skipping", maxRetry)
 		failedToForward.Add(1)
 
@@ -5,11 +5,11 @@ import (
 	"database/sql"
 	"encoding/json"
 	"fmt"
+	"github.com/ethanyzhang/presto-go/query_json"
 	"os"
 	"os/signal"
 	"path/filepath"
 	"pbench/log"
-	"pbench/presto/query_json"
 	"pbench/stage"
 	"pbench/utils"
 	"reflect"
@@ -117,7 +117,9 @@ func Run(_ *cobra.Command, args []string) {
 	pseudoStage.States.RunStartTime = runStartTime.GetTime()
 	pseudoStage.States.RunFinishTime = runEndTime.GetTime()
 	for _, r := range runRecorders {
-		r.RecordRun(utils.GetCtxWithTimeout(time.Second*5), pseudoStage, queryResults)
+		rCtx, rCancel := utils.GetCtxWithTimeout(time.Second * 5)
+		r.RecordRun(rCtx, pseudoStage, queryResults)
+		rCancel()
 	}
 
 	log.Info().Int("file_loaded", len(queryResults)).Send()
@@ -173,7 +175,7 @@ func processFile(ctx context.Context, path string) {
 	}
 	if queryInfo.ErrorCode != nil {
 		// Need to set this so the run recorders will mark this query as failed.
-		queryResult.QueryError = fmt.Errorf(*queryInfo.ErrorCode.Name)
+		queryResult.QueryError = fmt.Errorf("%s", *queryInfo.ErrorCode.Name)
 	}
 	// Unlike benchmarks run by pbench, we do not know when did the run start and finish when loading them from files.
 	// We infer that the whole run starts at min(queryStartTime) and ends at max(queryEndTime).
@@ -226,7 +228,9 @@ func processFile(ctx context.Context, path string) {
 		}
 	}
 	for _, r := range runRecorders {
-		r.RecordQuery(utils.GetCtxWithTimeout(time.Second*5), pseudoStage, queryResult)
+		rCtx, rCancel := utils.GetCtxWithTimeout(time.Second * 5)
+		r.RecordQuery(rCtx, pseudoStage, queryResult)
+		rCancel()
 	}
 	log.Info().Str("path", path).Str("query_id", queryInfo.QueryId).Msg("success")
 	resultChan <- queryResult
 
@@ -4,8 +4,8 @@ import (
 	"embed"
 	_ "embed"
 	"encoding/json"
+	"github.com/ethanyzhang/presto-go/query_json"
 	"github.com/stretchr/testify/assert"
-	"pbench/presto/query_json"
 	"testing"
 )
 
 
@@ -21,7 +21,7 @@ import (
 	"io"
 	"os"
 	"pbench/log"
-	"pbench/presto/plan_node"
+	"pbench/prestoapi/plan_node"
 
 	"github.com/spf13/cobra"
 )
 
@@ -2,9 +2,9 @@ package replay
 
 import (
 	"encoding/csv"
+	presto "github.com/ethanyzhang/presto-go"
 	"github.com/stretchr/testify/assert"
 	"io"
-	"pbench/presto"
 	"sort"
 	"strings"
 	"testing"
@@ -28,7 +28,7 @@ func TestFrame(t *testing.T) {
 		assert.Equal(t, "20240415_112042_61088_qa5fd", frame.QueryId)
 		assert.Equal(t, "2024-04-15 11:20:42.755 UTC", frame.CreateTime.Format(CreateTimeFormat))
 		assert.Equal(t, 99993, frame.WallTimeMillis)
-		client, _ := presto.NewClient("http://127.0.0.1", false)
+		client, _ := presto.NewClient("http://127.0.0.1")
 		sessionParams := strings.Split(client.GenerateSessionParamsHeaderValue(frame.ParseSessionParams()), ",")
 		sort.Strings(sessionParams)
 		assert.Equal(t, []string{
 
@@ -10,8 +10,9 @@ import (
 	"os/signal"
 	"path/filepath"
 	"pbench/log"
-	"pbench/presto"
 	"pbench/utils"
+
+	presto "github.com/ethanyzhang/presto-go"
 	"sync"
 	"syscall"
 	"time"
 
@@ -3,7 +3,7 @@ package replay
 import (
 	"encoding/json"
 	"fmt"
-	"pbench/presto/query_json"
+	"github.com/ethanyzhang/presto-go/query_json"
 	"time"
 )
Original file line number	Diff line number	Diff line change
`@@ -4,6 +4,7 @@`
`4`	`4`	`"session_params": {`
`5`	`5`	`"query_max_execution_time": "8h",`
`6`	`6`	`"join_distribution_type": "AUTOMATIC",`
`7`		`- "join_reordering_strategy": "AUTOMATIC"`
	`7`	`+ "join_reordering_strategy": "AUTOMATIC",`
	`8`	`+ "optimizer_use_histograms": null`
`8`	`9`	`}`
`9`	`10`	`}`
Original file line number	Diff line number	Diff line change
`@@ -4,8 +4,8 @@ import (`
`4`	`4`	`"embed"`
`5`	`5`	`_ "embed"`
`6`	`6`	`"encoding/json"`
	`7`	`+ "github.com/ethanyzhang/presto-go/query_json"`
`7`	`8`	`"github.com/stretchr/testify/assert"`
`8`		`- "pbench/presto/query_json"`
`9`	`9`	`"testing"`
`10`	`10`	`)`
`11`	`11`
Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@ import (`
`21`	`21`	`"io"`
`22`	`22`	`"os"`
`23`	`23`	`"pbench/log"`
`24`		`- "pbench/presto/plan_node"`
	`24`	`+ "pbench/prestoapi/plan_node"`
`25`	`25`
`26`	`26`	`"github.com/spf13/cobra"`
`27`	`27`	`)`
Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,7 @@ package replay`
`3`	`3`	`import (`
`4`	`4`	`"encoding/json"`
`5`	`5`	`"fmt"`
`6`		`- "pbench/presto/query_json"`
	`6`	`+ "github.com/ethanyzhang/presto-go/query_json"`
`7`	`7`	`"time"`
`8`	`8`	`)`
`9`	`9`