fory/benchmarks/cpp/profile.sh at main · apache/fory · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

set -e

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# Default values
DATA=""
SERIALIZER=""
FILTER=""
DURATION=5
OUTPUT_DIR="profile_output"

# Parse arguments
usage() {
    echo "Usage: $0 [OPTIONS]"
    echo ""
    echo "Generate flamegraph/profile for C++ benchmarks"
    echo ""
    echo "Options:"
    echo "  --filter <pattern>           Custom benchmark filter (regex pattern)"
    echo "  --data <struct|sample>       Filter benchmark by data type"
    echo "  --serializer <fory|protobuf> Filter benchmark by serializer"
    echo "  --duration <seconds>         Profiling duration (default: 5)"
    echo "  --output-dir <dir>           Output directory (default: profile_output)"
    echo "  --help                       Show this help message"
    echo ""
    echo "Examples:"
    echo "  $0                                    # Profile all benchmarks"
    echo "  $0 --data struct --serializer fory   # Profile Fory Struct benchmarks"
    echo "  $0 --serializer protobuf --duration 10"
    echo "  $0 --filter BM_Fory_Struct_Serialize # Profile specific benchmark"
    echo ""
    echo "Supported profiling tools (in order of preference):"
    echo "  - samply (recommended): cargo install samply"
    echo "  - perf (Linux)"
    echo "  - sample (macOS built-in)"
    echo "  - dtrace (macOS, may require sudo)"
    exit 0
}

while [[ $# -gt 0 ]]; do
    case $1 in
        --filter)
            FILTER="$2"
            shift 2
            ;;
        --data)
            DATA="$2"
            shift 2
            ;;
        --serializer)
            SERIALIZER="$2"
            shift 2
            ;;
        --duration)
            DURATION="$2"
            shift 2
            ;;
        --output-dir)
            OUTPUT_DIR="$2"
            shift 2
            ;;
        --help|-h)
            usage
            ;;
        *)
            echo -e "${RED}Unknown option: $1${NC}"
            usage
            ;;
    esac
done

# Build benchmark filter (only if --filter not provided)
if [[ -z "$FILTER" ]]; then
    if [[ -n "$DATA" ]]; then
        DATA_CAP="$(echo "${DATA:0:1}" | tr '[:lower:]' '[:upper:]')${DATA:1}"
        FILTER="${DATA_CAP}"
    fi
    if [[ -n "$SERIALIZER" ]]; then
        SER_CAP="$(echo "${SERIALIZER:0:1}" | tr '[:lower:]' '[:upper:]')${SERIALIZER:1}"
        if [[ -n "$FILTER" ]]; then
            FILTER="${SER_CAP}_${FILTER}"
        else
            FILTER="${SER_CAP}"
        fi
    fi
fi

# Check if benchmark exists
if [[ ! -f "build/fory_benchmark" ]]; then
    echo -e "${RED}Benchmark not found. Run ./run.sh first to build.${NC}"
    exit 1
fi

# Find FlameGraph tools
FLAMEGRAPH_DIR=""
if [[ -d "$HOME/FlameGraph" ]]; then
    FLAMEGRAPH_DIR="$HOME/FlameGraph"
elif [[ -d "/usr/share/FlameGraph" ]]; then
    FLAMEGRAPH_DIR="/usr/share/FlameGraph"
elif command -v flamegraph.pl &> /dev/null; then
    FLAMEGRAPH_DIR="PATH"
else
    # Auto-install FlameGraph tools
    echo -e "${YELLOW}FlameGraph tools not found. Installing to ~/FlameGraph...${NC}"
    git clone --depth 1 https://github.com/brendangregg/FlameGraph.git "$HOME/FlameGraph"
    FLAMEGRAPH_DIR="$HOME/FlameGraph"
    echo -e "${GREEN}FlameGraph installed successfully.${NC}"
fi

# Create output directory
mkdir -p "$OUTPUT_DIR"
cd build

# Build benchmark command
BENCH_CMD="./fory_benchmark --benchmark_min_time=${DURATION}s"
if [[ -n "$FILTER" ]]; then
    BENCH_CMD="$BENCH_CMD --benchmark_filter=$FILTER"
fi

echo -e "${GREEN}=== Fory C++ Benchmark Profiler ===${NC}"
echo -e "Filter: ${FILTER:-all}"
echo -e "Duration: ${DURATION}s"
echo -e "Output: ${OUTPUT_DIR}"
echo ""

TIMESTAMP=$(date +%Y%m%d_%H%M%S)

# Detect OS
OS_TYPE="$(uname -s)"

# Try different profiling tools
if command -v samply &> /dev/null; then
    echo -e "${YELLOW}Profiling with samply...${NC}"
    echo -e "Running: samply record $BENCH_CMD"
    samply record $BENCH_CMD
    echo -e "${GREEN}Done! Samply should have opened in your browser.${NC}"

elif command -v perf &> /dev/null; then
    echo -e "${YELLOW}Profiling with perf...${NC}"
    PERF_DATA="../${OUTPUT_DIR}/perf_${TIMESTAMP}.data"
    FLAMEGRAPH_SVG="../${OUTPUT_DIR}/flamegraph_${TIMESTAMP}.svg"

    echo -e "Running: perf record -g --call-graph dwarf -o $PERF_DATA $BENCH_CMD"
    perf record -g --call-graph dwarf -o "$PERF_DATA" $BENCH_CMD

    echo -e "${GREEN}Profile saved to: $(realpath ${PERF_DATA})${NC}"

    # Generate flamegraph SVG
    echo -e "${YELLOW}Generating flamegraph SVG...${NC}"
    if [[ "$FLAMEGRAPH_DIR" == "PATH" ]]; then
        perf script -i "$PERF_DATA" | stackcollapse-perf.pl | flamegraph.pl > "$FLAMEGRAPH_SVG"
    else
        perf script -i "$PERF_DATA" | "$FLAMEGRAPH_DIR/stackcollapse-perf.pl" | "$FLAMEGRAPH_DIR/flamegraph.pl" > "$FLAMEGRAPH_SVG"
    fi
    echo -e "${GREEN}Flamegraph saved to: $(realpath ${FLAMEGRAPH_SVG})${NC}"

elif [[ "$OS_TYPE" == "Darwin" ]]; then
    # macOS-specific profiling using sample command
    echo -e "${YELLOW}Profiling on macOS using sample command...${NC}"
    SAMPLE_OUTPUT="../${OUTPUT_DIR}/sample_${TIMESTAMP}.txt"
    COLLAPSED_OUTPUT="../${OUTPUT_DIR}/collapsed_${TIMESTAMP}.txt"
    FLAMEGRAPH_SVG="../${OUTPUT_DIR}/flamegraph_${TIMESTAMP}.svg"

    # Start benchmark in background
    echo -e "Starting benchmark: $BENCH_CMD"
    $BENCH_CMD &
    BENCH_PID=$!

    # Wait a moment for the process to start
    sleep 0.5

    # Check if process is running
    if ! kill -0 $BENCH_PID 2>/dev/null; then
        echo -e "${RED}Benchmark process failed to start${NC}"
        exit 1
    fi

    echo -e "Sampling process $BENCH_PID for ${DURATION} seconds..."
    # Use sample command to profile (built-in on macOS)
    sample $BENCH_PID $DURATION -file "$SAMPLE_OUTPUT" 2>/dev/null || true

    # Wait for benchmark to complete
    wait $BENCH_PID 2>/dev/null || true

    if [[ -f "$SAMPLE_OUTPUT" ]]; then
        echo -e "${GREEN}Sample output saved to: $(realpath ${SAMPLE_OUTPUT})${NC}"

        # Convert sample output to collapsed format for flamegraph
        echo -e "${YELLOW}Converting to flamegraph format...${NC}"

        # Parse macOS sample output to collapsed stack format
        # The sample output has a tree format with indentation and branch indicators
        python3 - "$SAMPLE_OUTPUT" "$COLLAPSED_OUTPUT" << 'PYTHON_SCRIPT'
import sys
import re

def parse_sample_output(input_file, output_file):
    """
    Parse macOS sample output tree format into collapsed stacks.

    Sample format is a tree like:
        3718 start  (in dyld) + 6076  [0x...]
          3718 main  (in fory_benchmark) + 136  [0x...]
            3718 benchmark::Run()  (in fory_benchmark) + 48  [0x...]
              + 3611 BM_Serialize()  (in fory_benchmark) + 316  [0x...]
              + ! 3301 fory::serialize()  (in fory_benchmark) + 468  [0x...]
    """
    stacks = {}

    with open(input_file, 'r') as f:
        lines = f.readlines()

    # Find the "Call graph:" section
    in_call_graph = False
    stack_lines = []

    for line in lines:
        if 'Call graph:' in line:
            in_call_graph = True
            continue
        if in_call_graph:
            # Stop at next section
            if line.strip() and not line.startswith(' ') and not any(c in line for c in ['+', '|', '!']):
                if 'Thread_' not in line and 'Total number' in line:
                    break
            stack_lines.append(line)

    # Parse the tree structure
    # Each frame has format: [indent/branch chars] COUNT FUNC_NAME (in MODULE) + OFFSET [ADDR] [FILE:LINE]
    frame_pattern = re.compile(
        r'^([\s+!:|]*)'           # Branch indicators and indentation
        r'(\d+)\s+'               # Sample count
        r'(.+?)'                  # Function name
        r'\s+\(in\s+([^)]+)\)'    # Module name
        r'(?:\s+\+\s+[\d,]+)?'    # Optional offset
        r'(?:\s+\[0x[0-9a-fA-F,]+\])?' # Optional address
        r'(?:\s+[\w./]+:\d+)?'    # Optional file:line
    )

    # Track stack at each depth level
    current_stack = []  # [(depth, func_name, count), ...]

    for line in stack_lines:
        match = frame_pattern.match(line)
        if not match:
            continue

        prefix = match.group(1)
        count = int(match.group(2))
        func_name = match.group(3).strip()
        module = match.group(4).strip()

        # Skip unknown frames
        if func_name == '???' or func_name.startswith('0x'):
            continue

        # Calculate depth based on prefix length (roughly 2 chars per level)
        # Count actual indentation ignoring branch chars
        depth = len(prefix.replace('+', ' ').replace('|', ' ').replace('!', ' ').replace(':', ' '))
        depth = depth // 2

        # Clean up function name for display
        func_name = func_name.replace(';', ':')

        # Pop stack until we're at the right depth
        while current_stack and current_stack[-1][0] >= depth:
            current_stack.pop()

        # Push current frame
        current_stack.append((depth, func_name, count))

        # Build stack string (bottom to top for flamegraph)
        stack_funcs = [f[1] for f in current_stack]
        if stack_funcs:
            stack_key = ';'.join(stack_funcs)
            # Use the count at this leaf node
            stacks[stack_key] = count

    # Write collapsed format
    with open(output_file, 'w') as f:
        for stack, count in sorted(stacks.items(), key=lambda x: -x[1]):
            if stack and count > 0:
                f.write(f"{stack} {count}\n")

    print(f"Extracted {len(stacks)} unique stack traces")
    if stacks:
        top_stacks = sorted(stacks.items(), key=lambda x: -x[1])[:5]
        print("Top 5 hottest stacks:")
        for stack, count in top_stacks:
            # Show just the last few functions
            funcs = stack.split(';')
            short = ';'.join(funcs[-3:]) if len(funcs) > 3 else stack
            print(f"  {count}: ...{short}")

    return len(stacks)

if __name__ == '__main__':
    parse_sample_output(sys.argv[1], sys.argv[2])
PYTHON_SCRIPT

        # Generate flamegraph
        if [[ -s "$COLLAPSED_OUTPUT" ]]; then
            if [[ "$FLAMEGRAPH_DIR" == "PATH" ]]; then
                flamegraph.pl "$COLLAPSED_OUTPUT" > "$FLAMEGRAPH_SVG"
            else
                "$FLAMEGRAPH_DIR/flamegraph.pl" "$COLLAPSED_OUTPUT" > "$FLAMEGRAPH_SVG"
            fi
            echo -e "${GREEN}Flamegraph saved to: $(realpath ${FLAMEGRAPH_SVG})${NC}"

            # Try to open in browser
            if command -v open &> /dev/null; then
                echo -e "${YELLOW}Opening flamegraph in browser...${NC}"
                open "$FLAMEGRAPH_SVG"
            fi
        else
            echo -e "${YELLOW}Could not generate flamegraph from sample output.${NC}"
            echo -e "You can view the raw sample output at: $(realpath ${SAMPLE_OUTPUT})"
            echo -e ""
            echo -e "${YELLOW}Tip: For better profiling results, consider using samply:${NC}"
            echo -e "  cargo install samply"
            echo -e "  ./profile.sh --data struct --serializer fory"
        fi
    else
        echo -e "${RED}Sample command failed to produce output${NC}"
        exit 1
    fi

else
    echo -e "${RED}No profiling tool found. Please install one of:${NC}"
    echo "  - samply: cargo install samply (recommended, cross-platform)"
    if [[ "$OS_TYPE" == "Darwin" ]]; then
        echo "  - sample: Built-in on macOS (should be available)"
    else
        echo "  - perf (Linux): apt install linux-tools-generic"
    fi
    exit 1
fi

echo ""
echo -e "${GREEN}=== Profiling complete! ===${NC}"