Tools/inspection/benchmark_external_inspection.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459

import _remote_debugging
import time
import subprocess
import sys
import contextlib
import tempfile
import os
import argparse
from _colorize import get_colors, can_colorize

CODE = '''\
import time
import os
import sys
import math

def slow_fibonacci(n):
    """Intentionally slow recursive fibonacci - should show up prominently in profiler"""
    if n <= 1:
        return n
    return slow_fibonacci(n-1) + slow_fibonacci(n-2)

def medium_computation():
    """Medium complexity function"""
    result = 0
    for i in range(1000):
        result += math.sqrt(i) * math.sin(i)
    return result

def fast_loop():
    """Fast simple loop"""
    total = 0
    for i in range(100):
        total += i
    return total

def string_operations():
    """String manipulation that should be visible in profiler"""
    text = "hello world " * 100
    words = text.split()
    return " ".join(reversed(words))

def nested_calls():
    """Nested function calls to test call stack depth"""
    def level1():
        def level2():
            def level3():
                return medium_computation()
            return level3()
        return level2()
    return level1()

def main_loop():
    """Main computation loop with different execution paths"""
    iteration = 0

    while True:
        iteration += 1

        # Different execution paths with different frequencies
        if iteration % 50 == 0:
            # Expensive operation - should show high per-call time
            result = slow_fibonacci(20)

        elif iteration % 10 == 0:
            # Medium operation
            result = nested_calls()

        elif iteration % 5 == 0:
            # String operations
            result = string_operations()

        else:
            # Fast operation - most common
            result = fast_loop()

        # Small delay to make sampling more interesting
        time.sleep(0.001)

if __name__ == "__main__":
    main_loop()
'''

DEEP_STATIC_CODE = """\
import time
def factorial(n):
    if n <= 1:
        time.sleep(10000)
        return 1
    return n * factorial(n-1)

factorial(900)
"""

CODE_WITH_TONS_OF_THREADS = '''\
import time
import threading
import random
import math

def cpu_intensive_work():
    """Do some CPU intensive calculations"""
    result = 0
    for _ in range(10000):
        result += math.sin(random.random()) * math.cos(random.random())
    return result

def io_intensive_work():
    """Simulate IO intensive work with sleeps"""
    time.sleep(0.1)

def mixed_workload():
    """Mix of CPU and IO work"""
    while True:
        if random.random() < 0.3:
            cpu_intensive_work()
        else:
            io_intensive_work()

def create_threads(n):
    """Create n threads doing mixed workloads"""
    threads = []
    for _ in range(n):
        t = threading.Thread(target=mixed_workload, daemon=True)
        t.start()
        threads.append(t)
    return threads

# Start with 5 threads
active_threads = create_threads(5)
thread_count = 5

# Main thread manages threads and does work
while True:
    # Randomly add or remove threads
    if random.random() < 0.1:  # 10% chance each iteration
        if random.random() < 0.5 and thread_count < 100:
            # Add 1-5 new threads
            new_count = random.randint(1, 5)
            new_threads = create_threads(new_count)
            active_threads.extend(new_threads)
            thread_count += new_count
        elif thread_count > 10:
            # Remove 1-3 threads
            remove_count = random.randint(1, 5)
            # The threads will terminate naturally since they're daemons
            active_threads = active_threads[remove_count:]
            thread_count -= remove_count

    cpu_intensive_work()
    time.sleep(0.05)
'''

CODE_EXAMPLES = {
    "basic": {
        "code": CODE,
        "description": "Mixed workload with fibonacci, computations, and string operations",
    },
    "deep_static": {
        "code": DEEP_STATIC_CODE,
        "description": "Deep recursive call stack with 900+ frames (factorial)",
    },
    "threads": {
        "code": CODE_WITH_TONS_OF_THREADS,
        "description": "Tons of threads doing mixed CPU/IO work",
    },
}


def benchmark(unwinder, duration_seconds=10):
    """Benchmark mode - measure raw sampling speed for specified duration"""
    sample_count = 0
    fail_count = 0
    total_work_time = 0.0
    start_time = time.perf_counter()
    end_time = start_time + duration_seconds

    colors = get_colors(can_colorize())

    print(
        f"{colors.BOLD_BLUE}Benchmarking sampling speed for {duration_seconds} seconds...{colors.RESET}"
    )

    try:
        while time.perf_counter() < end_time:
            work_start = time.perf_counter()
            try:
                stack_trace = unwinder.get_stack_trace()
                if stack_trace:
                    sample_count += 1
            except (OSError, RuntimeError, UnicodeDecodeError) as e:
                fail_count += 1

            work_end = time.perf_counter()
            total_work_time += work_end - work_start

            total_attempts = sample_count + fail_count
            if total_attempts % 10000 == 0:
                avg_work_time_us = (total_work_time / total_attempts) * 1e6
                work_rate = (
                    total_attempts / total_work_time if total_work_time > 0 else 0
                )
                success_rate = (sample_count / total_attempts) * 100

                # Color code the success rate
                if success_rate >= 95:
                    success_color = colors.GREEN
                elif success_rate >= 80:
                    success_color = colors.YELLOW
                else:
                    success_color = colors.RED

                print(
                    f"{colors.CYAN}Attempts:{colors.RESET} {total_attempts} | "
                    f"{colors.CYAN}Success:{colors.RESET} {success_color}{success_rate:.1f}%{colors.RESET} | "
                    f"{colors.CYAN}Rate:{colors.RESET} {colors.MAGENTA}{work_rate:.1f}Hz{colors.RESET} | "
                    f"{colors.CYAN}Avg:{colors.RESET} {colors.YELLOW}{avg_work_time_us:.2f}µs{colors.RESET}"
                )
    except KeyboardInterrupt:
        print(f"\n{colors.YELLOW}Benchmark interrupted by user{colors.RESET}")

    actual_end_time = time.perf_counter()
    wall_time = actual_end_time - start_time
    total_attempts = sample_count + fail_count

    # Return final statistics
    return {
        "wall_time": wall_time,
        "total_attempts": total_attempts,
        "sample_count": sample_count,
        "fail_count": fail_count,
        "success_rate": (
            (sample_count / total_attempts) * 100 if total_attempts > 0 else 0
        ),
        "total_work_time": total_work_time,
        "avg_work_time_us": (
            (total_work_time / total_attempts) * 1e6 if total_attempts > 0 else 0
        ),
        "work_rate_hz": total_attempts / total_work_time if total_work_time > 0 else 0,
        "samples_per_sec": sample_count / wall_time if wall_time > 0 else 0,
    }


def print_benchmark_results(results):
    """Print comprehensive benchmark results"""
    colors = get_colors(can_colorize())

    print(f"\n{colors.BOLD_GREEN}{'='*60}{colors.RESET}")
    print(f"{colors.BOLD_GREEN}get_stack_trace() Benchmark Results{colors.RESET}")
    print(f"{colors.BOLD_GREEN}{'='*60}{colors.RESET}")

    # Basic statistics
    print(f"\n{colors.BOLD_CYAN}Basic Statistics:{colors.RESET}")
    print(
        f"  {colors.CYAN}Wall time:{colors.RESET}           {colors.YELLOW}{results['wall_time']:.3f}{colors.RESET} seconds"
    )
    print(
        f"  {colors.CYAN}Total attempts:{colors.RESET}      {colors.MAGENTA}{results['total_attempts']:,}{colors.RESET}"
    )
    print(
        f"  {colors.CYAN}Successful samples:{colors.RESET}  {colors.GREEN}{results['sample_count']:,}{colors.RESET}"
    )
    print(
        f"  {colors.CYAN}Failed samples:{colors.RESET}      {colors.RED}{results['fail_count']:,}{colors.RESET}"
    )

    # Color code the success rate
    success_rate = results["success_rate"]
    if success_rate >= 95:
        success_color = colors.BOLD_GREEN
    elif success_rate >= 80:
        success_color = colors.BOLD_YELLOW
    else:
        success_color = colors.BOLD_RED

    print(
        f"  {colors.CYAN}Success rate:{colors.RESET}        {success_color}{success_rate:.2f}%{colors.RESET}"
    )

    # Performance metrics
    print(f"\n{colors.BOLD_CYAN}Performance Metrics:{colors.RESET}")
    print(
        f"  {colors.CYAN}Average call time:{colors.RESET}   {colors.YELLOW}{results['avg_work_time_us']:.2f}{colors.RESET} µs"
    )
    print(
        f"  {colors.CYAN}Work rate:{colors.RESET}           {colors.MAGENTA}{results['work_rate_hz']:.1f}{colors.RESET} calls/sec"
    )
    print(
        f"  {colors.CYAN}Sample rate:{colors.RESET}         {colors.MAGENTA}{results['samples_per_sec']:.1f}{colors.RESET} samples/sec"
    )
    print(
        f"  {colors.CYAN}Total work time:{colors.RESET}     {colors.YELLOW}{results['total_work_time']:.3f}{colors.RESET} seconds"
    )

    # Color code work efficiency
    efficiency = (results["total_work_time"] / results["wall_time"]) * 100
    if efficiency >= 80:
        efficiency_color = colors.GREEN
    elif efficiency >= 50:
        efficiency_color = colors.YELLOW
    else:
        efficiency_color = colors.RED

    print(
        f"  {colors.CYAN}Work efficiency:{colors.RESET}     {efficiency_color}{efficiency:.1f}%{colors.RESET}"
    )


def parse_arguments():
    """Parse command line arguments"""
    # Build the code examples description
    examples_desc = "\n".join(
        [f"  {name}: {info['description']}" for name, info in CODE_EXAMPLES.items()]
    )

    parser = argparse.ArgumentParser(
        description="Benchmark get_stack_trace() performance",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=f"""
Examples:
  %(prog)s                           # Run basic benchmark for 10 seconds (default)
  %(prog)s --duration 30             # Run basic benchmark for 30 seconds
  %(prog)s -d 60                     # Run basic benchmark for 60 seconds
  %(prog)s --code deep_static        # Run deep static call stack benchmark
  %(prog)s --code deep_static -d 30  # Run deep static benchmark for 30 seconds

Available code examples:
{examples_desc}
        """,
        color=True,
    )

    parser.add_argument(
        "--duration",
        "-d",
        type=int,
        default=10,
        help="Benchmark duration in seconds (default: 10)",
    )

    parser.add_argument(
        "--code",
        "-c",
        choices=list(CODE_EXAMPLES.keys()),
        default="basic",
        help="Code example to benchmark (default: basic)",
    )

    return parser.parse_args()


def create_target_process(temp_file, code_example="basic"):
    """Create and start the target process for benchmarking"""
    example_info = CODE_EXAMPLES.get(code_example, {"code": CODE})
    selected_code = example_info["code"]
    temp_file.write(selected_code)
    temp_file.flush()

    process = subprocess.Popen(
        [sys.executable, temp_file.name], stdout=subprocess.PIPE, stderr=subprocess.PIPE
    )

    # Give it time to start
    time.sleep(1.0)

    # Check if it's still running
    if process.poll() is not None:
        stdout, stderr = process.communicate()
        raise RuntimeError(
            f"Target process exited unexpectedly:\nSTDOUT: {stdout.decode()}\nSTDERR: {stderr.decode()}"
        )

    return process, temp_file.name


def cleanup_process(process, temp_file_path):
    """Clean up the target process and temporary file"""
    with contextlib.suppress(Exception):
        if process.poll() is None:
            process.terminate()
            try:
                process.wait(timeout=5.0)
            except subprocess.TimeoutExpired:
                process.kill()
                process.wait()


def main():
    """Main benchmark function"""
    colors = get_colors(can_colorize())
    args = parse_arguments()

    print(f"{colors.BOLD_MAGENTA}External Inspection Benchmark Tool{colors.RESET}")
    print(f"{colors.BOLD_MAGENTA}{'=' * 34}{colors.RESET}")

    example_info = CODE_EXAMPLES.get(args.code, {"description": "Unknown"})
    print(
        f"\n{colors.CYAN}Code Example:{colors.RESET} {colors.GREEN}{args.code}{colors.RESET}"
    )
    print(f"{colors.CYAN}Description:{colors.RESET} {example_info['description']}")
    print(
        f"{colors.CYAN}Benchmark Duration:{colors.RESET} {colors.YELLOW}{args.duration}{colors.RESET} seconds"
    )

    process = None
    temp_file_path = None

    try:
        # Create target process
        print(f"\n{colors.BLUE}Creating and starting target process...{colors.RESET}")
        with tempfile.NamedTemporaryFile(mode="w", suffix=".py") as temp_file:
            process, temp_file_path = create_target_process(temp_file, args.code)
            print(
                f"{colors.GREEN}Target process started with PID: {colors.BOLD_WHITE}{process.pid}{colors.RESET}"
            )

            # Run benchmark with specified duration
            with process:
                # Create unwinder and run benchmark
                print(f"{colors.BLUE}Initializing unwinder...{colors.RESET}")
                try:
                    unwinder = _remote_debugging.RemoteUnwinder(
                        process.pid, all_threads=True
                    )
                    results = benchmark(unwinder, duration_seconds=args.duration)
                finally:
                    cleanup_process(process, temp_file_path)

            # Print results
            print_benchmark_results(results)

    except PermissionError as e:
        print(
            f"{colors.BOLD_RED}Error: Insufficient permissions to read stack trace: {e}{colors.RESET}"
        )
        print(
            f"{colors.YELLOW}Try running with appropriate privileges (e.g., sudo){colors.RESET}"
        )
        return 1
    except Exception as e:
        print(f"{colors.BOLD_RED}Error during benchmarking: {e}{colors.RESET}")
        if process:
            with contextlib.suppress(Exception):
                stdout, stderr = process.communicate(timeout=1)
                if stdout:
                    print(
                        f"{colors.CYAN}Process STDOUT:{colors.RESET} {stdout.decode()}"
                    )
                if stderr:
                    print(
                        f"{colors.RED}Process STDERR:{colors.RESET} {stderr.decode()}"
                    )
        raise

    return 0


if __name__ == "__main__":
    sys.exit(main())