From f0f57060a54b5c57608f945fd8f66b030779fd31 Mon Sep 17 00:00:00 2001 From: diogo464 Date: Fri, 11 Jul 2025 20:14:42 +0100 Subject: feat: add benchmark startup analysis tools and improve demo.sh - Add generate-schedule.sh script to create container schedules from addresses.txt - Add benchmark-startup Python script for analyzing container startup times - Update demo.sh to print timestamps and wait for start signal at /oar-p2p/start - Add comprehensive statistics including startup, start signal, and waiting times - Support for synchronized container coordination via start signal file --- scripts/benchmark-startup | 204 +++++++++++++++++++++++++++++++++++++++++++ scripts/benchmark.sh | 2 + scripts/generate-schedule.sh | 49 +++++++++++ 3 files changed, 255 insertions(+) create mode 100755 scripts/benchmark-startup create mode 100644 scripts/benchmark.sh create mode 100755 scripts/generate-schedule.sh (limited to 'scripts') diff --git a/scripts/benchmark-startup b/scripts/benchmark-startup new file mode 100755 index 0000000..f38d031 --- /dev/null +++ b/scripts/benchmark-startup @@ -0,0 +1,204 @@ +#!/usr/bin/env python3 + +import os +import glob +import sys +from datetime import datetime +import statistics + +def parse_timestamp(timestamp_str): + """Parse ISO timestamp from date -Iseconds""" + if 'T' in timestamp_str: + # Handle timezone formats + if timestamp_str.endswith('Z'): + return datetime.fromisoformat(timestamp_str.replace('Z', '+00:00')) + elif '+' in timestamp_str or '-' in timestamp_str[-6:]: + return datetime.fromisoformat(timestamp_str) + else: + # Add timezone if missing + return datetime.fromisoformat(timestamp_str + '+00:00') + else: + # Try other formats + return datetime.fromisoformat(timestamp_str) + +def analyze_startup_times(): + """Analyze startup times from log files""" + print("Analyzing startup times...") + + log_files = glob.glob("benchmark-logs/*.stdout") + if not log_files: + print("No log files found in benchmark-logs/") + return + + startup_times = [] + start_signal_times = [] + earliest_startup_time = None + earliest_start_signal_time = None + + for log_file in log_files: + try: + with open(log_file, 'r') as f: + lines = f.readlines() + + # Parse first line (startup time) + if len(lines) > 0: + first_line = lines[0].strip() + if first_line: + try: + timestamp = parse_timestamp(first_line) + startup_times.append((log_file, timestamp)) + + if earliest_startup_time is None or timestamp < earliest_startup_time: + earliest_startup_time = timestamp + + except ValueError as e: + print(f"Could not parse startup timestamp in {log_file}: '{first_line}' - {e}") + + # Look for second timestamp (start signal time) + for i, line in enumerate(lines): + line = line.strip() + if 'T' in line and i > 0: # Skip first line, look for second timestamp + try: + timestamp = parse_timestamp(line) + start_signal_times.append((log_file, timestamp)) + + if earliest_start_signal_time is None or timestamp < earliest_start_signal_time: + earliest_start_signal_time = timestamp + break # Only take the first additional timestamp found + + except ValueError: + continue # Skip lines that aren't timestamps + + except Exception as e: + print(f"Error reading {log_file}: {e}") + + if not startup_times: + print("No valid startup times found") + return + + # Calculate relative startup times + relative_startup_times = [] + for log_file, timestamp in startup_times: + relative_time = (timestamp - earliest_startup_time).total_seconds() + relative_startup_times.append(relative_time) + + # Print startup statistics + print(f"\n=== STARTUP TIME STATISTICS ===") + print(f"Total containers: {len(startup_times)}") + print(f"Earliest start time: {earliest_startup_time}") + print(f"Latest start time: {max(startup_times, key=lambda x: x[1])[1]}") + print(f"Total startup window: {max(relative_startup_times):.2f} seconds") + + if len(relative_startup_times) > 1: + print(f"Average relative startup time: {statistics.mean(relative_startup_times):.2f} seconds") + print(f"Median relative startup time: {statistics.median(relative_startup_times):.2f} seconds") + print(f"Standard deviation: {statistics.stdev(relative_startup_times):.2f} seconds") + print(f"Min relative startup time: {min(relative_startup_times):.2f} seconds") + print(f"Max relative startup time: {max(relative_startup_times):.2f} seconds") + + # Percentiles + sorted_times = sorted(relative_startup_times) + p50 = sorted_times[int(len(sorted_times) * 0.5)] + p90 = sorted_times[int(len(sorted_times) * 0.9)] + p95 = sorted_times[int(len(sorted_times) * 0.95)] + p99 = sorted_times[int(len(sorted_times) * 0.99)] + + print(f"50th percentile: {p50:.2f} seconds") + print(f"90th percentile: {p90:.2f} seconds") + print(f"95th percentile: {p95:.2f} seconds") + print(f"99th percentile: {p99:.2f} seconds") + + # Show startup distribution + print(f"\nStartup time distribution (by second):") + time_buckets = {} + for relative_time in relative_startup_times: + bucket = int(relative_time) + time_buckets[bucket] = time_buckets.get(bucket, 0) + 1 + + for bucket in sorted(time_buckets.keys())[:10]: # Show first 10 buckets + print(f" {bucket}s: {time_buckets[bucket]} containers") + + # Print per-container details (first 10) + print(f"\nFirst 10 containers to start:") + sorted_startup_times = sorted(startup_times, key=lambda x: x[1]) + for i, (log_file, timestamp) in enumerate(sorted_startup_times[:10]): + container_name = os.path.basename(log_file).replace('.stdout', '') + relative_time = (timestamp - earliest_startup_time).total_seconds() + print(f"{i+1:2d}. {container_name}: +{relative_time:.2f}s") + + if len(sorted_startup_times) > 10: + print(f"... and {len(sorted_startup_times) - 10} more containers") + + # Analyze start signal times if available + if start_signal_times: + print(f"\n=== START SIGNAL TIME STATISTICS ===") + print(f"Containers with start signal: {len(start_signal_times)}") + + # Calculate relative start signal times + relative_start_signal_times = [] + for log_file, timestamp in start_signal_times: + relative_time = (timestamp - earliest_start_signal_time).total_seconds() + relative_start_signal_times.append(relative_time) + + print(f"Earliest start signal time: {earliest_start_signal_time}") + print(f"Latest start signal time: {max(start_signal_times, key=lambda x: x[1])[1]}") + print(f"Total start signal window: {max(relative_start_signal_times):.2f} seconds") + + if len(relative_start_signal_times) > 1: + print(f"Average relative start signal time: {statistics.mean(relative_start_signal_times):.2f} seconds") + print(f"Median relative start signal time: {statistics.median(relative_start_signal_times):.2f} seconds") + print(f"Standard deviation: {statistics.stdev(relative_start_signal_times):.2f} seconds") + + # Percentiles + sorted_signal_times = sorted(relative_start_signal_times) + p50 = sorted_signal_times[int(len(sorted_signal_times) * 0.5)] + p90 = sorted_signal_times[int(len(sorted_signal_times) * 0.9)] + p95 = sorted_signal_times[int(len(sorted_signal_times) * 0.95)] + p99 = sorted_signal_times[int(len(sorted_signal_times) * 0.99)] + + print(f"50th percentile: {p50:.2f} seconds") + print(f"90th percentile: {p90:.2f} seconds") + print(f"95th percentile: {p95:.2f} seconds") + print(f"99th percentile: {p99:.2f} seconds") + + # Show start signal distribution + print(f"\nStart signal time distribution (by second):") + signal_time_buckets = {} + for relative_time in relative_start_signal_times: + bucket = int(relative_time) + signal_time_buckets[bucket] = signal_time_buckets.get(bucket, 0) + 1 + + for bucket in sorted(signal_time_buckets.keys())[:10]: # Show first 10 buckets + print(f" {bucket}s: {signal_time_buckets[bucket]} containers") + + # Calculate waiting times (time between startup and start signal) + waiting_times = [] + for startup_entry in startup_times: + startup_log, startup_time = startup_entry + # Find corresponding start signal time + for signal_entry in start_signal_times: + signal_log, signal_time = signal_entry + if startup_log == signal_log: + waiting_time = (signal_time - startup_time).total_seconds() + waiting_times.append(waiting_time) + break + + if waiting_times: + print(f"\n=== WAITING TIME STATISTICS ===") + print(f"Average waiting time: {statistics.mean(waiting_times):.2f} seconds") + print(f"Median waiting time: {statistics.median(waiting_times):.2f} seconds") + print(f"Min waiting time: {min(waiting_times):.2f} seconds") + print(f"Max waiting time: {max(waiting_times):.2f} seconds") + + if len(waiting_times) > 1: + print(f"Standard deviation: {statistics.stdev(waiting_times):.2f} seconds") + + else: + print(f"\nNo start signal timestamps found (containers may not have reached start signal yet)") + +def main(): + """Main function""" + analyze_startup_times() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/benchmark.sh b/scripts/benchmark.sh new file mode 100644 index 0000000..db956bf --- /dev/null +++ b/scripts/benchmark.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +scripts/generate-schedule.sh | oar-p2p run --output-dir benchmark-logs/ && scripts/benchmark-startup diff --git a/scripts/generate-schedule.sh b/scripts/generate-schedule.sh new file mode 100755 index 0000000..8e8db4e --- /dev/null +++ b/scripts/generate-schedule.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +# Script to generate a schedule based on `cargo run -- net show` output +# Usage: ./generate-schedule.sh [image] + +set -e + +# Default values +IMAGE="${1:-ghcr.io/diogo464/oar-p2p/demo:latest}" + +# Generate JSON schedule +echo "Getting addresses from addresses.txt..." >&2 +if [ ! -f "addresses.txt" ]; then + echo "Error: addresses.txt not found" >&2 + exit 1 +fi +addresses_output=$(cat addresses.txt) +address_count=$(echo "$addresses_output" | wc -l) + +echo "Generating schedule with $address_count containers..." >&2 +echo "Using image: $IMAGE" >&2 + +# Start JSON array +echo "[" + +# Process each address +first=true +while IFS=' ' read -r machine address; do + # Skip empty lines + if [ -z "$address" ]; then + continue + fi + + # Add comma separator for all but first entry + if [ "$first" = true ]; then + first=false + else + echo "," + fi + + # Generate container entry directly with proper escaping + printf ' {\n "address": "%s",\n "image": "%s",\n "env": {\n "ADDRESS": "%s",\n "MACHINE": "%s",\n "MESSAGE": "Container on %s with address %s"\n }\n }' \ + "$address" "$IMAGE" "$address" "$machine" "$machine" "$address" + +done <<< "$addresses_output" + +# Close JSON array +echo "" +echo "]" \ No newline at end of file -- cgit