aboutsummaryrefslogtreecommitdiff
path: root/scripts/benchmark-startup
diff options
context:
space:
mode:
authordiogo464 <[email protected]>2025-07-11 20:14:42 +0100
committerdiogo464 <[email protected]>2025-07-11 20:14:42 +0100
commitf0f57060a54b5c57608f945fd8f66b030779fd31 (patch)
treead9f341e8a91057f1df9d00027812fee53de117a /scripts/benchmark-startup
parent9c57f905f93e381cbb27147f2effc76e127cc646 (diff)
feat: add benchmark startup analysis tools and improve demo.sh
- Add generate-schedule.sh script to create container schedules from addresses.txt - Add benchmark-startup Python script for analyzing container startup times - Update demo.sh to print timestamps and wait for start signal at /oar-p2p/start - Add comprehensive statistics including startup, start signal, and waiting times - Support for synchronized container coordination via start signal file
Diffstat (limited to 'scripts/benchmark-startup')
-rwxr-xr-xscripts/benchmark-startup204
1 files changed, 204 insertions, 0 deletions
diff --git a/scripts/benchmark-startup b/scripts/benchmark-startup
new file mode 100755
index 0000000..f38d031
--- /dev/null
+++ b/scripts/benchmark-startup
@@ -0,0 +1,204 @@
1#!/usr/bin/env python3
2
3import os
4import glob
5import sys
6from datetime import datetime
7import statistics
8
9def parse_timestamp(timestamp_str):
10 """Parse ISO timestamp from date -Iseconds"""
11 if 'T' in timestamp_str:
12 # Handle timezone formats
13 if timestamp_str.endswith('Z'):
14 return datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
15 elif '+' in timestamp_str or '-' in timestamp_str[-6:]:
16 return datetime.fromisoformat(timestamp_str)
17 else:
18 # Add timezone if missing
19 return datetime.fromisoformat(timestamp_str + '+00:00')
20 else:
21 # Try other formats
22 return datetime.fromisoformat(timestamp_str)
23
24def analyze_startup_times():
25 """Analyze startup times from log files"""
26 print("Analyzing startup times...")
27
28 log_files = glob.glob("benchmark-logs/*.stdout")
29 if not log_files:
30 print("No log files found in benchmark-logs/")
31 return
32
33 startup_times = []
34 start_signal_times = []
35 earliest_startup_time = None
36 earliest_start_signal_time = None
37
38 for log_file in log_files:
39 try:
40 with open(log_file, 'r') as f:
41 lines = f.readlines()
42
43 # Parse first line (startup time)
44 if len(lines) > 0:
45 first_line = lines[0].strip()
46 if first_line:
47 try:
48 timestamp = parse_timestamp(first_line)
49 startup_times.append((log_file, timestamp))
50
51 if earliest_startup_time is None or timestamp < earliest_startup_time:
52 earliest_startup_time = timestamp
53
54 except ValueError as e:
55 print(f"Could not parse startup timestamp in {log_file}: '{first_line}' - {e}")
56
57 # Look for second timestamp (start signal time)
58 for i, line in enumerate(lines):
59 line = line.strip()
60 if 'T' in line and i > 0: # Skip first line, look for second timestamp
61 try:
62 timestamp = parse_timestamp(line)
63 start_signal_times.append((log_file, timestamp))
64
65 if earliest_start_signal_time is None or timestamp < earliest_start_signal_time:
66 earliest_start_signal_time = timestamp
67 break # Only take the first additional timestamp found
68
69 except ValueError:
70 continue # Skip lines that aren't timestamps
71
72 except Exception as e:
73 print(f"Error reading {log_file}: {e}")
74
75 if not startup_times:
76 print("No valid startup times found")
77 return
78
79 # Calculate relative startup times
80 relative_startup_times = []
81 for log_file, timestamp in startup_times:
82 relative_time = (timestamp - earliest_startup_time).total_seconds()
83 relative_startup_times.append(relative_time)
84
85 # Print startup statistics
86 print(f"\n=== STARTUP TIME STATISTICS ===")
87 print(f"Total containers: {len(startup_times)}")
88 print(f"Earliest start time: {earliest_startup_time}")
89 print(f"Latest start time: {max(startup_times, key=lambda x: x[1])[1]}")
90 print(f"Total startup window: {max(relative_startup_times):.2f} seconds")
91
92 if len(relative_startup_times) > 1:
93 print(f"Average relative startup time: {statistics.mean(relative_startup_times):.2f} seconds")
94 print(f"Median relative startup time: {statistics.median(relative_startup_times):.2f} seconds")
95 print(f"Standard deviation: {statistics.stdev(relative_startup_times):.2f} seconds")
96 print(f"Min relative startup time: {min(relative_startup_times):.2f} seconds")
97 print(f"Max relative startup time: {max(relative_startup_times):.2f} seconds")
98
99 # Percentiles
100 sorted_times = sorted(relative_startup_times)
101 p50 = sorted_times[int(len(sorted_times) * 0.5)]
102 p90 = sorted_times[int(len(sorted_times) * 0.9)]
103 p95 = sorted_times[int(len(sorted_times) * 0.95)]
104 p99 = sorted_times[int(len(sorted_times) * 0.99)]
105
106 print(f"50th percentile: {p50:.2f} seconds")
107 print(f"90th percentile: {p90:.2f} seconds")
108 print(f"95th percentile: {p95:.2f} seconds")
109 print(f"99th percentile: {p99:.2f} seconds")
110
111 # Show startup distribution
112 print(f"\nStartup time distribution (by second):")
113 time_buckets = {}
114 for relative_time in relative_startup_times:
115 bucket = int(relative_time)
116 time_buckets[bucket] = time_buckets.get(bucket, 0) + 1
117
118 for bucket in sorted(time_buckets.keys())[:10]: # Show first 10 buckets
119 print(f" {bucket}s: {time_buckets[bucket]} containers")
120
121 # Print per-container details (first 10)
122 print(f"\nFirst 10 containers to start:")
123 sorted_startup_times = sorted(startup_times, key=lambda x: x[1])
124 for i, (log_file, timestamp) in enumerate(sorted_startup_times[:10]):
125 container_name = os.path.basename(log_file).replace('.stdout', '')
126 relative_time = (timestamp - earliest_startup_time).total_seconds()
127 print(f"{i+1:2d}. {container_name}: +{relative_time:.2f}s")
128
129 if len(sorted_startup_times) > 10:
130 print(f"... and {len(sorted_startup_times) - 10} more containers")
131
132 # Analyze start signal times if available
133 if start_signal_times:
134 print(f"\n=== START SIGNAL TIME STATISTICS ===")
135 print(f"Containers with start signal: {len(start_signal_times)}")
136
137 # Calculate relative start signal times
138 relative_start_signal_times = []
139 for log_file, timestamp in start_signal_times:
140 relative_time = (timestamp - earliest_start_signal_time).total_seconds()
141 relative_start_signal_times.append(relative_time)
142
143 print(f"Earliest start signal time: {earliest_start_signal_time}")
144 print(f"Latest start signal time: {max(start_signal_times, key=lambda x: x[1])[1]}")
145 print(f"Total start signal window: {max(relative_start_signal_times):.2f} seconds")
146
147 if len(relative_start_signal_times) > 1:
148 print(f"Average relative start signal time: {statistics.mean(relative_start_signal_times):.2f} seconds")
149 print(f"Median relative start signal time: {statistics.median(relative_start_signal_times):.2f} seconds")
150 print(f"Standard deviation: {statistics.stdev(relative_start_signal_times):.2f} seconds")
151
152 # Percentiles
153 sorted_signal_times = sorted(relative_start_signal_times)
154 p50 = sorted_signal_times[int(len(sorted_signal_times) * 0.5)]
155 p90 = sorted_signal_times[int(len(sorted_signal_times) * 0.9)]
156 p95 = sorted_signal_times[int(len(sorted_signal_times) * 0.95)]
157 p99 = sorted_signal_times[int(len(sorted_signal_times) * 0.99)]
158
159 print(f"50th percentile: {p50:.2f} seconds")
160 print(f"90th percentile: {p90:.2f} seconds")
161 print(f"95th percentile: {p95:.2f} seconds")
162 print(f"99th percentile: {p99:.2f} seconds")
163
164 # Show start signal distribution
165 print(f"\nStart signal time distribution (by second):")
166 signal_time_buckets = {}
167 for relative_time in relative_start_signal_times:
168 bucket = int(relative_time)
169 signal_time_buckets[bucket] = signal_time_buckets.get(bucket, 0) + 1
170
171 for bucket in sorted(signal_time_buckets.keys())[:10]: # Show first 10 buckets
172 print(f" {bucket}s: {signal_time_buckets[bucket]} containers")
173
174 # Calculate waiting times (time between startup and start signal)
175 waiting_times = []
176 for startup_entry in startup_times:
177 startup_log, startup_time = startup_entry
178 # Find corresponding start signal time
179 for signal_entry in start_signal_times:
180 signal_log, signal_time = signal_entry
181 if startup_log == signal_log:
182 waiting_time = (signal_time - startup_time).total_seconds()
183 waiting_times.append(waiting_time)
184 break
185
186 if waiting_times:
187 print(f"\n=== WAITING TIME STATISTICS ===")
188 print(f"Average waiting time: {statistics.mean(waiting_times):.2f} seconds")
189 print(f"Median waiting time: {statistics.median(waiting_times):.2f} seconds")
190 print(f"Min waiting time: {min(waiting_times):.2f} seconds")
191 print(f"Max waiting time: {max(waiting_times):.2f} seconds")
192
193 if len(waiting_times) > 1:
194 print(f"Standard deviation: {statistics.stdev(waiting_times):.2f} seconds")
195
196 else:
197 print(f"\nNo start signal timestamps found (containers may not have reached start signal yet)")
198
199def main():
200 """Main function"""
201 analyze_startup_times()
202
203if __name__ == "__main__":
204 main() \ No newline at end of file