1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
|
#!/usr/bin/env python3
import os
import glob
import sys
from datetime import datetime
import statistics
def parse_timestamp(timestamp_str):
"""Parse ISO timestamp from date -Iseconds"""
if 'T' in timestamp_str:
# Handle timezone formats
if timestamp_str.endswith('Z'):
return datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
elif '+' in timestamp_str or '-' in timestamp_str[-6:]:
return datetime.fromisoformat(timestamp_str)
else:
# Add timezone if missing
return datetime.fromisoformat(timestamp_str + '+00:00')
else:
# Try other formats
return datetime.fromisoformat(timestamp_str)
def analyze_startup_times():
"""Analyze startup times from log files"""
print("Analyzing startup times...")
log_files = glob.glob("benchmark-logs/*.stdout")
if not log_files:
print("No log files found in benchmark-logs/")
return
startup_times = []
start_signal_times = []
earliest_startup_time = None
earliest_start_signal_time = None
for log_file in log_files:
try:
with open(log_file, 'r') as f:
lines = f.readlines()
# Parse first line (startup time)
if len(lines) > 0:
first_line = lines[0].strip()
if first_line:
try:
timestamp = parse_timestamp(first_line)
startup_times.append((log_file, timestamp))
if earliest_startup_time is None or timestamp < earliest_startup_time:
earliest_startup_time = timestamp
except ValueError as e:
print(f"Could not parse startup timestamp in {log_file}: '{first_line}' - {e}")
# Look for second timestamp (start signal time)
for i, line in enumerate(lines):
line = line.strip()
if 'T' in line and i > 0: # Skip first line, look for second timestamp
try:
timestamp = parse_timestamp(line)
start_signal_times.append((log_file, timestamp))
if earliest_start_signal_time is None or timestamp < earliest_start_signal_time:
earliest_start_signal_time = timestamp
break # Only take the first additional timestamp found
except ValueError:
continue # Skip lines that aren't timestamps
except Exception as e:
print(f"Error reading {log_file}: {e}")
if not startup_times:
print("No valid startup times found")
return
# Calculate relative startup times
relative_startup_times = []
for log_file, timestamp in startup_times:
relative_time = (timestamp - earliest_startup_time).total_seconds()
relative_startup_times.append(relative_time)
# Print startup statistics
print(f"\n=== STARTUP TIME STATISTICS ===")
print(f"Total containers: {len(startup_times)}")
print(f"Earliest start time: {earliest_startup_time}")
print(f"Latest start time: {max(startup_times, key=lambda x: x[1])[1]}")
print(f"Total startup window: {max(relative_startup_times):.2f} seconds")
if len(relative_startup_times) > 1:
print(f"Average relative startup time: {statistics.mean(relative_startup_times):.2f} seconds")
print(f"Median relative startup time: {statistics.median(relative_startup_times):.2f} seconds")
print(f"Standard deviation: {statistics.stdev(relative_startup_times):.2f} seconds")
print(f"Min relative startup time: {min(relative_startup_times):.2f} seconds")
print(f"Max relative startup time: {max(relative_startup_times):.2f} seconds")
# Percentiles
sorted_times = sorted(relative_startup_times)
p50 = sorted_times[int(len(sorted_times) * 0.5)]
p90 = sorted_times[int(len(sorted_times) * 0.9)]
p95 = sorted_times[int(len(sorted_times) * 0.95)]
p99 = sorted_times[int(len(sorted_times) * 0.99)]
print(f"50th percentile: {p50:.2f} seconds")
print(f"90th percentile: {p90:.2f} seconds")
print(f"95th percentile: {p95:.2f} seconds")
print(f"99th percentile: {p99:.2f} seconds")
# Show startup distribution
print(f"\nStartup time distribution (by second):")
time_buckets = {}
for relative_time in relative_startup_times:
bucket = int(relative_time)
time_buckets[bucket] = time_buckets.get(bucket, 0) + 1
for bucket in sorted(time_buckets.keys())[:10]: # Show first 10 buckets
print(f" {bucket}s: {time_buckets[bucket]} containers")
# Print per-container details (first 10)
print(f"\nFirst 10 containers to start:")
sorted_startup_times = sorted(startup_times, key=lambda x: x[1])
for i, (log_file, timestamp) in enumerate(sorted_startup_times[:10]):
container_name = os.path.basename(log_file).replace('.stdout', '')
relative_time = (timestamp - earliest_startup_time).total_seconds()
print(f"{i+1:2d}. {container_name}: +{relative_time:.2f}s")
if len(sorted_startup_times) > 10:
print(f"... and {len(sorted_startup_times) - 10} more containers")
# Analyze start signal times if available
if start_signal_times:
print(f"\n=== START SIGNAL TIME STATISTICS ===")
print(f"Containers with start signal: {len(start_signal_times)}")
# Calculate relative start signal times
relative_start_signal_times = []
for log_file, timestamp in start_signal_times:
relative_time = (timestamp - earliest_start_signal_time).total_seconds()
relative_start_signal_times.append(relative_time)
print(f"Earliest start signal time: {earliest_start_signal_time}")
print(f"Latest start signal time: {max(start_signal_times, key=lambda x: x[1])[1]}")
print(f"Total start signal window: {max(relative_start_signal_times):.2f} seconds")
if len(relative_start_signal_times) > 1:
print(f"Average relative start signal time: {statistics.mean(relative_start_signal_times):.2f} seconds")
print(f"Median relative start signal time: {statistics.median(relative_start_signal_times):.2f} seconds")
print(f"Standard deviation: {statistics.stdev(relative_start_signal_times):.2f} seconds")
# Percentiles
sorted_signal_times = sorted(relative_start_signal_times)
p50 = sorted_signal_times[int(len(sorted_signal_times) * 0.5)]
p90 = sorted_signal_times[int(len(sorted_signal_times) * 0.9)]
p95 = sorted_signal_times[int(len(sorted_signal_times) * 0.95)]
p99 = sorted_signal_times[int(len(sorted_signal_times) * 0.99)]
print(f"50th percentile: {p50:.2f} seconds")
print(f"90th percentile: {p90:.2f} seconds")
print(f"95th percentile: {p95:.2f} seconds")
print(f"99th percentile: {p99:.2f} seconds")
# Show start signal distribution
print(f"\nStart signal time distribution (by second):")
signal_time_buckets = {}
for relative_time in relative_start_signal_times:
bucket = int(relative_time)
signal_time_buckets[bucket] = signal_time_buckets.get(bucket, 0) + 1
for bucket in sorted(signal_time_buckets.keys())[:10]: # Show first 10 buckets
print(f" {bucket}s: {signal_time_buckets[bucket]} containers")
# Calculate waiting times (time between startup and start signal)
waiting_times = []
for startup_entry in startup_times:
startup_log, startup_time = startup_entry
# Find corresponding start signal time
for signal_entry in start_signal_times:
signal_log, signal_time = signal_entry
if startup_log == signal_log:
waiting_time = (signal_time - startup_time).total_seconds()
waiting_times.append(waiting_time)
break
if waiting_times:
print(f"\n=== WAITING TIME STATISTICS ===")
print(f"Average waiting time: {statistics.mean(waiting_times):.2f} seconds")
print(f"Median waiting time: {statistics.median(waiting_times):.2f} seconds")
print(f"Min waiting time: {min(waiting_times):.2f} seconds")
print(f"Max waiting time: {max(waiting_times):.2f} seconds")
if len(waiting_times) > 1:
print(f"Standard deviation: {statistics.stdev(waiting_times):.2f} seconds")
else:
print(f"\nNo start signal timestamps found (containers may not have reached start signal yet)")
def main():
"""Main function"""
analyze_startup_times()
if __name__ == "__main__":
main()
|