diff options
| author | diogo464 <[email protected]> | 2025-07-24 16:55:50 +0100 |
|---|---|---|
| committer | diogo464 <[email protected]> | 2025-07-24 16:55:50 +0100 |
| commit | 5c3fe9aac767ee44f271ea8d1086b28336c88334 (patch) | |
| tree | 0a2c6a165c0ee429b4caf5c6a80430a1aebe3361 /extract-event-url | |
| parent | 6138d43d3ff8736bbcb95e4e34cdbb3f79f4d7cc (diff) | |
added extract-event-url
Diffstat (limited to 'extract-event-url')
| -rwxr-xr-x | extract-event-url | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/extract-event-url b/extract-event-url new file mode 100755 index 000000000..1bd86898d --- /dev/null +++ b/extract-event-url | |||
| @@ -0,0 +1,25 @@ | |||
| 1 | #!/usr/bin/env python3 | ||
| 2 | import os | ||
| 3 | import re | ||
| 4 | import sys | ||
| 5 | |||
| 6 | pattern = r'<a[^>]*class=[\'"][^\'"]*evcal_evdata_row evo_clik_row[^\'"]*[\'"][^>]*href=[\'"]([^\'"]+)[\'"][^>]*>' | ||
| 7 | |||
| 8 | for slug in sys.argv[1:]: | ||
| 9 | page_path = os.path.join("events", slug, "page.html") | ||
| 10 | event_url_path = os.path.join("events", slug, "event-url") | ||
| 11 | if os.path.exists(event_url_path): | ||
| 12 | continue | ||
| 13 | |||
| 14 | with open(page_path, "r") as f: | ||
| 15 | page_content = f.read() | ||
| 16 | |||
| 17 | match = re.search(pattern, page_content) | ||
| 18 | if not match: | ||
| 19 | print(f"event {slug} has no event url") | ||
| 20 | continue | ||
| 21 | |||
| 22 | event_page_url = match.group(1) | ||
| 23 | with open(event_url_path, "w") as f: | ||
| 24 | f.write(event_page_url) | ||
| 25 | |||
