aboutsummaryrefslogtreecommitdiff
path: root/setup-directories
diff options
context:
space:
mode:
authordiogo464 <[email protected]>2025-07-21 15:02:48 +0100
committerdiogo464 <[email protected]>2025-07-21 15:02:48 +0100
commit8c8dabd0ed20679a2dad43a5c239f9fcfe1c1ad7 (patch)
tree55abbcfbbff19efa3aaf6cf36540ac7651c54973 /setup-directories
init
Diffstat (limited to 'setup-directories')
-rwxr-xr-xsetup-directories45
1 files changed, 45 insertions, 0 deletions
diff --git a/setup-directories b/setup-directories
new file mode 100755
index 000000000..d3548b14f
--- /dev/null
+++ b/setup-directories
@@ -0,0 +1,45 @@
1#!/usr/bin/env python3
2import re
3import os
4import shutil
5import xml.etree.ElementTree as ET
6
7ignored_urls = ["https://www.portugalrunning.com/eventos/"]
8tree = ET.parse("sitemap.xml")
9root = tree.getroot()
10
11for url_element in root.findall(".//{*}url"):
12 loc = url_element.find("{*}loc")
13 lastmod = url_element.find("{*}lastmod")
14 assert loc is not None
15 assert lastmod is not None
16
17 url = loc.text
18 lastmod = lastmod.text
19
20 assert url is not None
21 assert lastmod is not None
22
23 url = url.strip()
24 lastmod = lastmod.strip()
25
26 if url in ignored_urls:
27 continue
28
29 slug = re.match("https://www.portugalrunning.com/eventos/([^/]*)/", url)
30 assert slug is not None, f"failed to extract slug from '{url}'"
31 slug = slug[1]
32
33 event_dir = os.path.join("events", slug)
34 page_path = os.path.join(event_dir, "page.html")
35 lastmod_path = os.path.join(event_dir, "lastmod")
36
37 if os.path.exists(lastmod_path) and open(lastmod_path).read() == lastmod:
38 continue
39
40 if os.path.exists(event_dir):
41 shutil.rmtree(event_dir)
42 os.makedirs(event_dir, exist_ok=True)
43 with open(lastmod_path, "w") as f:
44 f.write(lastmod)
45