aboutsummaryrefslogtreecommitdiff
path: root/fetch-location
diff options
context:
space:
mode:
authordiogo464 <[email protected]>2025-07-21 15:02:48 +0100
committerdiogo464 <[email protected]>2025-07-21 15:02:48 +0100
commit8c8dabd0ed20679a2dad43a5c239f9fcfe1c1ad7 (patch)
tree55abbcfbbff19efa3aaf6cf36540ac7651c54973 /fetch-location
init
Diffstat (limited to 'fetch-location')
-rwxr-xr-xfetch-location243
1 files changed, 243 insertions, 0 deletions
diff --git a/fetch-location b/fetch-location
new file mode 100755
index 000000000..47bd064f0
--- /dev/null
+++ b/fetch-location
@@ -0,0 +1,243 @@
1#!/usr/bin/env python3
2import os
3import re
4import sys
5import json
6import requests
7import urllib.parse
8
9from dataclasses import dataclass, asdict
10from typing import Optional, Dict, Any
11
12ENV_GOOGLE_MAPS_API_KEY = "GOOGLE_MAPS_API_KEY"
13
14# Portugal district codes mapping (ISO 3166-2:PT)
15PORTUGAL_DISTRICT_CODES = {
16 # Mainland Districts
17 "Aveiro": 1,
18 "Beja": 2,
19 "Braga": 3,
20 "Bragança": 4,
21 "Castelo Branco": 5,
22 "Coimbra": 6,
23 "Évora": 7,
24 "Faro": 8,
25 "Guarda": 9,
26 "Leiria": 10,
27 "Lisboa": 11,
28 "Portalegre": 12,
29 "Porto": 13,
30 "Santarém": 14,
31 "Setúbal": 15,
32 "Viana do Castelo": 16,
33 "Vila Real": 17,
34 "Viseu": 18,
35 # Autonomous Regions
36 "Região Autónoma dos Açores": 20,
37 "Açores": 20,
38 "Azores": 20,
39 "Região Autónoma da Madeira": 30,
40 "Madeira": 30,
41}
42
43
44def get_district_code(district_name: Optional[str]) -> Optional[int]:
45 """Get Portuguese district code from district name (ISO 3166-2:PT)."""
46 if not district_name:
47 return None
48
49 # Direct lookup
50 if district_name in PORTUGAL_DISTRICT_CODES:
51 return PORTUGAL_DISTRICT_CODES[district_name]
52
53 # Try common variations and normalize
54 normalized = district_name.strip()
55
56 # Handle common variations for autonomous regions
57 variations = {
58 "Região Autónoma da Madeira": "Madeira",
59 "Região Autónoma dos Açores": "Açores",
60 }
61
62 if normalized in variations:
63 return PORTUGAL_DISTRICT_CODES[variations[normalized]]
64
65 # Last resort: try partial matching for districts
66 for district in PORTUGAL_DISTRICT_CODES:
67 if (
68 district.lower() in normalized.lower()
69 or normalized.lower() in district.lower()
70 ):
71 return PORTUGAL_DISTRICT_CODES[district]
72
73 return None
74
75
76@dataclass
77class Coordinates:
78 """Geographic coordinates."""
79
80 lat: float
81 lon: float
82
83 def to_dict(self) -> Dict[str, float]:
84 return asdict(self)
85
86
87@dataclass
88class EventLocation:
89 """Location information for an event."""
90
91 name: str
92 country: str
93 locality: str
94 coordinates: Optional[Coordinates] = None
95 administrative_area_level_1: Optional[str] = None # District
96 administrative_area_level_2: Optional[str] = None # Municipality
97 administrative_area_level_3: Optional[str] = None # Parish
98 district_code: Optional[int] = None # Portuguese district code
99
100 def to_dict(self) -> Dict[str, Any]:
101 result = {}
102 result["name"] = self.name
103 result["country"] = self.country
104 result["locality"] = self.locality
105 if self.coordinates:
106 result["coordinates"] = self.coordinates.to_dict() # type: ignore
107 if self.administrative_area_level_1:
108 result["administrative_area_level_1"] = self.administrative_area_level_1
109 if self.administrative_area_level_2:
110 result["administrative_area_level_2"] = self.administrative_area_level_2
111 if self.administrative_area_level_3:
112 result["administrative_area_level_3"] = self.administrative_area_level_3
113 if self.district_code:
114 result["district_code"] = self.district_code
115 return result
116
117
118class GoogleGeocodingClient:
119 """Google Maps Geocoding API client with caching."""
120
121 def __init__(self, api_key: str):
122 self.api_key = api_key
123 self.base_url = "https://maps.googleapis.com/maps/api/geocode/json"
124
125 def _parse_google_response(
126 self, location: str, google_result: dict
127 ) -> EventLocation:
128 """Parse Google Maps API response into EventLocation."""
129 location_data = {
130 "name": location,
131 "lat": google_result["geometry"]["location"]["lat"],
132 "lon": google_result["geometry"]["location"]["lng"],
133 "country": "Portugal",
134 "locality": location.split(",")[0].strip(),
135 "administrative_area_level_1": None,
136 "administrative_area_level_2": None,
137 "administrative_area_level_3": None,
138 "district_code": None,
139 }
140
141 # Extract all administrative levels from address components
142 for component in google_result["address_components"]:
143 types = component["types"]
144 if "country" in types:
145 location_data["country"] = component["long_name"]
146 elif "administrative_area_level_1" in types:
147 location_data["administrative_area_level_1"] = component["long_name"]
148 # Use district as locality for Portugal
149 location_data["locality"] = component["long_name"]
150 elif "administrative_area_level_2" in types:
151 location_data["administrative_area_level_2"] = component["long_name"]
152 elif "administrative_area_level_3" in types:
153 location_data["administrative_area_level_3"] = component["long_name"]
154
155 # Calculate district code from administrative_area_level_1 (district)
156 location_data["district_code"] = get_district_code(
157 location_data["administrative_area_level_1"]
158 )
159
160 return EventLocation(
161 name=location,
162 country=location_data["country"],
163 locality=location_data["locality"],
164 coordinates=Coordinates(lat=location_data["lat"], lon=location_data["lon"]),
165 administrative_area_level_1=location_data["administrative_area_level_1"],
166 administrative_area_level_2=location_data["administrative_area_level_2"],
167 administrative_area_level_3=location_data["administrative_area_level_3"],
168 district_code=location_data["district_code"],
169 )
170
171 def geocode(self, location: str) -> Optional[EventLocation]:
172 """Geocode a location string."""
173 # Build request
174 params = {
175 "address": location,
176 "key": self.api_key,
177 "region": "pt",
178 "language": "pt",
179 }
180
181 try:
182 url = f"{self.base_url}?{urllib.parse.urlencode(params)}"
183 response = requests.get(url)
184 response.raise_for_status()
185 content = response.content
186 # print(f"GEOCODING|Google API content|{location}|{content}")
187 data = json.loads(content)
188 result = data["results"][0]
189 # print(f"GEOCODING|Google API result|{location}|{result}")
190 return self._parse_google_response(location, result)
191 except Exception as e:
192 # print(f"GEOCODING|Error|{location}|{str(e)}")
193 return None
194
195
196def clean_ics_location(loc: str) -> str:
197 def clean_ics_segment(segment: str) -> str:
198 segment = segment.strip()
199 words = segment.split()
200 # Madeira Madeira
201 # Santa Maria da Cruz Santa Maria da Cruz
202 if len(words) % 2 == 0:
203 hl = len(words) // 2
204 if words[:hl] == words[hl:]:
205 words = words[:hl]
206 return " ".join(words)
207
208 loc = loc.strip()
209 loc = loc.replace("\\", "")
210 segments = [clean_ics_segment(s) for s in loc.split(",")]
211
212 # Cabeço de Vida, Fontreira Cabeço de Vida, Fontreira
213 # Alcaria da Serra, Beja Alcaria da Serra, Beja
214 if len(segments) == 3 and segments[1] == f"{segments[2]} {segments[0]}":
215 segments = [segments[0], segments[2]]
216 return ", ".join(segments)
217
218
219FIXUP_TABLE = {"Alcaria da Serra, Beja": "Alcaria da Serra"}
220
221key = os.environ[ENV_GOOGLE_MAPS_API_KEY]
222client = GoogleGeocodingClient(key)
223for slug in sys.argv[1:]:
224 ics_path = os.path.join("events", slug, "ics")
225 location_path = os.path.join("events", slug, "location")
226 if os.path.exists(location_path):
227 continue
228 ics_content = open(ics_path, "rb").read().decode("utf-8", errors="ignore")
229 ics_location_match = re.search(r"LOCATION:(.*)", ics_content)
230 assert ics_location_match is not None
231 ics_location = ics_location_match[1]
232 ics_location = clean_ics_location(ics_location)
233 if ics_location == "":
234 continue
235 if ics_location in FIXUP_TABLE:
236 ics_location = FIXUP_TABLE[ics_location]
237 location = client.geocode(ics_location)
238 if location is None:
239 print(f"failed to obtain location from {slug} '{ics_location}'")
240 sys.exit(1)
241 with open(location_path, "w") as f:
242 json.dump(location.to_dict(), f)
243