final test

2025-05-23 18:13:56 +03:00 · 2025-05-23 18:13:56 +03:00 · 1b854d6a20
parent 4e9e2dc4be
commit 1b854d6a20
2 changed files with 169 additions and 0 deletions
--- a/final/circlek.pdf
+++ b/final/circlek.pdf
--- a/final/final.py
+++ b/final/final.py
@ -0,0 +1,169 @@
+#!/bin/python
+import pdfplumber
+
+# 1. Parse pdf file to extract the coordinates
+coord_str = ""
+input_file = "circlek.pdf"
+with pdfplumber.open(input_file) as pdf:
+    coord_str = pdf.pages[0].extract_text_simple().strip()
+
+print(f"1. coords are: {coord_str}")
+
+
+# 2. Search for the address that corresponds to this particular location 
+# (Nominatim does not require API key, LocationIQ  
+# and OpenCage Geocoder do not require credit card number to access their API)
+
+# 2.1. convert minutes and seconds to fractional degrees
+def geo_convert(dms_str):
+    parts = dms_str.replace("°", " ").replace("'", " ").replace('"', " ").split()
+    degrees, minutes, seconds, direction = parts
+    decimal = float(degrees) + float(minutes) / 60 + float(seconds) / 3600
+    if direction in ['S', 'W']:
+        decimal *= -1
+    return decimal
+
+lat_str, lon_str = coord_str.split()
+lat = geo_convert(lat_str)
+lon = geo_convert(lon_str)
+
+# 2.2 call the api
+import requests
+import time
+
+req_params = {
+    'lat': lat,
+    'lon': lon,
+    'format': 'json'
+}
+
+req_headers = {
+    'User-Agent': 'ondrej hladuvka (mail@hladu.xyz)'
+}
+
+tries = 5
+timebreak = 1
+for i in range(tries):
+    response = requests.get("https://nominatim.openstreetmap.org/reverse",
+                            params=req_params, headers=req_headers)
+    if response.status_code == 200:
+        break
+    time.sleep(timebreak)
+
+if response is None or response.status_code != 200:
+    print("openstreetmap api: not avaliable")
+    exit(1)
+
+# 2.3 parse response
+resp_data = response.json()
+
+if not resp_data or 'address' not in resp_data:
+    print("openstreetmap api: no address returned")
+    exit(1)
+
+address = resp_data['address']
+if 'road' not in address or 'quarter' not in address or 'city' not in address:
+    print("openstreetmap api: cannot parse address")
+    exit(1)
+print(f"2. address is: {address['road']} {address['quarter']}, {address['city']}")
+
+
+
+# 3. Investigate what business is located at that particular address
+if not resp_data or 'name' not in resp_data:
+    print("openstreetmap api: no business returned")
+    exit(1)
+
+bsn_name = resp_data['name']
+
+print(f"3. business is: {bsn_name}")
+
+
+
+# 4. Search for the web-page of this organization 
+# (a simple request to google.com/search?q=… and CircleK will be the first result
+
+## i didnt figure out how to overcome googles anti-scraping
+## so im using duckduckgo insted
+
+data = {'q': bsn_name + " ee"} # fix to get the same first result as google ;)
+for i in range(tries):
+    response = requests.post("https://html.duckduckgo.com/html/", 
+                               data=data, headers=req_headers)
+    if response.status_code == 200:
+        break
+    time.sleep(timebreak)
+
+if response is None or response.status_code != 200:
+    print("duckduckgo: not avaliable")
+    exit(1)
+
+# 4.1. parse html links <- duckduckgo has rel="nofollow" with all results
+html = response.text
+results = []
+start = 0
+
+while True:
+    anchor_start = html.find('<a rel="nofollow" href="', start)
+    if anchor_start == -1:
+        break
+    href_start = anchor_start + len('<a rel="nofollow" href="')
+    href_end = html.find('"', href_start)
+    link = html[href_start:href_end]
+    results.append(link)
+    start = href_end
+
+link=results[0]
+print(f"4. search result is: {link}")
+
+
+
+# 5. Find out where their web-page is located, parse it, 
+# extract the logo (its in svg format).
+
+for i in range(tries):
+    response = requests.post(link, headers=req_headers)
+    if response.status_code == 200:
+        break
+    time.sleep(timebreak)
+
+if response is None or response.status_code != 200:
+    print(f"{link}: not avaliable")
+    exit(1)
+
+html = response.text
+
+results = []
+start = 0
+while True:
+    href_index = html.find('src="https://', start)
+    if href_index == -1:
+        break
+    start_quote = href_index + len('src="')
+    end_quote = html.find('"', start_quote)
+    link = html[start_quote:end_quote]
+    if link.endswith('.svg') and 'logo' in link:
+        results.append(link)
+    start = end_quote
+
+if len(results) == 0:
+    print(f"{link}: cannot find any svg resource named logo")
+    exit(1)
+
+logo_link = results[0]
+print(f"5. logo found at: {logo_link}")
+
+
+# 5.1. open the logo ;)
+ui_input = input("do you want to open it? [y/N] ")
+if 'y' not in ui_input and 'Y' not in ui_input:
+    exit(0)
+
+import os
+
+if os.name == 'posix':
+    os.system(f'xdg-open "{logo_link}"')
+elif os.name == 'nt': # hope this works, i dont have win machine to test this on :)
+    os.system(f'start "" "{logo_link}"')
+else:
+    print("sorry, cant do this on your system :c")