hometask 1

2025-04-12 17:46:06 +03:00 · 2025-04-12 17:46:06 +03:00 · 15edb86218
commit 15edb86218
2 changed files with 59 additions and 0 deletions
--- a/fetch.sh
+++ b/fetch.sh
@ -0,0 +1 @@
+wget -O 'xml-sample-file.xml' 'https://file.notion.so/f/f/0f72e9b4-39fd-4e82-b82b-344748b49dbc/c88a460f-d7ea-4702-ba2f-3932ea3cea4a/xml-sample-file.xml?table=block&id=1d098088-272f-8199-b385-e3c2be526eed&spaceId=0f72e9b4-39fd-4e82-b82b-344748b49dbc&expirationTimestamp=1744480800000&signature=OyQu8lI-dWC8wDY1Mhmzi9z96EQ834Q9YNfojjD2GcI&downloadName=xml-sample-file.xml'
--- a/teams_task1.py
+++ b/teams_task1.py
@ -0,0 +1,58 @@
+from xml.etree import ElementTree as ET
+
+dom = ET.parse("xml-sample-file.xml")
+rows = dom.findall("row")
+
+# Task 1:
+# Count the users with notifications parameter turned on (sendmenotifications = true)
+# and place the result into the variable notifications_count (int)
+notifications_count: int = 0
+for element in rows:
+    notifications_count += element.find("sendmenotifications").text == "true"
+
+print(f"notifications_count: {notifications_count}")
+
+
+# Task 2:
+# Search for users with same email addresses and
+# save their list as a json file email_dupes.json as a dictionary with name: email pairs
+emails = {}
+for element in rows:
+    name = element.find("name").text
+    mail = element.find("email").text
+    if mail not in emails:
+        emails[mail] = [name]
+    else:
+        emails[mail].append(name)
+
+email_pairs = []
+for mail, names in emails.items():
+    if len(names) > 1:
+        email_pairs.insert(mail)
+
+import json
+with open('email_dupes.json', 'w') as f:
+    json.dump(email_pairs, f)
+
+# Search for similar names using SequenceMatcher from difflib
+# (combinations from itertools might come useful as well). Set similarity score at 85%. 
+# Save a list of names in name_dupes.json file as a list (array) of strings. 
+# Optionally, search for near duplicates using Levenshtein distance. 
+from difflib import SequenceMatcher
+from itertools import combinations
+
+names = set()
+for element in rows:
+    name = element.find("name").text
+    names.add(name)
+
+threshold = 0.85
+name_pairs = []
+for (str1, str2) in combinations(names, 2):
+    similarity = SequenceMatcher(None, str1, str2).ratio()
+    if similarity >= threshold:
+        name_pairs.append((str1, str2, similarity))
+
+print(name_pairs)
+with open('name_dupes.json', 'w') as f:
+    json.dump(name_pairs, f, indent=4, sort_keys=True)
				`@ -0,0 +1 @@`
				`wget -O 'xml-sample-file.xml' 'https://file.notion.so/f/f/0f72e9b4-39fd-4e82-b82b-344748b49dbc/c88a460f-d7ea-4702-ba2f-3932ea3cea4a/xml-sample-file.xml?table=block&id=1d098088-272f-8199-b385-e3c2be526eed&spaceId=0f72e9b4-39fd-4e82-b82b-344748b49dbc&expirationTimestamp=1744480800000&signature=OyQu8lI-dWC8wDY1Mhmzi9z96EQ834Q9YNfojjD2GcI&downloadName=xml-sample-file.xml'`