commit 15edb86218cb30b286a6d3e9d646d5b1f6f95d23 Author: Hladu357 Date: Sat Apr 12 17:46:06 2025 +0300 hometask 1 diff --git a/fetch.sh b/fetch.sh new file mode 100755 index 0000000..3673a66 --- /dev/null +++ b/fetch.sh @@ -0,0 +1 @@ +wget -O 'xml-sample-file.xml' 'https://file.notion.so/f/f/0f72e9b4-39fd-4e82-b82b-344748b49dbc/c88a460f-d7ea-4702-ba2f-3932ea3cea4a/xml-sample-file.xml?table=block&id=1d098088-272f-8199-b385-e3c2be526eed&spaceId=0f72e9b4-39fd-4e82-b82b-344748b49dbc&expirationTimestamp=1744480800000&signature=OyQu8lI-dWC8wDY1Mhmzi9z96EQ834Q9YNfojjD2GcI&downloadName=xml-sample-file.xml' \ No newline at end of file diff --git a/teams_task1.py b/teams_task1.py new file mode 100644 index 0000000..3c34572 --- /dev/null +++ b/teams_task1.py @@ -0,0 +1,58 @@ +from xml.etree import ElementTree as ET + +dom = ET.parse("xml-sample-file.xml") +rows = dom.findall("row") + +# Task 1: +# Count the users with notifications parameter turned on (sendmenotifications = true) +# and place the result into the variable notifications_count (int) +notifications_count: int = 0 +for element in rows: + notifications_count += element.find("sendmenotifications").text == "true" + +print(f"notifications_count: {notifications_count}") + + +# Task 2: +# Search for users with same email addresses and +# save their list as a json file email_dupes.json as a dictionary with name: email pairs +emails = {} +for element in rows: + name = element.find("name").text + mail = element.find("email").text + if mail not in emails: + emails[mail] = [name] + else: + emails[mail].append(name) + +email_pairs = [] +for mail, names in emails.items(): + if len(names) > 1: + email_pairs.insert(mail) + +import json +with open('email_dupes.json', 'w') as f: + json.dump(email_pairs, f) + +# Search for similar names using SequenceMatcher from difflib +# (combinations from itertools might come useful as well). Set similarity score at 85%. +# Save a list of names in name_dupes.json file as a list (array) of strings. +# Optionally, search for near duplicates using Levenshtein distance. +from difflib import SequenceMatcher +from itertools import combinations + +names = set() +for element in rows: + name = element.find("name").text + names.add(name) + +threshold = 0.85 +name_pairs = [] +for (str1, str2) in combinations(names, 2): + similarity = SequenceMatcher(None, str1, str2).ratio() + if similarity >= threshold: + name_pairs.append((str1, str2, similarity)) + +print(name_pairs) +with open('name_dupes.json', 'w') as f: + json.dump(name_pairs, f, indent=4, sort_keys=True) \ No newline at end of file