hometask 1
This commit is contained in:
commit
15edb86218
|
@ -0,0 +1 @@
|
||||||
|
wget -O 'xml-sample-file.xml' 'https://file.notion.so/f/f/0f72e9b4-39fd-4e82-b82b-344748b49dbc/c88a460f-d7ea-4702-ba2f-3932ea3cea4a/xml-sample-file.xml?table=block&id=1d098088-272f-8199-b385-e3c2be526eed&spaceId=0f72e9b4-39fd-4e82-b82b-344748b49dbc&expirationTimestamp=1744480800000&signature=OyQu8lI-dWC8wDY1Mhmzi9z96EQ834Q9YNfojjD2GcI&downloadName=xml-sample-file.xml'
|
|
@ -0,0 +1,58 @@
|
||||||
|
from xml.etree import ElementTree as ET
|
||||||
|
|
||||||
|
dom = ET.parse("xml-sample-file.xml")
|
||||||
|
rows = dom.findall("row")
|
||||||
|
|
||||||
|
# Task 1:
|
||||||
|
# Count the users with notifications parameter turned on (sendmenotifications = true)
|
||||||
|
# and place the result into the variable notifications_count (int)
|
||||||
|
notifications_count: int = 0
|
||||||
|
for element in rows:
|
||||||
|
notifications_count += element.find("sendmenotifications").text == "true"
|
||||||
|
|
||||||
|
print(f"notifications_count: {notifications_count}")
|
||||||
|
|
||||||
|
|
||||||
|
# Task 2:
|
||||||
|
# Search for users with same email addresses and
|
||||||
|
# save their list as a json file email_dupes.json as a dictionary with name: email pairs
|
||||||
|
emails = {}
|
||||||
|
for element in rows:
|
||||||
|
name = element.find("name").text
|
||||||
|
mail = element.find("email").text
|
||||||
|
if mail not in emails:
|
||||||
|
emails[mail] = [name]
|
||||||
|
else:
|
||||||
|
emails[mail].append(name)
|
||||||
|
|
||||||
|
email_pairs = []
|
||||||
|
for mail, names in emails.items():
|
||||||
|
if len(names) > 1:
|
||||||
|
email_pairs.insert(mail)
|
||||||
|
|
||||||
|
import json
|
||||||
|
with open('email_dupes.json', 'w') as f:
|
||||||
|
json.dump(email_pairs, f)
|
||||||
|
|
||||||
|
# Search for similar names using SequenceMatcher from difflib
|
||||||
|
# (combinations from itertools might come useful as well). Set similarity score at 85%.
|
||||||
|
# Save a list of names in name_dupes.json file as a list (array) of strings.
|
||||||
|
# Optionally, search for near duplicates using Levenshtein distance.
|
||||||
|
from difflib import SequenceMatcher
|
||||||
|
from itertools import combinations
|
||||||
|
|
||||||
|
names = set()
|
||||||
|
for element in rows:
|
||||||
|
name = element.find("name").text
|
||||||
|
names.add(name)
|
||||||
|
|
||||||
|
threshold = 0.85
|
||||||
|
name_pairs = []
|
||||||
|
for (str1, str2) in combinations(names, 2):
|
||||||
|
similarity = SequenceMatcher(None, str1, str2).ratio()
|
||||||
|
if similarity >= threshold:
|
||||||
|
name_pairs.append((str1, str2, similarity))
|
||||||
|
|
||||||
|
print(name_pairs)
|
||||||
|
with open('name_dupes.json', 'w') as f:
|
||||||
|
json.dump(name_pairs, f, indent=4, sort_keys=True)
|
Loading…
Reference in New Issue