hometask 1
This commit is contained in:
		
						commit
						15edb86218
					
				|  | @ -0,0 +1 @@ | ||||||
|  | wget -O 'xml-sample-file.xml' 'https://file.notion.so/f/f/0f72e9b4-39fd-4e82-b82b-344748b49dbc/c88a460f-d7ea-4702-ba2f-3932ea3cea4a/xml-sample-file.xml?table=block&id=1d098088-272f-8199-b385-e3c2be526eed&spaceId=0f72e9b4-39fd-4e82-b82b-344748b49dbc&expirationTimestamp=1744480800000&signature=OyQu8lI-dWC8wDY1Mhmzi9z96EQ834Q9YNfojjD2GcI&downloadName=xml-sample-file.xml' | ||||||
|  | @ -0,0 +1,58 @@ | ||||||
|  | from xml.etree import ElementTree as ET | ||||||
|  | 
 | ||||||
|  | dom = ET.parse("xml-sample-file.xml") | ||||||
|  | rows = dom.findall("row") | ||||||
|  | 
 | ||||||
|  | # Task 1: | ||||||
|  | # Count the users with notifications parameter turned on (sendmenotifications = true) | ||||||
|  | # and place the result into the variable notifications_count (int) | ||||||
|  | notifications_count: int = 0 | ||||||
|  | for element in rows: | ||||||
|  |     notifications_count += element.find("sendmenotifications").text == "true" | ||||||
|  | 
 | ||||||
|  | print(f"notifications_count: {notifications_count}") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # Task 2: | ||||||
|  | # Search for users with same email addresses and | ||||||
|  | # save their list as a json file email_dupes.json as a dictionary with name: email pairs | ||||||
|  | emails = {} | ||||||
|  | for element in rows: | ||||||
|  |     name = element.find("name").text | ||||||
|  |     mail = element.find("email").text | ||||||
|  |     if mail not in emails: | ||||||
|  |         emails[mail] = [name] | ||||||
|  |     else: | ||||||
|  |         emails[mail].append(name) | ||||||
|  | 
 | ||||||
|  | email_pairs = [] | ||||||
|  | for mail, names in emails.items(): | ||||||
|  |     if len(names) > 1: | ||||||
|  |         email_pairs.insert(mail) | ||||||
|  | 
 | ||||||
|  | import json | ||||||
|  | with open('email_dupes.json', 'w') as f: | ||||||
|  |     json.dump(email_pairs, f) | ||||||
|  | 
 | ||||||
|  | # Search for similar names using SequenceMatcher from difflib | ||||||
|  | # (combinations from itertools might come useful as well). Set similarity score at 85%.  | ||||||
|  | # Save a list of names in name_dupes.json file as a list (array) of strings.  | ||||||
|  | # Optionally, search for near duplicates using Levenshtein distance.  | ||||||
|  | from difflib import SequenceMatcher | ||||||
|  | from itertools import combinations | ||||||
|  | 
 | ||||||
|  | names = set() | ||||||
|  | for element in rows: | ||||||
|  |     name = element.find("name").text | ||||||
|  |     names.add(name) | ||||||
|  | 
 | ||||||
|  | threshold = 0.85 | ||||||
|  | name_pairs = [] | ||||||
|  | for (str1, str2) in combinations(names, 2): | ||||||
|  |     similarity = SequenceMatcher(None, str1, str2).ratio() | ||||||
|  |     if similarity >= threshold: | ||||||
|  |         name_pairs.append((str1, str2, similarity)) | ||||||
|  | 
 | ||||||
|  | print(name_pairs) | ||||||
|  | with open('name_dupes.json', 'w') as f: | ||||||
|  |     json.dump(name_pairs, f, indent=4, sort_keys=True) | ||||||
		Loading…
	
		Reference in New Issue