import csv from bs4 import BeautifulSoup from tkinter import Tk, filedialog # --- Step 0: ask for main keyword --- mkwrd = input("Main keyword? ").strip() # --- Step 1: ask user to select the HTML file --- Tk().withdraw() filename = filedialog.askopenfilename( title="Select HTML file", filetypes=[("HTML Files", "*.html"), ("All Files", "*.*")] ) if not filename: print("❌ No file selected. Exiting.") exit() # --- Step 2: read HTML file (UTF-8 with BOM support) --- with open(filename, "r", encoding="utf-8") as f: soup = BeautifulSoup(f, "html.parser") # --- Step 3: define CSV schema --- headers = [ "note_id", "Type", "Label", "URL", "Content", "Keywords", "Media URL", "Media Type", "Media File Name", "Emoji", "Position" ] # add label_1,link_1 ... label_10,link_10 for i in range(1, 11): headers.append(f"label_{i}") headers.append(f"link_{i}") rows = [] # --- Step 4: process
  • elements --- for pos, li in enumerate(soup.find_all("li"), start=1): anchors = li.find_all("a") images = li.find_all("img") # First anchor = main Label + URL label = anchors[0].get_text(strip=True) if anchors else "" url = anchors[0]["href"] if anchors and anchors[0].has_attr("href") else "" # Other anchors = label_n + link_n link_data = [] for a in anchors[1:11]: # up to 10 additional lbl = a.get_text(strip=True) href = a["href"] if a.has_attr("href") else "" link_data.append((lbl, href)) # pad missing slots with blanks while len(link_data) < 10: link_data.append(("", "")) # Image handling media_url = images[0]["src"] if images and images[0].has_attr("src") else "" media_type = "image" if media_url else "" media_file_name = media_url.split("/")[-1] if media_url else "" row = [ "", # note_id blank "link", # default Type label, url, li.get_text(" ", strip=True), # Content = text of li mkwrd, # Keywords = main keyword media_url, media_type, media_file_name, "", # Emoji pos # Position ] # Add label_1,link_1 ... label_10,link_10 for lbl, href in link_data: row.extend([lbl, href]) # Replace "" with null row = [val if val != "" else "null" for val in row] rows.append(row) # --- Step 5: write CSV with BOM for Excel --- out_file = filename.rsplit(".", 1)[0] + "_converted.csv" with open(out_file, "w", newline="", encoding="utf-8-sig") as csvfile: writer = csv.writer(csvfile) writer.writerow(headers) writer.writerows(rows) print(f"✅ Conversion complete. CSV saved as: {out_file}")