from bs4 import BeautifulSoup import re MAX_CHAR_COUNT_BOOKMARKS = 105 #identify all offending bookmark elements and curtail their text content def fix_bookmarks(soup): bookmarks = soup.find_all('div', class_=['bookmark-title', 'bookmark-description', 'bookmark-href']) for bookmark in bookmarks: for text in bookmark.stripped_strings: if len(text) > MAX_CHAR_COUNT_BOOKMARKS: bookmark.contents.remove(text) bookmark.insert(1, soup.new_string(text[0:MAX_CHAR_COUNT_BOOKMARKS]+'...')) #add style formatting which will wrap text content in 'code' tags def fix_code_snippets(soup): code_elements = soup.find_all('code') for code_element in code_elements: code_element['style'] = "white-space:pre-wrap;word-break:break-all" #replace the lame clickable link with an actual embedded YouTube video def fix_youtube_links(soup): youtube_regex = r'^(https?:\/\/)?(www\.)?((youtube\.com\/(?:watch\?v=|embed\/|v\/|playlist\?list=)|youtu\.be\/))([a-zA-Z0-9_-]{11}|\w+(\.\w+)+)$' youtube_links = soup.find_all('div', class_="source") for youtube_link in youtube_links: if re.match(youtube_regex, youtube_link.string): embeded_element = soup.new_tag('center') embed_url = youtube_link.string.replace('watch?v=', 'embed/') iframe = BeautifulSoup("
".format(embed_url), features="html.parser") embeded_element.append(iframe) youtube_link.replace_with(embeded_element) filename = input('Name of your Notion HTML file: ') f = open(filename, 'r') html = f.read() f.close() soup = BeautifulSoup(html, features="html.parser") fix_bookmarks(soup) fix_code_snippets(soup) fix_youtube_links(soup) newfilename = filename.replace('.html','')+'-fixed.html' f = open(newfilename, 'w') f.write(str(soup)) f.close() print('Fixed file created: ' + newfilename)