""" This file contains the routines used to compile the 'about' page's text. This text comes to us in three parts, which are stored in three markdown files in the data directory, along with an about_meta.yaml file describing how they are to be described at the top of the page. The markdown files are then compiled to two pieces of HTML, one for the sidebar and one for the main part of the text, as well as a small dictionary that describes the content of the cards at the top of the page. Created 16-08-2017 by Josh Borrow (joshua.borrow@durham.ac.uk) """ #Disable errors associated with un-overwritten methods in HTMLParser. #pylint: disable=W0223 from html.parser import HTMLParser import pypandoc import yaml def open_meta(filename="about_meta.yaml", data_dir="../data"): """ Read the meta file and return the associated data """ with open(f"{data_dir}/{filename}", "r") as handle: return yaml.load(handle) def compile_markdown(data, data_dir="../data"): """ 'data' is the dictionary provided by reading about_meta.yaml """ output_text = "" for item in data["files"]: with open(f"{data_dir}/{item['name']}", "r") as handle: input_text = handle.read() compiled_text = pypandoc.convert_text(input_text, 'html', format='md') output_text += f"<div id=\"{item['slug']}\">{compiled_text}</div>" return output_text class Parser(HTMLParser): """ Custom HTML Parser that builds the headings tree """ def __init__(self): super(Parser, self).__init__() self.headings = [] self.waiting_for_data = False def handle_starttag(self, tag, attrs): for attr in attrs: if attr[0] == "id": this_id = attr[1] break else: this_id = "" continue if (tag == "h1") or (tag == "h2"): self.headings.append([tag, this_id]) self.waiting_for_data = True def handle_endtag(self, tag): pass def handle_data(self, data): if self.waiting_for_data: self.headings[-1].append(data) self.waiting_for_data = False def parse_html(text): """ To generate the sidebar automatically we must parse the HTML and extract the appropriate headings. """ parser = Parser() parser.feed(text) return parser.headings def parse_headings(headings): """ Creates the HTML for the sidebar based on the headings """ sidebar_content = "" # Heading has the structure [tag, id, text] for heading in headings: if heading[0] == "h1": if sidebar_content != "": sidebar_content += "</ul>" sidebar_content += f"<h2>{heading[2]}</h2><ul>" elif heading[0] == "h2": sidebar_content += f"<li><a href=\"#{heading[1]}\">{heading[2]}</a></li>" else: raise Exception(f"There is an issue with Parser (about.py). It picked up {heading}") return f"{sidebar_content}</ul>" def compile_to_yaml(in_filename="about_meta.yaml", out_filename="about.yaml", data_dir="../data"): """ Compiles the information in the markdown files and sends it out to an external yaml file. """ meta_data = open_meta(in_filename, data_dir) output_text = compile_markdown(meta_data, data_dir) sidebar = parse_headings(parse_html(output_text)) output_data = { "meta_data": meta_data, "output_text": output_text, "sidebar": sidebar } with open(f"{data_dir}/{out_filename}", "w") as handle: yaml.dump(output_data, handle) return output_data if __name__ == "__main__": print("Running this script directly will only compile the markdown in data.") print("If you wish to continue, please enter Y, if not, please enter N.") CHOICE = input() if CHOICE in ['y', 'Y']: compile_to_yaml() else: exit(0)