Skip to content
Snippets Groups Projects
Commit c6f97851 authored by Josh Borrow's avatar Josh Borrow
Browse files

added short compilation script for markdown files as part of the about page

parent 8909c6d8
No related branches found
No related tags found
No related merge requests found
""" This file contains the routines used to compile the 'about' page's text.
This text comes to us in three parts, which are stored in three markdown
files in the data directory, along with an about_meta.yaml file describing
how they are to be described at the top of the page.
The markdown files are then compiled to two pieces of HTML, one for the
sidebar and one for the main part of the text, as well as a small
dictionary that describes the content of the cards at the top of the page.
Created 16-08-2017 by Josh Borrow (joshua.borrow@durham.ac.uk)
"""
import os
from html.parser import HTMLParser
import pypandoc
import yaml
def open_meta(filename="about_meta.yaml", data_dir="../data"):
""" Read the meta file and return the associated data """
with open(f"{data_dir}/{filename}", "r") as f:
return yaml.load(f)
def compile_markdown(data, data_dir="../data"):
""" 'data' is the dictionary provided by reading about_meta.yaml """
output_text = ""
for item in data["files"]:
with open(f"{data_dir}/{item['name']}", "r") as f:
input_text = f.read()
compiled_text = pypandoc.convert_text(input_text, 'html', format='md')
output_text += f"<div id=\"{item['slug']}\">{compiled_text}</div>"
return output_text
class Parser(HTMLParser):
def __init__(self):
super(Parser, self).__init__()
self.headings = []
self.waiting_for_data = False
def handle_starttag(self, tag, attrs):
for attr in attrs:
if attr[0] == "id":
this_id = attr[1]
break
else:
this_id = ""
continue
if (tag == "h1") or (tag == "h2"):
self.headings.append([tag, this_id])
self.waiting_for_data = True
def handle_endtag(self, tag):
pass
def handle_data(self, data):
if self.waiting_for_data:
self.headings[-1].append(data)
self.waiting_for_data = False
def parse_html(text):
""" To generate the sidebar automatically we must parse the HTML and
extract the appropriate headings. """
parser = Parser()
parser.feed(text)
return parser.headings
def parse_headings(headings):
""" Creates the HTML for the sidebar based on the headings """
sidebar_content = ""
# Heading has the structure [tag, id, text]
for heading in headings:
if heading[0] == "h1":
if sidebar_content != "":
sidebar_content += "</ul>"
sidebar_content += f"<h3>{heading[2]}</h3><ul>"
elif heading[0] == "h2":
sidebar_content += f"<li><a href=\"#{heading[1]}\">{heading[2]}</a></li>"
else:
raise Exception(f"There is an issue with Parser (about.py). It picked up {heading}")
return f"{sidebar_content}</ul>"
def compile_to_yaml(in_filename="about_meta.yaml", out_filename="about.yaml", data_dir="../data"):
""" Compiles the information in the markdown files and sends it out to an
external yaml file. """
meta_data = open_meta(in_filename, data_dir)
output_text = compile_markdown(meta_data, data_dir)
sidebar = parse_headings(parse_html(output_text))
output_data = {
"meta_data": meta_data,
"output_text": output_text,
"sidebar": sidebar
}
with open(f"{data_dir}/{out_filename}", "w") as f:
yaml.dump(output_data, f)
return output_data
if __name__ == "__main__":
print("Running this script directly will only compile the markdown in data.")
print("If you wish to continue, please enter Y, if not, please enter N.")
choice = input()
if choice in ['y', 'Y']:
compile_to_yaml()
else:
exit(0)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment