Source code for door43_tools.templaters

from __future__ import unicode_literals, print_function
import os
import codecs
import logging
from glob import glob
from bs4 import BeautifulSoup
from libraries.general_tools.file_utils import write_file
from libraries.resource_container.ResourceContainer import RC
from libraries.general_tools.file_utils import load_yaml_object
from libraries.resource_container.ResourceContainer import BIBLE_RESOURCE_TYPES


[docs]def do_template(resource_type, source_dir, output_dir, template_file):
    templater = init_template(resource_type, source_dir, output_dir, template_file)
    return templater.run()


[docs]def init_template(resource_type, source_dir, output_dir, template_file):
    if resource_type in BIBLE_RESOURCE_TYPES:
        templater = BibleTemplater(resource_type, source_dir, output_dir, template_file)
    elif resource_type == 'obs':
        templater = ObsTemplater(resource_type, source_dir, output_dir, template_file)
    elif resource_type == 'ta':
        templater = TaTemplater(resource_type, source_dir, output_dir, template_file)
    else:
        templater = Templater(resource_type, source_dir, output_dir, template_file)
    return templater


[docs]class Templater(object):
    def __init__(self, resource_type, source_dir, output_dir, template_file):
        self.resource_type = resource_type
        self.source_dir = source_dir  # Local directory
        self.output_dir = output_dir  # Local directory
        self.template_file = template_file  # Local file of template

        self.files = sorted(glob(os.path.join(self.source_dir, '*.html')))
        self.rc = None
        self.template_html = ''
        self.logger = logging.getLogger()
        self.already_converted = []
        self.titles = {}
        self.chapters = {}
        self.book_codes = {}

[docs]    def run(self):
        # get the resource container
        self.rc = RC(self.source_dir)
        with open(self.template_file) as template_file:
            self.template_html = template_file.read()
            soup = BeautifulSoup(self.template_html, 'html.parser')
            soup.body['class'] = soup.body.get('class', []) + [self.resource_type]
            if self.resource_type in BIBLE_RESOURCE_TYPES and self.resource_type != 'bible':
                soup.body['class'] = soup.body.get('class', []) + ['bible']
            self.template_html = unicode(soup)
        self.apply_template()
        return True

[docs]    def build_left_sidebar(self, filename=None):
        html = """
            <nav class="affix-top hidden-print hidden-xs hidden-sm" id="left-sidebar-nav">
                <div class="nav nav-stacked" id="revisions-div">
                    <h1>Revisions</h1>
                    <table width="100%" id="revisions"></table>
                </div>
            </nav>
            """
        return html

[docs]    def build_right_sidebar(self, filename=None):
        html = self.build_page_nav(filename)
        return html

[docs]    def build_page_nav(self, filename=None):
        html = """
            <nav class="affix-top hidden-print hidden-xs hidden-sm content-nav" id="right-sidebar-nav">
              <ul id="sidebar-nav" class="nav nav-stacked">
                <li><h1>Navigation</h1></li>
            """
        for fname in self.files:
            key = os.path.basename(fname)
            title = ""
            if key in self.titles:
                title = self.titles[key]

            if title == "Conversion requested..." or title == "Conversion successful" or title == "Index":
                continue
            if filename != fname:
                html += '<li><a href="{0}">{1}</a></li>'.format(os.path.basename(fname), title)
            else:
                html += '<li>{0}</li>'.format(title)
        html += """
                </ul>
            </nav>
            """
        return html

[docs]    def get_page_navigation(self):
        for fname in self.files:
            key = os.path.basename(fname)
            if key in self.titles:  # skip if we already have data
                continue

            with codecs.open(fname, 'r', 'utf-8-sig') as f:
                soup = BeautifulSoup(f, 'html.parser')
            if soup.find('h1'):
                title = soup.h1.text
            else:
                title = os.path.splitext(os.path.basename(fname))[0].replace('_', ' ').capitalize()

            self.titles[key] = title

[docs]    def apply_template(self):
        language_code = self.rc.resource.language.identifier
        language_name = self.rc.resource.language.title
        language_dir = self.rc.resource.language.direction
        resource_title = self.rc.resource.title

        self.get_page_navigation()

        heading = '{0}: {1}'.format(language_name, resource_title)
        title = ''
        canonical = ''

        # soup is the template that we will replace content of for every file
        soup = BeautifulSoup(self.template_html, 'html.parser')
        left_sidebar_div = soup.body.find('div', id='left-sidebar')
        outer_content_div = soup.body.find('div', id='outer-content')
        right_sidebar_div = soup.body.find('div', id='right-sidebar')

        # find the outer-content div in the template
        if not outer_content_div:
            raise Exception('No div tag with id "outer-content" was found in the template')

        # get the canonical UTL
        if not canonical:
            links = soup.head.find_all('link[rel="canonical"]')
            if len(links) == 1:
                canonical = links[0]['href']

        # loop through the html files
        for filename in self.files:
            if filename not in self.already_converted:
                self.logger.debug('Applying template to {0}.'.format(filename))

                # read the downloaded file into a dom abject
                with codecs.open(filename, 'r', 'utf-8-sig') as f:
                    fileSoup = BeautifulSoup(f, 'html.parser')

                # get the title from the raw html file
                if not title and fileSoup.head and fileSoup.head.title:
                    title = fileSoup.head.title.text
                else:
                    title = os.path.basename(filename)

                # get the language code, if we haven't yet
                if not language_code:
                    if 'lang' in fileSoup.html:
                        language_code = fileSoup.html['lang']
                    else:
                        language_code = 'en'

                # get the body of the raw html file
                if not fileSoup.body:
                    body = BeautifulSoup('<div>No content</div>', 'html.parser')
                else:
                    body = BeautifulSoup(''.join(['%s' % x for x in fileSoup.body.contents]), 'html.parser')

                # insert new HTML into the template
                outer_content_div.clear()
                outer_content_div.append(body)
                soup.html['lang'] = language_code
                soup.html['dir'] = language_dir

                soup.head.title.clear()
                soup.head.title.append(heading+' - '+title)

                # set the page heading
                heading_span = soup.body.find('span', id='h1')
                heading_span.clear()
                heading_span.append(heading)

                if left_sidebar_div:
                    left_sidebar_html = self.build_left_sidebar(filename)
                    left_sidebar = BeautifulSoup(left_sidebar_html, 'html.parser').nav.extract()
                    left_sidebar_div.clear()
                    left_sidebar_div.append(left_sidebar)

                if right_sidebar_div:
                    right_sidebar_html = self.build_right_sidebar(filename)
                    right_sidebar = BeautifulSoup(right_sidebar_html, 'html.parser').nav.extract()
                    right_sidebar_div.clear()
                    right_sidebar_div.append(right_sidebar)

                # render the html as an unicode string
                html = unicode(soup)

                # fix the footer message, removing the title of this page in parentheses as it doesn't get filled
                html = html.replace(
                    '("<a xmlns:dct="http://purl.org/dc/terms/" href="https://live.door43.org/templates/project-page.html" rel="dct:source">{{ HEADING }}</a>") ',
                    '')
                # update the canonical URL - it is in several different locations
                html = html.replace(canonical, canonical.replace('/templates/', '/{0}/'.format(language_code)))

                # Replace HEADING with page title in footer
                html = html.replace('{{ HEADING }}', title)

                # write to output directory
                out_file = os.path.join(self.output_dir, os.path.basename(filename))
                self.logger.debug('Writing {0}.'.format(out_file))
                write_file(out_file, html.encode('ascii', 'xmlcharrefreplace'))

            else:  # if already templated, need to update navigation bar
                # read the templated file into a dom abject
                with codecs.open(filename, 'r', 'utf-8-sig') as f:
                    soup = BeautifulSoup(f, 'html.parser')

                right_sidebar_div = soup.body.find('div', id='right-sidebar')
                if right_sidebar_div:
                    right_sidebar_html = self.build_right_sidebar(filename)
                    right_sidebar = BeautifulSoup(right_sidebar_html, 'html.parser').nav.extract()
                    right_sidebar_div.clear()
                    right_sidebar_div.append(right_sidebar)

                    # render the html as an unicode string
                    html = unicode(soup)

                    # write to output directory
                    out_file = os.path.join(self.output_dir, os.path.basename(filename))
                    self.logger.debug('Updating nav in {0}.'.format(out_file))
                    write_file(out_file, html.encode('ascii', 'xmlcharrefreplace'))


[docs]class ObsTemplater(Templater):
    def __init__(self, *args, **kwargs):
        super(ObsTemplater, self).__init__(*args, **kwargs)


[docs]class BibleTemplater(Templater):
    def __init__(self, *args, **kwargs):
        super(BibleTemplater, self).__init__(*args, **kwargs)

[docs]    def get_page_navigation(self):
        for fname in self.files:
            key = os.path.basename(fname)
            if key in self.titles:  # skip if we already have data
                continue

            filebase = os.path.splitext(os.path.basename(fname))[0]
            # Getting the book code for HTML tag references
            fileparts = filebase.split('-')
            if len(fileparts) == 2:
                # Assuming filename of ##-<name>.usfm, such as 01-GEN.usfm
                book_code = fileparts[1].lower()
            else:
                # Assuming filename of <name.usfm, such as GEN.usfm
                book_code = fileparts[0].lower()
            book_code.replace(' ', '-').replace('.', '-')  # replacing spaces and periods since used as tag class
            with codecs.open(fname, 'r', 'utf-8-sig') as f:
                soup = BeautifulSoup(f.read(), 'html.parser')
            if soup.find('h1'):
                title = soup.find('h1').text
            else:
                title = '{0}.'.format(book_code)
            self.titles[key] = title
            self.book_codes[key] = book_code
            chapters = soup.find_all('h2', {'c-num'})
            self.chapters[key] = [c['id'] for c in chapters]

[docs]    def build_page_nav(self, filename=None):
        html = """
        <nav class="hidden-print hidden-xs hidden-sm content-nav" id="right-sidebar-nav">
            <ul id="sidebar-nav" class="nav nav-stacked books panel-group">
            """
        for fname in self.files:
            key = os.path.basename(fname)

            book_code = ""
            if key in self.book_codes:
                book_code = self.book_codes[key]

            title = ""
            if key in self.titles:
                title = self.titles[key]

            if title == "Conversion requested..." or title == "Conversion successful" or title == "Index":
                continue
            html += """
                <div class="panel panel-default">
                    <div class="panel-heading">
                        <h4 class="panel-title">
                            <a class="accordion-toggle" data-toggle="collapse" data-parent="#sidebar-nav" href="#collapse{0}">{1}</a>
                        </h4>
                    </div>
                    <div id="collapse{0}" class="panel-collapse collapse{2}">
                        <ul class="panel-body chapters">
                    """.format(book_code, title, ' in' if fname == filename else '')

            chapters = {}
            if key in self.chapters:
                chapters = self.chapters[key]

            for chapter in chapters:
                html += """
                       <li class="chapter"><a href="{0}#{1}">{2}</a></li>
                    """.format(os.path.basename(fname) if fname != filename else '', chapter,
                               chapter.split('-')[2].lstrip('0'))
            html += """
                        </ul>
                    </div>
                </div>
                    """
        html += """
            </ul>
        </nav>
            """
        return html


[docs]class TaTemplater(Templater):
    def __init__(self, *args, **kwargs):
        super(TaTemplater, self).__init__(*args, **kwargs)
        self.section_container_id = 1

[docs]    def build_section_toc(self, section):
        """
        Recursive section toc builder
        :param dict section: 
        :return: 
        """
        if 'link' in section:
            link = section['link']
        else:
            link = 'section-container-{0}'.format(self.section_container_id)
            self.section_container_id = self.section_container_id + 1
        html = """
            <li>
                <a href="#{0}">{1}</a>
            """.format(link, section['title'])
        if 'sections' in section:
            html += """ 
                <a href="#" data-target="#{0}-sub" data-toggle="collapse" class="content-nav-expand collapsed"></a>
                <ul id="{0}-sub" class="collapse">
            """.format(link)
            for subsection in section['sections']:
                html += self.build_section_toc(subsection)
            html += """
                </ul>
            """
        html += """
            </li>
        """
        return html

[docs]    def build_page_nav(self, filename=None):
        self.section_container_id = 1
        html = """
            <nav class="hidden-print hidden-xs hidden-sm content-nav" id="right-sidebar-nav">
                <ul class="nav nav-stacked">
        """
        for fname in self.files:
            with codecs.open(fname, 'r', 'utf-8-sig') as f:
                soup = BeautifulSoup(f.read(), 'html.parser')
            if soup.find('h1'):
                title = soup.find('h1').text
            else:
                title = os.path.splitext(os.path.basename(fname))[0].title()
            if title == "Conversion requested..." or title == "Conversion successful" or title == "Index":
                continue
            if fname != filename:
                html += """
                <h4><a href="{0}">{1}</a></h4>
                """.format(os.path.basename(fname), title)
            else:
                html += """
                <h4>{0}</h4>
                """.format(title)
                toc = load_yaml_object(os.path.join('{0}-toc.yaml'.format(os.path.splitext(fname)[0])))
                if toc:
                    for section in toc['sections']:
                        html += self.build_section_toc(section)
                html += """
                """
        html += """
                </ul>
            </nav>
        """
        return html