Source code for converters.usfm2html_converter

from __future__ import print_function, unicode_literals
import urlparse
import os
import tempfile
import codecs
from bs4 import BeautifulSoup
from shutil import copyfile
from libraries.general_tools.file_utils import write_file, remove_tree, get_files
from converter import Converter
from usfm_tools.transform import UsfmTransform
from libraries.resource_container.ResourceContainer import BIBLE_RESOURCE_TYPES


[docs]class Usfm2HtmlConverter(Converter):
[docs] def convert(self): if self.resource in BIBLE_RESOURCE_TYPES: self.convert_bible() return True else: return False
[docs] def convert_bible(self): self.log.info('Processing the Bible USFM files') # find the first directory that has usfm files. files = get_files(directory=self.files_dir, exclude=self.EXCLUDED_FILES) exclusive_convert = False convert_only = [] if self.source and len(self.source) > 0: parsed = urlparse.urlparse(self.source) params = urlparse.parse_qsl(parsed.query) if params and len(params) > 0: for i in range(0, len(params)): item = params[i] if item[0] == 'convert_only': convert_only = item[1].split(',') exclusive_convert = True self.source = urlparse.urlunparse((parsed.scheme, parsed.netloc, parsed.path, '', '', '')) break current_dir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(current_dir, 'templates', 'template.html')) as template_file: template_html = template_file.read() for filename in files: if filename.endswith('.usfm'): if exclusive_convert: base_name = os.path.basename(filename) if base_name not in convert_only: # see if this is a file we are to convert continue # Covert the USFM file scratch_dir = tempfile.mkdtemp(prefix='scratch_') copyfile(filename, os.path.join(scratch_dir, os.path.basename(filename))) filebase = os.path.splitext(os.path.basename(filename))[0] UsfmTransform.buildSingleHtml(scratch_dir, scratch_dir, filebase) html_filename = filebase+".html" with codecs.open(os.path.join(scratch_dir, html_filename), 'r', 'utf-8-sig') as html_file: converted_html = html_file.read() template_soup = BeautifulSoup(template_html, 'html.parser') template_soup.head.title.string = self.resource.upper() converted_soup = BeautifulSoup(converted_html, 'html.parser') content_div = template_soup.find('div', id='content') content_div.clear() content_div.append(converted_soup.body) content_div.body.unwrap() output_file = os.path.join(self.output_dir, html_filename) write_file(output_file, template_soup.prettify()) self.log.info('Converted {0} to {1}.'.format(os.path.basename(filename), os.path.basename(html_filename))) remove_tree(scratch_dir) else: # Directly copy over files that are not USFM files try: output_file = os.path.join(self.output_dir, os.path.basename(filename)) if not os.path.exists(output_file): copyfile(filename, output_file) except: pass self.log.info('Finished processing Bible USFM files.')