#pylint: disable=too-many-public-methods
from functools import reduce
import os, re, shutil, tempfile
import unittest
from gleetex import htmlhandling
excl_filename = htmlhandling.HtmlImageFormatter.EXCLUSION_FILE_NAME
HTML_SKELETON = '''
{1}'''
def read(file_name, mode='r', encoding='utf-8'):
"""Read the file, return the string. Close file properly."""
with open(file_name, mode, encoding=encoding) as handle:
return handle.read()
class HtmlparserTest(unittest.TestCase):
def setUp(self):
self.p = htmlhandling.EqnParser()
def test_start_tags_are_parsed_literally(self):
self.p.feed("")
self.assertEqual(self.p.get_data()[0], "
",
"The HTML parser should copy start tags literally.")
def test_that_end_tags_are_copied_literally(self):
self.p.feed(" p>
")
self.assertEqual(''.join(self.p.get_data()), " p>")
def test_entities_are_unchanged(self):
self.p.feed("
")
self.assertEqual(self.p.get_data()[0], '
')
def test_charsets_are_copied(self):
self.p.feed('>→')
self.assertEqual(''.join(self.p.get_data()[0]), '>→')
def test_without_eqn_all_blocks_are_strings(self):
self.p.feed("\n42
blah
")
self.assertTrue(reduce(lambda x,y: x and isinstance(y, str),
self.p.get_data()), "all chunks have to be strings")
def test_equation_is_detected(self):
self.p.feed('foo \\pi')
self.assertTrue(isinstance(self.p.get_data()[0], (tuple, list)))
self.assertEqual(self.p.get_data()[0][2], 'foo \\pi')
def test_tag_followed_by_eqn_is_correctly_recognized(self):
self.p.feed('bar')
self.assertEqual(self.p.get_data()[0], '
')
self.assertTrue(isinstance(self.p.get_data(), list), "second item of data must be equation data list")
def test_document_with_tag_then_eqn_then_tag_works(self):
self.p.feed('
bar
baz')
eqn = None
# test should not depend on a specific position of equation, search for
# it
data = self.p.get_data()
for chunk in data:
if isinstance(chunk, (tuple, list)):
eqn = chunk
break
self.assertTrue(isinstance(data[0], str))
self.assertTrue(eqn is not None,
"No equation found, must be tuple/list object.")
self.assertTrue(isinstance(data[-1], str))
def test_equation_is_copied_literally(self):
self.p.feed('my\nlittle\n\\tau')
self.assertEqual(self.p.get_data()[0][2], 'my\nlittle\n\\tau')
def test_unclosed_eqns_are_detected(self):
self.assertRaises(htmlhandling.ParseException, self.p.feed,
'\\endless\\formula')
def test_nested_formulas_trigger_exception(self):
self.assertRaises(htmlhandling.ParseException, self.p.feed,
"\\pi")
self.assertRaises(htmlhandling.ParseException, self.p.feed,
"\\pi
")
def test_formulas_without_displaymath_attribute_are_detected(self):
self.p.feed('\frac12
bar
')
formulas = [c for c in self.p.get_data() if isinstance(c, (tuple, list))]
self.assertEqual(len(formulas), 2) # there should be _2_ formulas
self.assertEqual(formulas[0][1], False) # no displaymath
self.assertEqual(formulas[1][1], False) # no displaymath
def test_that_unclosed_formulas_detected(self):
self.assertRaises(htmlhandling.ParseException, self.p.feed,
"\\pi")
self.assertRaises(htmlhandling.ParseException, self.p.feed,
"\\pi")
def test_formula_contains_only_formula(self):
p = htmlhandling.EqnParser()
p.feed("1
")
formula = next(e for e in p.get_data() if isinstance(e, (list, tuple)))
self.assertEqual(formula[-1], "1test')
formula = next(e for e in p.get_data() if isinstance(e, (list, tuple)))
self.assertEqual(formula[-1], "test")
p = htmlhandling.EqnParser()
p.feed("1
")
formula = next(e for e in p.get_data() if isinstance(e, (list, tuple)))
self.assertEqual(formula[-1], "1a>b')
formula = self.p.get_data()[0]
self.assertEqual(formula[-1], "a>b")
def test_displaymath_is_recognized(self):
self.p.feed('\\sum\limits_{n=1}^{e^i} a^nl^n')
self.assertEqual(self.p.get_data()[0][1], True) # displaymath flag set
def test_encoding_is_parsed_from_HTML4(self):
iso8859_1 = HTML_SKELETON.format('iso-8859-15', 'öäüß').encode('iso-8859-1')
self.p.feed(iso8859_1)
self.assertEqual(self.p._EqnParser__encoding, 'iso-8859-15')
def test_encoding_is_parsed_from_HTML5(self):
document = r"""
hi
"""
self.p.feed(document.encode('utf-8'))
self.assertEqual(self.p._EqnParser__encoding.lower(), 'utf-8')
def test_strings_can_be_passed_tO_parser_as_well(self):
# no exception - everything is working as expected
self.p.feed(HTML_SKELETON.format('utf-8', 'æø'))
class GetPositionTest(unittest.TestCase):
def test_that_line_number_is_correct(self):
self.assertEqual(htmlhandling.get_position('jojo', 0)[0], 0)
self.assertEqual(htmlhandling.get_position('jojo', 3)[0], 0)
self.assertEqual(htmlhandling.get_position('a\njojo', 3)[0], 1)
self.assertEqual(htmlhandling.get_position('a\n\njojo', 3)[0], 2)
def test_that_position_on_line_is_correct(self):
self.assertEqual(htmlhandling.get_position('jojo', 0)[1], 0)
self.assertEqual(htmlhandling.get_position('jojo', 3)[1], 3)
self.assertEqual(htmlhandling.get_position('a\njojo', 3)[1], 2)
self.assertEqual(htmlhandling.get_position('a\n\njojo', 3)[1], 1)
class HtmlImageTest(unittest.TestCase):
def setUp(self):
self.pos = {'depth':99, 'height' : 88, 'width' : 77}
self.original_directory = os.getcwd()
self.tmpdir = tempfile.mkdtemp()
os.chdir(self.tmpdir)
def tearDown(self):
os.chdir(self.original_directory)
shutil.rmtree(self.tmpdir, ignore_errors=True)
def test_that_no_file_is_written_if_no_content(self):
with htmlhandling.HtmlImageFormatter('foo.html'):
pass
self.assertFalse(os.path.exists('foo.html') )
def test_file_if_written_when_content_exists(self):
with htmlhandling.HtmlImageFormatter() as img:
img.format_excluded(self.pos, '\\tau\\tau', 'foo.png')
self.assertTrue(os.path.exists(excl_filename) )
def test_written_file_starts_and_ends_more_or_less_properly(self):
with htmlhandling.HtmlImageFormatter('.') as img:
img.format_excluded(self.pos, '\\tau\\tau', 'foo.png')
data = read(htmlhandling.HtmlImageFormatter.EXCLUSION_FILE_NAME, 'r', encoding='utf-8')
self.assertTrue('' in data)
self.assertTrue('' in data)
# make sure encoding is specified
self.assertTrue('%s
\n' % (\
('
' if hr else ''), htmlhandling.gen_id(formula), formula)
class OutsourcingParserTest(unittest.TestCase):
def setUp(self):
self.html = ('\n\n\n' +
'' +
'Outsourced Formulas\n\nheading
')
def get_html(self, string):
"""Create html string with head / tail und put the specified string into
it."""
return self.html + string + '\n\n'
def test_formulas_are_recognized(self):
data = self.get_html(htmleqn('\\tau'))
parser = htmlhandling.OutsourcedFormulaParser()
parser.feed(data)
self.assertEqual(len(parser.get_formulas()), 1)
def test_formula_doesnt_contain_surrounding_rubbish(self):
data = self.get_html(htmleqn('\\gamma'))
parser = htmlhandling.OutsourcedFormulaParser()
parser.feed(data)
self.assertEqual(len(parser.get_formulas()), 1)
key = next(iter(parser.get_formulas()))
par = parser.get_formulas()[key]
self.assertFalse('' in par)
self.assertFalse('hr' in par)
def test_that_header_is_parsed_correctly(self):
p = htmlhandling.OutsourcedFormulaParser()
p.feed(self.get_html(htmleqn('test123', False)))
head = p.get_head()
self.assertTrue('DOCTYPE' in head)
self.assertTrue('