"""
modul latexdiff - compares LaTeX files

Contains class TexDiff.
"""
import re, difflib2, sys, textdiff
from misc import open_file

class TexDiff():
    r"""Class, which provides comparation of LaTeX files.

    Each file is read and then parsed. LaTeX marks are removed and then texts
    are compared.

    Output format is ndiff. Same lines begin with two spaces, added lines begin
    with character + and spaces and deleted lines begin with character - and
    space.
    """
    def __init__(self,file1_name,file2_name,options):
        r"""Constructs TexDiff instance.

        Opens files `file1_name` and `file2_name` and reads the text from them.

        parameters:
        file1_name - name of the first file
        file2_name - name of the second file
        param - options from optparse

        instance variables:
        self.file1_name - name of the first file
        self.file2_name - name of the second file
        self.file1 - file descriptor of the first file
        self.file2 - file descriptor of the second file
        self.text1 - text of the first file
        self.text2 - text of the second file
        """
        self.options = options
        self.file1_name = file1_name
        self.file2_name = file2_name

        self.file1 = open_file(file1_name)
        self.text1 = self.file1.read()
        self.file1.close()

        self.file2 = open_file(file2_name)
        self.text2 = self.file2.read()
        self.file2.close()


    def get_text(self,text):
        r"""Returns the text without formating marks

        Using the regular expressions,the text is parsed. LaTeX commands are removed and
        special character entites are substituted. List of lines of the text is returned.
        """
        pat_get_document = re.compile(r'\\begin(\[.*?\])?\{document\}(?P<body>.*)\\end\{document\}',re.I|re.S)
        pat_erase_command = re.compile(r'\\.+?(\[.*?\])?([ \t\v\{]|(?=\n))',re.I|re.S)

        pat_erase_begins = re.compile(r'\\begin(\[.*?\])?\{.*?\}',re.I|re.S)
        pat_erase_ends = re.compile(r'\\end(\[.*?\])?\{.*?\}',re.I|re.S)

        pat_erase_brackets = re.compile(r'(?<!\\)[\{\}]')
        pat_erase_spaces = re.compile(r'\\,')
        pat_erase_spaces2 = re.compile(r'\\ ')
        pat_erase_ampersands = re.compile(r'(?<!\\)&')
        pat_erase_brakelines = re.compile(r'\\\\')
        pat_erase_wordwarpings = re.compile(r'\\-')

        pat_replace_dolars = re.compile(r'\\\$')
        pat_replace_ampersands = re.compile(r'\\&')
        pat_replace_percents = re.compile(r'\\%')
        pat_replace_hashs = re.compile(r'\\#')
        pat_replace_underscores = re.compile(r'\\_')
        pat_replace_lbraces = re.compile(r'\\\{')
        pat_replace_rbraces = re.compile(r'\\\}')
        pat_replace_tildes = re.compile(r'~')
        pat_replace_circumflexs = re.compile(r'\\\^')


        pat_erase_dollars = re.compile(r'(?<!\\)\$')
        #pat_erase_math = re.compile(r'(?<!\\)\$\$.+?\$\$',re.I|re.S)
        #pat_erase_math2 = re.compile(r'(?<!\\)\$.+?\$',re.I|re.S)
        pat_erase_math3 = re.compile(r'\\begin\{equation\}.*?\\end\{equation\}',re.I|re.S)
        pat_erase_math4 = re.compile(r'\\begin\{displaymath\}.*?\\end\{displaymath\}',re.I|re.S)
        pat_erase_math5 = re.compile(r'\\begin\{eqnarray\}.*?\\end\{eqnarray\}',re.I|re.S)
        pat_erase_comments = re.compile(r'(?<!\\)%.*$',re.M)

        pat_erase_tabular = re.compile(r'\\begin\{tabular\}\{.*?\}',re.I|re.S)
        pat_erase_multicolumns = re.compile(r'\\multicolumn\{.*?\}\{.*?\}',re.I|re.S)
        pat_erase_textcolors = re.compile(r'\\textcolor\{.*?\}')
        pat_erase_clines = re.compile(r'\\cline\{.*?\}',re.I|re.S)
        pat_erase_refs = re.compile(r'\\ref\{.*?\}',re.I|re.S)

        pat_remove_spaces = re.compile(r'[ \t]+')
        pat_remove_spaces2 = re.compile(r'^[ \t]+$',re.M)
        pat_remove_blank_lines = re.compile(r'[\n]+',re.S)

        match_document = re.search(pat_get_document,text)

        document = ''
        if match_document:
            document = match_document.group('body')
        else:
            sys.stderr.write("Bad TeX file\n")
            sys.exit(1)

        #~ document = re.sub(pat_erase_math,'',document)
        #~ document = re.sub(pat_erase_math2,'',document)
        document = re.sub(pat_erase_dollars,'',document)
        document = re.sub(pat_erase_math3,'',document)
        document = re.sub(pat_erase_math4,'',document)
        document = re.sub(pat_erase_math5,'',document)
        document = re.sub(pat_erase_comments,'',document)
        document = re.sub(pat_erase_brakelines,'',document)
        document = re.sub(pat_erase_ampersands,'',document)

        document = re.sub(pat_erase_wordwarpings,'',document)


        document = re.sub(pat_erase_tabular,'',document)
        document = re.sub(pat_erase_multicolumns,'',document)
        document = re.sub(pat_erase_textcolors,'',document)
        document = re.sub(pat_erase_clines,'',document)
        document = re.sub(pat_erase_refs,'',document)

        document = re.sub(pat_erase_spaces,' ',document)
        document = re.sub(pat_erase_spaces2,' ',document)
        document = re.sub(pat_erase_begins,'',document)
        document = re.sub(pat_erase_ends,'',document)

        document = re.sub(pat_replace_dolars,'$',document)
        document = re.sub(pat_replace_ampersands,'&',document)
        document = re.sub(pat_replace_percents,'%',document)
        document = re.sub(pat_replace_hashs,'#',document)
        document = re.sub(pat_replace_underscores,'_',document)
        document = re.sub(pat_replace_lbraces,'{',document)
        document = re.sub(pat_replace_rbraces,'}',document)
        document = re.sub(pat_replace_tildes,' ',document)
        document = re.sub(pat_replace_circumflexs,'^',document)

        document = re.sub(pat_erase_command,'',document)
        document = re.sub(pat_erase_brackets,'',document)

        document = re.sub(pat_remove_spaces,' ',document)
        document = re.sub(pat_remove_spaces2,'',document)
        document = re.sub(pat_remove_blank_lines,'\n',document)

        result = document.split('\n')

        for i in range(len(result)):
            result[i] = result[i].strip(' \t') + '\n'


        return result

    def compare(self):
        r"""Compares two texts according to the parameters."""
        text1 = self.get_text(self.text1)
        text2 = self.get_text(self.text2)

        if self.options.standard_diff:
            return textdiff.TextDiff(self.file1_name,self.file2_name,self.options,text1,text2).gnu_diff()
        elif self.options.context_diff:
            return textdiff.TextDiff(self.file1_name,self.file2_name,self.options,text1,text2).context_diff()
        elif self.options.unified_diff:
            return textdiff.TextDiff(self.file1_name,self.file2_name,self.options,text1,text2).unified_diff()
        else:
            return textdiff.TextDiff(self.file1_name,self.file2_name,self.options,text1,text2).normal_diff()
