root/relatorio/templates/opendocument.py @ 119:5c2412920bff

Revision 119:5c2412920bff, 26.3 kB (checked in by Ga?tan de Menten <ged@…>, 4 years ago)

fix issue #10 even if there is only text between the opening tag and the
closing tag.

Line 
1###############################################################################
2#
3# Copyright (c) 2007, 2008 OpenHex SPRL. (http://openhex.com) All Rights
4# Reserved.
5#
6# This program is free software; you can redistribute it and/or modify it under
7# the terms of the GNU General Public License as published by the Free Software
8# Foundation; either version 2 of the License, or (at your option) any later
9# version.
10#
11# This program is distributed in the hope that it will be useful, but WITHOUT
12# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
14# details.
15#
16# You should have received a copy of the GNU General Public License along with
17# this program.  If not, see <http://www.gnu.org/licenses/>.
18#
19###############################################################################
20
21__metaclass__ = type
22
23import re
24import md5
25import time
26import urllib
27import zipfile
28from cStringIO import StringIO
29from copy import deepcopy
30
31
32import warnings
33warnings.filterwarnings('always', module='relatorio.templates.opendocument')
34
35import lxml.etree
36import genshi
37import genshi.output
38from genshi.template import MarkupTemplate
39from genshi.filters import Transformer
40from genshi.filters.transform import ENTER, EXIT
41from genshi.core import Stream
42
43
44from relatorio.templates.base import RelatorioStream
45from relatorio.reporting import Report, MIMETemplateLoader
46try:
47    from relatorio.templates.chart import Template as ChartTemplate
48except ImportError:
49    ChartTemplate = type(None)
50
51GENSHI_EXPR = re.compile(r'''
52        (/)?                                 # is this a closing tag?
53        (for|if|choose|when|otherwise|with)  # tag directive
54        \s*
55        (?:\s(\w+)=["'](.*)["']|$)           # match a single attr & its value
56        |
57        .*                                   # or anything else
58        ''', re.VERBOSE)
59
60EXTENSIONS = {'image/png': 'png',
61              'image/jpeg': 'jpg',
62              'image/bmp': 'bmp',
63              'image/gif': 'gif',
64              'image/tiff': 'tif',
65              'image/xbm': 'xbm',
66             }
67
68RELATORIO_URI = 'http://relatorio.openhex.org/'
69output_encode = genshi.output.encode
70EtreeElement = lxml.etree.Element
71
72def guess_type(val):
73    if isinstance(val, (str, unicode)):
74        return 'string'
75    elif isinstance(val, (int, float)):
76        return 'float'
77
78class OOTemplateError(genshi.template.base.TemplateSyntaxError):
79    "Error to raise when there is a SyntaxError in the genshi template"
80
81
82class ImageHref:
83    "A class used to add images in the odf zipfile"
84
85    def __init__(self, zfile, context):
86        self.zip = zfile
87        self.context = context.copy()
88
89    def __call__(self, expr):
90        bitstream, mimetype = expr
91        if isinstance(bitstream, Report):
92            bitstream = bitstream(**self.context).render()
93        elif isinstance(bitstream, ChartTemplate):
94            bitstream = bitstream.generate(**self.context).render()
95        bitstream.seek(0)
96        file_content = bitstream.read()
97        name = md5.new(file_content).hexdigest()
98        path = 'Pictures/%s.%s' % (name, EXTENSIONS[mimetype])
99        if path not in self.zip.namelist():
100            self.zip.writestr(path, file_content)
101        return {'{http://www.w3.org/1999/xlink}href': path}
102
103
104class ColumnCounter:
105    """A class used to count the actual maximum number of cells (and thus
106    columns) a table contains accross its rows.
107    """
108    def __init__(self):
109        self.temp_counters = {}
110        self.counters = {}
111
112    def reset(self, loop_id):
113        self.temp_counters[loop_id] = 0
114
115    def inc(self, loop_id):
116        self.temp_counters[loop_id] += 1
117
118    def store(self, loop_id, table_name):
119        self.counters[table_name] = max(self.temp_counters.pop(loop_id),
120                                        self.counters.get(table_name, 0))
121
122
123def wrap_nodes_between(first, last, new_parent):
124    """An helper function to move all nodes between two nodes to a new node
125    and add that new node to their former parent. The boundary nodes are
126    removed in the process.
127    """
128    old_parent = first.getparent()
129
130    # Any text after the opening tag (and not within a tag) need to be handled
131    # explicitly. For example in <if>xxx<span>yyy</span>zzz</if>, zzz is
132    # copied along the span tag, but not xxx, which corresponds to the tail
133    # attribute of the opening tag.
134    if first.tail:
135        new_parent.text = first.tail
136    for node in first.itersiblings():
137        if node is last:
138            break
139        # appending a node to a new parent also
140        # remove it from its previous parent
141        new_parent.append(node)
142    old_parent.replace(first, new_parent)
143    old_parent.remove(last)
144
145
146class Template(MarkupTemplate):
147
148    def __init__(self, source, filepath=None, filename=None, loader=None,
149                 encoding=None, lookup='strict', allow_exec=True):
150        self.namespaces = {}
151        self.inner_docs = []
152        self.has_col_loop = False
153        super(Template, self).__init__(source, filepath, filename, loader,
154                                       encoding, lookup, allow_exec)
155
156    def _parse(self, source, encoding):
157        """parses the odf file.
158
159        It adds genshi directives and finds the inner docs.
160        """
161        zf = zipfile.ZipFile(self.filepath)
162        content = zf.read('content.xml')
163        styles = zf.read('styles.xml')
164
165        template = super(Template, self)
166        content = template._parse(self.insert_directives(content), encoding)
167        styles = template._parse(self.insert_directives(styles), encoding)
168        content_files = [('content.xml', content)]
169        styles_files = [('styles.xml', styles)]
170
171        while self.inner_docs:
172            doc = self.inner_docs.pop()
173            c_path, s_path = doc + '/content.xml', doc + '/styles.xml'
174            content = zf.read(c_path)
175            styles = zf.read(s_path)
176
177            c_parsed = template._parse(self.insert_directives(content),
178                                       encoding)
179            s_parsed = template._parse(self.insert_directives(styles),
180                                       encoding)
181            content_files.append((c_path, c_parsed))
182            styles_files.append((s_path, s_parsed))
183
184        zf.close()
185        parsed = []
186        for fpath, fparsed in content_files + styles_files:
187            parsed.append((genshi.core.PI, ('relatorio', fpath), None))
188            parsed += fparsed
189
190        return parsed
191
192    def insert_directives(self, content):
193        """adds the genshi directives, handle the images and the innerdocs.
194        """
195        tree = lxml.etree.parse(StringIO(content))
196        root = tree.getroot()
197        self.namespaces = root.nsmap.copy()
198        self.namespaces['py'] = 'http://genshi.edgewall.org/'
199        self.namespaces['relatorio'] = RELATORIO_URI
200
201        self._invert_style(tree)
202        self._handle_relatorio_tags(tree)
203        self._handle_images(tree)
204        self._handle_innerdocs(tree)
205        return StringIO(lxml.etree.tostring(tree))
206
207    def _invert_style(self, tree):
208        "inverts the text:a and text:span"
209        xpath_expr = "//text:a[starts-with(@xlink:href, 'relatorio://')]" \
210                     "/text:span"
211        for span in tree.xpath(xpath_expr, namespaces=self.namespaces):
212            text_a = span.getparent()
213            outer = text_a.getparent()
214            text_a.text = span.text
215            span.text = ''
216            text_a.remove(span)
217            outer.replace(text_a, span)
218            span.append(text_a)
219
220    def _relatorio_statements(self, tree):
221        "finds the relatorio statements (text:a/text:placeholder)"
222        # If this node href matches the relatorio URL it is kept.
223        # If this node href matches a genshi directive it is kept for further
224        # processing.
225        xlink_href_attrib = '{%s}href' % self.namespaces['xlink']
226        text_a = '{%s}a' % self.namespaces['text']
227        placeholder = '{%s}placeholder' % self.namespaces['text']
228        s_xpath = "//text:a[starts-with(@xlink:href, 'relatorio://')]" \
229                  "| //text:placeholder"
230
231        r_statements = []
232        opened_tags = []
233        # We map each opening tag with its closing tag
234        closing_tags = {}
235        for statement in tree.xpath(s_xpath, namespaces=self.namespaces):
236            if statement.tag == placeholder:
237                expr = statement.text[1:-1]
238            elif statement.tag == text_a:
239                expr = urllib.unquote(statement.attrib[xlink_href_attrib][12:])
240
241            if not expr:
242                raise OOTemplateError("No expression in the tag",
243                                      self.filepath)
244            closing, directive, attr, attr_val = \
245                    GENSHI_EXPR.match(expr).groups()
246            is_opening = closing != '/'
247
248            warn_msg = None
249            if not statement.text:
250                warn_msg = "No statement text in '%s' for '%s'" \
251                           % (self.filepath, expr)
252            elif expr != statement.text and statement.tag == text_a:
253                warn_msg = "url and text do not match in %s: %s != %s" \
254                           % (self.filepath, expr,
255                              statement.text.encode('utf-8'))
256            if warn_msg:
257                if directive is not None and not is_opening:
258                    warn_msg += " corresponding to opening tag '%s'" \
259                                % opened_tags[-1].text
260                warnings.warn(warn_msg)
261
262            if directive is not None:
263                # map closing tags with their opening tag
264                if is_opening:
265                    opened_tags.append(statement)
266                else:
267                    closing_tags[id(opened_tags.pop())] = statement
268            # - we only need to return opening statements
269            if is_opening:
270                r_statements.append((statement,
271                                     (expr, directive, attr, attr_val))
272                                   )
273        assert not opened_tags
274        return r_statements, closing_tags
275
276    def _handle_relatorio_tags(self, tree):
277        """
278        Will treat all relatorio tag (py:if/for/choose/when/otherwise)
279        tags
280        """
281        # Some tag/attribute name constants
282        table_namespace = self.namespaces['table']
283        table_row_tag = '{%s}table-row' % table_namespace
284        table_cell_tag = '{%s}table-cell' % table_namespace
285
286        office_name = '{%s}value' % self.namespaces['office']
287        office_valuetype = '{%s}value-type' % self.namespaces['office']
288
289        py_namespace = self.namespaces['py']
290        py_attrs_attr = '{%s}attrs' % py_namespace
291        py_replace = '{%s}replace' % py_namespace
292
293        r_statements, closing_tags = self._relatorio_statements(tree)
294
295        for r_node, parsed in r_statements:
296            expr, directive, attr, a_val = parsed
297
298            # If the node is a genshi directive statement:
299            if directive is not None:
300                opening = r_node
301                closing = closing_tags[id(r_node)]
302
303                # - we find the nearest common ancestor of the closing and
304                #   opening statements
305                o_ancestors = [opening]
306                c_ancestors = [closing] + list(closing.iterancestors())
307                ancestor = None
308                for node in opening.iterancestors():
309                    try:
310                        idx = c_ancestors.index(node)
311                        assert c_ancestors[idx] == node
312                        # we only need ancestors up to the common one
313                        del c_ancestors[idx:]
314                        ancestor = node
315                        break
316                    except ValueError:
317                        # c_ancestors.index(node) raise ValueError if node is
318                        # not a child of c_ancestors
319                        pass
320                    o_ancestors.append(node)
321                assert ancestor is not None, \
322                       "No common ancestor found for opening and closing tag"
323
324                outermost_o_ancestor = o_ancestors[-1]
325                outermost_c_ancestor = c_ancestors[-1]
326
327                # handle horizontal repetitions (over columns)
328                if directive == "for" and ancestor.tag == table_row_tag:
329                    a_val = self._handle_column_loops(parsed, ancestor,
330                                                      opening,
331                                                      outermost_o_ancestor,
332                                                      outermost_c_ancestor)
333
334                # - we create a <py:xxx> node
335                if attr is not None:
336                    attribs = {attr: a_val}
337                else:
338                    attribs = {}
339                genshi_node = EtreeElement('{%s}%s' % (py_namespace,
340                                                       directive),
341                                           attrib=attribs,
342                                           nsmap=self.namespaces)
343
344                # - we move all the nodes between the opening and closing
345                #   statements to this new node (append also removes from old
346                #   parent)
347                # - we replace the opening statement by the <py:xxx> node
348                # - we delete the closing statement (and its ancestors)
349                wrap_nodes_between(outermost_o_ancestor, outermost_c_ancestor,
350                                   genshi_node)
351            else:
352                # It's not a genshi statement it's a python expression
353                r_node.attrib[py_replace] = expr
354                parent = r_node.getparent().getparent()
355                if parent is None or parent.tag != table_cell_tag:
356                    continue
357
358                # The grand-parent tag is a table cell we should set the
359                # correct value and type for this cell.
360                dico = "{'%s': %s, '%s': __relatorio_guess_type(%s)}"
361                parent.attrib[py_attrs_attr] = dico % (office_name, expr,
362                                                       office_valuetype, expr)
363                parent.attrib.pop(office_valuetype, None)
364                parent.attrib.pop(office_name, None)
365
366    def _handle_column_loops(self, statement, ancestor, opening,
367                             outer_o_node, outer_c_node):
368        _, directive, attr, a_val = statement
369
370        self.has_col_loop = True
371
372        table_namespace = self.namespaces['table']
373        table_col_tag = '{%s}table-column' % table_namespace
374        table_num_col_attr = '{%s}number-columns-repeated' % table_namespace
375
376        py_namespace = self.namespaces['py']
377        py_attrs_attr = '{%s}attrs' % py_namespace
378
379        repeat_tag = '{%s}repeat' % self.namespaces['relatorio']
380
381        # table node (it is not necessarily the direct parent of ancestor)
382        table_node = ancestor.iterancestors('{%s}table' % table_namespace) \
383                             .next()
384        table_name = table_node.attrib['{%s}name' % table_namespace]
385
386        # add counting instructions
387        loop_id = id(opening)
388
389        # 1) add reset counter code on the row opening tag
390        #    (through a py:attrs attribute).
391        # Note that table_name is not needed in the first two
392        # operations, but a unique id within the table is required
393        # to support nested column repetition
394        ancestor.attrib[py_attrs_attr] = \
395            "__relatorio_reset_col_count(%d)" % loop_id
396
397        # 2) add increment code (through a py:attrs attribute) on
398        #    the first cell node after the opening (cell node)
399        #    ancestor
400        enclosed_cell = outer_o_node.getnext()
401        assert enclosed_cell.tag == '{%s}table-cell' % table_namespace
402        enclosed_cell.attrib[py_attrs_attr] = \
403            "__relatorio_inc_col_count(%d)" % loop_id
404
405        # 3) add "store count" code as a py:replace node, as the
406        #    last child of the row
407        attr_value = "__relatorio_store_col_count(%d, %r)" \
408                     % (loop_id, table_name)
409        replace_node = EtreeElement('{%s}replace' % py_namespace,
410                                    attrib={'value': attr_value},
411                                    nsmap=self.namespaces)
412        ancestor.append(replace_node)
413
414        # find the position in the row of the cells holding the
415        # <for> and </for> instructions
416        # We use "*" so as to count both normal cells and covered/hidden cells
417        position_xpath_expr = 'count(preceding-sibling::*)'
418        opening_pos = \
419            int(outer_o_node.xpath(position_xpath_expr,
420                                   namespaces=self.namespaces))
421        closing_pos = \
422            int(outer_c_node.xpath(position_xpath_expr,
423                                   namespaces=self.namespaces))
424
425        # check whether or not the opening tag spans several rows
426        a_val = self._handle_row_spanned_column_loops(
427                    statement, outer_o_node, opening_pos, closing_pos)
428
429        # check if this table's headers were already processed
430        repeat_node = table_node.find(repeat_tag)
431        if repeat_node is not None:
432            prev_pos = (int(repeat_node.attrib['opening']),
433                        int(repeat_node.attrib['closing']))
434            if (opening_pos, closing_pos) != prev_pos:
435                raise Exception(
436                    'Incoherent column repetition found! '
437                    'If a table has several lines with repeated '
438                    'columns, the repetition need to be on the '
439                    'same columns across all lines.')
440        else:
441            # compute splits: oo collapses the headers of adjacent
442            # columns which use the same style. We need to split
443            # any column header which is repeated so many times
444            # that it encompasses any of the column headers that
445            # we need to repeat
446            to_split = []
447            idx = 0
448            childs = list(table_node.iterchildren(table_col_tag))
449            for tag in childs:
450                inc = int(tag.attrib.get(table_num_col_attr, 1))
451                oldidx = idx
452                idx += inc
453                if oldidx < opening_pos < idx or \
454                   oldidx < closing_pos < idx:
455                    to_split.append(tag)
456
457            # split tags
458            for tag in to_split:
459                tag_pos = table_node.index(tag)
460                num = int(tag.attrib.pop(table_num_col_attr))
461                new_tags = [deepcopy(tag) for _ in range(num)]
462                table_node[tag_pos:tag_pos+1] = new_tags
463
464            # recompute the list of column headers as it could
465            # have changed.
466            coldefs = list(table_node.iterchildren(table_col_tag))
467
468            # compute the column header nodes corresponding to
469            # the opening and closing tags.
470            first = table_node[opening_pos]
471            last = table_node[closing_pos]
472
473            # add a <relatorio:repeat> node around the column
474            # definitions nodes
475            attribs = {
476               "opening": str(opening_pos),
477               "closing": str(closing_pos),
478               "table": table_name
479            }
480            repeat_node = EtreeElement(repeat_tag, attrib=attribs,
481                                       nsmap=self.namespaces)
482            wrap_nodes_between(first, last, repeat_node)
483        return a_val
484
485    def _handle_row_spanned_column_loops(self, statement, outer_o_node,
486                                         opening_pos, closing_pos):
487        """handles column repetitions which span several rows, by duplicating
488        the py:for node for each row, and make the loops work on a copy of the
489        original iterable as to not exhaust generators."""
490
491        _, directive, attr, a_val = statement
492        table_rowspan_attr = '{%s}number-rows-spanned' \
493                             % self.namespaces['table']
494
495        # checks wether there is a (meaningful) rowspan
496        rows_spanned = int(outer_o_node.attrib.get(table_rowspan_attr, 1))
497        if rows_spanned == 1:
498            return a_val
499
500        py_namespace = self.namespaces['py']
501        table_namespace = self.namespaces['table']
502        table_row_tag = '{%s}table-row' % table_namespace
503        table_cov_cell_tag = '{%s}covered-table-cell' % table_namespace
504
505        # if so, we need to:
506
507        # 1) create a with node to define a temporary variable
508        temp_var = "__relatorio_temp%d" % id(outer_o_node)
509        # a_val == "target in iterable"
510        target, iterable = a_val.split(' in ', 1)
511        vars = "%s = list(%s)" % (temp_var, iterable.strip())
512        with_node = EtreeElement('{%s}with' % py_namespace,
513                                 attrib={"vars": vars},
514                                 nsmap=self.namespaces)
515
516        # 2) transform a_val to use that temporary variable
517        a_val = "%s in %s" % (target, temp_var)
518
519        # 3) wrap the corresponding cells on the next row(s)
520        #    (those should be covered-table-cell) inside a
521        #    duplicate py:for node (looping on the temporary
522        #    variable).
523        row_node = outer_o_node.getparent()
524        row_node.addprevious(with_node)
525        rows_to_wrap = [row_node]
526        assert row_node.tag == table_row_tag
527        next_rows = row_node.itersiblings(table_row_tag)
528        for row_idx in range(rows_spanned-1):
529            next_row_node = next_rows.next()
530            rows_to_wrap.append(next_row_node)
531            # compute the start and end nodes
532            first = next_row_node[opening_pos]
533            last = next_row_node[closing_pos]
534            assert first.tag == table_cov_cell_tag
535            assert last.tag == table_cov_cell_tag
536            # wrap them
537            tag = '{%s}%s' % (py_namespace, directive)
538            for_node = EtreeElement(tag,
539                                    attrib={attr: a_val},
540                                    nsmap=self.namespaces)
541            wrap_nodes_between(first, last, for_node)
542
543        # 4) wrap all the corresponding rows indide the "with"
544        #    node
545        for node in rows_to_wrap:
546            with_node.append(node)
547        return a_val
548
549    def _handle_images(self, tree):
550        "replaces all draw:frame named 'image: ...' by draw:image nodes"
551        draw_name = '{%s}name' % self.namespaces['draw']
552        draw_image = '{%s}image' % self.namespaces['draw']
553        py_attrs = '{%s}attrs' % self.namespaces['py']
554        xpath_expr = "//draw:frame[starts-with(@draw:name, 'image:')]"
555        for draw in tree.xpath(xpath_expr, namespaces=self.namespaces):
556            d_name = draw.attrib[draw_name]
557            attr_expr = "__relatorio_make_href(%s)" % d_name[7:]
558            image_node = EtreeElement(draw_image,
559                                      attrib={py_attrs: attr_expr},
560                                      nsmap=self.namespaces)
561            draw.replace(draw[0], image_node)
562
563    def _handle_innerdocs(self, tree):
564        "finds inner_docs and adds them to the processing stack."
565        href_attrib = '{%s}href' % self.namespaces['xlink']
566        xpath_expr = "//draw:object[starts-with(@xlink:href, './')" \
567                     "and @xlink:show='embed']"
568        for draw in tree.xpath(xpath_expr, namespaces=self.namespaces):
569            self.inner_docs.append(draw.attrib[href_attrib][2:])
570
571    def generate(self, *args, **kwargs):
572        "creates the RelatorioStream."
573        serializer = OOSerializer(self.filepath)
574        kwargs['__relatorio_make_href'] = ImageHref(serializer.outzip, kwargs)
575        kwargs['__relatorio_guess_type'] = guess_type
576
577        counter = ColumnCounter()
578        kwargs['__relatorio_reset_col_count'] = counter.reset
579        kwargs['__relatorio_inc_col_count'] = counter.inc
580        kwargs['__relatorio_store_col_count'] = counter.store
581
582        stream = super(Template, self).generate(*args, **kwargs)
583        if self.has_col_loop:
584            transformation = DuplicateColumnHeaders(counter)
585            col_filter = Transformer('//repeat[namespace-uri()="%s"]'
586                                     % RELATORIO_URI)
587            col_filter = col_filter.apply(transformation)
588            stream = Stream(list(stream), self.serializer) | col_filter
589        return RelatorioStream(stream, serializer)
590
591
592class DuplicateColumnHeaders(object):
593    def __init__(self, counter):
594        self.counter = counter
595
596    def __call__(self, stream):
597        for mark, (kind, data, pos) in stream:
598            # for each repeat tag found
599            if mark is ENTER:
600                # get the number of columns for that table
601                attrs = data[1]
602                table = attrs.get('table')
603                col_count = self.counter.counters[table]
604
605                # collect events (column header tags) to repeat
606                events = []
607                for submark, event in stream:
608                    if submark is EXIT:
609                        break
610                    events.append(event)
611
612                # repeat them
613                for _ in range(col_count):
614                    for event in events:
615                        yield None, event
616            else:
617                yield mark, (kind, data, pos)
618
619
620class OOSerializer:
621
622    def __init__(self, oo_path):
623        self.inzip = zipfile.ZipFile(oo_path)
624        self.new_oo = StringIO()
625        self.outzip = zipfile.ZipFile(self.new_oo, 'w')
626        self.xml_serializer = genshi.output.XMLSerializer()
627
628    def __call__(self, stream):
629        files = {}
630        for kind, data, pos in stream:
631            if kind == genshi.core.PI and data[0] == 'relatorio':
632                stream_for = data[1]
633                continue
634            files.setdefault(stream_for, []).append((kind, data, pos))
635
636        now = time.localtime()[:6]
637        for f_info in self.inzip.infolist():
638            if f_info.filename.startswith('ObjectReplacements'):
639                continue
640            elif f_info.filename in files:
641                stream = files[f_info.filename]
642                # create a new file descriptor, copying some attributes from
643                # the original file
644                new_info = zipfile.ZipInfo(f_info.filename, now)
645                for attr in ('compress_type', 'flag_bits', 'create_system'):
646                    setattr(new_info, attr, getattr(f_info, attr))
647                serialized_stream = output_encode(self.xml_serializer(stream))
648                self.outzip.writestr(new_info, serialized_stream)
649            else:
650                self.outzip.writestr(f_info, self.inzip.read(f_info.filename))
651        self.inzip.close()
652        self.outzip.close()
653
654        return self.new_oo
655
656MIMETemplateLoader.add_factory('oo.org', Template)
Note: See TracBrowser for help on using the browser.