| 1 | ############################################################################### |
|---|
| 2 | # |
|---|
| 3 | # Copyright (c) 2007, 2008 OpenHex SPRL. (http://openhex.com) All Rights |
|---|
| 4 | # Reserved. |
|---|
| 5 | # |
|---|
| 6 | # This program is free software; you can redistribute it and/or modify it under |
|---|
| 7 | # the terms of the GNU General Public License as published by the Free Software |
|---|
| 8 | # Foundation; either version 2 of the License, or (at your option) any later |
|---|
| 9 | # version. |
|---|
| 10 | # |
|---|
| 11 | # This program is distributed in the hope that it will be useful, but WITHOUT |
|---|
| 12 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|---|
| 13 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
|---|
| 14 | # details. |
|---|
| 15 | # |
|---|
| 16 | # You should have received a copy of the GNU General Public License along with |
|---|
| 17 | # this program. If not, see <http://www.gnu.org/licenses/>. |
|---|
| 18 | # |
|---|
| 19 | ############################################################################### |
|---|
| 20 | |
|---|
| 21 | __metaclass__ = type |
|---|
| 22 | |
|---|
| 23 | import os |
|---|
| 24 | import re |
|---|
| 25 | import md5 |
|---|
| 26 | import urllib |
|---|
| 27 | import zipfile |
|---|
| 28 | from cStringIO import StringIO |
|---|
| 29 | |
|---|
| 30 | import warnings |
|---|
| 31 | warnings.filterwarnings('always', module='relatorio.templates.opendocument') |
|---|
| 32 | |
|---|
| 33 | import lxml.etree |
|---|
| 34 | import genshi |
|---|
| 35 | import genshi.output |
|---|
| 36 | from genshi.template import MarkupTemplate |
|---|
| 37 | |
|---|
| 38 | GENSHI_TAGS = re.compile(r'''relatorio://((/)?(for|choose|otherwise|when|if|with)( (\w+)=["'](.*)["']|)|.*)''') |
|---|
| 39 | EXTENSIONS = {'image/png': 'png', |
|---|
| 40 | 'image/jpeg': 'jpg', |
|---|
| 41 | 'image/bmp': 'bmp', |
|---|
| 42 | 'image/gif': 'gif', |
|---|
| 43 | 'image/tiff': 'tif', |
|---|
| 44 | 'image/xbm': 'xbm', |
|---|
| 45 | } |
|---|
| 46 | |
|---|
| 47 | _encode = genshi.output.encode |
|---|
| 48 | ETElement = lxml.etree.Element |
|---|
| 49 | |
|---|
| 50 | |
|---|
| 51 | class ImageHref: |
|---|
| 52 | |
|---|
| 53 | def __init__(self, zipfile): |
|---|
| 54 | self.zip = zipfile |
|---|
| 55 | |
|---|
| 56 | def __call__(self, expr, name): |
|---|
| 57 | bitstream, mimetype = expr |
|---|
| 58 | bitstream.seek(0) |
|---|
| 59 | file_content = bitstream.read() |
|---|
| 60 | name = md5.new(file_content).hexdigest() |
|---|
| 61 | path = 'Pictures/%s.%s' % (name, EXTENSIONS[mimetype]) |
|---|
| 62 | if path not in self.zip.namelist(): |
|---|
| 63 | self.zip.writestr(path, file_content) |
|---|
| 64 | return {'{http://www.w3.org/1999/xlink}href': path} |
|---|
| 65 | |
|---|
| 66 | |
|---|
| 67 | class Template(MarkupTemplate): |
|---|
| 68 | |
|---|
| 69 | def __init__(self, source, filepath=None, filename=None, loader=None, |
|---|
| 70 | encoding=None, lookup='strict', allow_exec=True): |
|---|
| 71 | self.namespaces = {} |
|---|
| 72 | self.inner_docs = [] |
|---|
| 73 | super(Template, self).__init__(source, filepath, filename, loader, |
|---|
| 74 | encoding, lookup, allow_exec) |
|---|
| 75 | |
|---|
| 76 | def _parse(self, source, encoding): |
|---|
| 77 | inzip = zipfile.ZipFile(self.filepath) |
|---|
| 78 | content = inzip.read('content.xml') |
|---|
| 79 | styles = inzip.read('styles.xml') |
|---|
| 80 | |
|---|
| 81 | genshi_obj = super(Template, self) |
|---|
| 82 | content = genshi_obj._parse(self.add_directives(content), encoding) |
|---|
| 83 | styles = genshi_obj._parse(self.add_directives(styles), encoding) |
|---|
| 84 | content_files= [('content.xml', content)] |
|---|
| 85 | styles_files = [('styles.xml', styles)] |
|---|
| 86 | |
|---|
| 87 | while self.inner_docs: |
|---|
| 88 | doc = self.inner_docs.pop() |
|---|
| 89 | c_path, s_path = doc + '/content.xml', doc + '/styles.xml' |
|---|
| 90 | content = inzip.read(c_path) |
|---|
| 91 | styles = inzip.read(s_path) |
|---|
| 92 | |
|---|
| 93 | c_parsed = genshi_obj._parse(self.add_directives(content), encoding) |
|---|
| 94 | s_parsed = genshi_obj._parse(self.add_directives(styles), encoding) |
|---|
| 95 | |
|---|
| 96 | content_files.append((c_path, c_parsed)) |
|---|
| 97 | styles_files.append((s_path, s_parsed)) |
|---|
| 98 | |
|---|
| 99 | inzip.close() |
|---|
| 100 | parsed = [] |
|---|
| 101 | for fpath, fparsed in content_files + styles_files: |
|---|
| 102 | parsed.append((genshi.core.PI, ('relatorio', fpath), None)) |
|---|
| 103 | parsed += fparsed |
|---|
| 104 | |
|---|
| 105 | return parsed |
|---|
| 106 | |
|---|
| 107 | def add_directives(self, content): |
|---|
| 108 | tree = lxml.etree.parse(StringIO(content)) |
|---|
| 109 | root = tree.getroot() |
|---|
| 110 | self.namespaces = root.nsmap.copy() |
|---|
| 111 | self.namespaces['py'] = 'http://genshi.edgewall.org/' |
|---|
| 112 | |
|---|
| 113 | self._invert_style(tree) |
|---|
| 114 | self._handle_text_a(tree) |
|---|
| 115 | self._handle_images(tree) |
|---|
| 116 | self._handle_innerdocs(tree) |
|---|
| 117 | return StringIO(lxml.etree.tostring(tree)) |
|---|
| 118 | |
|---|
| 119 | def _invert_style(self, tree): |
|---|
| 120 | xpath_expr = "//text:a[starts-with(@xlink:href, 'relatorio://')]"\ |
|---|
| 121 | "/text:span" |
|---|
| 122 | for span in tree.xpath(xpath_expr, namespaces=self.namespaces): |
|---|
| 123 | text_a = span.getparent() |
|---|
| 124 | outer = text_a.getparent() |
|---|
| 125 | text_a.text = span.text |
|---|
| 126 | span.text = '' |
|---|
| 127 | text_a.remove(span) |
|---|
| 128 | outer.replace(text_a, span) |
|---|
| 129 | span.append(text_a) |
|---|
| 130 | |
|---|
| 131 | def _handle_text_a(self, tree): |
|---|
| 132 | """ |
|---|
| 133 | Will treat all text:a tag (py:if/for/choose/when/otherwise) |
|---|
| 134 | tags |
|---|
| 135 | """ |
|---|
| 136 | # Some tag name constants |
|---|
| 137 | table_cell_tag = '{%s}table-cell' % self.namespaces['table'] |
|---|
| 138 | attrib_name = '{%s}attrs' % self.namespaces['py'] |
|---|
| 139 | office_name = '{%s}value' % self.namespaces['office'] |
|---|
| 140 | office_valuetype = '{%s}value-type' % self.namespaces['office'] |
|---|
| 141 | genshi_name = '{%s}replace' % self.namespaces['py'] |
|---|
| 142 | xlink_href_attrib = '{%s}href' % self.namespaces['xlink'] |
|---|
| 143 | |
|---|
| 144 | # First we create the list of all the text:a nodes. |
|---|
| 145 | # If this node href matches the relatorio URL it is kept. |
|---|
| 146 | # If this node href matches a genshi directive it is kept for further |
|---|
| 147 | # processing. |
|---|
| 148 | genshi_directives, text_a = [], [] |
|---|
| 149 | xpath_expr = "//text:a[starts-with(@xlink:href, 'relatorio://')]" |
|---|
| 150 | for statement in tree.xpath(xpath_expr, namespaces=self.namespaces): |
|---|
| 151 | href = urllib.unquote(statement.attrib[xlink_href_attrib]) |
|---|
| 152 | match_obj = GENSHI_TAGS.match(href) |
|---|
| 153 | expr, closing, directive, _, attr, attr_val = match_obj.groups() |
|---|
| 154 | if expr != statement.text: |
|---|
| 155 | txt = statement.text or '' |
|---|
| 156 | warnings.warn('url and text do not match in %s: %s != %s' |
|---|
| 157 | % (self.filepath, expr, txt.encode('utf-8'))) |
|---|
| 158 | if directive is not None: |
|---|
| 159 | genshi_directives.append((statement, href)) |
|---|
| 160 | text_a.append((statement, |
|---|
| 161 | (expr, closing, directive, attr, attr_val))) |
|---|
| 162 | |
|---|
| 163 | # Then we match the opening and closing directives together |
|---|
| 164 | idx = 0 |
|---|
| 165 | genshi_pairs, inserted = [], [] |
|---|
| 166 | for statement, href in genshi_directives: |
|---|
| 167 | if not href.startswith('relatorio:///'): |
|---|
| 168 | genshi_pairs.append([statement, None]) |
|---|
| 169 | inserted.append(idx) |
|---|
| 170 | idx += 1 |
|---|
| 171 | else: |
|---|
| 172 | genshi_pairs[inserted.pop()][1] = statement |
|---|
| 173 | |
|---|
| 174 | for a_node, parsed in text_a: |
|---|
| 175 | expr, c_dir, directive, attr, a_val = parsed |
|---|
| 176 | |
|---|
| 177 | if directive is not None: |
|---|
| 178 | # If the text:a is a genshi directive statement: |
|---|
| 179 | # - we operate only on opening statement |
|---|
| 180 | # - we find the nearest ancestor of the closing and opening |
|---|
| 181 | # statement |
|---|
| 182 | # - we create a <py:xxx> node |
|---|
| 183 | # - we add all the node between the opening and closing |
|---|
| 184 | # statements to this new node |
|---|
| 185 | # - we replace the opening statement by the <py:for> node |
|---|
| 186 | # - we delete the closing statement |
|---|
| 187 | |
|---|
| 188 | if c_dir is not None: |
|---|
| 189 | # pass the closing statements |
|---|
| 190 | continue |
|---|
| 191 | for pair in genshi_pairs: |
|---|
| 192 | if pair[0] == a_node: |
|---|
| 193 | break |
|---|
| 194 | opening, closing = pair |
|---|
| 195 | |
|---|
| 196 | o_ancestors = list(opening.iterancestors()) |
|---|
| 197 | c_ancestors = list(closing.iterancestors()) |
|---|
| 198 | for n in o_ancestors: |
|---|
| 199 | if n in c_ancestors: |
|---|
| 200 | ancestor = n |
|---|
| 201 | break |
|---|
| 202 | |
|---|
| 203 | genshi_node = ETElement('{%s}%s' % (self.namespaces['py'], |
|---|
| 204 | directive), |
|---|
| 205 | attrib={attr: a_val}, |
|---|
| 206 | nsmap=self.namespaces) |
|---|
| 207 | can_append = False |
|---|
| 208 | for node in ancestor.iterchildren(): |
|---|
| 209 | if node in o_ancestors: |
|---|
| 210 | outermost_o_ancestor = node |
|---|
| 211 | can_append = True |
|---|
| 212 | continue |
|---|
| 213 | if node in c_ancestors: |
|---|
| 214 | outermost_c_ancestor = node |
|---|
| 215 | break |
|---|
| 216 | if can_append: |
|---|
| 217 | genshi_node.append(node) |
|---|
| 218 | ancestor.replace(outermost_o_ancestor, genshi_node) |
|---|
| 219 | ancestor.remove(outermost_c_ancestor) |
|---|
| 220 | else: |
|---|
| 221 | # It's not a genshi statement it's a python expression |
|---|
| 222 | a_node.attrib['{%s}replace' % self.namespaces['py']] = expr |
|---|
| 223 | parent = a_node.getparent().getparent() |
|---|
| 224 | if parent is None or parent.tag != table_cell_tag: |
|---|
| 225 | continue |
|---|
| 226 | if parent.attrib.get(office_valuetype, 'string') != 'string': |
|---|
| 227 | # The grand-parent tag is a table cell we set the |
|---|
| 228 | # office:value attribute of this cell |
|---|
| 229 | dico = "{'%s': %s}" % (office_name, expr) |
|---|
| 230 | parent.attrib[attrib_name] = dico |
|---|
| 231 | parent.attrib.pop(office_name, None) |
|---|
| 232 | |
|---|
| 233 | def _handle_images(self, tree): |
|---|
| 234 | draw_name = '{%s}name' % self.namespaces['draw'] |
|---|
| 235 | draw_image = '{%s}image' % self.namespaces['draw'] |
|---|
| 236 | python_attrs = '{%s}attrs' % self.namespaces['py'] |
|---|
| 237 | xpath_expr = "//draw:frame[starts-with(@draw:name, 'image:')]" |
|---|
| 238 | for draw in tree.xpath(xpath_expr, namespaces=self.namespaces): |
|---|
| 239 | d_name = draw.attrib[draw_name] |
|---|
| 240 | attr_expr = "make_href(%s, %r)" % (d_name[7:], d_name[7:]) |
|---|
| 241 | image_node = ETElement(draw_image, |
|---|
| 242 | attrib={python_attrs: attr_expr}, |
|---|
| 243 | nsmap=self.namespaces) |
|---|
| 244 | draw.replace(draw[0], image_node) |
|---|
| 245 | |
|---|
| 246 | def _handle_innerdocs(self, tree): |
|---|
| 247 | href_attrib = '{%s}href' % self.namespaces['xlink'] |
|---|
| 248 | xpath_expr = "//draw:object[starts-with(@xlink:href, './')" \ |
|---|
| 249 | "and @xlink:show='embed']" |
|---|
| 250 | for draw in tree.xpath(xpath_expr, namespaces=self.namespaces): |
|---|
| 251 | self.inner_docs.append(draw.attrib[href_attrib][2:]) |
|---|
| 252 | |
|---|
| 253 | def generate(self, *args, **kwargs): |
|---|
| 254 | serializer = OOSerializer(self.filepath) |
|---|
| 255 | kwargs['make_href'] = ImageHref(serializer.outzip) |
|---|
| 256 | generate_all = super(Template, self).generate(*args, **kwargs) |
|---|
| 257 | |
|---|
| 258 | return OOStream(generate_all, serializer) |
|---|
| 259 | |
|---|
| 260 | |
|---|
| 261 | class OOStream(genshi.core.Stream): |
|---|
| 262 | |
|---|
| 263 | def __init__(self, content_stream, serializer): |
|---|
| 264 | self.events = content_stream |
|---|
| 265 | self.serializer = serializer |
|---|
| 266 | |
|---|
| 267 | def render(self, method=None, encoding='utf-8', out=None, **kwargs): |
|---|
| 268 | return self.serializer(self.events) |
|---|
| 269 | |
|---|
| 270 | def serialize(self, method, **kwargs): |
|---|
| 271 | return self.render(method, **kwargs) |
|---|
| 272 | |
|---|
| 273 | def __or__(self, function): |
|---|
| 274 | return OOStream(self.events | function, self.serializer) |
|---|
| 275 | |
|---|
| 276 | |
|---|
| 277 | class OOSerializer: |
|---|
| 278 | |
|---|
| 279 | def __init__(self, oo_path): |
|---|
| 280 | self.inzip = zipfile.ZipFile(oo_path) |
|---|
| 281 | self.new_oo = StringIO() |
|---|
| 282 | self.outzip = zipfile.ZipFile(self.new_oo, 'w') |
|---|
| 283 | self.xml_serializer = genshi.output.XMLSerializer() |
|---|
| 284 | |
|---|
| 285 | def __call__(self, stream): |
|---|
| 286 | files = {} |
|---|
| 287 | for kind, data, pos in stream: |
|---|
| 288 | if kind == genshi.core.PI and data[0] == 'relatorio': |
|---|
| 289 | stream_for = data[1] |
|---|
| 290 | continue |
|---|
| 291 | files.setdefault(stream_for, []).append((kind, data, pos)) |
|---|
| 292 | |
|---|
| 293 | for f in self.inzip.infolist(): |
|---|
| 294 | if f.filename.startswith('ObjectReplacements'): |
|---|
| 295 | continue |
|---|
| 296 | elif f.filename in files: |
|---|
| 297 | stream = files[f.filename] |
|---|
| 298 | self.outzip.writestr(f.filename, |
|---|
| 299 | _encode(self.xml_serializer(stream))) |
|---|
| 300 | else: |
|---|
| 301 | self.outzip.writestr(f, self.inzip.read(f.filename)) |
|---|
| 302 | self.inzip.close() |
|---|
| 303 | self.outzip.close() |
|---|
| 304 | |
|---|
| 305 | return self.new_oo |
|---|