| 1 | """gettext message extraction via Babel: http://babel.edgewall.org/""" | 
|---|
| 2 | from StringIO import StringIO | 
|---|
| 3 |  | 
|---|
| 4 | from babel.messages.extract import extract_python | 
|---|
| 5 |  | 
|---|
| 6 | from mako import lexer, parsetree | 
|---|
| 7 |  | 
|---|
| 8 | def extract(fileobj, keywords, comment_tags, options): | 
|---|
| 9 |     """Extract messages from Mako templates. | 
|---|
| 10 |  | 
|---|
| 11 |     :param fileobj: the file-like object the messages should be extracted from | 
|---|
| 12 |     :param keywords: a list of keywords (i.e. function names) that should be | 
|---|
| 13 |                      recognized as translation functions | 
|---|
| 14 |     :param comment_tags: a list of translator tags to search for and include | 
|---|
| 15 |                          in the results | 
|---|
| 16 |     :param options: a dictionary of additional options (optional) | 
|---|
| 17 |     :return: an iterator over ``(lineno, funcname, message, comments)`` tuples | 
|---|
| 18 |     :rtype: ``iterator`` | 
|---|
| 19 |     """ | 
|---|
| 20 |     encoding = options.get('input_encoding', options.get('encoding', None)) | 
|---|
| 21 |  | 
|---|
| 22 |     template_node = lexer.Lexer(fileobj.read(), | 
|---|
| 23 |                                 input_encoding=encoding).parse() | 
|---|
| 24 |     for extracted in extract_nodes(template_node.get_children(), | 
|---|
| 25 |                                    keywords, comment_tags, options): | 
|---|
| 26 |         yield extracted | 
|---|
| 27 |  | 
|---|
| 28 | def extract_nodes(nodes, keywords, comment_tags, options): | 
|---|
| 29 |     """Extract messages from Mako's lexer node objects | 
|---|
| 30 |  | 
|---|
| 31 |     :param nodes: an iterable of Mako parsetree.Node objects to extract from | 
|---|
| 32 |     :param keywords: a list of keywords (i.e. function names) that should be | 
|---|
| 33 |                      recognized as translation functions | 
|---|
| 34 |     :param comment_tags: a list of translator tags to search for and include | 
|---|
| 35 |                          in the results | 
|---|
| 36 |     :param options: a dictionary of additional options (optional) | 
|---|
| 37 |     :return: an iterator over ``(lineno, funcname, message, comments)`` tuples | 
|---|
| 38 |     :rtype: ``iterator`` | 
|---|
| 39 |     """ | 
|---|
| 40 |     translator_comments = [] | 
|---|
| 41 |     in_translator_comments = False | 
|---|
| 42 |  | 
|---|
| 43 |     for node in nodes: | 
|---|
| 44 |         child_nodes = None | 
|---|
| 45 |         if in_translator_comments and isinstance(node, parsetree.Text) and \ | 
|---|
| 46 |                 not node.content.strip(): | 
|---|
| 47 |             # Ignore whitespace within translator comments | 
|---|
| 48 |             continue | 
|---|
| 49 |  | 
|---|
| 50 |         if isinstance(node, parsetree.Comment): | 
|---|
| 51 |             value = node.text.strip() | 
|---|
| 52 |             if in_translator_comments: | 
|---|
| 53 |                 translator_comments.extend(_split_comment(node.lineno, value)) | 
|---|
| 54 |                 continue | 
|---|
| 55 |             for comment_tag in comment_tags: | 
|---|
| 56 |                 if value.startswith(comment_tag): | 
|---|
| 57 |                     in_translator_comments = True | 
|---|
| 58 |                     translator_comments.extend(_split_comment(node.lineno, | 
|---|
| 59 |                                                               value)) | 
|---|
| 60 |             continue | 
|---|
| 61 |  | 
|---|
| 62 |         if isinstance(node, parsetree.DefTag): | 
|---|
| 63 |             code = node.function_decl.code | 
|---|
| 64 |             child_nodes = node.nodes | 
|---|
| 65 |         elif isinstance(node, parsetree.CallTag): | 
|---|
| 66 |             code = node.code.code | 
|---|
| 67 |             child_nodes = node.nodes | 
|---|
| 68 |         elif isinstance(node, parsetree.PageTag): | 
|---|
| 69 |             code = node.body_decl.code | 
|---|
| 70 |         elif isinstance(node, parsetree.ControlLine): | 
|---|
| 71 |             if node.isend: | 
|---|
| 72 |                 translator_comments = [] | 
|---|
| 73 |                 in_translator_comments = False | 
|---|
| 74 |                 continue | 
|---|
| 75 |             code = node.text | 
|---|
| 76 |         elif isinstance(node, parsetree.Code): | 
|---|
| 77 |             # <% and <%! blocks would provide their own translator comments | 
|---|
| 78 |             translator_comments = [] | 
|---|
| 79 |             in_translator_comments = False | 
|---|
| 80 |  | 
|---|
| 81 |             code = node.code.code | 
|---|
| 82 |         elif isinstance(node, parsetree.Expression): | 
|---|
| 83 |             code = node.code.code | 
|---|
| 84 |         else: | 
|---|
| 85 |             translator_comments = [] | 
|---|
| 86 |             in_translator_comments = False | 
|---|
| 87 |             continue | 
|---|
| 88 |  | 
|---|
| 89 |         # Comments don't apply unless they immediately preceed the message | 
|---|
| 90 |         if translator_comments and \ | 
|---|
| 91 |                 translator_comments[-1][0] < node.lineno - 1: | 
|---|
| 92 |             translator_comments = [] | 
|---|
| 93 |         else: | 
|---|
| 94 |             translator_comments = \ | 
|---|
| 95 |                 [comment[1] for comment in translator_comments] | 
|---|
| 96 |  | 
|---|
| 97 |         if isinstance(code, unicode): | 
|---|
| 98 |             code = code.encode('ascii', 'backslashreplace') | 
|---|
| 99 |         code = StringIO(code) | 
|---|
| 100 |         for lineno, funcname, messages, python_translator_comments \ | 
|---|
| 101 |                 in extract_python(code, keywords, comment_tags, options): | 
|---|
| 102 |             yield (node.lineno + (lineno - 1), funcname, messages, | 
|---|
| 103 |                    translator_comments + python_translator_comments) | 
|---|
| 104 |  | 
|---|
| 105 |         translator_comments = [] | 
|---|
| 106 |         in_translator_comments = False | 
|---|
| 107 |  | 
|---|
| 108 |         if child_nodes: | 
|---|
| 109 |             for extracted in extract_nodes(child_nodes, keywords, comment_tags, | 
|---|
| 110 |                                            options): | 
|---|
| 111 |                 yield extracted | 
|---|
| 112 |  | 
|---|
| 113 |  | 
|---|
| 114 | def _split_comment(lineno, comment): | 
|---|
| 115 |     """Return the multiline comment at lineno split into a list of comment line | 
|---|
| 116 |     numbers and the accompanying comment line""" | 
|---|
| 117 |     return [(lineno + index, line) for index, line in | 
|---|
| 118 |             enumerate(comment.splitlines())] | 
|---|