| 1 | """gettext message extraction via Babel: http://babel.edgewall.org/""" |
|---|
| 2 | from StringIO import StringIO |
|---|
| 3 | |
|---|
| 4 | from babel.messages.extract import extract_python |
|---|
| 5 | |
|---|
| 6 | from mako import lexer, parsetree |
|---|
| 7 | |
|---|
| 8 | def extract(fileobj, keywords, comment_tags, options): |
|---|
| 9 | """Extract messages from Mako templates. |
|---|
| 10 | |
|---|
| 11 | :param fileobj: the file-like object the messages should be extracted from |
|---|
| 12 | :param keywords: a list of keywords (i.e. function names) that should be |
|---|
| 13 | recognized as translation functions |
|---|
| 14 | :param comment_tags: a list of translator tags to search for and include |
|---|
| 15 | in the results |
|---|
| 16 | :param options: a dictionary of additional options (optional) |
|---|
| 17 | :return: an iterator over ``(lineno, funcname, message, comments)`` tuples |
|---|
| 18 | :rtype: ``iterator`` |
|---|
| 19 | """ |
|---|
| 20 | encoding = options.get('input_encoding', options.get('encoding', None)) |
|---|
| 21 | |
|---|
| 22 | template_node = lexer.Lexer(fileobj.read(), |
|---|
| 23 | input_encoding=encoding).parse() |
|---|
| 24 | for extracted in extract_nodes(template_node.get_children(), |
|---|
| 25 | keywords, comment_tags, options): |
|---|
| 26 | yield extracted |
|---|
| 27 | |
|---|
| 28 | def extract_nodes(nodes, keywords, comment_tags, options): |
|---|
| 29 | """Extract messages from Mako's lexer node objects |
|---|
| 30 | |
|---|
| 31 | :param nodes: an iterable of Mako parsetree.Node objects to extract from |
|---|
| 32 | :param keywords: a list of keywords (i.e. function names) that should be |
|---|
| 33 | recognized as translation functions |
|---|
| 34 | :param comment_tags: a list of translator tags to search for and include |
|---|
| 35 | in the results |
|---|
| 36 | :param options: a dictionary of additional options (optional) |
|---|
| 37 | :return: an iterator over ``(lineno, funcname, message, comments)`` tuples |
|---|
| 38 | :rtype: ``iterator`` |
|---|
| 39 | """ |
|---|
| 40 | translator_comments = [] |
|---|
| 41 | in_translator_comments = False |
|---|
| 42 | |
|---|
| 43 | for node in nodes: |
|---|
| 44 | child_nodes = None |
|---|
| 45 | if in_translator_comments and isinstance(node, parsetree.Text) and \ |
|---|
| 46 | not node.content.strip(): |
|---|
| 47 | # Ignore whitespace within translator comments |
|---|
| 48 | continue |
|---|
| 49 | |
|---|
| 50 | if isinstance(node, parsetree.Comment): |
|---|
| 51 | value = node.text.strip() |
|---|
| 52 | if in_translator_comments: |
|---|
| 53 | translator_comments.extend(_split_comment(node.lineno, value)) |
|---|
| 54 | continue |
|---|
| 55 | for comment_tag in comment_tags: |
|---|
| 56 | if value.startswith(comment_tag): |
|---|
| 57 | in_translator_comments = True |
|---|
| 58 | translator_comments.extend(_split_comment(node.lineno, |
|---|
| 59 | value)) |
|---|
| 60 | continue |
|---|
| 61 | |
|---|
| 62 | if isinstance(node, parsetree.DefTag): |
|---|
| 63 | code = node.function_decl.code |
|---|
| 64 | child_nodes = node.nodes |
|---|
| 65 | elif isinstance(node, parsetree.CallTag): |
|---|
| 66 | code = node.code.code |
|---|
| 67 | child_nodes = node.nodes |
|---|
| 68 | elif isinstance(node, parsetree.PageTag): |
|---|
| 69 | code = node.body_decl.code |
|---|
| 70 | elif isinstance(node, parsetree.ControlLine): |
|---|
| 71 | if node.isend: |
|---|
| 72 | translator_comments = [] |
|---|
| 73 | in_translator_comments = False |
|---|
| 74 | continue |
|---|
| 75 | code = node.text |
|---|
| 76 | elif isinstance(node, parsetree.Code): |
|---|
| 77 | # <% and <%! blocks would provide their own translator comments |
|---|
| 78 | translator_comments = [] |
|---|
| 79 | in_translator_comments = False |
|---|
| 80 | |
|---|
| 81 | code = node.code.code |
|---|
| 82 | elif isinstance(node, parsetree.Expression): |
|---|
| 83 | code = node.code.code |
|---|
| 84 | else: |
|---|
| 85 | translator_comments = [] |
|---|
| 86 | in_translator_comments = False |
|---|
| 87 | continue |
|---|
| 88 | |
|---|
| 89 | # Comments don't apply unless they immediately preceed the message |
|---|
| 90 | if translator_comments and \ |
|---|
| 91 | translator_comments[-1][0] < node.lineno - 1: |
|---|
| 92 | translator_comments = [] |
|---|
| 93 | else: |
|---|
| 94 | translator_comments = \ |
|---|
| 95 | [comment[1] for comment in translator_comments] |
|---|
| 96 | |
|---|
| 97 | if isinstance(code, unicode): |
|---|
| 98 | code = code.encode('ascii', 'backslashreplace') |
|---|
| 99 | code = StringIO(code) |
|---|
| 100 | for lineno, funcname, messages, python_translator_comments \ |
|---|
| 101 | in extract_python(code, keywords, comment_tags, options): |
|---|
| 102 | yield (node.lineno + (lineno - 1), funcname, messages, |
|---|
| 103 | translator_comments + python_translator_comments) |
|---|
| 104 | |
|---|
| 105 | translator_comments = [] |
|---|
| 106 | in_translator_comments = False |
|---|
| 107 | |
|---|
| 108 | if child_nodes: |
|---|
| 109 | for extracted in extract_nodes(child_nodes, keywords, comment_tags, |
|---|
| 110 | options): |
|---|
| 111 | yield extracted |
|---|
| 112 | |
|---|
| 113 | |
|---|
| 114 | def _split_comment(lineno, comment): |
|---|
| 115 | """Return the multiline comment at lineno split into a list of comment line |
|---|
| 116 | numbers and the accompanying comment line""" |
|---|
| 117 | return [(lineno + index, line) for index, line in |
|---|
| 118 | enumerate(comment.splitlines())] |
|---|