1 | """gettext message extraction via Babel: http://babel.edgewall.org/""" |
---|
2 | from StringIO import StringIO |
---|
3 | |
---|
4 | from babel.messages.extract import extract_python |
---|
5 | |
---|
6 | from mako import lexer, parsetree |
---|
7 | |
---|
8 | def extract(fileobj, keywords, comment_tags, options): |
---|
9 | """Extract messages from Mako templates. |
---|
10 | |
---|
11 | :param fileobj: the file-like object the messages should be extracted from |
---|
12 | :param keywords: a list of keywords (i.e. function names) that should be |
---|
13 | recognized as translation functions |
---|
14 | :param comment_tags: a list of translator tags to search for and include |
---|
15 | in the results |
---|
16 | :param options: a dictionary of additional options (optional) |
---|
17 | :return: an iterator over ``(lineno, funcname, message, comments)`` tuples |
---|
18 | :rtype: ``iterator`` |
---|
19 | """ |
---|
20 | encoding = options.get('input_encoding', options.get('encoding', None)) |
---|
21 | |
---|
22 | template_node = lexer.Lexer(fileobj.read(), |
---|
23 | input_encoding=encoding).parse() |
---|
24 | for extracted in extract_nodes(template_node.get_children(), |
---|
25 | keywords, comment_tags, options): |
---|
26 | yield extracted |
---|
27 | |
---|
28 | def extract_nodes(nodes, keywords, comment_tags, options): |
---|
29 | """Extract messages from Mako's lexer node objects |
---|
30 | |
---|
31 | :param nodes: an iterable of Mako parsetree.Node objects to extract from |
---|
32 | :param keywords: a list of keywords (i.e. function names) that should be |
---|
33 | recognized as translation functions |
---|
34 | :param comment_tags: a list of translator tags to search for and include |
---|
35 | in the results |
---|
36 | :param options: a dictionary of additional options (optional) |
---|
37 | :return: an iterator over ``(lineno, funcname, message, comments)`` tuples |
---|
38 | :rtype: ``iterator`` |
---|
39 | """ |
---|
40 | translator_comments = [] |
---|
41 | in_translator_comments = False |
---|
42 | |
---|
43 | for node in nodes: |
---|
44 | child_nodes = None |
---|
45 | if in_translator_comments and isinstance(node, parsetree.Text) and \ |
---|
46 | not node.content.strip(): |
---|
47 | # Ignore whitespace within translator comments |
---|
48 | continue |
---|
49 | |
---|
50 | if isinstance(node, parsetree.Comment): |
---|
51 | value = node.text.strip() |
---|
52 | if in_translator_comments: |
---|
53 | translator_comments.extend(_split_comment(node.lineno, value)) |
---|
54 | continue |
---|
55 | for comment_tag in comment_tags: |
---|
56 | if value.startswith(comment_tag): |
---|
57 | in_translator_comments = True |
---|
58 | translator_comments.extend(_split_comment(node.lineno, |
---|
59 | value)) |
---|
60 | continue |
---|
61 | |
---|
62 | if isinstance(node, parsetree.DefTag): |
---|
63 | code = node.function_decl.code |
---|
64 | child_nodes = node.nodes |
---|
65 | elif isinstance(node, parsetree.CallTag): |
---|
66 | code = node.code.code |
---|
67 | child_nodes = node.nodes |
---|
68 | elif isinstance(node, parsetree.PageTag): |
---|
69 | code = node.body_decl.code |
---|
70 | elif isinstance(node, parsetree.ControlLine): |
---|
71 | if node.isend: |
---|
72 | translator_comments = [] |
---|
73 | in_translator_comments = False |
---|
74 | continue |
---|
75 | code = node.text |
---|
76 | elif isinstance(node, parsetree.Code): |
---|
77 | # <% and <%! blocks would provide their own translator comments |
---|
78 | translator_comments = [] |
---|
79 | in_translator_comments = False |
---|
80 | |
---|
81 | code = node.code.code |
---|
82 | elif isinstance(node, parsetree.Expression): |
---|
83 | code = node.code.code |
---|
84 | else: |
---|
85 | translator_comments = [] |
---|
86 | in_translator_comments = False |
---|
87 | continue |
---|
88 | |
---|
89 | # Comments don't apply unless they immediately preceed the message |
---|
90 | if translator_comments and \ |
---|
91 | translator_comments[-1][0] < node.lineno - 1: |
---|
92 | translator_comments = [] |
---|
93 | else: |
---|
94 | translator_comments = \ |
---|
95 | [comment[1] for comment in translator_comments] |
---|
96 | |
---|
97 | if isinstance(code, unicode): |
---|
98 | code = code.encode('ascii', 'backslashreplace') |
---|
99 | code = StringIO(code) |
---|
100 | for lineno, funcname, messages, python_translator_comments \ |
---|
101 | in extract_python(code, keywords, comment_tags, options): |
---|
102 | yield (node.lineno + (lineno - 1), funcname, messages, |
---|
103 | translator_comments + python_translator_comments) |
---|
104 | |
---|
105 | translator_comments = [] |
---|
106 | in_translator_comments = False |
---|
107 | |
---|
108 | if child_nodes: |
---|
109 | for extracted in extract_nodes(child_nodes, keywords, comment_tags, |
---|
110 | options): |
---|
111 | yield extracted |
---|
112 | |
---|
113 | |
---|
114 | def _split_comment(lineno, comment): |
---|
115 | """Return the multiline comment at lineno split into a list of comment line |
---|
116 | numbers and the accompanying comment line""" |
---|
117 | return [(lineno + index, line) for index, line in |
---|
118 | enumerate(comment.splitlines())] |
---|