1 | """MIME-Type Parser |
---|
2 | |
---|
3 | This module provides basic functions for handling mime-types. It can handle |
---|
4 | matching mime-types against a list of media-ranges. See section 14.1 of |
---|
5 | the HTTP specification [RFC 2616] for a complete explanation. |
---|
6 | |
---|
7 | http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1 |
---|
8 | |
---|
9 | Contents: |
---|
10 | - parse_mime_type(): Parses a mime-type into its component parts. |
---|
11 | - parse_media_range(): Media-ranges are mime-types with wild-cards and a 'q' quality parameter. |
---|
12 | - quality(): Determines the quality ('q') of a mime-type when compared against a list of media-ranges. |
---|
13 | - quality_parsed(): Just like quality() except the second parameter must be pre-parsed. |
---|
14 | - best_match(): Choose the mime-type with the highest quality ('q') from a list of candidates. |
---|
15 | """ |
---|
16 | |
---|
17 | __version__ = "0.1.2" |
---|
18 | __author__ = 'Joe Gregorio' |
---|
19 | __email__ = "joe@bitworking.org" |
---|
20 | __credits__ = "" |
---|
21 | |
---|
22 | def parse_mime_type(mime_type): |
---|
23 | """Carves up a mime-type and returns a tuple of the |
---|
24 | (type, subtype, params) where 'params' is a dictionary |
---|
25 | of all the parameters for the media range. |
---|
26 | For example, the media range 'application/xhtml;q=0.5' would |
---|
27 | get parsed into: |
---|
28 | |
---|
29 | ('application', 'xhtml', {'q', '0.5'}) |
---|
30 | """ |
---|
31 | parts = mime_type.split(";") |
---|
32 | params = dict([tuple([s.strip() for s in param.split("=")])\ |
---|
33 | for param in parts[1:] ]) |
---|
34 | full_type = parts[0].strip() |
---|
35 | # Java URLConnection class sends an Accept header that includes a single "*" |
---|
36 | # Turn it into a legal wildcard. |
---|
37 | if full_type == '*': full_type = '*/*' |
---|
38 | (type, subtype) = full_type.split("/") |
---|
39 | return (type.strip(), subtype.strip(), params) |
---|
40 | |
---|
41 | def parse_media_range(range): |
---|
42 | """Carves up a media range and returns a tuple of the |
---|
43 | (type, subtype, params) where 'params' is a dictionary |
---|
44 | of all the parameters for the media range. |
---|
45 | For example, the media range 'application/*;q=0.5' would |
---|
46 | get parsed into: |
---|
47 | |
---|
48 | ('application', '*', {'q', '0.5'}) |
---|
49 | |
---|
50 | In addition this function also guarantees that there |
---|
51 | is a value for 'q' in the params dictionary, filling it |
---|
52 | in with a proper default if necessary. |
---|
53 | """ |
---|
54 | (type, subtype, params) = parse_mime_type(range) |
---|
55 | if not params.has_key('q') or not params['q'] or \ |
---|
56 | not float(params['q']) or float(params['q']) > 1\ |
---|
57 | or float(params['q']) < 0: |
---|
58 | params['q'] = '1' |
---|
59 | return (type, subtype, params) |
---|
60 | |
---|
61 | def fitness_and_quality_parsed(mime_type, parsed_ranges): |
---|
62 | """Find the best match for a given mime-type against |
---|
63 | a list of media_ranges that have already been |
---|
64 | parsed by parse_media_range(). Returns a tuple of |
---|
65 | the fitness value and the value of the 'q' quality |
---|
66 | parameter of the best match, or (-1, 0) if no match |
---|
67 | was found. Just as for quality_parsed(), 'parsed_ranges' |
---|
68 | must be a list of parsed media ranges. """ |
---|
69 | best_fitness = -1 |
---|
70 | best_fit_q = 0 |
---|
71 | (target_type, target_subtype, target_params) =\ |
---|
72 | parse_media_range(mime_type) |
---|
73 | for (type, subtype, params) in parsed_ranges: |
---|
74 | if (type == target_type or type == '*' or target_type == '*') and \ |
---|
75 | (subtype == target_subtype or subtype == '*' or target_subtype == '*'): |
---|
76 | param_matches = reduce(lambda x, y: x+y, [1 for (key, value) in \ |
---|
77 | target_params.iteritems() if key != 'q' and \ |
---|
78 | params.has_key(key) and value == params[key]], 0) |
---|
79 | fitness = (type == target_type) and 100 or 0 |
---|
80 | fitness += (subtype == target_subtype) and 10 or 0 |
---|
81 | fitness += param_matches |
---|
82 | if fitness > best_fitness: |
---|
83 | best_fitness = fitness |
---|
84 | best_fit_q = params['q'] |
---|
85 | |
---|
86 | return best_fitness, float(best_fit_q) |
---|
87 | |
---|
88 | def quality_parsed(mime_type, parsed_ranges): |
---|
89 | """Find the best match for a given mime-type against |
---|
90 | a list of media_ranges that have already been |
---|
91 | parsed by parse_media_range(). Returns the |
---|
92 | 'q' quality parameter of the best match, 0 if no |
---|
93 | match was found. This function bahaves the same as quality() |
---|
94 | except that 'parsed_ranges' must be a list of |
---|
95 | parsed media ranges. """ |
---|
96 | return fitness_and_quality_parsed(mime_type, parsed_ranges)[1] |
---|
97 | |
---|
98 | def quality(mime_type, ranges): |
---|
99 | """Returns the quality 'q' of a mime-type when compared |
---|
100 | against the media-ranges in ranges. For example: |
---|
101 | |
---|
102 | >>> quality('text/html','text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5') |
---|
103 | 0.7 |
---|
104 | |
---|
105 | """ |
---|
106 | parsed_ranges = [parse_media_range(r) for r in ranges.split(",")] |
---|
107 | return quality_parsed(mime_type, parsed_ranges) |
---|
108 | |
---|
109 | def best_match(supported, header): |
---|
110 | """Takes a list of supported mime-types and finds the best |
---|
111 | match for all the media-ranges listed in header. The value of |
---|
112 | header must be a string that conforms to the format of the |
---|
113 | HTTP Accept: header. The value of 'supported' is a list of |
---|
114 | mime-types. |
---|
115 | |
---|
116 | >>> best_match(['application/xbel+xml', 'text/xml'], 'text/*;q=0.5,*/*; q=0.1') |
---|
117 | 'text/xml' |
---|
118 | """ |
---|
119 | parsed_header = [parse_media_range(r) for r in header.split(",")] |
---|
120 | weighted_matches = [(fitness_and_quality_parsed(mime_type, parsed_header), mime_type)\ |
---|
121 | for mime_type in supported] |
---|
122 | weighted_matches.sort() |
---|
123 | return weighted_matches[-1][0][1] and weighted_matches[-1][1] or '' |
---|
124 | |
---|
125 | if __name__ == "__main__": |
---|
126 | import unittest |
---|
127 | |
---|
128 | class TestMimeParsing(unittest.TestCase): |
---|
129 | |
---|
130 | def test_parse_media_range(self): |
---|
131 | self.assert_(('application', 'xml', {'q': '1'}) == parse_media_range('application/xml;q=1')) |
---|
132 | self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml')) |
---|
133 | self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml;q=')) |
---|
134 | self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml ; q=')) |
---|
135 | self.assertEqual(('application', 'xml', {'q': '1', 'b': 'other'}), parse_media_range('application/xml ; q=1;b=other')) |
---|
136 | self.assertEqual(('application', 'xml', {'q': '1', 'b': 'other'}), parse_media_range('application/xml ; q=2;b=other')) |
---|
137 | # Java URLConnection class sends an Accept header that includes a single * |
---|
138 | self.assertEqual(('*', '*', {'q': '.2'}), parse_media_range(" *; q=.2")) |
---|
139 | |
---|
140 | def test_rfc_2616_example(self): |
---|
141 | accept = "text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5" |
---|
142 | self.assertEqual(1, quality("text/html;level=1", accept)) |
---|
143 | self.assertEqual(0.7, quality("text/html", accept)) |
---|
144 | self.assertEqual(0.3, quality("text/plain", accept)) |
---|
145 | self.assertEqual(0.5, quality("image/jpeg", accept)) |
---|
146 | self.assertEqual(0.4, quality("text/html;level=2", accept)) |
---|
147 | self.assertEqual(0.7, quality("text/html;level=3", accept)) |
---|
148 | |
---|
149 | def test_best_match(self): |
---|
150 | mime_types_supported = ['application/xbel+xml', 'application/xml'] |
---|
151 | # direct match |
---|
152 | self.assertEqual(best_match(mime_types_supported, 'application/xbel+xml'), 'application/xbel+xml') |
---|
153 | # direct match with a q parameter |
---|
154 | self.assertEqual(best_match(mime_types_supported, 'application/xbel+xml; q=1'), 'application/xbel+xml') |
---|
155 | # direct match of our second choice with a q parameter |
---|
156 | self.assertEqual(best_match(mime_types_supported, 'application/xml; q=1'), 'application/xml') |
---|
157 | # match using a subtype wildcard |
---|
158 | self.assertEqual(best_match(mime_types_supported, 'application/*; q=1'), 'application/xml') |
---|
159 | # match using a type wildcard |
---|
160 | self.assertEqual(best_match(mime_types_supported, '*/*'), 'application/xml') |
---|
161 | |
---|
162 | mime_types_supported = ['application/xbel+xml', 'text/xml'] |
---|
163 | # match using a type versus a lower weighted subtype |
---|
164 | self.assertEqual(best_match(mime_types_supported, 'text/*;q=0.5,*/*; q=0.1'), 'text/xml') |
---|
165 | # fail to match anything |
---|
166 | self.assertEqual(best_match(mime_types_supported, 'text/html,application/atom+xml; q=0.9'), '') |
---|
167 | |
---|
168 | # common AJAX scenario |
---|
169 | mime_types_supported = ['application/json', 'text/html'] |
---|
170 | self.assertEqual(best_match(mime_types_supported, 'application/json, text/javascript, */*'), 'application/json') |
---|
171 | # verify fitness ordering |
---|
172 | self.assertEqual(best_match(mime_types_supported, 'application/json, text/html;q=0.9'), 'application/json') |
---|
173 | |
---|
174 | def test_support_wildcards(self): |
---|
175 | mime_types_supported = ['image/*', 'application/xml'] |
---|
176 | # match using a type wildcard |
---|
177 | self.assertEqual(best_match(mime_types_supported, 'image/png'), 'image/*') |
---|
178 | # match using a wildcard for both requested and supported |
---|
179 | self.assertEqual(best_match(mime_types_supported, 'image/*'), 'image/*') |
---|
180 | |
---|
181 | unittest.main() |
---|