1 | """MIME-Type Parser |
---|
2 | |
---|
3 | This module provides basic functions for handling mime-types. It can handle |
---|
4 | matching mime-types against a list of media-ranges. See section 14.1 of |
---|
5 | the HTTP specification [RFC 2616] for a complete explaination. |
---|
6 | |
---|
7 | http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1 |
---|
8 | |
---|
9 | Contents: |
---|
10 | - parse_mime_type(): Parses a mime-type into it's component parts. |
---|
11 | - parse_media_range(): Media-ranges are mime-types with wild-cards and a 'q' quality parameter. |
---|
12 | - quality(): Determines the quality ('q') of a mime-type when compared against a list of media-ranges. |
---|
13 | - quality_parsed(): Just like quality() except the second parameter must be pre-parsed. |
---|
14 | - best_match(): Choose the mime-type with the highest quality ('q') from a list of candidates. |
---|
15 | - desired_matches(): Provide a list in order of server-desired priorities from a list of candidates. |
---|
16 | """ |
---|
17 | |
---|
18 | __version__ = "0.1.1" |
---|
19 | __author__ = 'Joe Gregorio' |
---|
20 | __email__ = "joe@bitworking.org" |
---|
21 | __credits__ = "" |
---|
22 | |
---|
23 | def parse_mime_type(mime_type): |
---|
24 | """Carves up a mime_type and returns a tuple of the |
---|
25 | (type, subtype, params) where 'params' is a dictionary |
---|
26 | of all the parameters for the media range. |
---|
27 | For example, the media range 'application/xhtml;q=0.5' would |
---|
28 | get parsed into: |
---|
29 | |
---|
30 | ('application', 'xhtml', {'q', '0.5'}) |
---|
31 | """ |
---|
32 | parts = mime_type.split(";") |
---|
33 | params = dict([tuple([s.strip() for s in param.split("=")])\ |
---|
34 | for param in parts[1:] ]) |
---|
35 | (type, subtype) = parts[0].split("/") |
---|
36 | return (type.strip(), subtype.strip(), params) |
---|
37 | |
---|
38 | def parse_media_range(range): |
---|
39 | """Carves up a media range and returns a tuple of the |
---|
40 | (type, subtype, params) where 'params' is a dictionary |
---|
41 | of all the parameters for the media range. |
---|
42 | |
---|
43 | For example, the media range ``application/*;q=0.5`` would |
---|
44 | get parsed into:: |
---|
45 | |
---|
46 | ('application', '*', {'q', '0.5'}) |
---|
47 | |
---|
48 | In addition this function also guarantees that there |
---|
49 | is a value for 'q' in the params dictionary, filling it |
---|
50 | in with a proper default if necessary. |
---|
51 | """ |
---|
52 | (type, subtype, params) = parse_mime_type(range) |
---|
53 | if not params.has_key('q') or not params['q'] or \ |
---|
54 | not float(params['q']) or float(params['q']) > 1\ |
---|
55 | or float(params['q']) < 0: |
---|
56 | params['q'] = '1' |
---|
57 | return (type, subtype, params) |
---|
58 | |
---|
59 | def quality_parsed(mime_type, parsed_ranges): |
---|
60 | """Find the best match for a given mime_type against |
---|
61 | a list of media_ranges that have already been |
---|
62 | parsed by parse_media_range(). Returns the |
---|
63 | 'q' quality parameter of the best match, 0 if no |
---|
64 | match was found. This function bahaves the same as quality() |
---|
65 | except that 'parsed_ranges' must be a list of |
---|
66 | parsed media ranges. """ |
---|
67 | best_fitness = -1 |
---|
68 | best_match = "" |
---|
69 | best_fit_q = 0 |
---|
70 | (target_type, target_subtype, target_params) =\ |
---|
71 | parse_media_range(mime_type) |
---|
72 | for (type, subtype, params) in parsed_ranges: |
---|
73 | param_matches = sum([1 for (key, value) in \ |
---|
74 | target_params.iteritems() if key != 'q' and \ |
---|
75 | params.has_key(key) and value == params[key]]) |
---|
76 | if (type == target_type or type == '*') and \ |
---|
77 | (subtype == target_subtype or subtype == "*"): |
---|
78 | fitness = (type == target_type) and 100 or 0 |
---|
79 | fitness += (subtype == target_subtype) and 10 or 0 |
---|
80 | fitness += param_matches |
---|
81 | if fitness > best_fitness: |
---|
82 | best_fitness = fitness |
---|
83 | best_fit_q = params['q'] |
---|
84 | |
---|
85 | return float(best_fit_q) |
---|
86 | |
---|
87 | def quality(mime_type, ranges): |
---|
88 | """Returns the quality 'q' of a mime_type when compared |
---|
89 | against the media-ranges in ranges. For example: |
---|
90 | |
---|
91 | >>> quality('text/html','text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5') |
---|
92 | 0.7 |
---|
93 | |
---|
94 | """ |
---|
95 | parsed_ranges = [parse_media_range(r) for r in ranges.split(",")] |
---|
96 | return quality_parsed(mime_type, parsed_ranges) |
---|
97 | |
---|
98 | def best_match(supported, header): |
---|
99 | """Takes a list of supported mime-types and finds the best |
---|
100 | match for all the media-ranges listed in header. The value of |
---|
101 | header must be a string that conforms to the format of the |
---|
102 | HTTP Accept: header. The value of 'supported' is a list of |
---|
103 | mime-types. |
---|
104 | |
---|
105 | >>> best_match(['application/xbel+xml', 'text/xml'], 'text/*;q=0.5,*/*; q=0.1') |
---|
106 | 'text/xml' |
---|
107 | """ |
---|
108 | parsed_header = [parse_media_range(r) for r in header.split(",")] |
---|
109 | weighted_matches = [(quality_parsed(mime_type, parsed_header), mime_type)\ |
---|
110 | for mime_type in supported] |
---|
111 | weighted_matches.sort() |
---|
112 | return weighted_matches[-1][0] and weighted_matches[-1][1] or '' |
---|
113 | |
---|
114 | def desired_matches(desired, header): |
---|
115 | """Takes a list of desired mime-types in the order the server prefers to |
---|
116 | send them regardless of the browsers preference. |
---|
117 | |
---|
118 | Browsers (such as Firefox) technically want XML over HTML depending on how |
---|
119 | one reads the specification. This function is provided for a server to |
---|
120 | declare a set of desired mime-types it supports, and returns a subset of |
---|
121 | the desired list in the same order should each one be Accepted by the |
---|
122 | browser. |
---|
123 | |
---|
124 | >>> sorted_match(['text/html', 'application/xml'], \ |
---|
125 | ... 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png') |
---|
126 | ['text/html', 'application/xml'] |
---|
127 | >>> sorted_match(['text/html', 'application/xml'], 'application/xml,application/json') |
---|
128 | ['application/xml'] |
---|
129 | """ |
---|
130 | matches = [] |
---|
131 | parsed_ranges = [parse_media_range(r) for r in header.split(",")] |
---|
132 | for mimetype in desired: |
---|
133 | if quality_parsed(mimetype, parsed_ranges): |
---|
134 | matches.append(mimetype) |
---|
135 | return matches |
---|
136 | |
---|
137 | if __name__ == "__main__": |
---|
138 | import unittest |
---|
139 | |
---|
140 | class TestMimeParsing(unittest.TestCase): |
---|
141 | |
---|
142 | def test_parse_media_range(self): |
---|
143 | self.assert_(('application', 'xml', {'q': '1'}) == parse_media_range('application/xml;q=1')) |
---|
144 | self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml')) |
---|
145 | self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml;q=')) |
---|
146 | self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml ; q=')) |
---|
147 | self.assertEqual(('application', 'xml', {'q': '1', 'b': 'other'}), parse_media_range('application/xml ; q=1;b=other')) |
---|
148 | self.assertEqual(('application', 'xml', {'q': '1', 'b': 'other'}), parse_media_range('application/xml ; q=2;b=other')) |
---|
149 | |
---|
150 | def test_rfc_2616_example(self): |
---|
151 | accept = "text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5" |
---|
152 | self.assertEqual(1, quality("text/html;level=1", accept)) |
---|
153 | self.assertEqual(0.7, quality("text/html", accept)) |
---|
154 | self.assertEqual(0.3, quality("text/plain", accept)) |
---|
155 | self.assertEqual(0.5, quality("image/jpeg", accept)) |
---|
156 | self.assertEqual(0.4, quality("text/html;level=2", accept)) |
---|
157 | self.assertEqual(0.7, quality("text/html;level=3", accept)) |
---|
158 | |
---|
159 | def test_best_match(self): |
---|
160 | mime_types_supported = ['application/xbel+xml', 'application/xml'] |
---|
161 | # direct match |
---|
162 | self.assertEqual(best_match(mime_types_supported, 'application/xbel+xml'), 'application/xbel+xml') |
---|
163 | # direct match with a q parameter |
---|
164 | self.assertEqual(best_match(mime_types_supported, 'application/xbel+xml; q=1'), 'application/xbel+xml') |
---|
165 | # direct match of our second choice with a q parameter |
---|
166 | self.assertEqual(best_match(mime_types_supported, 'application/xml; q=1'), 'application/xml') |
---|
167 | # match using a subtype wildcard |
---|
168 | self.assertEqual(best_match(mime_types_supported, 'application/*; q=1'), 'application/xml') |
---|
169 | # match using a type wildcard |
---|
170 | self.assertEqual(best_match(mime_types_supported, '*/*'), 'application/xml') |
---|
171 | |
---|
172 | mime_types_supported = ['application/xbel+xml', 'text/xml'] |
---|
173 | # match using a type versus a lower weighted subtype |
---|
174 | self.assertEqual(best_match(mime_types_supported, 'text/*;q=0.5,*/*; q=0.1'), 'text/xml') |
---|
175 | # fail to match anything |
---|
176 | self.assertEqual(best_match(mime_types_supported, 'text/html,application/atom+xml; q=0.9'), '') |
---|
177 | |
---|
178 | unittest.main() |
---|