| 1 | """MIME-Type Parser |
|---|
| 2 | |
|---|
| 3 | This module provides basic functions for handling mime-types. It can handle |
|---|
| 4 | matching mime-types against a list of media-ranges. See section 14.1 of |
|---|
| 5 | the HTTP specification [RFC 2616] for a complete explaination. |
|---|
| 6 | |
|---|
| 7 | http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1 |
|---|
| 8 | |
|---|
| 9 | Contents: |
|---|
| 10 | - parse_mime_type(): Parses a mime-type into it's component parts. |
|---|
| 11 | - parse_media_range(): Media-ranges are mime-types with wild-cards and a 'q' quality parameter. |
|---|
| 12 | - quality(): Determines the quality ('q') of a mime-type when compared against a list of media-ranges. |
|---|
| 13 | - quality_parsed(): Just like quality() except the second parameter must be pre-parsed. |
|---|
| 14 | - best_match(): Choose the mime-type with the highest quality ('q') from a list of candidates. |
|---|
| 15 | - desired_matches(): Provide a list in order of server-desired priorities from a list of candidates. |
|---|
| 16 | """ |
|---|
| 17 | |
|---|
| 18 | __version__ = "0.1.1" |
|---|
| 19 | __author__ = 'Joe Gregorio' |
|---|
| 20 | __email__ = "joe@bitworking.org" |
|---|
| 21 | __credits__ = "" |
|---|
| 22 | |
|---|
| 23 | def parse_mime_type(mime_type): |
|---|
| 24 | """Carves up a mime_type and returns a tuple of the |
|---|
| 25 | (type, subtype, params) where 'params' is a dictionary |
|---|
| 26 | of all the parameters for the media range. |
|---|
| 27 | For example, the media range 'application/xhtml;q=0.5' would |
|---|
| 28 | get parsed into: |
|---|
| 29 | |
|---|
| 30 | ('application', 'xhtml', {'q', '0.5'}) |
|---|
| 31 | """ |
|---|
| 32 | parts = mime_type.split(";") |
|---|
| 33 | params = dict([tuple([s.strip() for s in param.split("=")])\ |
|---|
| 34 | for param in parts[1:] ]) |
|---|
| 35 | (type, subtype) = parts[0].split("/") |
|---|
| 36 | return (type.strip(), subtype.strip(), params) |
|---|
| 37 | |
|---|
| 38 | def parse_media_range(range): |
|---|
| 39 | """Carves up a media range and returns a tuple of the |
|---|
| 40 | (type, subtype, params) where 'params' is a dictionary |
|---|
| 41 | of all the parameters for the media range. |
|---|
| 42 | |
|---|
| 43 | For example, the media range ``application/*;q=0.5`` would |
|---|
| 44 | get parsed into:: |
|---|
| 45 | |
|---|
| 46 | ('application', '*', {'q', '0.5'}) |
|---|
| 47 | |
|---|
| 48 | In addition this function also guarantees that there |
|---|
| 49 | is a value for 'q' in the params dictionary, filling it |
|---|
| 50 | in with a proper default if necessary. |
|---|
| 51 | """ |
|---|
| 52 | (type, subtype, params) = parse_mime_type(range) |
|---|
| 53 | if not params.has_key('q') or not params['q'] or \ |
|---|
| 54 | not float(params['q']) or float(params['q']) > 1\ |
|---|
| 55 | or float(params['q']) < 0: |
|---|
| 56 | params['q'] = '1' |
|---|
| 57 | return (type, subtype, params) |
|---|
| 58 | |
|---|
| 59 | def quality_parsed(mime_type, parsed_ranges): |
|---|
| 60 | """Find the best match for a given mime_type against |
|---|
| 61 | a list of media_ranges that have already been |
|---|
| 62 | parsed by parse_media_range(). Returns the |
|---|
| 63 | 'q' quality parameter of the best match, 0 if no |
|---|
| 64 | match was found. This function bahaves the same as quality() |
|---|
| 65 | except that 'parsed_ranges' must be a list of |
|---|
| 66 | parsed media ranges. """ |
|---|
| 67 | best_fitness = -1 |
|---|
| 68 | best_match = "" |
|---|
| 69 | best_fit_q = 0 |
|---|
| 70 | (target_type, target_subtype, target_params) =\ |
|---|
| 71 | parse_media_range(mime_type) |
|---|
| 72 | for (type, subtype, params) in parsed_ranges: |
|---|
| 73 | param_matches = sum([1 for (key, value) in \ |
|---|
| 74 | target_params.iteritems() if key != 'q' and \ |
|---|
| 75 | params.has_key(key) and value == params[key]]) |
|---|
| 76 | if (type == target_type or type == '*') and \ |
|---|
| 77 | (subtype == target_subtype or subtype == "*"): |
|---|
| 78 | fitness = (type == target_type) and 100 or 0 |
|---|
| 79 | fitness += (subtype == target_subtype) and 10 or 0 |
|---|
| 80 | fitness += param_matches |
|---|
| 81 | if fitness > best_fitness: |
|---|
| 82 | best_fitness = fitness |
|---|
| 83 | best_fit_q = params['q'] |
|---|
| 84 | |
|---|
| 85 | return float(best_fit_q) |
|---|
| 86 | |
|---|
| 87 | def quality(mime_type, ranges): |
|---|
| 88 | """Returns the quality 'q' of a mime_type when compared |
|---|
| 89 | against the media-ranges in ranges. For example: |
|---|
| 90 | |
|---|
| 91 | >>> quality('text/html','text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5') |
|---|
| 92 | 0.7 |
|---|
| 93 | |
|---|
| 94 | """ |
|---|
| 95 | parsed_ranges = [parse_media_range(r) for r in ranges.split(",")] |
|---|
| 96 | return quality_parsed(mime_type, parsed_ranges) |
|---|
| 97 | |
|---|
| 98 | def best_match(supported, header): |
|---|
| 99 | """Takes a list of supported mime-types and finds the best |
|---|
| 100 | match for all the media-ranges listed in header. The value of |
|---|
| 101 | header must be a string that conforms to the format of the |
|---|
| 102 | HTTP Accept: header. The value of 'supported' is a list of |
|---|
| 103 | mime-types. |
|---|
| 104 | |
|---|
| 105 | >>> best_match(['application/xbel+xml', 'text/xml'], 'text/*;q=0.5,*/*; q=0.1') |
|---|
| 106 | 'text/xml' |
|---|
| 107 | """ |
|---|
| 108 | parsed_header = [parse_media_range(r) for r in header.split(",")] |
|---|
| 109 | weighted_matches = [(quality_parsed(mime_type, parsed_header), mime_type)\ |
|---|
| 110 | for mime_type in supported] |
|---|
| 111 | weighted_matches.sort() |
|---|
| 112 | return weighted_matches[-1][0] and weighted_matches[-1][1] or '' |
|---|
| 113 | |
|---|
| 114 | def desired_matches(desired, header): |
|---|
| 115 | """Takes a list of desired mime-types in the order the server prefers to |
|---|
| 116 | send them regardless of the browsers preference. |
|---|
| 117 | |
|---|
| 118 | Browsers (such as Firefox) technically want XML over HTML depending on how |
|---|
| 119 | one reads the specification. This function is provided for a server to |
|---|
| 120 | declare a set of desired mime-types it supports, and returns a subset of |
|---|
| 121 | the desired list in the same order should each one be Accepted by the |
|---|
| 122 | browser. |
|---|
| 123 | |
|---|
| 124 | >>> sorted_match(['text/html', 'application/xml'], \ |
|---|
| 125 | ... 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png') |
|---|
| 126 | ['text/html', 'application/xml'] |
|---|
| 127 | >>> sorted_match(['text/html', 'application/xml'], 'application/xml,application/json') |
|---|
| 128 | ['application/xml'] |
|---|
| 129 | """ |
|---|
| 130 | matches = [] |
|---|
| 131 | parsed_ranges = [parse_media_range(r) for r in header.split(",")] |
|---|
| 132 | for mimetype in desired: |
|---|
| 133 | if quality_parsed(mimetype, parsed_ranges): |
|---|
| 134 | matches.append(mimetype) |
|---|
| 135 | return matches |
|---|
| 136 | |
|---|
| 137 | if __name__ == "__main__": |
|---|
| 138 | import unittest |
|---|
| 139 | |
|---|
| 140 | class TestMimeParsing(unittest.TestCase): |
|---|
| 141 | |
|---|
| 142 | def test_parse_media_range(self): |
|---|
| 143 | self.assert_(('application', 'xml', {'q': '1'}) == parse_media_range('application/xml;q=1')) |
|---|
| 144 | self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml')) |
|---|
| 145 | self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml;q=')) |
|---|
| 146 | self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml ; q=')) |
|---|
| 147 | self.assertEqual(('application', 'xml', {'q': '1', 'b': 'other'}), parse_media_range('application/xml ; q=1;b=other')) |
|---|
| 148 | self.assertEqual(('application', 'xml', {'q': '1', 'b': 'other'}), parse_media_range('application/xml ; q=2;b=other')) |
|---|
| 149 | |
|---|
| 150 | def test_rfc_2616_example(self): |
|---|
| 151 | accept = "text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5" |
|---|
| 152 | self.assertEqual(1, quality("text/html;level=1", accept)) |
|---|
| 153 | self.assertEqual(0.7, quality("text/html", accept)) |
|---|
| 154 | self.assertEqual(0.3, quality("text/plain", accept)) |
|---|
| 155 | self.assertEqual(0.5, quality("image/jpeg", accept)) |
|---|
| 156 | self.assertEqual(0.4, quality("text/html;level=2", accept)) |
|---|
| 157 | self.assertEqual(0.7, quality("text/html;level=3", accept)) |
|---|
| 158 | |
|---|
| 159 | def test_best_match(self): |
|---|
| 160 | mime_types_supported = ['application/xbel+xml', 'application/xml'] |
|---|
| 161 | # direct match |
|---|
| 162 | self.assertEqual(best_match(mime_types_supported, 'application/xbel+xml'), 'application/xbel+xml') |
|---|
| 163 | # direct match with a q parameter |
|---|
| 164 | self.assertEqual(best_match(mime_types_supported, 'application/xbel+xml; q=1'), 'application/xbel+xml') |
|---|
| 165 | # direct match of our second choice with a q parameter |
|---|
| 166 | self.assertEqual(best_match(mime_types_supported, 'application/xml; q=1'), 'application/xml') |
|---|
| 167 | # match using a subtype wildcard |
|---|
| 168 | self.assertEqual(best_match(mime_types_supported, 'application/*; q=1'), 'application/xml') |
|---|
| 169 | # match using a type wildcard |
|---|
| 170 | self.assertEqual(best_match(mime_types_supported, '*/*'), 'application/xml') |
|---|
| 171 | |
|---|
| 172 | mime_types_supported = ['application/xbel+xml', 'text/xml'] |
|---|
| 173 | # match using a type versus a lower weighted subtype |
|---|
| 174 | self.assertEqual(best_match(mime_types_supported, 'text/*;q=0.5,*/*; q=0.1'), 'text/xml') |
|---|
| 175 | # fail to match anything |
|---|
| 176 | self.assertEqual(best_match(mime_types_supported, 'text/html,application/atom+xml; q=0.9'), '') |
|---|
| 177 | |
|---|
| 178 | unittest.main() |
|---|