| 1 | """MIME-Type Parser | 
|---|
| 2 |  | 
|---|
| 3 | This module provides basic functions for handling mime-types. It can handle | 
|---|
| 4 | matching mime-types against a list of media-ranges. See section 14.1 of | 
|---|
| 5 | the HTTP specification [RFC 2616] for a complete explanation. | 
|---|
| 6 |  | 
|---|
| 7 | http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1 | 
|---|
| 8 |  | 
|---|
| 9 | Contents: | 
|---|
| 10 | - parse_mime_type():   Parses a mime-type into its component parts. | 
|---|
| 11 | - parse_media_range(): Media-ranges are mime-types with wild-cards and a 'q' quality parameter. | 
|---|
| 12 | - quality():           Determines the quality ('q') of a mime-type when compared against a list of media-ranges. | 
|---|
| 13 | - quality_parsed():    Just like quality() except the second parameter must be pre-parsed. | 
|---|
| 14 | - best_match():        Choose the mime-type with the highest quality ('q') from a list of candidates. | 
|---|
| 15 | """ | 
|---|
| 16 |  | 
|---|
| 17 | __version__ = "0.1.2" | 
|---|
| 18 | __author__ = 'Joe Gregorio' | 
|---|
| 19 | __email__ = "joe@bitworking.org" | 
|---|
| 20 | __credits__ = "" | 
|---|
| 21 |  | 
|---|
| 22 | def parse_mime_type(mime_type): | 
|---|
| 23 | """Carves up a mime-type and returns a tuple of the | 
|---|
| 24 | (type, subtype, params) where 'params' is a dictionary | 
|---|
| 25 | of all the parameters for the media range. | 
|---|
| 26 | For example, the media range 'application/xhtml;q=0.5' would | 
|---|
| 27 | get parsed into: | 
|---|
| 28 |  | 
|---|
| 29 | ('application', 'xhtml', {'q', '0.5'}) | 
|---|
| 30 | """ | 
|---|
| 31 | parts = mime_type.split(";") | 
|---|
| 32 | params = dict([tuple([s.strip() for s in param.split("=")])\ | 
|---|
| 33 | for param in parts[1:] ]) | 
|---|
| 34 | full_type = parts[0].strip() | 
|---|
| 35 | # Java URLConnection class sends an Accept header that includes a single "*" | 
|---|
| 36 | # Turn it into a legal wildcard. | 
|---|
| 37 | if full_type == '*': full_type = '*/*' | 
|---|
| 38 | (type, subtype) = full_type.split("/") | 
|---|
| 39 | return (type.strip(), subtype.strip(), params) | 
|---|
| 40 |  | 
|---|
| 41 | def parse_media_range(range): | 
|---|
| 42 | """Carves up a media range and returns a tuple of the | 
|---|
| 43 | (type, subtype, params) where 'params' is a dictionary | 
|---|
| 44 | of all the parameters for the media range. | 
|---|
| 45 | For example, the media range 'application/*;q=0.5' would | 
|---|
| 46 | get parsed into: | 
|---|
| 47 |  | 
|---|
| 48 | ('application', '*', {'q', '0.5'}) | 
|---|
| 49 |  | 
|---|
| 50 | In addition this function also guarantees that there | 
|---|
| 51 | is a value for 'q' in the params dictionary, filling it | 
|---|
| 52 | in with a proper default if necessary. | 
|---|
| 53 | """ | 
|---|
| 54 | (type, subtype, params) = parse_mime_type(range) | 
|---|
| 55 | if not params.has_key('q') or not params['q'] or \ | 
|---|
| 56 | not float(params['q']) or float(params['q']) > 1\ | 
|---|
| 57 | or float(params['q']) < 0: | 
|---|
| 58 | params['q'] = '1' | 
|---|
| 59 | return (type, subtype, params) | 
|---|
| 60 |  | 
|---|
| 61 | def fitness_and_quality_parsed(mime_type, parsed_ranges): | 
|---|
| 62 | """Find the best match for a given mime-type against | 
|---|
| 63 | a list of media_ranges that have already been | 
|---|
| 64 | parsed by parse_media_range(). Returns a tuple of | 
|---|
| 65 | the fitness value and the value of the 'q' quality | 
|---|
| 66 | parameter of the best match, or (-1, 0) if no match | 
|---|
| 67 | was found. Just as for quality_parsed(), 'parsed_ranges' | 
|---|
| 68 | must be a list of parsed media ranges. """ | 
|---|
| 69 | best_fitness = -1 | 
|---|
| 70 | best_fit_q = 0 | 
|---|
| 71 | (target_type, target_subtype, target_params) =\ | 
|---|
| 72 | parse_media_range(mime_type) | 
|---|
| 73 | for (type, subtype, params) in parsed_ranges: | 
|---|
| 74 | if (type == target_type or type == '*' or target_type == '*') and \ | 
|---|
| 75 | (subtype == target_subtype or subtype == '*' or target_subtype == '*'): | 
|---|
| 76 | param_matches = reduce(lambda x, y: x+y, [1 for (key, value) in \ | 
|---|
| 77 | target_params.iteritems() if key != 'q' and \ | 
|---|
| 78 | params.has_key(key) and value == params[key]], 0) | 
|---|
| 79 | fitness = (type == target_type) and 100 or 0 | 
|---|
| 80 | fitness += (subtype == target_subtype) and 10 or 0 | 
|---|
| 81 | fitness += param_matches | 
|---|
| 82 | if fitness > best_fitness: | 
|---|
| 83 | best_fitness = fitness | 
|---|
| 84 | best_fit_q = params['q'] | 
|---|
| 85 |  | 
|---|
| 86 | return best_fitness, float(best_fit_q) | 
|---|
| 87 |  | 
|---|
| 88 | def quality_parsed(mime_type, parsed_ranges): | 
|---|
| 89 | """Find the best match for a given mime-type against | 
|---|
| 90 | a list of media_ranges that have already been | 
|---|
| 91 | parsed by parse_media_range(). Returns the | 
|---|
| 92 | 'q' quality parameter of the best match, 0 if no | 
|---|
| 93 | match was found. This function bahaves the same as quality() | 
|---|
| 94 | except that 'parsed_ranges' must be a list of | 
|---|
| 95 | parsed media ranges. """ | 
|---|
| 96 | return fitness_and_quality_parsed(mime_type, parsed_ranges)[1] | 
|---|
| 97 |  | 
|---|
| 98 | def quality(mime_type, ranges): | 
|---|
| 99 | """Returns the quality 'q' of a mime-type when compared | 
|---|
| 100 | against the media-ranges in ranges. For example: | 
|---|
| 101 |  | 
|---|
| 102 | >>> quality('text/html','text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5') | 
|---|
| 103 | 0.7 | 
|---|
| 104 |  | 
|---|
| 105 | """ | 
|---|
| 106 | parsed_ranges = [parse_media_range(r) for r in ranges.split(",")] | 
|---|
| 107 | return quality_parsed(mime_type, parsed_ranges) | 
|---|
| 108 |  | 
|---|
| 109 | def best_match(supported, header): | 
|---|
| 110 | """Takes a list of supported mime-types and finds the best | 
|---|
| 111 | match for all the media-ranges listed in header. The value of | 
|---|
| 112 | header must be a string that conforms to the format of the | 
|---|
| 113 | HTTP Accept: header. The value of 'supported' is a list of | 
|---|
| 114 | mime-types. | 
|---|
| 115 |  | 
|---|
| 116 | >>> best_match(['application/xbel+xml', 'text/xml'], 'text/*;q=0.5,*/*; q=0.1') | 
|---|
| 117 | 'text/xml' | 
|---|
| 118 | """ | 
|---|
| 119 | parsed_header = [parse_media_range(r) for r in header.split(",")] | 
|---|
| 120 | weighted_matches = [(fitness_and_quality_parsed(mime_type, parsed_header), mime_type)\ | 
|---|
| 121 | for mime_type in supported] | 
|---|
| 122 | weighted_matches.sort() | 
|---|
| 123 | return weighted_matches[-1][0][1] and weighted_matches[-1][1] or '' | 
|---|
| 124 |  | 
|---|
| 125 | if __name__ == "__main__": | 
|---|
| 126 | import unittest | 
|---|
| 127 |  | 
|---|
| 128 | class TestMimeParsing(unittest.TestCase): | 
|---|
| 129 |  | 
|---|
| 130 | def test_parse_media_range(self): | 
|---|
| 131 | self.assert_(('application', 'xml', {'q': '1'}) == parse_media_range('application/xml;q=1')) | 
|---|
| 132 | self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml')) | 
|---|
| 133 | self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml;q=')) | 
|---|
| 134 | self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml ; q=')) | 
|---|
| 135 | self.assertEqual(('application', 'xml', {'q': '1', 'b': 'other'}), parse_media_range('application/xml ; q=1;b=other')) | 
|---|
| 136 | self.assertEqual(('application', 'xml', {'q': '1', 'b': 'other'}), parse_media_range('application/xml ; q=2;b=other')) | 
|---|
| 137 | # Java URLConnection class sends an Accept header that includes a single * | 
|---|
| 138 | self.assertEqual(('*', '*', {'q': '.2'}), parse_media_range(" *; q=.2")) | 
|---|
| 139 |  | 
|---|
| 140 | def test_rfc_2616_example(self): | 
|---|
| 141 | accept = "text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5" | 
|---|
| 142 | self.assertEqual(1, quality("text/html;level=1", accept)) | 
|---|
| 143 | self.assertEqual(0.7, quality("text/html", accept)) | 
|---|
| 144 | self.assertEqual(0.3, quality("text/plain", accept)) | 
|---|
| 145 | self.assertEqual(0.5, quality("image/jpeg", accept)) | 
|---|
| 146 | self.assertEqual(0.4, quality("text/html;level=2", accept)) | 
|---|
| 147 | self.assertEqual(0.7, quality("text/html;level=3", accept)) | 
|---|
| 148 |  | 
|---|
| 149 | def test_best_match(self): | 
|---|
| 150 | mime_types_supported = ['application/xbel+xml', 'application/xml'] | 
|---|
| 151 | # direct match | 
|---|
| 152 | self.assertEqual(best_match(mime_types_supported, 'application/xbel+xml'), 'application/xbel+xml') | 
|---|
| 153 | # direct match with a q parameter | 
|---|
| 154 | self.assertEqual(best_match(mime_types_supported, 'application/xbel+xml; q=1'), 'application/xbel+xml') | 
|---|
| 155 | # direct match of our second choice with a q parameter | 
|---|
| 156 | self.assertEqual(best_match(mime_types_supported, 'application/xml; q=1'), 'application/xml') | 
|---|
| 157 | # match using a subtype wildcard | 
|---|
| 158 | self.assertEqual(best_match(mime_types_supported, 'application/*; q=1'), 'application/xml') | 
|---|
| 159 | # match using a type wildcard | 
|---|
| 160 | self.assertEqual(best_match(mime_types_supported, '*/*'), 'application/xml') | 
|---|
| 161 |  | 
|---|
| 162 | mime_types_supported = ['application/xbel+xml', 'text/xml'] | 
|---|
| 163 | # match using a type versus a lower weighted subtype | 
|---|
| 164 | self.assertEqual(best_match(mime_types_supported, 'text/*;q=0.5,*/*; q=0.1'), 'text/xml') | 
|---|
| 165 | # fail to match anything | 
|---|
| 166 | self.assertEqual(best_match(mime_types_supported, 'text/html,application/atom+xml; q=0.9'), '') | 
|---|
| 167 |  | 
|---|
| 168 | # common AJAX scenario | 
|---|
| 169 | mime_types_supported = ['application/json', 'text/html'] | 
|---|
| 170 | self.assertEqual(best_match(mime_types_supported, 'application/json, text/javascript, */*'), 'application/json') | 
|---|
| 171 | # verify fitness ordering | 
|---|
| 172 | self.assertEqual(best_match(mime_types_supported, 'application/json, text/html;q=0.9'), 'application/json') | 
|---|
| 173 |  | 
|---|
| 174 | def test_support_wildcards(self): | 
|---|
| 175 | mime_types_supported = ['image/*', 'application/xml'] | 
|---|
| 176 | # match using a type wildcard | 
|---|
| 177 | self.assertEqual(best_match(mime_types_supported, 'image/png'), 'image/*') | 
|---|
| 178 | # match using a wildcard for both requested and supported | 
|---|
| 179 | self.assertEqual(best_match(mime_types_supported, 'image/*'), 'image/*') | 
|---|
| 180 |  | 
|---|
| 181 | unittest.main() | 
|---|