[3] | 1 | # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) |
---|
| 2 | # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php |
---|
| 3 | |
---|
| 4 | """ |
---|
| 5 | Creates a human-readable identifier, using numbers and digits, |
---|
| 6 | avoiding ambiguous numbers and letters. hash_identifier can be used |
---|
| 7 | to create compact representations that are unique for a certain string |
---|
| 8 | (or concatenation of strings) |
---|
| 9 | """ |
---|
| 10 | |
---|
| 11 | import md5 |
---|
| 12 | |
---|
| 13 | good_characters = "23456789abcdefghjkmnpqrtuvwxyz" |
---|
| 14 | |
---|
| 15 | base = len(good_characters) |
---|
| 16 | |
---|
| 17 | def make_identifier(number): |
---|
| 18 | """ |
---|
| 19 | Encodes a number as an identifier. |
---|
| 20 | """ |
---|
| 21 | if not isinstance(number, (int, long)): |
---|
| 22 | raise ValueError( |
---|
| 23 | "You can only make identifiers out of integers (not %r)" |
---|
| 24 | % number) |
---|
| 25 | if number < 0: |
---|
| 26 | raise ValueError( |
---|
| 27 | "You cannot make identifiers out of negative numbers: %r" |
---|
| 28 | % number) |
---|
| 29 | result = [] |
---|
| 30 | while number: |
---|
| 31 | next = number % base |
---|
| 32 | result.append(good_characters[next]) |
---|
| 33 | # Note, this depends on integer rounding of results: |
---|
| 34 | number = number / base |
---|
| 35 | return ''.join(result) |
---|
| 36 | |
---|
| 37 | def hash_identifier(s, length, pad=True, hasher=md5, prefix='', |
---|
| 38 | group=None, upper=False): |
---|
| 39 | """ |
---|
| 40 | Hashes the string (with the given hashing module), then turns that |
---|
| 41 | hash into an identifier of the given length (using modulo to |
---|
| 42 | reduce the length of the identifier). If ``pad`` is False, then |
---|
| 43 | the minimum-length identifier will be used; otherwise the |
---|
| 44 | identifier will be padded with 0's as necessary. |
---|
| 45 | |
---|
| 46 | ``prefix`` will be added last, and does not count towards the |
---|
| 47 | target length. ``group`` will group the characters with ``-`` in |
---|
| 48 | the given lengths, and also does not count towards the target |
---|
| 49 | length. E.g., ``group=4`` will cause a identifier like |
---|
| 50 | ``a5f3-hgk3-asdf``. Grouping occurs before the prefix. |
---|
| 51 | """ |
---|
| 52 | if length > 26 and hasher is md5: |
---|
| 53 | raise ValueError, ( |
---|
| 54 | "md5 cannot create hashes longer than 26 characters in " |
---|
| 55 | "length (you gave %s)" % length) |
---|
| 56 | if isinstance(s, unicode): |
---|
| 57 | s = s.encode('utf-8') |
---|
| 58 | h = hasher.new(str(s)) |
---|
| 59 | bin_hash = h.digest() |
---|
| 60 | modulo = base ** length |
---|
| 61 | number = 0 |
---|
| 62 | for c in list(bin_hash): |
---|
| 63 | number = (number * 256 + ord(c)) % modulo |
---|
| 64 | ident = make_identifier(number) |
---|
| 65 | if pad: |
---|
| 66 | ident = good_characters[0]*(length-len(ident)) + ident |
---|
| 67 | if group: |
---|
| 68 | parts = [] |
---|
| 69 | while ident: |
---|
| 70 | parts.insert(0, ident[-group:]) |
---|
| 71 | ident = ident[:-group] |
---|
| 72 | ident = '-'.join(parts) |
---|
| 73 | if upper: |
---|
| 74 | ident = ident.upper() |
---|
| 75 | return prefix + ident |
---|
| 76 | |
---|
| 77 | # doctest tests: |
---|
| 78 | __test__ = { |
---|
| 79 | 'make_identifier': """ |
---|
| 80 | >>> make_identifier(0) |
---|
| 81 | '' |
---|
| 82 | >>> make_identifier(1000) |
---|
| 83 | 'c53' |
---|
| 84 | >>> make_identifier(-100) |
---|
| 85 | Traceback (most recent call last): |
---|
| 86 | ... |
---|
| 87 | ValueError: You cannot make identifiers out of negative numbers: -100 |
---|
| 88 | >>> make_identifier('test') |
---|
| 89 | Traceback (most recent call last): |
---|
| 90 | ... |
---|
| 91 | ValueError: You can only make identifiers out of integers (not 'test') |
---|
| 92 | >>> make_identifier(1000000000000) |
---|
| 93 | 'c53x9rqh3' |
---|
| 94 | """, |
---|
| 95 | 'hash_identifier': """ |
---|
| 96 | >>> hash_identifier(0, 5) |
---|
| 97 | 'cy2dr' |
---|
| 98 | >>> hash_identifier(0, 10) |
---|
| 99 | 'cy2dr6rg46' |
---|
| 100 | >>> hash_identifier('this is a test of a long string', 5) |
---|
| 101 | 'awatu' |
---|
| 102 | >>> hash_identifier(0, 26) |
---|
| 103 | 'cy2dr6rg46cx8t4w2f3nfexzk4' |
---|
| 104 | >>> hash_identifier(0, 30) |
---|
| 105 | Traceback (most recent call last): |
---|
| 106 | ... |
---|
| 107 | ValueError: md5 cannot create hashes longer than 26 characters in length (you gave 30) |
---|
| 108 | >>> hash_identifier(0, 10, group=4) |
---|
| 109 | 'cy-2dr6-rg46' |
---|
| 110 | >>> hash_identifier(0, 10, group=4, upper=True, prefix='M-') |
---|
| 111 | 'M-CY-2DR6-RG46' |
---|
| 112 | """} |
---|
| 113 | |
---|
| 114 | if __name__ == '__main__': |
---|
| 115 | import doctest |
---|
| 116 | doctest.testmod() |
---|
| 117 | |
---|