1 | # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) |
---|
2 | # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php |
---|
3 | |
---|
4 | """ |
---|
5 | Creates a human-readable identifier, using numbers and digits, |
---|
6 | avoiding ambiguous numbers and letters. hash_identifier can be used |
---|
7 | to create compact representations that are unique for a certain string |
---|
8 | (or concatenation of strings) |
---|
9 | """ |
---|
10 | |
---|
11 | import md5 |
---|
12 | |
---|
13 | good_characters = "23456789abcdefghjkmnpqrtuvwxyz" |
---|
14 | |
---|
15 | base = len(good_characters) |
---|
16 | |
---|
17 | def make_identifier(number): |
---|
18 | """ |
---|
19 | Encodes a number as an identifier. |
---|
20 | """ |
---|
21 | if not isinstance(number, (int, long)): |
---|
22 | raise ValueError( |
---|
23 | "You can only make identifiers out of integers (not %r)" |
---|
24 | % number) |
---|
25 | if number < 0: |
---|
26 | raise ValueError( |
---|
27 | "You cannot make identifiers out of negative numbers: %r" |
---|
28 | % number) |
---|
29 | result = [] |
---|
30 | while number: |
---|
31 | next = number % base |
---|
32 | result.append(good_characters[next]) |
---|
33 | # Note, this depends on integer rounding of results: |
---|
34 | number = number / base |
---|
35 | return ''.join(result) |
---|
36 | |
---|
37 | def hash_identifier(s, length, pad=True, hasher=md5, prefix='', |
---|
38 | group=None, upper=False): |
---|
39 | """ |
---|
40 | Hashes the string (with the given hashing module), then turns that |
---|
41 | hash into an identifier of the given length (using modulo to |
---|
42 | reduce the length of the identifier). If ``pad`` is False, then |
---|
43 | the minimum-length identifier will be used; otherwise the |
---|
44 | identifier will be padded with 0's as necessary. |
---|
45 | |
---|
46 | ``prefix`` will be added last, and does not count towards the |
---|
47 | target length. ``group`` will group the characters with ``-`` in |
---|
48 | the given lengths, and also does not count towards the target |
---|
49 | length. E.g., ``group=4`` will cause a identifier like |
---|
50 | ``a5f3-hgk3-asdf``. Grouping occurs before the prefix. |
---|
51 | """ |
---|
52 | if length > 26 and hasher is md5: |
---|
53 | raise ValueError, ( |
---|
54 | "md5 cannot create hashes longer than 26 characters in " |
---|
55 | "length (you gave %s)" % length) |
---|
56 | if isinstance(s, unicode): |
---|
57 | s = s.encode('utf-8') |
---|
58 | h = hasher.new(str(s)) |
---|
59 | bin_hash = h.digest() |
---|
60 | modulo = base ** length |
---|
61 | number = 0 |
---|
62 | for c in list(bin_hash): |
---|
63 | number = (number * 256 + ord(c)) % modulo |
---|
64 | ident = make_identifier(number) |
---|
65 | if pad: |
---|
66 | ident = good_characters[0]*(length-len(ident)) + ident |
---|
67 | if group: |
---|
68 | parts = [] |
---|
69 | while ident: |
---|
70 | parts.insert(0, ident[-group:]) |
---|
71 | ident = ident[:-group] |
---|
72 | ident = '-'.join(parts) |
---|
73 | if upper: |
---|
74 | ident = ident.upper() |
---|
75 | return prefix + ident |
---|
76 | |
---|
77 | # doctest tests: |
---|
78 | __test__ = { |
---|
79 | 'make_identifier': """ |
---|
80 | >>> make_identifier(0) |
---|
81 | '' |
---|
82 | >>> make_identifier(1000) |
---|
83 | 'c53' |
---|
84 | >>> make_identifier(-100) |
---|
85 | Traceback (most recent call last): |
---|
86 | ... |
---|
87 | ValueError: You cannot make identifiers out of negative numbers: -100 |
---|
88 | >>> make_identifier('test') |
---|
89 | Traceback (most recent call last): |
---|
90 | ... |
---|
91 | ValueError: You can only make identifiers out of integers (not 'test') |
---|
92 | >>> make_identifier(1000000000000) |
---|
93 | 'c53x9rqh3' |
---|
94 | """, |
---|
95 | 'hash_identifier': """ |
---|
96 | >>> hash_identifier(0, 5) |
---|
97 | 'cy2dr' |
---|
98 | >>> hash_identifier(0, 10) |
---|
99 | 'cy2dr6rg46' |
---|
100 | >>> hash_identifier('this is a test of a long string', 5) |
---|
101 | 'awatu' |
---|
102 | >>> hash_identifier(0, 26) |
---|
103 | 'cy2dr6rg46cx8t4w2f3nfexzk4' |
---|
104 | >>> hash_identifier(0, 30) |
---|
105 | Traceback (most recent call last): |
---|
106 | ... |
---|
107 | ValueError: md5 cannot create hashes longer than 26 characters in length (you gave 30) |
---|
108 | >>> hash_identifier(0, 10, group=4) |
---|
109 | 'cy-2dr6-rg46' |
---|
110 | >>> hash_identifier(0, 10, group=4, upper=True, prefix='M-') |
---|
111 | 'M-CY-2DR6-RG46' |
---|
112 | """} |
---|
113 | |
---|
114 | if __name__ == '__main__': |
---|
115 | import doctest |
---|
116 | doctest.testmod() |
---|
117 | |
---|