1 | # -*- coding: iso-8859-15 -*- |
---|
2 | """Immutable integer set type. |
---|
3 | |
---|
4 | Integer set class. |
---|
5 | |
---|
6 | Copyright (C) 2006, Heiko Wundram. |
---|
7 | Released under the MIT license. |
---|
8 | """ |
---|
9 | |
---|
10 | # Version information |
---|
11 | # ------------------- |
---|
12 | |
---|
13 | __author__ = "Heiko Wundram <me@modelnine.org>" |
---|
14 | __version__ = "0.2" |
---|
15 | __revision__ = "6" |
---|
16 | __date__ = "2006-01-20" |
---|
17 | |
---|
18 | |
---|
19 | # Utility classes |
---|
20 | # --------------- |
---|
21 | |
---|
22 | class _Infinity(object): |
---|
23 | """Internal type used to represent infinity values.""" |
---|
24 | |
---|
25 | __slots__ = ["_neg"] |
---|
26 | |
---|
27 | def __init__(self,neg): |
---|
28 | self._neg = neg |
---|
29 | |
---|
30 | def __lt__(self,value): |
---|
31 | if not isinstance(value,(int,long,_Infinity)): |
---|
32 | return NotImplemented |
---|
33 | return ( self._neg and |
---|
34 | not ( isinstance(value,_Infinity) and value._neg ) ) |
---|
35 | |
---|
36 | def __le__(self,value): |
---|
37 | if not isinstance(value,(int,long,_Infinity)): |
---|
38 | return NotImplemented |
---|
39 | return self._neg |
---|
40 | |
---|
41 | def __gt__(self,value): |
---|
42 | if not isinstance(value,(int,long,_Infinity)): |
---|
43 | return NotImplemented |
---|
44 | return not ( self._neg or |
---|
45 | ( isinstance(value,_Infinity) and not value._neg ) ) |
---|
46 | |
---|
47 | def __ge__(self,value): |
---|
48 | if not isinstance(value,(int,long,_Infinity)): |
---|
49 | return NotImplemented |
---|
50 | return not self._neg |
---|
51 | |
---|
52 | def __eq__(self,value): |
---|
53 | if not isinstance(value,(int,long,_Infinity)): |
---|
54 | return NotImplemented |
---|
55 | return isinstance(value,_Infinity) and self._neg == value._neg |
---|
56 | |
---|
57 | def __ne__(self,value): |
---|
58 | if not isinstance(value,(int,long,_Infinity)): |
---|
59 | return NotImplemented |
---|
60 | return not isinstance(value,_Infinity) or self._neg <> value._neg |
---|
61 | |
---|
62 | def __repr__(self): |
---|
63 | return "None" |
---|
64 | |
---|
65 | |
---|
66 | # Constants |
---|
67 | # --------- |
---|
68 | |
---|
69 | _MININF = _Infinity(True) |
---|
70 | _MAXINF = _Infinity(False) |
---|
71 | |
---|
72 | |
---|
73 | # Integer set class |
---|
74 | # ----------------- |
---|
75 | |
---|
76 | class IntSet(object): |
---|
77 | """Integer set class with efficient storage in a RLE format of ranges. |
---|
78 | Supports minus and plus infinity in the range.""" |
---|
79 | |
---|
80 | __slots__ = ["_ranges","_min","_max","_hash"] |
---|
81 | |
---|
82 | def __init__(self,*args,**kwargs): |
---|
83 | """Initialize an integer set. The constructor accepts an unlimited |
---|
84 | number of arguments that may either be tuples in the form of |
---|
85 | (start,stop) where either start or stop may be a number or None to |
---|
86 | represent maximum/minimum in that direction. The range specified by |
---|
87 | (start,stop) is always inclusive (differing from the builtin range |
---|
88 | operator). |
---|
89 | |
---|
90 | Keyword arguments that can be passed to an integer set are min and |
---|
91 | max, which specify the minimum and maximum number in the set, |
---|
92 | respectively. You can also pass None here to represent minus or plus |
---|
93 | infinity, which is also the default. |
---|
94 | """ |
---|
95 | |
---|
96 | # Special case copy constructor. |
---|
97 | if len(args) == 1 and isinstance(args[0],IntSet): |
---|
98 | if kwargs: |
---|
99 | raise ValueError("No keyword arguments for copy constructor.") |
---|
100 | self._min = args[0]._min |
---|
101 | self._max = args[0]._max |
---|
102 | self._ranges = args[0]._ranges |
---|
103 | self._hash = args[0]._hash |
---|
104 | return |
---|
105 | |
---|
106 | # Initialize set. |
---|
107 | self._ranges = [] |
---|
108 | |
---|
109 | # Process keyword arguments. |
---|
110 | self._min = kwargs.pop("min",_MININF) |
---|
111 | self._max = kwargs.pop("max",_MAXINF) |
---|
112 | if self._min is None: |
---|
113 | self._min = _MININF |
---|
114 | if self._max is None: |
---|
115 | self._max = _MAXINF |
---|
116 | |
---|
117 | # Check keyword arguments. |
---|
118 | if kwargs: |
---|
119 | raise ValueError("Invalid keyword argument.") |
---|
120 | if not ( isinstance(self._min,(int,long)) or self._min is _MININF ): |
---|
121 | raise TypeError("Invalid type of min argument.") |
---|
122 | if not ( isinstance(self._max,(int,long)) or self._max is _MAXINF ): |
---|
123 | raise TypeError("Invalid type of max argument.") |
---|
124 | if ( self._min is not _MININF and self._max is not _MAXINF and |
---|
125 | self._min > self._max ): |
---|
126 | raise ValueError("Minimum is not smaller than maximum.") |
---|
127 | if isinstance(self._max,(int,long)): |
---|
128 | self._max += 1 |
---|
129 | |
---|
130 | # Process arguments. |
---|
131 | for arg in args: |
---|
132 | if isinstance(arg,(int,long)): |
---|
133 | start, stop = arg, arg+1 |
---|
134 | elif isinstance(arg,tuple): |
---|
135 | if len(arg) <> 2: |
---|
136 | raise ValueError("Invalid tuple, must be (start,stop).") |
---|
137 | |
---|
138 | # Process argument. |
---|
139 | start, stop = arg |
---|
140 | if start is None: |
---|
141 | start = self._min |
---|
142 | if stop is None: |
---|
143 | stop = self._max |
---|
144 | |
---|
145 | # Check arguments. |
---|
146 | if not ( isinstance(start,(int,long)) or start is _MININF ): |
---|
147 | raise TypeError("Invalid type of tuple start.") |
---|
148 | if not ( isinstance(stop,(int,long)) or stop is _MAXINF ): |
---|
149 | raise TypeError("Invalid type of tuple stop.") |
---|
150 | if ( start is not _MININF and stop is not _MAXINF and |
---|
151 | start > stop ): |
---|
152 | continue |
---|
153 | if isinstance(stop,(int,long)): |
---|
154 | stop += 1 |
---|
155 | else: |
---|
156 | raise TypeError("Invalid argument.") |
---|
157 | |
---|
158 | if start > self._max: |
---|
159 | continue |
---|
160 | elif start < self._min: |
---|
161 | start = self._min |
---|
162 | if stop < self._min: |
---|
163 | continue |
---|
164 | elif stop > self._max: |
---|
165 | stop = self._max |
---|
166 | self._ranges.append((start,stop)) |
---|
167 | |
---|
168 | # Normalize set. |
---|
169 | self._normalize() |
---|
170 | |
---|
171 | # Utility functions for set operations |
---|
172 | # ------------------------------------ |
---|
173 | |
---|
174 | def _iterranges(self,r1,r2,minval=_MININF,maxval=_MAXINF): |
---|
175 | curval = minval |
---|
176 | curstates = {"r1":False,"r2":False} |
---|
177 | imax, jmax = 2*len(r1), 2*len(r2) |
---|
178 | i, j = 0, 0 |
---|
179 | while i < imax or j < jmax: |
---|
180 | if i < imax and ( ( j < jmax and |
---|
181 | r1[i>>1][i&1] < r2[j>>1][j&1] ) or |
---|
182 | j == jmax ): |
---|
183 | cur_r, newname, newstate = r1[i>>1][i&1], "r1", not (i&1) |
---|
184 | i += 1 |
---|
185 | else: |
---|
186 | cur_r, newname, newstate = r2[j>>1][j&1], "r2", not (j&1) |
---|
187 | j += 1 |
---|
188 | if curval < cur_r: |
---|
189 | if cur_r > maxval: |
---|
190 | break |
---|
191 | yield curstates, (curval,cur_r) |
---|
192 | curval = cur_r |
---|
193 | curstates[newname] = newstate |
---|
194 | if curval < maxval: |
---|
195 | yield curstates, (curval,maxval) |
---|
196 | |
---|
197 | def _normalize(self): |
---|
198 | self._ranges.sort() |
---|
199 | i = 1 |
---|
200 | while i < len(self._ranges): |
---|
201 | if self._ranges[i][0] < self._ranges[i-1][1]: |
---|
202 | self._ranges[i-1] = (self._ranges[i-1][0], |
---|
203 | max(self._ranges[i-1][1], |
---|
204 | self._ranges[i][1])) |
---|
205 | del self._ranges[i] |
---|
206 | else: |
---|
207 | i += 1 |
---|
208 | self._ranges = tuple(self._ranges) |
---|
209 | self._hash = hash(self._ranges) |
---|
210 | |
---|
211 | def __coerce__(self,other): |
---|
212 | if isinstance(other,IntSet): |
---|
213 | return self, other |
---|
214 | elif isinstance(other,(int,long,tuple)): |
---|
215 | try: |
---|
216 | return self, self.__class__(other) |
---|
217 | except TypeError: |
---|
218 | # Catch a type error, in that case the structure specified by |
---|
219 | # other is something we can't coerce, return NotImplemented. |
---|
220 | # ValueErrors are not caught, they signal that the data was |
---|
221 | # invalid for the constructor. This is appropriate to signal |
---|
222 | # as a ValueError to the caller. |
---|
223 | return NotImplemented |
---|
224 | elif isinstance(other,list): |
---|
225 | try: |
---|
226 | return self, self.__class__(*other) |
---|
227 | except TypeError: |
---|
228 | # See above. |
---|
229 | return NotImplemented |
---|
230 | return NotImplemented |
---|
231 | |
---|
232 | # Set function definitions |
---|
233 | # ------------------------ |
---|
234 | |
---|
235 | def _make_function(name,type,doc,pall,pany=None): |
---|
236 | """Makes a function to match two ranges. Accepts two types: either |
---|
237 | 'set', which defines a function which returns a set with all ranges |
---|
238 | matching pall (pany is ignored), or 'bool', which returns True if pall |
---|
239 | matches for all ranges and pany matches for any one range. doc is the |
---|
240 | dostring to give this function. pany may be none to ignore the any |
---|
241 | match. |
---|
242 | |
---|
243 | The predicates get a dict with two keys, 'r1', 'r2', which denote |
---|
244 | whether the current range is present in range1 (self) and/or range2 |
---|
245 | (other) or none of the two, respectively.""" |
---|
246 | |
---|
247 | if type == "set": |
---|
248 | def f(self,other): |
---|
249 | coerced = self.__coerce__(other) |
---|
250 | if coerced is NotImplemented: |
---|
251 | return NotImplemented |
---|
252 | other = coerced[1] |
---|
253 | newset = self.__class__.__new__(self.__class__) |
---|
254 | newset._min = min(self._min,other._min) |
---|
255 | newset._max = max(self._max,other._max) |
---|
256 | newset._ranges = [] |
---|
257 | for states, (start,stop) in \ |
---|
258 | self._iterranges(self._ranges,other._ranges, |
---|
259 | newset._min,newset._max): |
---|
260 | if pall(states): |
---|
261 | if newset._ranges and newset._ranges[-1][1] == start: |
---|
262 | newset._ranges[-1] = (newset._ranges[-1][0],stop) |
---|
263 | else: |
---|
264 | newset._ranges.append((start,stop)) |
---|
265 | newset._ranges = tuple(newset._ranges) |
---|
266 | newset._hash = hash(self._ranges) |
---|
267 | return newset |
---|
268 | elif type == "bool": |
---|
269 | def f(self,other): |
---|
270 | coerced = self.__coerce__(other) |
---|
271 | if coerced is NotImplemented: |
---|
272 | return NotImplemented |
---|
273 | other = coerced[1] |
---|
274 | _min = min(self._min,other._min) |
---|
275 | _max = max(self._max,other._max) |
---|
276 | found = not pany |
---|
277 | for states, (start,stop) in \ |
---|
278 | self._iterranges(self._ranges,other._ranges,_min,_max): |
---|
279 | if not pall(states): |
---|
280 | return False |
---|
281 | found = found or pany(states) |
---|
282 | return found |
---|
283 | else: |
---|
284 | raise ValueError("Invalid type of function to create.") |
---|
285 | try: |
---|
286 | f.func_name = name |
---|
287 | except TypeError: |
---|
288 | pass |
---|
289 | f.func_doc = doc |
---|
290 | return f |
---|
291 | |
---|
292 | # Intersection. |
---|
293 | __and__ = _make_function("__and__","set", |
---|
294 | "Intersection of two sets as a new set.", |
---|
295 | lambda s: s["r1"] and s["r2"]) |
---|
296 | __rand__ = _make_function("__rand__","set", |
---|
297 | "Intersection of two sets as a new set.", |
---|
298 | lambda s: s["r1"] and s["r2"]) |
---|
299 | intersection = _make_function("intersection","set", |
---|
300 | "Intersection of two sets as a new set.", |
---|
301 | lambda s: s["r1"] and s["r2"]) |
---|
302 | |
---|
303 | # Union. |
---|
304 | __or__ = _make_function("__or__","set", |
---|
305 | "Union of two sets as a new set.", |
---|
306 | lambda s: s["r1"] or s["r2"]) |
---|
307 | __ror__ = _make_function("__ror__","set", |
---|
308 | "Union of two sets as a new set.", |
---|
309 | lambda s: s["r1"] or s["r2"]) |
---|
310 | union = _make_function("union","set", |
---|
311 | "Union of two sets as a new set.", |
---|
312 | lambda s: s["r1"] or s["r2"]) |
---|
313 | |
---|
314 | # Difference. |
---|
315 | __sub__ = _make_function("__sub__","set", |
---|
316 | "Difference of two sets as a new set.", |
---|
317 | lambda s: s["r1"] and not s["r2"]) |
---|
318 | __rsub__ = _make_function("__rsub__","set", |
---|
319 | "Difference of two sets as a new set.", |
---|
320 | lambda s: s["r2"] and not s["r1"]) |
---|
321 | difference = _make_function("difference","set", |
---|
322 | "Difference of two sets as a new set.", |
---|
323 | lambda s: s["r1"] and not s["r2"]) |
---|
324 | |
---|
325 | # Symmetric difference. |
---|
326 | __xor__ = _make_function("__xor__","set", |
---|
327 | "Symmetric difference of two sets as a new set.", |
---|
328 | lambda s: s["r1"] ^ s["r2"]) |
---|
329 | __rxor__ = _make_function("__rxor__","set", |
---|
330 | "Symmetric difference of two sets as a new set.", |
---|
331 | lambda s: s["r1"] ^ s["r2"]) |
---|
332 | symmetric_difference = _make_function("symmetric_difference","set", |
---|
333 | "Symmetric difference of two sets as a new set.", |
---|
334 | lambda s: s["r1"] ^ s["r2"]) |
---|
335 | |
---|
336 | # Containership testing. |
---|
337 | __contains__ = _make_function("__contains__","bool", |
---|
338 | "Returns true if self is superset of other.", |
---|
339 | lambda s: s["r1"] or not s["r2"]) |
---|
340 | issubset = _make_function("issubset","bool", |
---|
341 | "Returns true if self is subset of other.", |
---|
342 | lambda s: s["r2"] or not s["r1"]) |
---|
343 | istruesubset = _make_function("istruesubset","bool", |
---|
344 | "Returns true if self is true subset of other.", |
---|
345 | lambda s: s["r2"] or not s["r1"], |
---|
346 | lambda s: s["r2"] and not s["r1"]) |
---|
347 | issuperset = _make_function("issuperset","bool", |
---|
348 | "Returns true if self is superset of other.", |
---|
349 | lambda s: s["r1"] or not s["r2"]) |
---|
350 | istruesuperset = _make_function("istruesuperset","bool", |
---|
351 | "Returns true if self is true superset of other.", |
---|
352 | lambda s: s["r1"] or not s["r2"], |
---|
353 | lambda s: s["r1"] and not s["r2"]) |
---|
354 | overlaps = _make_function("overlaps","bool", |
---|
355 | "Returns true if self overlaps with other.", |
---|
356 | lambda s: True, |
---|
357 | lambda s: s["r1"] and s["r2"]) |
---|
358 | |
---|
359 | # Comparison. |
---|
360 | __eq__ = _make_function("__eq__","bool", |
---|
361 | "Returns true if self is equal to other.", |
---|
362 | lambda s: not ( s["r1"] ^ s["r2"] )) |
---|
363 | __ne__ = _make_function("__ne__","bool", |
---|
364 | "Returns true if self is different to other.", |
---|
365 | lambda s: True, |
---|
366 | lambda s: s["r1"] ^ s["r2"]) |
---|
367 | |
---|
368 | # Clean up namespace. |
---|
369 | del _make_function |
---|
370 | |
---|
371 | # Define other functions. |
---|
372 | def inverse(self): |
---|
373 | """Inverse of set as a new set.""" |
---|
374 | |
---|
375 | newset = self.__class__.__new__(self.__class__) |
---|
376 | newset._min = self._min |
---|
377 | newset._max = self._max |
---|
378 | newset._ranges = [] |
---|
379 | laststop = self._min |
---|
380 | for r in self._ranges: |
---|
381 | if laststop < r[0]: |
---|
382 | newset._ranges.append((laststop,r[0])) |
---|
383 | laststop = r[1] |
---|
384 | if laststop < self._max: |
---|
385 | newset._ranges.append((laststop,self._max)) |
---|
386 | return newset |
---|
387 | |
---|
388 | __invert__ = inverse |
---|
389 | |
---|
390 | # Hashing |
---|
391 | # ------- |
---|
392 | |
---|
393 | def __hash__(self): |
---|
394 | """Returns a hash value representing this integer set. As the set is |
---|
395 | always stored normalized, the hash value is guaranteed to match for |
---|
396 | matching ranges.""" |
---|
397 | |
---|
398 | return self._hash |
---|
399 | |
---|
400 | # Iterating |
---|
401 | # --------- |
---|
402 | |
---|
403 | def __len__(self): |
---|
404 | """Get length of this integer set. In case the length is larger than |
---|
405 | 2**31 (including infinitely sized integer sets), it raises an |
---|
406 | OverflowError. This is due to len() restricting the size to |
---|
407 | 0 <= len < 2**31.""" |
---|
408 | |
---|
409 | if not self._ranges: |
---|
410 | return 0 |
---|
411 | if self._ranges[0][0] is _MININF or self._ranges[-1][1] is _MAXINF: |
---|
412 | raise OverflowError("Infinitely sized integer set.") |
---|
413 | rlen = 0 |
---|
414 | for r in self._ranges: |
---|
415 | rlen += r[1]-r[0] |
---|
416 | if rlen >= 2**31: |
---|
417 | raise OverflowError("Integer set bigger than 2**31.") |
---|
418 | return rlen |
---|
419 | |
---|
420 | def len(self): |
---|
421 | """Returns the length of this integer set as an integer. In case the |
---|
422 | length is infinite, returns -1. This function exists because of a |
---|
423 | limitation of the builtin len() function which expects values in |
---|
424 | the range 0 <= len < 2**31. Use this function in case your integer |
---|
425 | set might be larger.""" |
---|
426 | |
---|
427 | if not self._ranges: |
---|
428 | return 0 |
---|
429 | if self._ranges[0][0] is _MININF or self._ranges[-1][1] is _MAXINF: |
---|
430 | return -1 |
---|
431 | rlen = 0 |
---|
432 | for r in self._ranges: |
---|
433 | rlen += r[1]-r[0] |
---|
434 | return rlen |
---|
435 | |
---|
436 | def __nonzero__(self): |
---|
437 | """Returns true if this integer set contains at least one item.""" |
---|
438 | |
---|
439 | return bool(self._ranges) |
---|
440 | |
---|
441 | def __iter__(self): |
---|
442 | """Iterate over all values in this integer set. Iteration always starts |
---|
443 | by iterating from lowest to highest over the ranges that are bounded. |
---|
444 | After processing these, all ranges that are unbounded (maximum 2) are |
---|
445 | yielded intermixed.""" |
---|
446 | |
---|
447 | ubranges = [] |
---|
448 | for r in self._ranges: |
---|
449 | if r[0] is _MININF: |
---|
450 | if r[1] is _MAXINF: |
---|
451 | ubranges.extend(([0,1],[-1,-1])) |
---|
452 | else: |
---|
453 | ubranges.append([r[1]-1,-1]) |
---|
454 | elif r[1] is _MAXINF: |
---|
455 | ubranges.append([r[0],1]) |
---|
456 | else: |
---|
457 | for val in xrange(r[0],r[1]): |
---|
458 | yield val |
---|
459 | if ubranges: |
---|
460 | while True: |
---|
461 | for ubrange in ubranges: |
---|
462 | yield ubrange[0] |
---|
463 | ubrange[0] += ubrange[1] |
---|
464 | |
---|
465 | # Printing |
---|
466 | # -------- |
---|
467 | |
---|
468 | def __repr__(self): |
---|
469 | """Return a representation of this integer set. The representation is |
---|
470 | executable to get an equal integer set.""" |
---|
471 | |
---|
472 | rv = [] |
---|
473 | for start, stop in self._ranges: |
---|
474 | if ( isinstance(start,(int,long)) and isinstance(stop,(int,long)) |
---|
475 | and stop-start == 1 ): |
---|
476 | rv.append("%r" % start) |
---|
477 | elif isinstance(stop,(int,long)): |
---|
478 | rv.append("(%r,%r)" % (start,stop-1)) |
---|
479 | else: |
---|
480 | rv.append("(%r,%r)" % (start,stop)) |
---|
481 | if self._min is not _MININF: |
---|
482 | rv.append("min=%r" % self._min) |
---|
483 | if self._max is not _MAXINF: |
---|
484 | rv.append("max=%r" % self._max) |
---|
485 | return "%s(%s)" % (self.__class__.__name__,",".join(rv)) |
---|
486 | |
---|
487 | if __name__ == "__main__": |
---|
488 | # Little test script demonstrating functionality. |
---|
489 | x = IntSet((10,20),30) |
---|
490 | y = IntSet((10,20)) |
---|
491 | z = IntSet((10,20),30,(15,19),min=0,max=40) |
---|
492 | print x |
---|
493 | print x&110 |
---|
494 | print x|110 |
---|
495 | print x^(15,25) |
---|
496 | print x-12 |
---|
497 | print 12 in x |
---|
498 | print x.issubset(x) |
---|
499 | print y.issubset(x) |
---|
500 | print x.istruesubset(x) |
---|
501 | print y.istruesubset(x) |
---|
502 | for val in x: |
---|
503 | print val |
---|
504 | print x.inverse() |
---|
505 | print x == z |
---|
506 | print x == y |
---|
507 | print x <> y |
---|
508 | print hash(x) |
---|
509 | print hash(z) |
---|
510 | print len(x) |
---|
511 | print x.len() |
---|