| 1 | # topological.py |
|---|
| 2 | # Copyright (C) 2005, 2006, 2007, 2008, 2009 Michael Bayer mike_mp@zzzcomputing.com |
|---|
| 3 | # |
|---|
| 4 | # This module is part of SQLAlchemy and is released under |
|---|
| 5 | # the MIT License: http://www.opensource.org/licenses/mit-license.php |
|---|
| 6 | |
|---|
| 7 | """Topological sorting algorithms. |
|---|
| 8 | |
|---|
| 9 | The topological sort is an algorithm that receives this list of |
|---|
| 10 | dependencies as a *partial ordering*, that is a list of pairs which |
|---|
| 11 | might say, *X is dependent on Y*, *Q is dependent on Z*, but does not |
|---|
| 12 | necessarily tell you anything about Q being dependent on X. Therefore, |
|---|
| 13 | its not a straight sort where every element can be compared to |
|---|
| 14 | another... only some of the elements have any sorting preference, and |
|---|
| 15 | then only towards just some of the other elements. For a particular |
|---|
| 16 | partial ordering, there can be many possible sorts that satisfy the |
|---|
| 17 | conditions. |
|---|
| 18 | |
|---|
| 19 | """ |
|---|
| 20 | |
|---|
| 21 | from sqlalchemy.exc import CircularDependencyError |
|---|
| 22 | from sqlalchemy import util |
|---|
| 23 | |
|---|
| 24 | __all__ = ['sort', 'sort_with_cycles', 'sort_as_tree'] |
|---|
| 25 | |
|---|
| 26 | def sort(tuples, allitems): |
|---|
| 27 | """sort the given list of items by dependency. |
|---|
| 28 | |
|---|
| 29 | 'tuples' is a list of tuples representing a partial ordering. |
|---|
| 30 | """ |
|---|
| 31 | |
|---|
| 32 | return [n.item for n in _sort(tuples, allitems, allow_cycles=False, ignore_self_cycles=True)] |
|---|
| 33 | |
|---|
| 34 | def sort_with_cycles(tuples, allitems): |
|---|
| 35 | """sort the given list of items by dependency, cutting out cycles. |
|---|
| 36 | |
|---|
| 37 | returns results as an iterable of 2-tuples, containing the item, |
|---|
| 38 | and a list containing items involved in a cycle with this item, if any. |
|---|
| 39 | |
|---|
| 40 | 'tuples' is a list of tuples representing a partial ordering. |
|---|
| 41 | """ |
|---|
| 42 | |
|---|
| 43 | return [(n.item, [n.item for n in n.cycles or []]) for n in _sort(tuples, allitems, allow_cycles=True)] |
|---|
| 44 | |
|---|
| 45 | def sort_as_tree(tuples, allitems, with_cycles=False): |
|---|
| 46 | """sort the given list of items by dependency, and return results |
|---|
| 47 | as a hierarchical tree structure. |
|---|
| 48 | |
|---|
| 49 | returns results as an iterable of 3-tuples, containing the item, |
|---|
| 50 | a list containing items involved in a cycle with this item, if any, |
|---|
| 51 | and a list of child tuples. |
|---|
| 52 | |
|---|
| 53 | if with_cycles is False, the returned structure is of the same form |
|---|
| 54 | but the second element of each tuple, i.e. the 'cycles', is an empty list. |
|---|
| 55 | |
|---|
| 56 | 'tuples' is a list of tuples representing a partial ordering. |
|---|
| 57 | """ |
|---|
| 58 | |
|---|
| 59 | return _organize_as_tree(_sort(tuples, allitems, allow_cycles=with_cycles)) |
|---|
| 60 | |
|---|
| 61 | |
|---|
| 62 | class _Node(object): |
|---|
| 63 | """Represent each item in the sort.""" |
|---|
| 64 | |
|---|
| 65 | def __init__(self, item): |
|---|
| 66 | self.item = item |
|---|
| 67 | self.dependencies = set() |
|---|
| 68 | self.children = [] |
|---|
| 69 | self.cycles = None |
|---|
| 70 | |
|---|
| 71 | def __str__(self): |
|---|
| 72 | return self.safestr() |
|---|
| 73 | |
|---|
| 74 | def safestr(self, indent=0): |
|---|
| 75 | return (' ' * indent * 2) + \ |
|---|
| 76 | str(self.item) + \ |
|---|
| 77 | (self.cycles is not None and (" (cycles: " + repr([x for x in self.cycles]) + ")") or "") + \ |
|---|
| 78 | "\n" + \ |
|---|
| 79 | ''.join(str(n) for n in self.children) |
|---|
| 80 | |
|---|
| 81 | def __repr__(self): |
|---|
| 82 | return "%s" % (str(self.item)) |
|---|
| 83 | |
|---|
| 84 | def all_deps(self): |
|---|
| 85 | """Return a set of dependencies for this node and all its cycles.""" |
|---|
| 86 | |
|---|
| 87 | deps = set(self.dependencies) |
|---|
| 88 | if self.cycles is not None: |
|---|
| 89 | for c in self.cycles: |
|---|
| 90 | deps.update(c.dependencies) |
|---|
| 91 | return deps |
|---|
| 92 | |
|---|
| 93 | class _EdgeCollection(object): |
|---|
| 94 | """A collection of directed edges.""" |
|---|
| 95 | |
|---|
| 96 | def __init__(self): |
|---|
| 97 | self.parent_to_children = util.defaultdict(set) |
|---|
| 98 | self.child_to_parents = util.defaultdict(set) |
|---|
| 99 | |
|---|
| 100 | def add(self, edge): |
|---|
| 101 | """Add an edge to this collection.""" |
|---|
| 102 | |
|---|
| 103 | parentnode, childnode = edge |
|---|
| 104 | self.parent_to_children[parentnode].add(childnode) |
|---|
| 105 | self.child_to_parents[childnode].add(parentnode) |
|---|
| 106 | parentnode.dependencies.add(childnode) |
|---|
| 107 | |
|---|
| 108 | def remove(self, edge): |
|---|
| 109 | """Remove an edge from this collection. |
|---|
| 110 | |
|---|
| 111 | Return the childnode if it has no other parents. |
|---|
| 112 | """ |
|---|
| 113 | |
|---|
| 114 | (parentnode, childnode) = edge |
|---|
| 115 | self.parent_to_children[parentnode].remove(childnode) |
|---|
| 116 | self.child_to_parents[childnode].remove(parentnode) |
|---|
| 117 | if not self.child_to_parents[childnode]: |
|---|
| 118 | return childnode |
|---|
| 119 | else: |
|---|
| 120 | return None |
|---|
| 121 | |
|---|
| 122 | def has_parents(self, node): |
|---|
| 123 | return node in self.child_to_parents and bool(self.child_to_parents[node]) |
|---|
| 124 | |
|---|
| 125 | def edges_by_parent(self, node): |
|---|
| 126 | if node in self.parent_to_children: |
|---|
| 127 | return [(node, child) for child in self.parent_to_children[node]] |
|---|
| 128 | else: |
|---|
| 129 | return [] |
|---|
| 130 | |
|---|
| 131 | def get_parents(self): |
|---|
| 132 | return self.parent_to_children.keys() |
|---|
| 133 | |
|---|
| 134 | def pop_node(self, node): |
|---|
| 135 | """Remove all edges where the given node is a parent. |
|---|
| 136 | |
|---|
| 137 | Return the collection of all nodes which were children of the |
|---|
| 138 | given node, and have no further parents. |
|---|
| 139 | """ |
|---|
| 140 | |
|---|
| 141 | children = self.parent_to_children.pop(node, None) |
|---|
| 142 | if children is not None: |
|---|
| 143 | for child in children: |
|---|
| 144 | self.child_to_parents[child].remove(node) |
|---|
| 145 | if not self.child_to_parents[child]: |
|---|
| 146 | yield child |
|---|
| 147 | |
|---|
| 148 | def __len__(self): |
|---|
| 149 | return sum(len(x) for x in self.parent_to_children.values()) |
|---|
| 150 | |
|---|
| 151 | def __iter__(self): |
|---|
| 152 | for parent, children in self.parent_to_children.iteritems(): |
|---|
| 153 | for child in children: |
|---|
| 154 | yield (parent, child) |
|---|
| 155 | |
|---|
| 156 | def __repr__(self): |
|---|
| 157 | return repr(list(self)) |
|---|
| 158 | |
|---|
| 159 | def _sort(tuples, allitems, allow_cycles=False, ignore_self_cycles=False): |
|---|
| 160 | nodes = {} |
|---|
| 161 | edges = _EdgeCollection() |
|---|
| 162 | |
|---|
| 163 | for item in list(allitems) + [t[0] for t in tuples] + [t[1] for t in tuples]: |
|---|
| 164 | if id(item) not in nodes: |
|---|
| 165 | node = _Node(item) |
|---|
| 166 | nodes[item] = node |
|---|
| 167 | |
|---|
| 168 | for t in tuples: |
|---|
| 169 | if t[0] is t[1]: |
|---|
| 170 | if allow_cycles: |
|---|
| 171 | n = nodes[t[0]] |
|---|
| 172 | n.cycles = set([n]) |
|---|
| 173 | elif not ignore_self_cycles: |
|---|
| 174 | raise CircularDependencyError("Self-referential dependency detected " + repr(t)) |
|---|
| 175 | continue |
|---|
| 176 | childnode = nodes[t[1]] |
|---|
| 177 | parentnode = nodes[t[0]] |
|---|
| 178 | edges.add((parentnode, childnode)) |
|---|
| 179 | |
|---|
| 180 | queue = [] |
|---|
| 181 | for n in nodes.values(): |
|---|
| 182 | if not edges.has_parents(n): |
|---|
| 183 | queue.append(n) |
|---|
| 184 | |
|---|
| 185 | output = [] |
|---|
| 186 | while nodes: |
|---|
| 187 | if not queue: |
|---|
| 188 | # edges remain but no edgeless nodes to remove; this indicates |
|---|
| 189 | # a cycle |
|---|
| 190 | if allow_cycles: |
|---|
| 191 | for cycle in _find_cycles(edges): |
|---|
| 192 | lead = cycle[0][0] |
|---|
| 193 | lead.cycles = set() |
|---|
| 194 | for edge in cycle: |
|---|
| 195 | n = edges.remove(edge) |
|---|
| 196 | lead.cycles.add(edge[0]) |
|---|
| 197 | lead.cycles.add(edge[1]) |
|---|
| 198 | if n is not None: |
|---|
| 199 | queue.append(n) |
|---|
| 200 | for n in lead.cycles: |
|---|
| 201 | if n is not lead: |
|---|
| 202 | n._cyclical = True |
|---|
| 203 | for (n, k) in list(edges.edges_by_parent(n)): |
|---|
| 204 | edges.add((lead, k)) |
|---|
| 205 | edges.remove((n, k)) |
|---|
| 206 | continue |
|---|
| 207 | else: |
|---|
| 208 | # long cycles not allowed |
|---|
| 209 | raise CircularDependencyError("Circular dependency detected " + repr(edges) + repr(queue)) |
|---|
| 210 | node = queue.pop() |
|---|
| 211 | if not hasattr(node, '_cyclical'): |
|---|
| 212 | output.append(node) |
|---|
| 213 | del nodes[node.item] |
|---|
| 214 | for childnode in edges.pop_node(node): |
|---|
| 215 | queue.append(childnode) |
|---|
| 216 | return output |
|---|
| 217 | |
|---|
| 218 | def _organize_as_tree(nodes): |
|---|
| 219 | """Given a list of nodes from a topological sort, organize the |
|---|
| 220 | nodes into a tree structure, with as many non-dependent nodes |
|---|
| 221 | set as siblings to each other as possible. |
|---|
| 222 | |
|---|
| 223 | returns nodes as 3-tuples (item, cycles, children). |
|---|
| 224 | """ |
|---|
| 225 | |
|---|
| 226 | if not nodes: |
|---|
| 227 | return None |
|---|
| 228 | # a list of all currently independent subtrees as a tuple of |
|---|
| 229 | # (root_node, set_of_all_tree_nodes, set_of_all_cycle_nodes_in_tree) |
|---|
| 230 | # order of the list has no semantics for the algorithmic |
|---|
| 231 | independents = [] |
|---|
| 232 | # in reverse topological order |
|---|
| 233 | for node in reversed(nodes): |
|---|
| 234 | # nodes subtree and cycles contain the node itself |
|---|
| 235 | subtree = set([node]) |
|---|
| 236 | if node.cycles is not None: |
|---|
| 237 | cycles = set(node.cycles) |
|---|
| 238 | else: |
|---|
| 239 | cycles = set() |
|---|
| 240 | # get a set of dependent nodes of node and its cycles |
|---|
| 241 | nodealldeps = node.all_deps() |
|---|
| 242 | if nodealldeps: |
|---|
| 243 | # iterate over independent node indexes in reverse order so we can efficiently remove them |
|---|
| 244 | for index in xrange(len(independents) - 1, -1, -1): |
|---|
| 245 | child, childsubtree, childcycles = independents[index] |
|---|
| 246 | # if there is a dependency between this node and an independent node |
|---|
| 247 | if (childsubtree.intersection(nodealldeps) or childcycles.intersection(node.dependencies)): |
|---|
| 248 | # prepend child to nodes children |
|---|
| 249 | # (append should be fine, but previous implemetation used prepend) |
|---|
| 250 | node.children[0:0] = [(child.item, [n.item for n in child.cycles or []], child.children)] |
|---|
| 251 | # merge childs subtree and cycles |
|---|
| 252 | subtree.update(childsubtree) |
|---|
| 253 | cycles.update(childcycles) |
|---|
| 254 | # remove the child from list of independent subtrees |
|---|
| 255 | independents[index:index+1] = [] |
|---|
| 256 | # add node as a new independent subtree |
|---|
| 257 | independents.append((node, subtree, cycles)) |
|---|
| 258 | # choose an arbitrary node from list of all independent subtrees |
|---|
| 259 | head = independents.pop()[0] |
|---|
| 260 | # add all other independent subtrees as a child of the chosen root |
|---|
| 261 | # used prepend [0:0] instead of extend to maintain exact behaviour of previous implementation |
|---|
| 262 | head.children[0:0] = [(i[0].item, [n.item for n in i[0].cycles or []], i[0].children) for i in independents] |
|---|
| 263 | return (head.item, [n.item for n in head.cycles or []], head.children) |
|---|
| 264 | |
|---|
| 265 | def _find_cycles(edges): |
|---|
| 266 | involved_in_cycles = set() |
|---|
| 267 | cycles = {} |
|---|
| 268 | def traverse(node, goal=None, cycle=None): |
|---|
| 269 | if goal is None: |
|---|
| 270 | goal = node |
|---|
| 271 | cycle = [] |
|---|
| 272 | elif node is goal: |
|---|
| 273 | return True |
|---|
| 274 | |
|---|
| 275 | for (n, key) in edges.edges_by_parent(node): |
|---|
| 276 | if key in cycle: |
|---|
| 277 | continue |
|---|
| 278 | cycle.append(key) |
|---|
| 279 | if traverse(key, goal, cycle): |
|---|
| 280 | cycset = set(cycle) |
|---|
| 281 | for x in cycle: |
|---|
| 282 | involved_in_cycles.add(x) |
|---|
| 283 | if x in cycles: |
|---|
| 284 | existing_set = cycles[x] |
|---|
| 285 | [existing_set.add(y) for y in cycset] |
|---|
| 286 | for y in existing_set: |
|---|
| 287 | cycles[y] = existing_set |
|---|
| 288 | cycset = existing_set |
|---|
| 289 | else: |
|---|
| 290 | cycles[x] = cycset |
|---|
| 291 | cycle.pop() |
|---|
| 292 | |
|---|
| 293 | for parent in edges.get_parents(): |
|---|
| 294 | traverse(parent) |
|---|
| 295 | |
|---|
| 296 | # sets are not hashable, so uniquify with id |
|---|
| 297 | unique_cycles = dict((id(s), s) for s in cycles.values()).values() |
|---|
| 298 | for cycle in unique_cycles: |
|---|
| 299 | edgecollection = [edge for edge in edges |
|---|
| 300 | if edge[0] in cycle and edge[1] in cycle] |
|---|
| 301 | yield edgecollection |
|---|