1 | # topological.py |
---|
2 | # Copyright (C) 2005, 2006, 2007, 2008, 2009 Michael Bayer mike_mp@zzzcomputing.com |
---|
3 | # |
---|
4 | # This module is part of SQLAlchemy and is released under |
---|
5 | # the MIT License: http://www.opensource.org/licenses/mit-license.php |
---|
6 | |
---|
7 | """Topological sorting algorithms. |
---|
8 | |
---|
9 | The topological sort is an algorithm that receives this list of |
---|
10 | dependencies as a *partial ordering*, that is a list of pairs which |
---|
11 | might say, *X is dependent on Y*, *Q is dependent on Z*, but does not |
---|
12 | necessarily tell you anything about Q being dependent on X. Therefore, |
---|
13 | its not a straight sort where every element can be compared to |
---|
14 | another... only some of the elements have any sorting preference, and |
---|
15 | then only towards just some of the other elements. For a particular |
---|
16 | partial ordering, there can be many possible sorts that satisfy the |
---|
17 | conditions. |
---|
18 | |
---|
19 | """ |
---|
20 | |
---|
21 | from sqlalchemy.exc import CircularDependencyError |
---|
22 | from sqlalchemy import util |
---|
23 | |
---|
24 | __all__ = ['sort', 'sort_with_cycles', 'sort_as_tree'] |
---|
25 | |
---|
26 | def sort(tuples, allitems): |
---|
27 | """sort the given list of items by dependency. |
---|
28 | |
---|
29 | 'tuples' is a list of tuples representing a partial ordering. |
---|
30 | """ |
---|
31 | |
---|
32 | return [n.item for n in _sort(tuples, allitems, allow_cycles=False, ignore_self_cycles=True)] |
---|
33 | |
---|
34 | def sort_with_cycles(tuples, allitems): |
---|
35 | """sort the given list of items by dependency, cutting out cycles. |
---|
36 | |
---|
37 | returns results as an iterable of 2-tuples, containing the item, |
---|
38 | and a list containing items involved in a cycle with this item, if any. |
---|
39 | |
---|
40 | 'tuples' is a list of tuples representing a partial ordering. |
---|
41 | """ |
---|
42 | |
---|
43 | return [(n.item, [n.item for n in n.cycles or []]) for n in _sort(tuples, allitems, allow_cycles=True)] |
---|
44 | |
---|
45 | def sort_as_tree(tuples, allitems, with_cycles=False): |
---|
46 | """sort the given list of items by dependency, and return results |
---|
47 | as a hierarchical tree structure. |
---|
48 | |
---|
49 | returns results as an iterable of 3-tuples, containing the item, |
---|
50 | a list containing items involved in a cycle with this item, if any, |
---|
51 | and a list of child tuples. |
---|
52 | |
---|
53 | if with_cycles is False, the returned structure is of the same form |
---|
54 | but the second element of each tuple, i.e. the 'cycles', is an empty list. |
---|
55 | |
---|
56 | 'tuples' is a list of tuples representing a partial ordering. |
---|
57 | """ |
---|
58 | |
---|
59 | return _organize_as_tree(_sort(tuples, allitems, allow_cycles=with_cycles)) |
---|
60 | |
---|
61 | |
---|
62 | class _Node(object): |
---|
63 | """Represent each item in the sort.""" |
---|
64 | |
---|
65 | def __init__(self, item): |
---|
66 | self.item = item |
---|
67 | self.dependencies = set() |
---|
68 | self.children = [] |
---|
69 | self.cycles = None |
---|
70 | |
---|
71 | def __str__(self): |
---|
72 | return self.safestr() |
---|
73 | |
---|
74 | def safestr(self, indent=0): |
---|
75 | return (' ' * indent * 2) + \ |
---|
76 | str(self.item) + \ |
---|
77 | (self.cycles is not None and (" (cycles: " + repr([x for x in self.cycles]) + ")") or "") + \ |
---|
78 | "\n" + \ |
---|
79 | ''.join(str(n) for n in self.children) |
---|
80 | |
---|
81 | def __repr__(self): |
---|
82 | return "%s" % (str(self.item)) |
---|
83 | |
---|
84 | def all_deps(self): |
---|
85 | """Return a set of dependencies for this node and all its cycles.""" |
---|
86 | |
---|
87 | deps = set(self.dependencies) |
---|
88 | if self.cycles is not None: |
---|
89 | for c in self.cycles: |
---|
90 | deps.update(c.dependencies) |
---|
91 | return deps |
---|
92 | |
---|
93 | class _EdgeCollection(object): |
---|
94 | """A collection of directed edges.""" |
---|
95 | |
---|
96 | def __init__(self): |
---|
97 | self.parent_to_children = util.defaultdict(set) |
---|
98 | self.child_to_parents = util.defaultdict(set) |
---|
99 | |
---|
100 | def add(self, edge): |
---|
101 | """Add an edge to this collection.""" |
---|
102 | |
---|
103 | parentnode, childnode = edge |
---|
104 | self.parent_to_children[parentnode].add(childnode) |
---|
105 | self.child_to_parents[childnode].add(parentnode) |
---|
106 | parentnode.dependencies.add(childnode) |
---|
107 | |
---|
108 | def remove(self, edge): |
---|
109 | """Remove an edge from this collection. |
---|
110 | |
---|
111 | Return the childnode if it has no other parents. |
---|
112 | """ |
---|
113 | |
---|
114 | (parentnode, childnode) = edge |
---|
115 | self.parent_to_children[parentnode].remove(childnode) |
---|
116 | self.child_to_parents[childnode].remove(parentnode) |
---|
117 | if not self.child_to_parents[childnode]: |
---|
118 | return childnode |
---|
119 | else: |
---|
120 | return None |
---|
121 | |
---|
122 | def has_parents(self, node): |
---|
123 | return node in self.child_to_parents and bool(self.child_to_parents[node]) |
---|
124 | |
---|
125 | def edges_by_parent(self, node): |
---|
126 | if node in self.parent_to_children: |
---|
127 | return [(node, child) for child in self.parent_to_children[node]] |
---|
128 | else: |
---|
129 | return [] |
---|
130 | |
---|
131 | def get_parents(self): |
---|
132 | return self.parent_to_children.keys() |
---|
133 | |
---|
134 | def pop_node(self, node): |
---|
135 | """Remove all edges where the given node is a parent. |
---|
136 | |
---|
137 | Return the collection of all nodes which were children of the |
---|
138 | given node, and have no further parents. |
---|
139 | """ |
---|
140 | |
---|
141 | children = self.parent_to_children.pop(node, None) |
---|
142 | if children is not None: |
---|
143 | for child in children: |
---|
144 | self.child_to_parents[child].remove(node) |
---|
145 | if not self.child_to_parents[child]: |
---|
146 | yield child |
---|
147 | |
---|
148 | def __len__(self): |
---|
149 | return sum(len(x) for x in self.parent_to_children.values()) |
---|
150 | |
---|
151 | def __iter__(self): |
---|
152 | for parent, children in self.parent_to_children.iteritems(): |
---|
153 | for child in children: |
---|
154 | yield (parent, child) |
---|
155 | |
---|
156 | def __repr__(self): |
---|
157 | return repr(list(self)) |
---|
158 | |
---|
159 | def _sort(tuples, allitems, allow_cycles=False, ignore_self_cycles=False): |
---|
160 | nodes = {} |
---|
161 | edges = _EdgeCollection() |
---|
162 | |
---|
163 | for item in list(allitems) + [t[0] for t in tuples] + [t[1] for t in tuples]: |
---|
164 | if id(item) not in nodes: |
---|
165 | node = _Node(item) |
---|
166 | nodes[item] = node |
---|
167 | |
---|
168 | for t in tuples: |
---|
169 | if t[0] is t[1]: |
---|
170 | if allow_cycles: |
---|
171 | n = nodes[t[0]] |
---|
172 | n.cycles = set([n]) |
---|
173 | elif not ignore_self_cycles: |
---|
174 | raise CircularDependencyError("Self-referential dependency detected " + repr(t)) |
---|
175 | continue |
---|
176 | childnode = nodes[t[1]] |
---|
177 | parentnode = nodes[t[0]] |
---|
178 | edges.add((parentnode, childnode)) |
---|
179 | |
---|
180 | queue = [] |
---|
181 | for n in nodes.values(): |
---|
182 | if not edges.has_parents(n): |
---|
183 | queue.append(n) |
---|
184 | |
---|
185 | output = [] |
---|
186 | while nodes: |
---|
187 | if not queue: |
---|
188 | # edges remain but no edgeless nodes to remove; this indicates |
---|
189 | # a cycle |
---|
190 | if allow_cycles: |
---|
191 | for cycle in _find_cycles(edges): |
---|
192 | lead = cycle[0][0] |
---|
193 | lead.cycles = set() |
---|
194 | for edge in cycle: |
---|
195 | n = edges.remove(edge) |
---|
196 | lead.cycles.add(edge[0]) |
---|
197 | lead.cycles.add(edge[1]) |
---|
198 | if n is not None: |
---|
199 | queue.append(n) |
---|
200 | for n in lead.cycles: |
---|
201 | if n is not lead: |
---|
202 | n._cyclical = True |
---|
203 | for (n, k) in list(edges.edges_by_parent(n)): |
---|
204 | edges.add((lead, k)) |
---|
205 | edges.remove((n, k)) |
---|
206 | continue |
---|
207 | else: |
---|
208 | # long cycles not allowed |
---|
209 | raise CircularDependencyError("Circular dependency detected " + repr(edges) + repr(queue)) |
---|
210 | node = queue.pop() |
---|
211 | if not hasattr(node, '_cyclical'): |
---|
212 | output.append(node) |
---|
213 | del nodes[node.item] |
---|
214 | for childnode in edges.pop_node(node): |
---|
215 | queue.append(childnode) |
---|
216 | return output |
---|
217 | |
---|
218 | def _organize_as_tree(nodes): |
---|
219 | """Given a list of nodes from a topological sort, organize the |
---|
220 | nodes into a tree structure, with as many non-dependent nodes |
---|
221 | set as siblings to each other as possible. |
---|
222 | |
---|
223 | returns nodes as 3-tuples (item, cycles, children). |
---|
224 | """ |
---|
225 | |
---|
226 | if not nodes: |
---|
227 | return None |
---|
228 | # a list of all currently independent subtrees as a tuple of |
---|
229 | # (root_node, set_of_all_tree_nodes, set_of_all_cycle_nodes_in_tree) |
---|
230 | # order of the list has no semantics for the algorithmic |
---|
231 | independents = [] |
---|
232 | # in reverse topological order |
---|
233 | for node in reversed(nodes): |
---|
234 | # nodes subtree and cycles contain the node itself |
---|
235 | subtree = set([node]) |
---|
236 | if node.cycles is not None: |
---|
237 | cycles = set(node.cycles) |
---|
238 | else: |
---|
239 | cycles = set() |
---|
240 | # get a set of dependent nodes of node and its cycles |
---|
241 | nodealldeps = node.all_deps() |
---|
242 | if nodealldeps: |
---|
243 | # iterate over independent node indexes in reverse order so we can efficiently remove them |
---|
244 | for index in xrange(len(independents) - 1, -1, -1): |
---|
245 | child, childsubtree, childcycles = independents[index] |
---|
246 | # if there is a dependency between this node and an independent node |
---|
247 | if (childsubtree.intersection(nodealldeps) or childcycles.intersection(node.dependencies)): |
---|
248 | # prepend child to nodes children |
---|
249 | # (append should be fine, but previous implemetation used prepend) |
---|
250 | node.children[0:0] = [(child.item, [n.item for n in child.cycles or []], child.children)] |
---|
251 | # merge childs subtree and cycles |
---|
252 | subtree.update(childsubtree) |
---|
253 | cycles.update(childcycles) |
---|
254 | # remove the child from list of independent subtrees |
---|
255 | independents[index:index+1] = [] |
---|
256 | # add node as a new independent subtree |
---|
257 | independents.append((node, subtree, cycles)) |
---|
258 | # choose an arbitrary node from list of all independent subtrees |
---|
259 | head = independents.pop()[0] |
---|
260 | # add all other independent subtrees as a child of the chosen root |
---|
261 | # used prepend [0:0] instead of extend to maintain exact behaviour of previous implementation |
---|
262 | head.children[0:0] = [(i[0].item, [n.item for n in i[0].cycles or []], i[0].children) for i in independents] |
---|
263 | return (head.item, [n.item for n in head.cycles or []], head.children) |
---|
264 | |
---|
265 | def _find_cycles(edges): |
---|
266 | involved_in_cycles = set() |
---|
267 | cycles = {} |
---|
268 | def traverse(node, goal=None, cycle=None): |
---|
269 | if goal is None: |
---|
270 | goal = node |
---|
271 | cycle = [] |
---|
272 | elif node is goal: |
---|
273 | return True |
---|
274 | |
---|
275 | for (n, key) in edges.edges_by_parent(node): |
---|
276 | if key in cycle: |
---|
277 | continue |
---|
278 | cycle.append(key) |
---|
279 | if traverse(key, goal, cycle): |
---|
280 | cycset = set(cycle) |
---|
281 | for x in cycle: |
---|
282 | involved_in_cycles.add(x) |
---|
283 | if x in cycles: |
---|
284 | existing_set = cycles[x] |
---|
285 | [existing_set.add(y) for y in cycset] |
---|
286 | for y in existing_set: |
---|
287 | cycles[y] = existing_set |
---|
288 | cycset = existing_set |
---|
289 | else: |
---|
290 | cycles[x] = cycset |
---|
291 | cycle.pop() |
---|
292 | |
---|
293 | for parent in edges.get_parents(): |
---|
294 | traverse(parent) |
---|
295 | |
---|
296 | # sets are not hashable, so uniquify with id |
---|
297 | unique_cycles = dict((id(s), s) for s in cycles.values()).values() |
---|
298 | for cycle in unique_cycles: |
---|
299 | edgecollection = [edge for edge in edges |
---|
300 | if edge[0] in cycle and edge[1] in cycle] |
---|
301 | yield edgecollection |
---|