root/galaxy-central/eggs/SQLAlchemy-0.5.6_dev_r6498-py2.6.egg/sqlalchemy/topological.py

リビジョン 3, 11.0 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1# topological.py
2# Copyright (C) 2005, 2006, 2007, 2008, 2009 Michael Bayer mike_mp@zzzcomputing.com
3#
4# This module is part of SQLAlchemy and is released under
5# the MIT License: http://www.opensource.org/licenses/mit-license.php
6
7"""Topological sorting algorithms.
8
9The topological sort is an algorithm that receives this list of
10dependencies as a *partial ordering*, that is a list of pairs which
11might say, *X is dependent on Y*, *Q is dependent on Z*, but does not
12necessarily tell you anything about Q being dependent on X. Therefore,
13its not a straight sort where every element can be compared to
14another... only some of the elements have any sorting preference, and
15then only towards just some of the other elements.  For a particular
16partial ordering, there can be many possible sorts that satisfy the
17conditions.
18
19"""
20
21from sqlalchemy.exc import CircularDependencyError
22from sqlalchemy import util
23
24__all__ = ['sort', 'sort_with_cycles', 'sort_as_tree']
25
26def sort(tuples, allitems):
27    """sort the given list of items by dependency.
28   
29    'tuples' is a list of tuples representing a partial ordering.
30    """
31   
32    return [n.item for n in _sort(tuples, allitems, allow_cycles=False, ignore_self_cycles=True)]
33
34def sort_with_cycles(tuples, allitems):
35    """sort the given list of items by dependency, cutting out cycles.
36   
37    returns results as an iterable of 2-tuples, containing the item,
38    and a list containing items involved in a cycle with this item, if any.
39   
40    'tuples' is a list of tuples representing a partial ordering.
41    """
42   
43    return [(n.item, [n.item for n in n.cycles or []]) for n in _sort(tuples, allitems, allow_cycles=True)]
44   
45def sort_as_tree(tuples, allitems, with_cycles=False):
46    """sort the given list of items by dependency, and return results
47    as a hierarchical tree structure.
48   
49    returns results as an iterable of 3-tuples, containing the item,
50    a list containing items involved in a cycle with this item, if any,
51    and a list of child tuples. 
52   
53    if with_cycles is False, the returned structure is of the same form
54    but the second element of each tuple, i.e. the 'cycles', is an empty list.
55   
56    'tuples' is a list of tuples representing a partial ordering.
57    """
58
59    return _organize_as_tree(_sort(tuples, allitems, allow_cycles=with_cycles))
60
61
62class _Node(object):
63    """Represent each item in the sort."""
64
65    def __init__(self, item):
66        self.item = item
67        self.dependencies = set()
68        self.children = []
69        self.cycles = None
70
71    def __str__(self):
72        return self.safestr()
73   
74    def safestr(self, indent=0):
75        return (' ' * indent * 2) + \
76            str(self.item) + \
77            (self.cycles is not None and (" (cycles: " + repr([x for x in self.cycles]) + ")") or "") + \
78            "\n" + \
79            ''.join(str(n) for n in self.children)
80
81    def __repr__(self):
82        return "%s" % (str(self.item))
83
84    def all_deps(self):
85        """Return a set of dependencies for this node and all its cycles."""
86
87        deps = set(self.dependencies)
88        if self.cycles is not None:
89            for c in self.cycles:
90                deps.update(c.dependencies)
91        return deps
92
93class _EdgeCollection(object):
94    """A collection of directed edges."""
95
96    def __init__(self):
97        self.parent_to_children = util.defaultdict(set)
98        self.child_to_parents = util.defaultdict(set)
99
100    def add(self, edge):
101        """Add an edge to this collection."""
102
103        parentnode, childnode = edge
104        self.parent_to_children[parentnode].add(childnode)
105        self.child_to_parents[childnode].add(parentnode)
106        parentnode.dependencies.add(childnode)
107
108    def remove(self, edge):
109        """Remove an edge from this collection.
110
111        Return the childnode if it has no other parents.
112        """
113
114        (parentnode, childnode) = edge
115        self.parent_to_children[parentnode].remove(childnode)
116        self.child_to_parents[childnode].remove(parentnode)
117        if not self.child_to_parents[childnode]:
118            return childnode
119        else:
120            return None
121
122    def has_parents(self, node):
123        return node in self.child_to_parents and bool(self.child_to_parents[node])
124
125    def edges_by_parent(self, node):
126        if node in self.parent_to_children:
127            return [(node, child) for child in self.parent_to_children[node]]
128        else:
129            return []
130
131    def get_parents(self):
132        return self.parent_to_children.keys()
133
134    def pop_node(self, node):
135        """Remove all edges where the given node is a parent.
136
137        Return the collection of all nodes which were children of the
138        given node, and have no further parents.
139        """
140
141        children = self.parent_to_children.pop(node, None)
142        if children is not None:
143            for child in children:
144                self.child_to_parents[child].remove(node)
145                if not self.child_to_parents[child]:
146                    yield child
147
148    def __len__(self):
149        return sum(len(x) for x in self.parent_to_children.values())
150
151    def __iter__(self):
152        for parent, children in self.parent_to_children.iteritems():
153            for child in children:
154                yield (parent, child)
155
156    def __repr__(self):
157        return repr(list(self))
158
159def _sort(tuples, allitems, allow_cycles=False, ignore_self_cycles=False):
160    nodes = {}
161    edges = _EdgeCollection()
162
163    for item in list(allitems) + [t[0] for t in tuples] + [t[1] for t in tuples]:
164        if id(item) not in nodes:
165            node = _Node(item)
166            nodes[item] = node
167
168    for t in tuples:
169        if t[0] is t[1]:
170            if allow_cycles:
171                n = nodes[t[0]]
172                n.cycles = set([n])
173            elif not ignore_self_cycles:
174                raise CircularDependencyError("Self-referential dependency detected " + repr(t))
175            continue
176        childnode = nodes[t[1]]
177        parentnode = nodes[t[0]]
178        edges.add((parentnode, childnode))
179
180    queue = []
181    for n in nodes.values():
182        if not edges.has_parents(n):
183            queue.append(n)
184
185    output = []
186    while nodes:
187        if not queue:
188            # edges remain but no edgeless nodes to remove; this indicates
189            # a cycle
190            if allow_cycles:
191                for cycle in _find_cycles(edges):
192                    lead = cycle[0][0]
193                    lead.cycles = set()
194                    for edge in cycle:
195                        n = edges.remove(edge)
196                        lead.cycles.add(edge[0])
197                        lead.cycles.add(edge[1])
198                        if n is not None:
199                            queue.append(n)
200                    for n in lead.cycles:
201                        if n is not lead:
202                            n._cyclical = True
203                            for (n, k) in list(edges.edges_by_parent(n)):
204                                edges.add((lead, k))
205                                edges.remove((n, k))
206                continue
207            else:
208                # long cycles not allowed
209                raise CircularDependencyError("Circular dependency detected " + repr(edges) + repr(queue))
210        node = queue.pop()
211        if not hasattr(node, '_cyclical'):
212            output.append(node)
213        del nodes[node.item]
214        for childnode in edges.pop_node(node):
215            queue.append(childnode)
216    return output
217
218def _organize_as_tree(nodes):
219    """Given a list of nodes from a topological sort, organize the
220    nodes into a tree structure, with as many non-dependent nodes
221    set as siblings to each other as possible.
222   
223    returns nodes as 3-tuples (item, cycles, children).
224    """
225
226    if not nodes:
227        return None
228    # a list of all currently independent subtrees as a tuple of
229    # (root_node, set_of_all_tree_nodes, set_of_all_cycle_nodes_in_tree)
230    # order of the list has no semantics for the algorithmic
231    independents = []
232    # in reverse topological order
233    for node in reversed(nodes):
234        # nodes subtree and cycles contain the node itself
235        subtree = set([node])
236        if node.cycles is not None:
237            cycles = set(node.cycles)
238        else:
239            cycles = set()
240        # get a set of dependent nodes of node and its cycles
241        nodealldeps = node.all_deps()
242        if nodealldeps:
243            # iterate over independent node indexes in reverse order so we can efficiently remove them
244            for index in xrange(len(independents) - 1, -1, -1):
245                child, childsubtree, childcycles = independents[index]
246                # if there is a dependency between this node and an independent node
247                if (childsubtree.intersection(nodealldeps) or childcycles.intersection(node.dependencies)):
248                    # prepend child to nodes children
249                    # (append should be fine, but previous implemetation used prepend)
250                    node.children[0:0] = [(child.item, [n.item for n in child.cycles or []], child.children)]
251                    # merge childs subtree and cycles
252                    subtree.update(childsubtree)
253                    cycles.update(childcycles)
254                    # remove the child from list of independent subtrees
255                    independents[index:index+1] = []
256        # add node as a new independent subtree
257        independents.append((node, subtree, cycles))
258    # choose an arbitrary node from list of all independent subtrees
259    head = independents.pop()[0]
260    # add all other independent subtrees as a child of the chosen root
261    # used prepend [0:0] instead of extend to maintain exact behaviour of previous implementation
262    head.children[0:0] = [(i[0].item, [n.item for n in i[0].cycles or []], i[0].children) for i in independents]
263    return (head.item, [n.item for n in head.cycles or []], head.children)
264
265def _find_cycles(edges):
266    involved_in_cycles = set()
267    cycles = {}
268    def traverse(node, goal=None, cycle=None):
269        if goal is None:
270            goal = node
271            cycle = []
272        elif node is goal:
273            return True
274
275        for (n, key) in edges.edges_by_parent(node):
276            if key in cycle:
277                continue
278            cycle.append(key)
279            if traverse(key, goal, cycle):
280                cycset = set(cycle)
281                for x in cycle:
282                    involved_in_cycles.add(x)
283                    if x in cycles:
284                        existing_set = cycles[x]
285                        [existing_set.add(y) for y in cycset]
286                        for y in existing_set:
287                            cycles[y] = existing_set
288                        cycset = existing_set
289                    else:
290                        cycles[x] = cycset
291            cycle.pop()
292
293    for parent in edges.get_parents():
294        traverse(parent)
295
296    # sets are not hashable, so uniquify with id
297    unique_cycles = dict((id(s), s) for s in cycles.values()).values()
298    for cycle in unique_cycles:
299        edgecollection = [edge for edge in edges
300                          if edge[0] in cycle and edge[1] in cycle]
301        yield edgecollection
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。