root/galaxy-central/eggs/docutils-0.4-py2.6.egg/docutils/statemachine.py

リビジョン 3, 54.1 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1# Author: David Goodger
2# Contact: goodger@users.sourceforge.net
3# Revision: $Revision: 4152 $
4# Date: $Date: 2005-12-08 00:46:30 +0100 (Thu, 08 Dec 2005) $
5# Copyright: This module has been placed in the public domain.
6
7"""
8A finite state machine specialized for regular-expression-based text filters,
9this module defines the following classes:
10
11- `StateMachine`, a state machine
12- `State`, a state superclass
13- `StateMachineWS`, a whitespace-sensitive version of `StateMachine`
14- `StateWS`, a state superclass for use with `StateMachineWS`
15- `SearchStateMachine`, uses `re.search()` instead of `re.match()`
16- `SearchStateMachineWS`, uses `re.search()` instead of `re.match()`
17- `ViewList`, extends standard Python lists.
18- `StringList`, string-specific ViewList.
19
20Exception classes:
21
22- `StateMachineError`
23- `UnknownStateError`
24- `DuplicateStateError`
25- `UnknownTransitionError`
26- `DuplicateTransitionError`
27- `TransitionPatternNotFound`
28- `TransitionMethodNotFound`
29- `UnexpectedIndentationError`
30- `TransitionCorrection`: Raised to switch to another transition.
31- `StateCorrection`: Raised to switch to another state & transition.
32
33Functions:
34
35- `string2lines()`: split a multi-line string into a list of one-line strings
36
37
38How To Use This Module
39======================
40(See the individual classes, methods, and attributes for details.)
41
421. Import it: ``import statemachine`` or ``from statemachine import ...``.
43   You will also need to ``import re``.
44
452. Derive a subclass of `State` (or `StateWS`) for each state in your state
46   machine::
47
48       class MyState(statemachine.State):
49
50   Within the state's class definition:
51
52   a) Include a pattern for each transition, in `State.patterns`::
53
54          patterns = {'atransition': r'pattern', ...}
55
56   b) Include a list of initial transitions to be set up automatically, in
57      `State.initial_transitions`::
58
59          initial_transitions = ['atransition', ...]
60
61   c) Define a method for each transition, with the same name as the
62      transition pattern::
63
64          def atransition(self, match, context, next_state):
65              # do something
66              result = [...]  # a list
67              return context, next_state, result
68              # context, next_state may be altered
69
70      Transition methods may raise an `EOFError` to cut processing short.
71
72   d) You may wish to override the `State.bof()` and/or `State.eof()` implicit
73      transition methods, which handle the beginning- and end-of-file.
74
75   e) In order to handle nested processing, you may wish to override the
76      attributes `State.nested_sm` and/or `State.nested_sm_kwargs`.
77
78      If you are using `StateWS` as a base class, in order to handle nested
79      indented blocks, you may wish to:
80
81      - override the attributes `StateWS.indent_sm`,
82        `StateWS.indent_sm_kwargs`, `StateWS.known_indent_sm`, and/or
83        `StateWS.known_indent_sm_kwargs`;
84      - override the `StateWS.blank()` method; and/or
85      - override or extend the `StateWS.indent()`, `StateWS.known_indent()`,
86        and/or `StateWS.firstknown_indent()` methods.
87
883. Create a state machine object::
89
90       sm = StateMachine(state_classes=[MyState, ...],
91                         initial_state='MyState')
92
934. Obtain the input text, which needs to be converted into a tab-free list of
94   one-line strings. For example, to read text from a file called
95   'inputfile'::
96
97       input_string = open('inputfile').read()
98       input_lines = statemachine.string2lines(input_string)
99
1005. Run the state machine on the input text and collect the results, a list::
101
102       results = sm.run(input_lines)
103
1046. Remove any lingering circular references::
105
106       sm.unlink()
107"""
108
109__docformat__ = 'restructuredtext'
110
111import sys
112import re
113import types
114import unicodedata
115
116
117class StateMachine:
118
119    """
120    A finite state machine for text filters using regular expressions.
121
122    The input is provided in the form of a list of one-line strings (no
123    newlines). States are subclasses of the `State` class. Transitions consist
124    of regular expression patterns and transition methods, and are defined in
125    each state.
126
127    The state machine is started with the `run()` method, which returns the
128    results of processing in a list.
129    """
130
131    def __init__(self, state_classes, initial_state, debug=0):
132        """
133        Initialize a `StateMachine` object; add state objects.
134
135        Parameters:
136
137        - `state_classes`: a list of `State` (sub)classes.
138        - `initial_state`: a string, the class name of the initial state.
139        - `debug`: a boolean; produce verbose output if true (nonzero).
140        """
141
142        self.input_lines = None
143        """`StringList` of input lines (without newlines).
144        Filled by `self.run()`."""
145
146        self.input_offset = 0
147        """Offset of `self.input_lines` from the beginning of the file."""
148
149        self.line = None
150        """Current input line."""
151
152        self.line_offset = -1
153        """Current input line offset from beginning of `self.input_lines`."""
154
155        self.debug = debug
156        """Debugging mode on/off."""
157
158        self.initial_state = initial_state
159        """The name of the initial state (key to `self.states`)."""
160
161        self.current_state = initial_state
162        """The name of the current state (key to `self.states`)."""
163
164        self.states = {}
165        """Mapping of {state_name: State_object}."""
166
167        self.add_states(state_classes)
168
169        self.observers = []
170        """List of bound methods or functions to call whenever the current
171        line changes.  Observers are called with one argument, ``self``.
172        Cleared at the end of `run()`."""
173
174    def unlink(self):
175        """Remove circular references to objects no longer required."""
176        for state in self.states.values():
177            state.unlink()
178        self.states = None
179
180    def run(self, input_lines, input_offset=0, context=None,
181            input_source=None):
182        """
183        Run the state machine on `input_lines`. Return results (a list).
184
185        Reset `self.line_offset` and `self.current_state`. Run the
186        beginning-of-file transition. Input one line at a time and check for a
187        matching transition. If a match is found, call the transition method
188        and possibly change the state. Store the context returned by the
189        transition method to be passed on to the next transition matched.
190        Accumulate the results returned by the transition methods in a list.
191        Run the end-of-file transition. Finally, return the accumulated
192        results.
193
194        Parameters:
195
196        - `input_lines`: a list of strings without newlines, or `StringList`.
197        - `input_offset`: the line offset of `input_lines` from the beginning
198          of the file.
199        - `context`: application-specific storage.
200        - `input_source`: name or path of source of `input_lines`.
201        """
202        self.runtime_init()
203        if isinstance(input_lines, StringList):
204            self.input_lines = input_lines
205        else:
206            self.input_lines = StringList(input_lines, source=input_source)
207        self.input_offset = input_offset
208        self.line_offset = -1
209        self.current_state = self.initial_state
210        if self.debug:
211            print >>sys.stderr, (
212                '\nStateMachine.run: input_lines (line_offset=%s):\n| %s'
213                % (self.line_offset, '\n| '.join(self.input_lines)))
214        transitions = None
215        results = []
216        state = self.get_state()
217        try:
218            if self.debug:
219                print >>sys.stderr, ('\nStateMachine.run: bof transition')
220            context, result = state.bof(context)
221            results.extend(result)
222            while 1:
223                try:
224                    try:
225                        self.next_line()
226                        if self.debug:
227                            source, offset = self.input_lines.info(
228                                self.line_offset)
229                            print >>sys.stderr, (
230                                '\nStateMachine.run: line (source=%r, '
231                                'offset=%r):\n| %s'
232                                % (source, offset, self.line))
233                        context, next_state, result = self.check_line(
234                            context, state, transitions)
235                    except EOFError:
236                        if self.debug:
237                            print >>sys.stderr, (
238                                '\nStateMachine.run: %s.eof transition'
239                                % state.__class__.__name__)
240                        result = state.eof(context)
241                        results.extend(result)
242                        break
243                    else:
244                        results.extend(result)
245                except TransitionCorrection, exception:
246                    self.previous_line() # back up for another try
247                    transitions = (exception.args[0],)
248                    if self.debug:
249                        print >>sys.stderr, (
250                              '\nStateMachine.run: TransitionCorrection to '
251                              'state "%s", transition %s.'
252                              % (state.__class__.__name__, transitions[0]))
253                    continue
254                except StateCorrection, exception:
255                    self.previous_line() # back up for another try
256                    next_state = exception.args[0]
257                    if len(exception.args) == 1:
258                        transitions = None
259                    else:
260                        transitions = (exception.args[1],)
261                    if self.debug:
262                        print >>sys.stderr, (
263                              '\nStateMachine.run: StateCorrection to state '
264                              '"%s", transition %s.'
265                              % (next_state, transitions[0]))
266                else:
267                    transitions = None
268                state = self.get_state(next_state)
269        except:
270            if self.debug:
271                self.error()
272            raise
273        self.observers = []
274        return results
275
276    def get_state(self, next_state=None):
277        """
278        Return current state object; set it first if `next_state` given.
279
280        Parameter `next_state`: a string, the name of the next state.
281
282        Exception: `UnknownStateError` raised if `next_state` unknown.
283        """
284        if next_state:
285            if self.debug and next_state != self.current_state:
286                print >>sys.stderr, \
287                      ('\nStateMachine.get_state: Changing state from '
288                       '"%s" to "%s" (input line %s).'
289                       % (self.current_state, next_state,
290                          self.abs_line_number()))
291            self.current_state = next_state
292        try:
293            return self.states[self.current_state]
294        except KeyError:
295            raise UnknownStateError(self.current_state)
296
297    def next_line(self, n=1):
298        """Load `self.line` with the `n`'th next line and return it."""
299        try:
300            try:
301                self.line_offset += n
302                self.line = self.input_lines[self.line_offset]
303            except IndexError:
304                self.line = None
305                raise EOFError
306            return self.line
307        finally:
308            self.notify_observers()
309
310    def is_next_line_blank(self):
311        """Return 1 if the next line is blank or non-existant."""
312        try:
313            return not self.input_lines[self.line_offset + 1].strip()
314        except IndexError:
315            return 1
316
317    def at_eof(self):
318        """Return 1 if the input is at or past end-of-file."""
319        return self.line_offset >= len(self.input_lines) - 1
320
321    def at_bof(self):
322        """Return 1 if the input is at or before beginning-of-file."""
323        return self.line_offset <= 0
324
325    def previous_line(self, n=1):
326        """Load `self.line` with the `n`'th previous line and return it."""
327        self.line_offset -= n
328        if self.line_offset < 0:
329            self.line = None
330        else:
331            self.line = self.input_lines[self.line_offset]
332        self.notify_observers()
333        return self.line
334
335    def goto_line(self, line_offset):
336        """Jump to absolute line offset `line_offset`, load and return it."""
337        try:
338            try:
339                self.line_offset = line_offset - self.input_offset
340                self.line = self.input_lines[self.line_offset]
341            except IndexError:
342                self.line = None
343                raise EOFError
344            return self.line
345        finally:
346            self.notify_observers()
347
348    def get_source(self, line_offset):
349        """Return source of line at absolute line offset `line_offset`."""
350        return self.input_lines.source(line_offset - self.input_offset)
351
352    def abs_line_offset(self):
353        """Return line offset of current line, from beginning of file."""
354        return self.line_offset + self.input_offset
355
356    def abs_line_number(self):
357        """Return line number of current line (counting from 1)."""
358        return self.line_offset + self.input_offset + 1
359
360    def insert_input(self, input_lines, source):
361        self.input_lines.insert(self.line_offset + 1, '',
362                                source='internal padding')
363        self.input_lines.insert(self.line_offset + 1, '',
364                                source='internal padding')
365        self.input_lines.insert(self.line_offset + 2,
366                                StringList(input_lines, source))
367
368    def get_text_block(self, flush_left=0):
369        """
370        Return a contiguous block of text.
371
372        If `flush_left` is true, raise `UnexpectedIndentationError` if an
373        indented line is encountered before the text block ends (with a blank
374        line).
375        """
376        try:
377            block = self.input_lines.get_text_block(self.line_offset,
378                                                    flush_left)
379            self.next_line(len(block) - 1)
380            return block
381        except UnexpectedIndentationError, error:
382            block, source, lineno = error
383            self.next_line(len(block) - 1) # advance to last line of block
384            raise
385
386    def check_line(self, context, state, transitions=None):
387        """
388        Examine one line of input for a transition match & execute its method.
389
390        Parameters:
391
392        - `context`: application-dependent storage.
393        - `state`: a `State` object, the current state.
394        - `transitions`: an optional ordered list of transition names to try,
395          instead of ``state.transition_order``.
396
397        Return the values returned by the transition method:
398
399        - context: possibly modified from the parameter `context`;
400        - next state name (`State` subclass name);
401        - the result output of the transition, a list.
402
403        When there is no match, ``state.no_match()`` is called and its return
404        value is returned.
405        """
406        if transitions is None:
407            transitions =  state.transition_order
408        state_correction = None
409        if self.debug:
410            print >>sys.stderr, (
411                  '\nStateMachine.check_line: state="%s", transitions=%r.'
412                  % (state.__class__.__name__, transitions))
413        for name in transitions:
414            pattern, method, next_state = state.transitions[name]
415            match = self.match(pattern)
416            if match:
417                if self.debug:
418                    print >>sys.stderr, (
419                          '\nStateMachine.check_line: Matched transition '
420                          '"%s" in state "%s".'
421                          % (name, state.__class__.__name__))
422                return method(match, context, next_state)
423        else:
424            if self.debug:
425                print >>sys.stderr, (
426                      '\nStateMachine.check_line: No match in state "%s".'
427                      % state.__class__.__name__)
428            return state.no_match(context, transitions)
429
430    def match(self, pattern):
431        """
432        Return the result of a regular expression match.
433
434        Parameter `pattern`: an `re` compiled regular expression.
435        """
436        return pattern.match(self.line)
437
438    def add_state(self, state_class):
439        """
440        Initialize & add a `state_class` (`State` subclass) object.
441
442        Exception: `DuplicateStateError` raised if `state_class` was already
443        added.
444        """
445        statename = state_class.__name__
446        if self.states.has_key(statename):
447            raise DuplicateStateError(statename)
448        self.states[statename] = state_class(self, self.debug)
449
450    def add_states(self, state_classes):
451        """
452        Add `state_classes` (a list of `State` subclasses).
453        """
454        for state_class in state_classes:
455            self.add_state(state_class)
456
457    def runtime_init(self):
458        """
459        Initialize `self.states`.
460        """
461        for state in self.states.values():
462            state.runtime_init()
463
464    def error(self):
465        """Report error details."""
466        type, value, module, line, function = _exception_data()
467        print >>sys.stderr, '%s: %s' % (type, value)
468        print >>sys.stderr, 'input line %s' % (self.abs_line_number())
469        print >>sys.stderr, ('module %s, line %s, function %s'
470                             % (module, line, function))
471
472    def attach_observer(self, observer):
473        """
474        The `observer` parameter is a function or bound method which takes two
475        arguments, the source and offset of the current line.
476        """
477        self.observers.append(observer)
478
479    def detach_observer(self, observer):
480        self.observers.remove(observer)
481
482    def notify_observers(self):
483        for observer in self.observers:
484            try:
485                info = self.input_lines.info(self.line_offset)
486            except IndexError:
487                info = (None, None)
488            observer(*info)
489
490
491class State:
492
493    """
494    State superclass. Contains a list of transitions, and transition methods.
495
496    Transition methods all have the same signature. They take 3 parameters:
497
498    - An `re` match object. ``match.string`` contains the matched input line,
499      ``match.start()`` gives the start index of the match, and
500      ``match.end()`` gives the end index.
501    - A context object, whose meaning is application-defined (initial value
502      ``None``). It can be used to store any information required by the state
503      machine, and the retured context is passed on to the next transition
504      method unchanged.
505    - The name of the next state, a string, taken from the transitions list;
506      normally it is returned unchanged, but it may be altered by the
507      transition method if necessary.
508
509    Transition methods all return a 3-tuple:
510
511    - A context object, as (potentially) modified by the transition method.
512    - The next state name (a return value of ``None`` means no state change).
513    - The processing result, a list, which is accumulated by the state
514      machine.
515
516    Transition methods may raise an `EOFError` to cut processing short.
517
518    There are two implicit transitions, and corresponding transition methods
519    are defined: `bof()` handles the beginning-of-file, and `eof()` handles
520    the end-of-file. These methods have non-standard signatures and return
521    values. `bof()` returns the initial context and results, and may be used
522    to return a header string, or do any other processing needed. `eof()`
523    should handle any remaining context and wrap things up; it returns the
524    final processing result.
525
526    Typical applications need only subclass `State` (or a subclass), set the
527    `patterns` and `initial_transitions` class attributes, and provide
528    corresponding transition methods. The default object initialization will
529    take care of constructing the list of transitions.
530    """
531
532    patterns = None
533    """
534    {Name: pattern} mapping, used by `make_transition()`. Each pattern may
535    be a string or a compiled `re` pattern. Override in subclasses.
536    """
537
538    initial_transitions = None
539    """
540    A list of transitions to initialize when a `State` is instantiated.
541    Each entry is either a transition name string, or a (transition name, next
542    state name) pair. See `make_transitions()`. Override in subclasses.
543    """
544
545    nested_sm = None
546    """
547    The `StateMachine` class for handling nested processing.
548
549    If left as ``None``, `nested_sm` defaults to the class of the state's
550    controlling state machine. Override it in subclasses to avoid the default.
551    """
552
553    nested_sm_kwargs = None
554    """
555    Keyword arguments dictionary, passed to the `nested_sm` constructor.
556
557    Two keys must have entries in the dictionary:
558
559    - Key 'state_classes' must be set to a list of `State` classes.
560    - Key 'initial_state' must be set to the name of the initial state class.
561
562    If `nested_sm_kwargs` is left as ``None``, 'state_classes' defaults to the
563    class of the current state, and 'initial_state' defaults to the name of
564    the class of the current state. Override in subclasses to avoid the
565    defaults.
566    """
567
568    def __init__(self, state_machine, debug=0):
569        """
570        Initialize a `State` object; make & add initial transitions.
571
572        Parameters:
573
574        - `statemachine`: the controlling `StateMachine` object.
575        - `debug`: a boolean; produce verbose output if true (nonzero).
576        """
577
578        self.transition_order = []
579        """A list of transition names in search order."""
580
581        self.transitions = {}
582        """
583        A mapping of transition names to 3-tuples containing
584        (compiled_pattern, transition_method, next_state_name). Initialized as
585        an instance attribute dynamically (instead of as a class attribute)
586        because it may make forward references to patterns and methods in this
587        or other classes.
588        """
589
590        self.add_initial_transitions()
591
592        self.state_machine = state_machine
593        """A reference to the controlling `StateMachine` object."""
594
595        self.debug = debug
596        """Debugging mode on/off."""
597
598        if self.nested_sm is None:
599            self.nested_sm = self.state_machine.__class__
600        if self.nested_sm_kwargs is None:
601            self.nested_sm_kwargs = {'state_classes': [self.__class__],
602                                     'initial_state': self.__class__.__name__}
603
604    def runtime_init(self):
605        """
606        Initialize this `State` before running the state machine; called from
607        `self.state_machine.run()`.
608        """
609        pass
610
611    def unlink(self):
612        """Remove circular references to objects no longer required."""
613        self.state_machine = None
614
615    def add_initial_transitions(self):
616        """Make and add transitions listed in `self.initial_transitions`."""
617        if self.initial_transitions:
618            names, transitions = self.make_transitions(
619                  self.initial_transitions)
620            self.add_transitions(names, transitions)
621
622    def add_transitions(self, names, transitions):
623        """
624        Add a list of transitions to the start of the transition list.
625
626        Parameters:
627
628        - `names`: a list of transition names.
629        - `transitions`: a mapping of names to transition tuples.
630
631        Exceptions: `DuplicateTransitionError`, `UnknownTransitionError`.
632        """
633        for name in names:
634            if self.transitions.has_key(name):
635                raise DuplicateTransitionError(name)
636            if not transitions.has_key(name):
637                raise UnknownTransitionError(name)
638        self.transition_order[:0] = names
639        self.transitions.update(transitions)
640
641    def add_transition(self, name, transition):
642        """
643        Add a transition to the start of the transition list.
644
645        Parameter `transition`: a ready-made transition 3-tuple.
646
647        Exception: `DuplicateTransitionError`.
648        """
649        if self.transitions.has_key(name):
650            raise DuplicateTransitionError(name)
651        self.transition_order[:0] = [name]
652        self.transitions[name] = transition
653
654    def remove_transition(self, name):
655        """
656        Remove a transition by `name`.
657
658        Exception: `UnknownTransitionError`.
659        """
660        try:
661            del self.transitions[name]
662            self.transition_order.remove(name)
663        except:
664            raise UnknownTransitionError(name)
665
666    def make_transition(self, name, next_state=None):
667        """
668        Make & return a transition tuple based on `name`.
669
670        This is a convenience function to simplify transition creation.
671
672        Parameters:
673
674        - `name`: a string, the name of the transition pattern & method. This
675          `State` object must have a method called '`name`', and a dictionary
676          `self.patterns` containing a key '`name`'.
677        - `next_state`: a string, the name of the next `State` object for this
678          transition. A value of ``None`` (or absent) implies no state change
679          (i.e., continue with the same state).
680
681        Exceptions: `TransitionPatternNotFound`, `TransitionMethodNotFound`.
682        """
683        if next_state is None:
684            next_state = self.__class__.__name__
685        try:
686            pattern = self.patterns[name]
687            if not hasattr(pattern, 'match'):
688                pattern = re.compile(pattern)
689        except KeyError:
690            raise TransitionPatternNotFound(
691                  '%s.patterns[%r]' % (self.__class__.__name__, name))
692        try:
693            method = getattr(self, name)
694        except AttributeError:
695            raise TransitionMethodNotFound(
696                  '%s.%s' % (self.__class__.__name__, name))
697        return (pattern, method, next_state)
698
699    def make_transitions(self, name_list):
700        """
701        Return a list of transition names and a transition mapping.
702
703        Parameter `name_list`: a list, where each entry is either a transition
704        name string, or a 1- or 2-tuple (transition name, optional next state
705        name).
706        """
707        stringtype = type('')
708        names = []
709        transitions = {}
710        for namestate in name_list:
711            if type(namestate) is stringtype:
712                transitions[namestate] = self.make_transition(namestate)
713                names.append(namestate)
714            else:
715                transitions[namestate[0]] = self.make_transition(*namestate)
716                names.append(namestate[0])
717        return names, transitions
718
719    def no_match(self, context, transitions):
720        """
721        Called when there is no match from `StateMachine.check_line()`.
722
723        Return the same values returned by transition methods:
724
725        - context: unchanged;
726        - next state name: ``None``;
727        - empty result list.
728
729        Override in subclasses to catch this event.
730        """
731        return context, None, []
732
733    def bof(self, context):
734        """
735        Handle beginning-of-file. Return unchanged `context`, empty result.
736
737        Override in subclasses.
738
739        Parameter `context`: application-defined storage.
740        """
741        return context, []
742
743    def eof(self, context):
744        """
745        Handle end-of-file. Return empty result.
746
747        Override in subclasses.
748
749        Parameter `context`: application-defined storage.
750        """
751        return []
752
753    def nop(self, match, context, next_state):
754        """
755        A "do nothing" transition method.
756
757        Return unchanged `context` & `next_state`, empty result. Useful for
758        simple state changes (actionless transitions).
759        """
760        return context, next_state, []
761
762
763class StateMachineWS(StateMachine):
764
765    """
766    `StateMachine` subclass specialized for whitespace recognition.
767
768    There are three methods provided for extracting indented text blocks:
769   
770    - `get_indented()`: use when the indent is unknown.
771    - `get_known_indented()`: use when the indent is known for all lines.
772    - `get_first_known_indented()`: use when only the first line's indent is
773      known.
774    """
775
776    def get_indented(self, until_blank=0, strip_indent=1):
777        """
778        Return a block of indented lines of text, and info.
779
780        Extract an indented block where the indent is unknown for all lines.
781
782        :Parameters:
783            - `until_blank`: Stop collecting at the first blank line if true
784              (1).
785            - `strip_indent`: Strip common leading indent if true (1,
786              default).
787
788        :Return:
789            - the indented block (a list of lines of text),
790            - its indent,
791            - its first line offset from BOF, and
792            - whether or not it finished with a blank line.
793        """
794        offset = self.abs_line_offset()
795        indented, indent, blank_finish = self.input_lines.get_indented(
796              self.line_offset, until_blank, strip_indent)
797        if indented:
798            self.next_line(len(indented) - 1) # advance to last indented line
799        while indented and not indented[0].strip():
800            indented.trim_start()
801            offset += 1
802        return indented, indent, offset, blank_finish
803
804    def get_known_indented(self, indent, until_blank=0, strip_indent=1):
805        """
806        Return an indented block and info.
807
808        Extract an indented block where the indent is known for all lines.
809        Starting with the current line, extract the entire text block with at
810        least `indent` indentation (which must be whitespace, except for the
811        first line).
812
813        :Parameters:
814            - `indent`: The number of indent columns/characters.
815            - `until_blank`: Stop collecting at the first blank line if true
816              (1).
817            - `strip_indent`: Strip `indent` characters of indentation if true
818              (1, default).
819
820        :Return:
821            - the indented block,
822            - its first line offset from BOF, and
823            - whether or not it finished with a blank line.
824        """
825        offset = self.abs_line_offset()
826        indented, indent, blank_finish = self.input_lines.get_indented(
827              self.line_offset, until_blank, strip_indent,
828              block_indent=indent)
829        self.next_line(len(indented) - 1) # advance to last indented line
830        while indented and not indented[0].strip():
831            indented.trim_start()
832            offset += 1
833        return indented, offset, blank_finish
834
835    def get_first_known_indented(self, indent, until_blank=0, strip_indent=1,
836                                 strip_top=1):
837        """
838        Return an indented block and info.
839
840        Extract an indented block where the indent is known for the first line
841        and unknown for all other lines.
842
843        :Parameters:
844            - `indent`: The first line's indent (# of columns/characters).
845            - `until_blank`: Stop collecting at the first blank line if true
846              (1).
847            - `strip_indent`: Strip `indent` characters of indentation if true
848              (1, default).
849            - `strip_top`: Strip blank lines from the beginning of the block.
850
851        :Return:
852            - the indented block,
853            - its indent,
854            - its first line offset from BOF, and
855            - whether or not it finished with a blank line.
856        """
857        offset = self.abs_line_offset()
858        indented, indent, blank_finish = self.input_lines.get_indented(
859              self.line_offset, until_blank, strip_indent,
860              first_indent=indent)
861        self.next_line(len(indented) - 1) # advance to last indented line
862        if strip_top:
863            while indented and not indented[0].strip():
864                indented.trim_start()
865                offset += 1
866        return indented, indent, offset, blank_finish
867
868
869class StateWS(State):
870
871    """
872    State superclass specialized for whitespace (blank lines & indents).
873
874    Use this class with `StateMachineWS`.  The transitions 'blank' (for blank
875    lines) and 'indent' (for indented text blocks) are added automatically,
876    before any other transitions.  The transition method `blank()` handles
877    blank lines and `indent()` handles nested indented blocks.  Indented
878    blocks trigger a new state machine to be created by `indent()` and run.
879    The class of the state machine to be created is in `indent_sm`, and the
880    constructor keyword arguments are in the dictionary `indent_sm_kwargs`.
881
882    The methods `known_indent()` and `firstknown_indent()` are provided for
883    indented blocks where the indent (all lines' and first line's only,
884    respectively) is known to the transition method, along with the attributes
885    `known_indent_sm` and `known_indent_sm_kwargs`.  Neither transition method
886    is triggered automatically.
887    """
888
889    indent_sm = None
890    """
891    The `StateMachine` class handling indented text blocks.
892
893    If left as ``None``, `indent_sm` defaults to the value of
894    `State.nested_sm`.  Override it in subclasses to avoid the default.
895    """
896
897    indent_sm_kwargs = None
898    """
899    Keyword arguments dictionary, passed to the `indent_sm` constructor.
900
901    If left as ``None``, `indent_sm_kwargs` defaults to the value of
902    `State.nested_sm_kwargs`. Override it in subclasses to avoid the default.
903    """
904
905    known_indent_sm = None
906    """
907    The `StateMachine` class handling known-indented text blocks.
908
909    If left as ``None``, `known_indent_sm` defaults to the value of
910    `indent_sm`.  Override it in subclasses to avoid the default.
911    """
912
913    known_indent_sm_kwargs = None
914    """
915    Keyword arguments dictionary, passed to the `known_indent_sm` constructor.
916
917    If left as ``None``, `known_indent_sm_kwargs` defaults to the value of
918    `indent_sm_kwargs`. Override it in subclasses to avoid the default.
919    """
920
921    ws_patterns = {'blank': ' *$',
922                   'indent': ' +'}
923    """Patterns for default whitespace transitions.  May be overridden in
924    subclasses."""
925
926    ws_initial_transitions = ('blank', 'indent')
927    """Default initial whitespace transitions, added before those listed in
928    `State.initial_transitions`.  May be overridden in subclasses."""
929
930    def __init__(self, state_machine, debug=0):
931        """
932        Initialize a `StateSM` object; extends `State.__init__()`.
933
934        Check for indent state machine attributes, set defaults if not set.
935        """
936        State.__init__(self, state_machine, debug)
937        if self.indent_sm is None:
938            self.indent_sm = self.nested_sm
939        if self.indent_sm_kwargs is None:
940            self.indent_sm_kwargs = self.nested_sm_kwargs
941        if self.known_indent_sm is None:
942            self.known_indent_sm = self.indent_sm
943        if self.known_indent_sm_kwargs is None:
944            self.known_indent_sm_kwargs = self.indent_sm_kwargs
945
946    def add_initial_transitions(self):
947        """
948        Add whitespace-specific transitions before those defined in subclass.
949
950        Extends `State.add_initial_transitions()`.
951        """
952        State.add_initial_transitions(self)
953        if self.patterns is None:
954            self.patterns = {}
955        self.patterns.update(self.ws_patterns)
956        names, transitions = self.make_transitions(
957            self.ws_initial_transitions)
958        self.add_transitions(names, transitions)
959
960    def blank(self, match, context, next_state):
961        """Handle blank lines. Does nothing. Override in subclasses."""
962        return self.nop(match, context, next_state)
963
964    def indent(self, match, context, next_state):
965        """
966        Handle an indented text block. Extend or override in subclasses.
967
968        Recursively run the registered state machine for indented blocks
969        (`self.indent_sm`).
970        """
971        indented, indent, line_offset, blank_finish = \
972              self.state_machine.get_indented()
973        sm = self.indent_sm(debug=self.debug, **self.indent_sm_kwargs)
974        results = sm.run(indented, input_offset=line_offset)
975        return context, next_state, results
976
977    def known_indent(self, match, context, next_state):
978        """
979        Handle a known-indent text block. Extend or override in subclasses.
980
981        Recursively run the registered state machine for known-indent indented
982        blocks (`self.known_indent_sm`). The indent is the length of the
983        match, ``match.end()``.
984        """
985        indented, line_offset, blank_finish = \
986              self.state_machine.get_known_indented(match.end())
987        sm = self.known_indent_sm(debug=self.debug,
988                                 **self.known_indent_sm_kwargs)
989        results = sm.run(indented, input_offset=line_offset)
990        return context, next_state, results
991
992    def first_known_indent(self, match, context, next_state):
993        """
994        Handle an indented text block (first line's indent known).
995
996        Extend or override in subclasses.
997
998        Recursively run the registered state machine for known-indent indented
999        blocks (`self.known_indent_sm`). The indent is the length of the
1000        match, ``match.end()``.
1001        """
1002        indented, line_offset, blank_finish = \
1003              self.state_machine.get_first_known_indented(match.end())
1004        sm = self.known_indent_sm(debug=self.debug,
1005                                 **self.known_indent_sm_kwargs)
1006        results = sm.run(indented, input_offset=line_offset)
1007        return context, next_state, results
1008
1009
1010class _SearchOverride:
1011
1012    """
1013    Mix-in class to override `StateMachine` regular expression behavior.
1014
1015    Changes regular expression matching, from the default `re.match()`
1016    (succeeds only if the pattern matches at the start of `self.line`) to
1017    `re.search()` (succeeds if the pattern matches anywhere in `self.line`).
1018    When subclassing a `StateMachine`, list this class **first** in the
1019    inheritance list of the class definition.
1020    """
1021
1022    def match(self, pattern):
1023        """
1024        Return the result of a regular expression search.
1025
1026        Overrides `StateMachine.match()`.
1027
1028        Parameter `pattern`: `re` compiled regular expression.
1029        """
1030        return pattern.search(self.line)
1031
1032
1033class SearchStateMachine(_SearchOverride, StateMachine):
1034    """`StateMachine` which uses `re.search()` instead of `re.match()`."""
1035    pass
1036
1037
1038class SearchStateMachineWS(_SearchOverride, StateMachineWS):
1039    """`StateMachineWS` which uses `re.search()` instead of `re.match()`."""
1040    pass
1041
1042
1043class ViewList:
1044
1045    """
1046    List with extended functionality: slices of ViewList objects are child
1047    lists, linked to their parents. Changes made to a child list also affect
1048    the parent list.  A child list is effectively a "view" (in the SQL sense)
1049    of the parent list.  Changes to parent lists, however, do *not* affect
1050    active child lists.  If a parent list is changed, any active child lists
1051    should be recreated.
1052
1053    The start and end of the slice can be trimmed using the `trim_start()` and
1054    `trim_end()` methods, without affecting the parent list.  The link between
1055    child and parent lists can be broken by calling `disconnect()` on the
1056    child list.
1057
1058    Also, ViewList objects keep track of the source & offset of each item.
1059    This information is accessible via the `source()`, `offset()`, and
1060    `info()` methods.
1061    """
1062
1063    def __init__(self, initlist=None, source=None, items=None,
1064                 parent=None, parent_offset=None):
1065        self.data = []
1066        """The actual list of data, flattened from various sources."""
1067
1068        self.items = []
1069        """A list of (source, offset) pairs, same length as `self.data`: the
1070        source of each line and the offset of each line from the beginning of
1071        its source."""
1072
1073        self.parent = parent
1074        """The parent list."""
1075
1076        self.parent_offset = parent_offset
1077        """Offset of this list from the beginning of the parent list."""
1078
1079        if isinstance(initlist, ViewList):
1080            self.data = initlist.data[:]
1081            self.items = initlist.items[:]
1082        elif initlist is not None:
1083            self.data = list(initlist)
1084            if items:
1085                self.items = items
1086            else:
1087                self.items = [(source, i) for i in range(len(initlist))]
1088        assert len(self.data) == len(self.items), 'data mismatch'
1089
1090    def __str__(self):
1091        return str(self.data)
1092
1093    def __repr__(self):
1094        return '%s(%s, items=%s)' % (self.__class__.__name__,
1095                                     self.data, self.items)
1096
1097    def __lt__(self, other): return self.data <  self.__cast(other)
1098    def __le__(self, other): return self.data <= self.__cast(other)
1099    def __eq__(self, other): return self.data == self.__cast(other)
1100    def __ne__(self, other): return self.data != self.__cast(other)
1101    def __gt__(self, other): return self.data >  self.__cast(other)
1102    def __ge__(self, other): return self.data >= self.__cast(other)
1103    def __cmp__(self, other): return cmp(self.data, self.__cast(other))
1104
1105    def __cast(self, other):
1106        if isinstance(other, ViewList):
1107            return other.data
1108        else:
1109            return other
1110
1111    def __contains__(self, item): return item in self.data
1112    def __len__(self): return len(self.data)
1113
1114    # The __getitem__()/__setitem__() methods check whether the index
1115    # is a slice first, since native list objects start supporting
1116    # them directly in Python 2.3 (no exception is raised when
1117    # indexing a list with a slice object; they just work).
1118
1119    def __getitem__(self, i):
1120        if isinstance(i, types.SliceType):
1121            assert i.step in (None, 1),  'cannot handle slice with stride'
1122            return self.__class__(self.data[i.start:i.stop],
1123                                  items=self.items[i.start:i.stop],
1124                                  parent=self, parent_offset=i.start)
1125        else:
1126            return self.data[i]
1127
1128    def __setitem__(self, i, item):
1129        if isinstance(i, types.SliceType):
1130            assert i.step in (None, 1), 'cannot handle slice with stride'
1131            if not isinstance(item, ViewList):
1132                raise TypeError('assigning non-ViewList to ViewList slice')
1133            self.data[i.start:i.stop] = item.data
1134            self.items[i.start:i.stop] = item.items
1135            assert len(self.data) == len(self.items), 'data mismatch'
1136            if self.parent:
1137                self.parent[i.start + self.parent_offset
1138                            : i.stop + self.parent_offset] = item
1139        else:
1140            self.data[i] = item
1141            if self.parent:
1142                self.parent[i + self.parent_offset] = item
1143
1144    def __delitem__(self, i):
1145        try:
1146            del self.data[i]
1147            del self.items[i]
1148            if self.parent:
1149                del self.parent[i + self.parent_offset]
1150        except TypeError:
1151            assert i.step is None, 'cannot handle slice with stride'
1152            del self.data[i.start:i.stop]
1153            del self.items[i.start:i.stop]
1154            if self.parent:
1155                del self.parent[i.start + self.parent_offset
1156                                : i.stop + self.parent_offset]
1157
1158    def __add__(self, other):
1159        if isinstance(other, ViewList):
1160            return self.__class__(self.data + other.data,
1161                                  items=(self.items + other.items))
1162        else:
1163            raise TypeError('adding non-ViewList to a ViewList')
1164
1165    def __radd__(self, other):
1166        if isinstance(other, ViewList):
1167            return self.__class__(other.data + self.data,
1168                                  items=(other.items + self.items))
1169        else:
1170            raise TypeError('adding ViewList to a non-ViewList')
1171
1172    def __iadd__(self, other):
1173        if isinstance(other, ViewList):
1174            self.data += other.data
1175        else:
1176            raise TypeError('argument to += must be a ViewList')
1177        return self
1178
1179    def __mul__(self, n):
1180        return self.__class__(self.data * n, items=(self.items * n))
1181
1182    __rmul__ = __mul__
1183
1184    def __imul__(self, n):
1185        self.data *= n
1186        self.items *= n
1187        return self
1188
1189    def extend(self, other):
1190        if not isinstance(other, ViewList):
1191            raise TypeError('extending a ViewList with a non-ViewList')
1192        if self.parent:
1193            self.parent.insert(len(self.data) + self.parent_offset, other)
1194        self.data.extend(other.data)
1195        self.items.extend(other.items)
1196
1197    def append(self, item, source=None, offset=0):
1198        if source is None:
1199            self.extend(item)
1200        else:
1201            if self.parent:
1202                self.parent.insert(len(self.data) + self.parent_offset, item,
1203                                   source, offset)
1204            self.data.append(item)
1205            self.items.append((source, offset))
1206
1207    def insert(self, i, item, source=None, offset=0):
1208        if source is None:
1209            if not isinstance(item, ViewList):
1210                raise TypeError('inserting non-ViewList with no source given')
1211            self.data[i:i] = item.data
1212            self.items[i:i] = item.items
1213            if self.parent:
1214                index = (len(self.data) + i) % len(self.data)
1215                self.parent.insert(index + self.parent_offset, item)
1216        else:
1217            self.data.insert(i, item)
1218            self.items.insert(i, (source, offset))
1219            if self.parent:
1220                index = (len(self.data) + i) % len(self.data)
1221                self.parent.insert(index + self.parent_offset, item,
1222                                   source, offset)
1223
1224    def pop(self, i=-1):
1225        if self.parent:
1226            index = (len(self.data) + i) % len(self.data)
1227            self.parent.pop(index + self.parent_offset)
1228        self.items.pop(i)
1229        return self.data.pop(i)
1230
1231    def trim_start(self, n=1):
1232        """
1233        Remove items from the start of the list, without touching the parent.
1234        """
1235        if n > len(self.data):
1236            raise IndexError("Size of trim too large; can't trim %s items "
1237                             "from a list of size %s." % (n, len(self.data)))
1238        elif n < 0:
1239            raise IndexError('Trim size must be >= 0.')
1240        del self.data[:n]
1241        del self.items[:n]
1242        if self.parent:
1243            self.parent_offset += n
1244
1245    def trim_end(self, n=1):
1246        """
1247        Remove items from the end of the list, without touching the parent.
1248        """
1249        if n > len(self.data):
1250            raise IndexError("Size of trim too large; can't trim %s items "
1251                             "from a list of size %s." % (n, len(self.data)))
1252        elif n < 0:
1253            raise IndexError('Trim size must be >= 0.')
1254        del self.data[-n:]
1255        del self.items[-n:]
1256
1257    def remove(self, item):
1258        index = self.index(item)
1259        del self[index]
1260
1261    def count(self, item): return self.data.count(item)
1262    def index(self, item): return self.data.index(item)
1263
1264    def reverse(self):
1265        self.data.reverse()
1266        self.items.reverse()
1267        self.parent = None
1268
1269    def sort(self, *args):
1270        tmp = zip(self.data, self.items)
1271        tmp.sort(*args)
1272        self.data = [entry[0] for entry in tmp]
1273        self.items = [entry[1] for entry in tmp]
1274        self.parent = None
1275
1276    def info(self, i):
1277        """Return source & offset for index `i`."""
1278        try:
1279            return self.items[i]
1280        except IndexError:
1281            if i == len(self.data):     # Just past the end
1282                return self.items[i - 1][0], None
1283            else:
1284                raise
1285
1286    def source(self, i):
1287        """Return source for index `i`."""
1288        return self.info(i)[0]
1289
1290    def offset(self, i):
1291        """Return offset for index `i`."""
1292        return self.info(i)[1]
1293
1294    def disconnect(self):
1295        """Break link between this list and parent list."""
1296        self.parent = None
1297
1298
1299class StringList(ViewList):
1300
1301    """A `ViewList` with string-specific methods."""
1302
1303    def trim_left(self, length, start=0, end=sys.maxint):
1304        """
1305        Trim `length` characters off the beginning of each item, in-place,
1306        from index `start` to `end`.  No whitespace-checking is done on the
1307        trimmed text.  Does not affect slice parent.
1308        """
1309        self.data[start:end] = [line[length:]
1310                                for line in self.data[start:end]]
1311
1312    def get_text_block(self, start, flush_left=0):
1313        """
1314        Return a contiguous block of text.
1315
1316        If `flush_left` is true, raise `UnexpectedIndentationError` if an
1317        indented line is encountered before the text block ends (with a blank
1318        line).
1319        """
1320        end = start
1321        last = len(self.data)
1322        while end < last:
1323            line = self.data[end]
1324            if not line.strip():
1325                break
1326            if flush_left and (line[0] == ' '):
1327                source, offset = self.info(end)
1328                raise UnexpectedIndentationError(self[start:end], source,
1329                                                 offset + 1)
1330            end += 1
1331        return self[start:end]
1332
1333    def get_indented(self, start=0, until_blank=0, strip_indent=1,
1334                     block_indent=None, first_indent=None):
1335        """
1336        Extract and return a StringList of indented lines of text.
1337
1338        Collect all lines with indentation, determine the minimum indentation,
1339        remove the minimum indentation from all indented lines (unless
1340        `strip_indent` is false), and return them. All lines up to but not
1341        including the first unindented line will be returned.
1342
1343        :Parameters:
1344          - `start`: The index of the first line to examine.
1345          - `until_blank`: Stop collecting at the first blank line if true.
1346          - `strip_indent`: Strip common leading indent if true (default).
1347          - `block_indent`: The indent of the entire block, if known.
1348          - `first_indent`: The indent of the first line, if known.
1349
1350        :Return:
1351          - a StringList of indented lines with mininum indent removed;
1352          - the amount of the indent;
1353          - a boolean: did the indented block finish with a blank line or EOF?
1354        """
1355        indent = block_indent           # start with None if unknown
1356        end = start
1357        if block_indent is not None and first_indent is None:
1358            first_indent = block_indent
1359        if first_indent is not None:
1360            end += 1
1361        last = len(self.data)
1362        while end < last:
1363            line = self.data[end]
1364            if line and (line[0] != ' '
1365                         or (block_indent is not None
1366                             and line[:block_indent].strip())):
1367                # Line not indented or insufficiently indented.
1368                # Block finished properly iff the last indented line blank:
1369                blank_finish = ((end > start)
1370                                and not self.data[end - 1].strip())
1371                break
1372            stripped = line.lstrip()
1373            if not stripped:            # blank line
1374                if until_blank:
1375                    blank_finish = 1
1376                    break
1377            elif block_indent is None:
1378                line_indent = len(line) - len(stripped)
1379                if indent is None:
1380                    indent = line_indent
1381                else:
1382                    indent = min(indent, line_indent)
1383            end += 1
1384        else:
1385            blank_finish = 1            # block ends at end of lines
1386        block = self[start:end]
1387        if first_indent is not None and block:
1388            block.data[0] = block.data[0][first_indent:]
1389        if indent and strip_indent:
1390            block.trim_left(indent, start=(first_indent is not None))
1391        return block, indent or 0, blank_finish
1392
1393    def get_2D_block(self, top, left, bottom, right, strip_indent=1):
1394        block = self[top:bottom]
1395        indent = right
1396        for i in range(len(block.data)):
1397            block.data[i] = line = block.data[i][left:right].rstrip()
1398            if line:
1399                indent = min(indent, len(line) - len(line.lstrip()))
1400        if strip_indent and 0 < indent < right:
1401            block.data = [line[indent:] for line in block.data]
1402        return block
1403
1404    def pad_double_width(self, pad_char):
1405        """
1406        Pad all double-width characters in self by appending `pad_char` to each.
1407        For East Asian language support.
1408        """
1409        if hasattr(unicodedata, 'east_asian_width'):
1410            east_asian_width = unicodedata.east_asian_width
1411        else:
1412            return                      # new in Python 2.4
1413        for i in range(len(self.data)):
1414            line = self.data[i]
1415            if isinstance(line, types.UnicodeType):
1416                new = []
1417                for char in line:
1418                    new.append(char)
1419                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
1420                        new.append(pad_char)
1421                self.data[i] = ''.join(new)
1422
1423    def replace(self, old, new):
1424        """Replace all occurrences of substring `old` with `new`."""
1425        for i in range(len(self.data)):
1426            self.data[i] = self.data[i].replace(old, new)
1427
1428
1429class StateMachineError(Exception): pass
1430class UnknownStateError(StateMachineError): pass
1431class DuplicateStateError(StateMachineError): pass
1432class UnknownTransitionError(StateMachineError): pass
1433class DuplicateTransitionError(StateMachineError): pass
1434class TransitionPatternNotFound(StateMachineError): pass
1435class TransitionMethodNotFound(StateMachineError): pass
1436class UnexpectedIndentationError(StateMachineError): pass
1437
1438
1439class TransitionCorrection(Exception):
1440
1441    """
1442    Raise from within a transition method to switch to another transition.
1443
1444    Raise with one argument, the new transition name.
1445    """
1446
1447
1448class StateCorrection(Exception):
1449
1450    """
1451    Raise from within a transition method to switch to another state.
1452
1453    Raise with one or two arguments: new state name, and an optional new
1454    transition name.
1455    """
1456
1457
1458def string2lines(astring, tab_width=8, convert_whitespace=0,
1459                 whitespace=re.compile('[\v\f]')):
1460    """
1461    Return a list of one-line strings with tabs expanded, no newlines, and
1462    trailing whitespace stripped.
1463
1464    Each tab is expanded with between 1 and `tab_width` spaces, so that the
1465    next character's index becomes a multiple of `tab_width` (8 by default).
1466
1467    Parameters:
1468
1469    - `astring`: a multi-line string.
1470    - `tab_width`: the number of columns between tab stops.
1471    - `convert_whitespace`: convert form feeds and vertical tabs to spaces?
1472    """
1473    if convert_whitespace:
1474        astring = whitespace.sub(' ', astring)
1475    return [s.expandtabs(tab_width).rstrip() for s in astring.splitlines()]
1476
1477def _exception_data():
1478    """
1479    Return exception information:
1480
1481    - the exception's class name;
1482    - the exception object;
1483    - the name of the file containing the offending code;
1484    - the line number of the offending code;
1485    - the function name of the offending code.
1486    """
1487    type, value, traceback = sys.exc_info()
1488    while traceback.tb_next:
1489        traceback = traceback.tb_next
1490    code = traceback.tb_frame.f_code
1491    return (type.__name__, value, code.co_filename, traceback.tb_lineno,
1492            code.co_name)
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。