root/galaxy-central/eggs/WebHelpers-0.2-py2.6.egg/webhelpers/markdown.py

リビジョン 3, 58.9 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1#!/usr/bin/env python
2
3SPEED_TEST = 0
4
5"""
6====================================================================
7IF YOU ARE LOOKING TO EXTEND MARKDOWN, SEE THE "FOOTNOTES" SECTION
8====================================================================
9
10Python-Markdown
11===============
12
13Converts Markdown to HTML.  Basic usage as a module:
14
15    import markdown
16    html = markdown.markdown(your_text_string)
17
18Started by [Manfred Stienstra](http://www.dwerg.net/).  Continued and
19maintained  by [Yuri Takhteyev](http://www.freewisdom.org).
20
21Project website: http://www.freewisdom.org/projects/python-markdown
22Contact: yuri [at] freewisdom.org
23
24License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD
25
26Version: 1.5 (May 15, 2006)
27
28For changelog, see end of file
29"""
30
31import re, sys, os, random
32
33# set debug level: 3 none, 2 critical, 1 informative, 0 all
34(VERBOSE, INFO, CRITICAL, NONE) = range(4)
35
36MESSAGE_THRESHOLD = CRITICAL
37
38def message(level, text) :
39    if level >= MESSAGE_THRESHOLD :
40        print text
41
42
43# --------------- CONSTANTS YOU MIGHT WANT TO MODIFY -----------------
44
45# all tabs will be expanded to up to this many spaces
46TAB_LENGTH = 4
47ENABLE_ATTRIBUTES = 1
48SMART_EMPHASIS = 1
49
50# --------------- CONSTANTS YOU _SHOULD NOT_ HAVE TO CHANGE ----------
51
52FN_BACKLINK_TEXT = "zz1337820767766393qq"
53# a template for html placeholders
54HTML_PLACEHOLDER_PREFIX = "qaodmasdkwaspemas"
55HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%dajkqlsmdqpakldnzsdfls"
56
57BLOCK_LEVEL_ELEMENTS = ['p', 'div', 'blockquote', 'pre', 'table',
58                        'dl', 'ol', 'ul', 'script', 'noscript',
59                        'form', 'fieldset', 'iframe', 'math', 'ins',
60                        'del', 'hr', 'hr/']
61
62def is_block_level (tag) :
63    return ( (tag in BLOCK_LEVEL_ELEMENTS) or
64             (tag[0] == 'h' and tag[1] in "0123456789") )
65
66"""
67======================================================================
68========================== NANODOM ===================================
69======================================================================
70
71The three classes below implement some of the most basic DOM
72methods.  I use this instead of minidom because I need a simpler
73functionality and do not want to require additional libraries.
74
75Importantly, NanoDom does not do normalization, which is what we
76want. It also adds extra white space when converting DOM to string
77"""
78
79
80class Document :
81
82    def appendChild(self, child) :
83        self.documentElement = child
84        child.parent = self
85        self.entities = {}
86
87    def createElement(self, tag, textNode=None) :
88        el = Element(tag)
89        el.doc = self
90        if textNode :
91            el.appendChild(self.createTextNode(textNode))
92        return el
93
94    def createTextNode(self, text) :
95        node = TextNode(text)
96        node.doc = self
97        return node
98
99    def createEntityReference(self, entity):
100        if entity not in self.entities:
101            self.entities[entity] = EntityReference(entity)
102        return self.entities[entity]
103
104    def toxml (self) :
105        return self.documentElement.toxml()
106
107    def normalizeEntities(self, text) :
108
109        pairs = [ #("&", "&"),
110                  ("<", "&lt;"),
111                  (">", "&gt;"),
112                  ("\"", "&quot;")]
113
114        for old, new in pairs :
115            text = text.replace(old, new)
116        return text
117
118    def find(self, test) :
119        return self.documentElement.find(test)
120
121    def unlink(self) :
122        self.documentElement.unlink()
123        self.documentElement = None
124
125
126class Element :
127
128    type = "element"
129
130    def __init__ (self, tag) :
131
132        self.nodeName = tag
133        self.attributes = []
134        self.attribute_values = {}
135        self.childNodes = []
136
137    def unlink(self) :
138        for child in self.childNodes :
139            if child.type == "element" :
140                child.unlink()
141        self.childNodes = None
142
143    def setAttribute(self, attr, value) :
144        if not attr in self.attributes :
145            self.attributes.append(attr)
146
147        self.attribute_values[attr] = value
148
149    def insertChild(self, position, child) :
150        self.childNodes.insert(position, child)
151        child.parent = self
152
153    def removeChild(self, child) :
154        self.childNodes.remove(child)
155
156    def replaceChild(self, oldChild, newChild) :
157        position = self.childNodes.index(oldChild)
158        self.removeChild(oldChild)
159        self.insertChild(position, newChild)
160
161    def appendChild(self, child) :
162        self.childNodes.append(child)
163        child.parent = self
164
165    def handleAttributes(self) :
166        pass
167
168    def find(self, test, depth=0) :
169        """ Returns a list of descendants that pass the test function """
170        matched_nodes = []
171        for child in self.childNodes :
172            if test(child) :
173                matched_nodes.append(child)
174            if child.type == "element" :
175                matched_nodes += child.find(test, depth+1)
176        return matched_nodes
177
178    def toxml(self):
179        if ENABLE_ATTRIBUTES :
180            for child in self.childNodes:
181                child.handleAttributes()
182        buffer = ""
183        if self.nodeName in ['h1', 'h2', 'h3', 'h4'] :
184            buffer += "\n"
185        elif self.nodeName in ['li'] :
186            buffer += "\n "
187        buffer += "<" + self.nodeName
188        for attr in self.attributes :
189            value = self.attribute_values[attr]
190            value = self.doc.normalizeEntities(value)
191            buffer += ' %s="%s"' % (attr, value)
192        if self.childNodes or self.nodeName in ['blockquote']:
193            buffer += ">"
194            for child in self.childNodes :
195                buffer += child.toxml()
196            if self.nodeName == 'p' :
197                buffer += "\n"
198            elif self.nodeName == 'li' :
199                buffer += "\n "
200            buffer += "</%s>" % self.nodeName
201        else :
202            buffer += "/>"
203        if self.nodeName in ['p', 'li', 'ul', 'ol',
204                             'h1', 'h2', 'h3', 'h4'] :
205            buffer += "\n"
206
207        return buffer
208
209
210class TextNode :
211
212    type = "text"
213    attrRegExp = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123}
214
215    def __init__ (self, text) :
216        self.value = text       
217
218    def attributeCallback(self, match) :
219        self.parent.setAttribute(match.group(1), match.group(2))
220
221    def handleAttributes(self) :
222        self.value = self.attrRegExp.sub(self.attributeCallback, self.value)
223
224    def toxml(self) :
225        text = self.value
226        if not text.startswith(HTML_PLACEHOLDER_PREFIX):
227            if self.parent.nodeName == "p" :
228                text = text.replace("\n", "\n   ")
229            elif (self.parent.nodeName == "li"
230                  and self.parent.childNodes[0]==self):
231                text = "\n     " + text.replace("\n", "\n     ")
232        text = self.doc.normalizeEntities(text)
233        return text
234
235
236class EntityReference:
237
238    type = "entity_ref"
239
240    def __init__(self, entity):
241        self.entity = entity
242
243    def handleAttributes(self):
244        pass
245
246    def toxml(self):
247        return "&" + self.entity + ";"
248
249
250"""
251======================================================================
252========================== PRE-PROCESSORS ============================
253======================================================================
254
255Preprocessors munge source text before we start doing anything too
256complicated.
257
258Each preprocessor implements a "run" method that takes a pointer to
259a list of lines of the document, modifies it as necessary and
260returns either the same pointer or a pointer to a new list.
261"""
262
263class HeaderPreprocessor :
264
265    """
266       Replaces underlined headers with hashed headers to avoid
267       the nead for lookahead later.
268    """
269
270    def run (self, lines) :
271
272        for i in range(len(lines)) :
273            if not lines[i] :
274                continue
275
276            if lines[i].startswith("#") :
277                lines.insert(i+1, "\n")
278
279            if (i+1 <= len(lines)
280                  and lines[i+1]
281                  and lines[i+1][0] in ['-', '=']) :
282
283                underline = lines[i+1].strip()
284
285                if underline == "="*len(underline) :
286                    lines[i] = "# " + lines[i].strip()
287                    lines[i+1] = ""
288                elif underline == "-"*len(underline) :
289                    lines[i] = "## " + lines[i].strip()
290                    lines[i+1] = ""
291
292        return lines
293
294HEADER_PREPROCESSOR = HeaderPreprocessor()
295
296class LinePreprocessor :
297    """Deals with HR lines (needs to be done before processing lists)"""
298
299    def run (self, lines) :
300        for i in range(len(lines)) :
301            if self._isLine(lines[i]) :
302                lines[i] = "<hr />"
303        return lines
304
305    def _isLine(self, block) :
306        """Determines if a block should be replaced with an <HR>"""
307        if block.startswith("    ") : return 0  # a code block
308        text = "".join([x for x in block if not x.isspace()])
309        if len(text) <= 2 :
310            return 0
311        for pattern in ['isline1', 'isline2', 'isline3'] :
312            m = RE.regExp[pattern].match(text)
313            if (m and m.group(1)) :
314                return 1
315        else:
316            return 0
317
318LINE_PREPROCESSOR = LinePreprocessor()
319
320
321class LineBreaksPreprocessor :
322    """Replaces double spaces at the end of the lines with <br/ >."""
323
324    def run (self, lines) :
325        for i in range(len(lines)) :
326            if (lines[i].endswith("  ")
327                and not RE.regExp['tabbed'].match(lines[i]) ):
328                lines[i] += "<br />"
329        return lines
330
331LINE_BREAKS_PREPROCESSOR = LineBreaksPreprocessor()
332
333
334class HtmlBlockPreprocessor :
335    """Removes html blocks from self.lines"""
336
337    def run (self, lines) :
338        new_blocks = []
339        text = "\n".join(lines)
340        for block in text.split("\n\n") :
341            if block.startswith("\n") :
342                block = block[1:]
343            if ( (block.startswith("<") and block.rstrip().endswith(">"))
344                 and (block[1] in ["!", "?", "@", "%"]
345                      or is_block_level( block[1:].replace(">", " ")
346                                         .split()[0].lower()))) :
347                new_blocks.append(
348                    self.stash.store(block.strip()))
349            else :
350                new_blocks.append(block)
351        return "\n\n".join(new_blocks).split("\n")
352
353HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor()
354
355
356class ReferencePreprocessor :
357
358    def run (self, lines) :
359        new_text = [];
360        for line in lines:
361            m = RE.regExp['reference-def'].match(line)
362            if m:
363                id = m.group(2).strip().lower()
364                title = dequote(m.group(4).strip()) #.replace('"', "&quot;")
365                self.references[id] = (m.group(3), title)
366            else:
367                new_text.append(line)
368        return new_text #+ "\n"
369
370REFERENCE_PREPROCESSOR = ReferencePreprocessor()
371
372"""
373======================================================================
374========================== INLINE PATTERNS ===========================
375======================================================================
376
377Inline patterns such as *emphasis* are handled by means of auxiliary
378objects, one per pattern.  Each pattern object uses a single regular
379expression and needs support the following methods:
380
381  pattern.getCompiledRegExp() - returns a regular expression
382
383  pattern.handleMatch(m, doc) - takes a match object and returns
384                                a NanoDom node (as a part of the provided
385                                doc) or None
386
387All of python markdown's built-in patterns subclass from BasePatter,
388but you can add additional patterns that don't.
389
390Also note that all the regular expressions used by inline must
391capture the whole block.  For this reason, they all start with
392'^(.*)' and end with '(.*)!'.  In case with built-in expression
393BasePattern takes care of adding the "^(.*)" and "(.*)!".
394
395Finally, the order in which regular expressions are applied is very
396important - e.g. if we first replace http://.../ links with <a> tags
397and _then_ try to replace inline html, we would end up with a mess.
398So, we apply the expressions in the following order:
399
400       * escape and backticks have to go before everything else, so
401         that we can preempt any markdown patterns by escaping them.
402
403       * then we handle auto-links (must be done before inline html)
404
405       * then we handle inline HTML.  At this point we will simply
406         replace all inline HTML strings with a placeholder and add
407         the actual HTML to a hash.
408
409       * then inline images (must be done before links)
410
411       * then bracketed links, first regular then reference-style
412
413       * finally we apply strong and emphasis
414"""
415
416NOBRACKET = r'[^\]\[]*'
417BRK = ( r'\[('
418        + (NOBRACKET + r'(\['+NOBRACKET)*6
419        + (NOBRACKET+ r'\])*'+NOBRACKET)*6
420        + NOBRACKET + r')\]' )
421
422BACKTICK_RE = r'\`([^\`]*)\`'                    # `e= m*c^2`
423DOUBLE_BACKTICK_RE =  r'\`\`(.*)\`\`'            # ``e=f("`")``
424ESCAPE_RE = r'\\(.)'                             # \<
425EMPHASIS_RE = r'\*([^\*]*)\*'                    # *emphasis*
426STRONG_RE = r'\*\*(.*)\*\*'                      # **strong**
427STRONG_EM_RE = r'\*\*\*([^_]*)\*\*\*'            # ***strong***
428
429if SMART_EMPHASIS:
430    EMPHASIS_2_RE = r'(?<!\S)_(\S[^_]*)_'        # _emphasis_
431else :
432    EMPHASIS_2_RE = r'_([^_]*)_'                 # _emphasis_
433
434STRONG_2_RE = r'__([^_]*)__'                     # __strong__
435STRONG_EM_2_RE = r'___([^_]*)___'                # ___strong___
436
437LINK_RE = BRK + r'\s*\(([^\)]*)\)'               # [text](url)
438LINK_ANGLED_RE = BRK + r'\s*\(<([^\)]*)>\)'      # [text](<url>)
439IMAGE_LINK_RE = r'\!' + BRK + r'\s*\(([^\)]*)\)' # ![alttxt](http://x.com/)
440REFERENCE_RE = BRK+ r'\s*\[([^\]]*)\]'           # [Google][3]
441IMAGE_REFERENCE_RE = r'\!' + BRK + '\s*\[([^\]]*)\]' # ![alt text][2]
442NOT_STRONG_RE = r'( \* )'                        # stand-alone * or _
443AUTOLINK_RE = r'<(http://[^>]*)>'                # <http://www.123.com>
444AUTOMAIL_RE = r'<([^> ]*@[^> ]*)>'               # <me@example.com>
445HTML_RE = r'(\<[^\>]*\>)'                        # <...>
446ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)'                # &amp;
447
448class BasePattern:
449
450    def __init__ (self, pattern) :
451        self.pattern = pattern
452        self.compiled_re = re.compile("^(.*)%s(.*)$" % pattern, re.DOTALL)
453
454    def getCompiledRegExp (self) :
455        return self.compiled_re
456
457class SimpleTextPattern (BasePattern) :
458
459    def handleMatch(self, m, doc) :
460        return doc.createTextNode(m.group(2))
461
462class SimpleTagPattern (BasePattern):
463
464    def __init__ (self, pattern, tag) :
465        BasePattern.__init__(self, pattern)
466        self.tag = tag
467
468    def handleMatch(self, m, doc) :
469        el = doc.createElement(self.tag)
470        el.appendChild(doc.createTextNode(m.group(2)))
471        return el
472
473class BacktickPattern (BasePattern):
474
475    def __init__ (self, pattern):
476        BasePattern.__init__(self, pattern)
477        self.tag = "code"
478
479    def handleMatch(self, m, doc) :
480        el = doc.createElement(self.tag)
481        text = m.group(2).strip()
482        text = text.replace("&", "&amp;")
483        el.appendChild(doc.createTextNode(text))
484        return el
485
486
487class DoubleTagPattern (SimpleTagPattern) :
488
489    def handleMatch(self, m, doc) :
490        tag1, tag2 = self.tag.split(",")
491        el1 = doc.createElement(tag1)
492        el2 = doc.createElement(tag2)
493        el1.appendChild(el2)
494        el2.appendChild(doc.createTextNode(m.group(2)))
495        return el1
496
497
498class HtmlPattern (BasePattern):
499
500    def handleMatch (self, m, doc) :
501        place_holder = self.stash.store(m.group(2))
502        return doc.createTextNode(place_holder)
503
504
505class LinkPattern (BasePattern):
506
507    def handleMatch(self, m, doc) :
508        el = doc.createElement('a')
509        el.appendChild(doc.createTextNode(m.group(2)))
510        parts = m.group(9).split()
511        # We should now have [], [href], or [href, title]
512        if parts :
513            el.setAttribute('href', parts[0])
514        else :
515            el.setAttribute('href', "")
516        if len(parts) > 1 :
517            # we also got a title
518            title = " ".join(parts[1:]).strip()
519            title = dequote(title) #.replace('"', "&quot;")
520            el.setAttribute('title', title)
521        return el
522
523
524class ImagePattern (BasePattern):
525
526    def handleMatch(self, m, doc):
527        el = doc.createElement('img')
528        src_parts = m.group(9).split()
529        el.setAttribute('src', src_parts[0])
530        if len(src_parts) > 1 :
531            el.setAttribute('title', dequote(" ".join(src_parts[1:])))
532        if ENABLE_ATTRIBUTES :
533            text = doc.createTextNode(m.group(2))
534            el.appendChild(text)
535            text.handleAttributes()
536            truealt = text.value
537            el.childNodes.remove(text)
538        else:
539            truealt = m.group(2)
540        el.setAttribute('alt', truealt)
541        return el
542
543class ReferencePattern (BasePattern):
544
545    def handleMatch(self, m, doc):
546        if m.group(9) :
547            id = m.group(9).lower()
548        else :
549            # if we got something like "[Google][]"
550            # we'll use "google" as the id
551            id = m.group(2).lower()
552        if not self.references.has_key(id) : # ignore undefined refs
553            return None
554        href, title = self.references[id]
555        text = m.group(2)
556        return self.makeTag(href, title, text, doc)
557
558    def makeTag(self, href, title, text, doc):
559        el = doc.createElement('a')
560        el.setAttribute('href', href)
561        if title :
562            el.setAttribute('title', title)
563        el.appendChild(doc.createTextNode(text))
564        return el
565
566
567class ImageReferencePattern (ReferencePattern):
568
569    def makeTag(self, href, title, text, doc):
570        el = doc.createElement('img')
571        el.setAttribute('src', href)
572        if title :
573            el.setAttribute('title', title)
574        el.setAttribute('alt', text)
575        return el
576
577
578class AutolinkPattern (BasePattern):
579
580    def handleMatch(self, m, doc):
581        el = doc.createElement('a')
582        el.setAttribute('href', m.group(2))
583        el.appendChild(doc.createTextNode(m.group(2)))
584        return el
585
586class AutomailPattern (BasePattern):
587
588    def handleMatch(self, m, doc) :
589        el = doc.createElement('a')
590        email = m.group(2)
591        if email.startswith("mailto:"):
592            email = email[len("mailto:"):]
593        for letter in email:
594            entity = doc.createEntityReference("#%d" % ord(letter))
595            el.appendChild(entity)
596        mailto = "mailto:" + email
597        mailto = "".join(['&#%d;' % ord(letter) for letter in mailto])
598        el.setAttribute('href', mailto)
599        return el
600
601ESCAPE_PATTERN          = SimpleTextPattern(ESCAPE_RE)
602NOT_STRONG_PATTERN      = SimpleTextPattern(NOT_STRONG_RE)
603
604BACKTICK_PATTERN        = BacktickPattern(BACKTICK_RE)
605DOUBLE_BACKTICK_PATTERN = BacktickPattern(DOUBLE_BACKTICK_RE)
606STRONG_PATTERN          = SimpleTagPattern(STRONG_RE, 'strong')
607STRONG_PATTERN_2        = SimpleTagPattern(STRONG_2_RE, 'strong')
608EMPHASIS_PATTERN        = SimpleTagPattern(EMPHASIS_RE, 'em')
609EMPHASIS_PATTERN_2      = SimpleTagPattern(EMPHASIS_2_RE, 'em')
610
611STRONG_EM_PATTERN       = DoubleTagPattern(STRONG_EM_RE, 'strong,em')
612STRONG_EM_PATTERN_2     = DoubleTagPattern(STRONG_EM_2_RE, 'strong,em')
613
614LINK_PATTERN            = LinkPattern(LINK_RE)
615LINK_ANGLED_PATTERN     = LinkPattern(LINK_ANGLED_RE)
616IMAGE_LINK_PATTERN      = ImagePattern(IMAGE_LINK_RE)
617IMAGE_REFERENCE_PATTERN = ImageReferencePattern(IMAGE_REFERENCE_RE)
618REFERENCE_PATTERN       = ReferencePattern(REFERENCE_RE)
619
620HTML_PATTERN            = HtmlPattern(HTML_RE)
621ENTITY_PATTERN          = HtmlPattern(ENTITY_RE)
622
623AUTOLINK_PATTERN        = AutolinkPattern(AUTOLINK_RE)
624AUTOMAIL_PATTERN        = AutomailPattern(AUTOMAIL_RE)
625
626
627"""
628======================================================================
629========================== POST-PROCESSORS ===========================
630======================================================================
631
632Markdown also allows post-processors, which are similar to
633preprocessors in that they need to implement a "run" method.  Unlike
634pre-processors, they take a NanoDom document as a parameter and work
635with that.
636#
637There are currently no standard post-processors, but the footnote
638extension below uses one.
639"""
640"""
641======================================================================
642========================== MISC AUXILIARY CLASSES ====================
643======================================================================
644"""
645
646class HtmlStash :
647    """This class is used for stashing HTML objects that we extract
648        in the beginning and replace with place-holders."""
649
650    def __init__ (self) :
651        self.html_counter = 0 # for counting inline html segments
652        self.rawHtmlBlocks=[]
653
654    def store(self, html) :
655        """Saves an HTML segment for later reinsertion.  Returns a
656           placeholder string that needs to be inserted into the
657           document.
658
659           @param html: an html segment
660           @returns : a placeholder string """
661        self.rawHtmlBlocks.append(html)
662        placeholder = HTML_PLACEHOLDER % self.html_counter
663        self.html_counter += 1
664        return placeholder
665
666
667class BlockGuru :
668
669    def _findHead(self, lines, fn, allowBlank=0) :
670
671        """Functional magic to help determine boundaries of indented
672           blocks.
673
674           @param lines: an array of strings
675           @param fn: a function that returns a substring of a string
676                      if the string matches the necessary criteria
677           @param allowBlank: specifies whether it's ok to have blank
678                      lines between matching functions
679           @returns: a list of post processes items and the unused
680                      remainder of the original list"""
681
682        items = []
683        item = -1
684
685        i = 0 # to keep track of where we are
686
687        for line in lines :
688
689            if not line.strip() and not allowBlank:
690                return items, lines[i:]
691
692            if not line.strip() and allowBlank:
693                # If we see a blank line, this _might_ be the end
694                i += 1
695
696                # Find the next non-blank line
697                for j in range(i, len(lines)) :
698                    if lines[j].strip() :
699                        next = lines[j]
700                        break
701                else :
702                    # There is no more text => this is the end
703                    break
704
705                # Check if the next non-blank line is still a part of the list
706
707                part = fn(next)
708
709                if part :
710                    items.append("")
711                    continue
712                else :
713                    break # found end of the list
714
715            part = fn(line)
716
717            if part :
718                items.append(part)
719                i += 1
720                continue
721            else :
722                return items, lines[i:]
723        else :
724            i += 1
725
726        return items, lines[i:]
727
728
729    def detabbed_fn(self, line) :
730        """ An auxiliary method to be passed to _findHead """
731        m = RE.regExp['tabbed'].match(line)
732        if m:
733            return m.group(4)
734        else :
735            return None
736
737
738    def detectTabbed(self, lines) :
739
740        return self._findHead(lines, self.detabbed_fn,
741                              allowBlank = 1)
742
743
744def print_error(string):
745    """Print an error string to stderr"""
746    sys.stderr.write(string +'\n')
747
748
749def dequote(string) :
750    """ Removes quotes from around a string """
751    if ( ( string.startswith('"') and string.endswith('"'))
752         or (string.startswith("'") and string.endswith("'")) ) :
753        return string[1:-1]
754    else :
755        return string
756
757"""
758======================================================================
759========================== CORE MARKDOWN =============================
760======================================================================
761
762This stuff is ugly, so if you are thinking of extending the syntax,
763see first if you can do it via pre-processors, post-processors,
764inline patterns or a combination of the three.
765"""
766
767class CorePatterns :
768    """This class is scheduled for removal as part of a refactoring
769        effort."""
770
771    patterns = {
772        'header':          r'(#*)([^#]*)(#*)', # # A title
773        'reference-def' :  r'(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)',
774                           # [Google]: http://www.google.com/
775        'containsline':    r'([-]*)$|^([=]*)', # -----, =====, etc.
776        'ol':              r'[ ]{0,3}[\d]*\.\s+(.*)', # 1. text
777        'ul':              r'[ ]{0,3}[*+-]\s+(.*)', # "* text"
778        'isline1':         r'(\**)', # ***
779        'isline2':         r'(\-*)', # ---
780        'isline3':         r'(\_*)', # ___
781        'tabbed':          r'((\t)|(    ))(.*)', # an indented line
782        'quoted' :         r'> ?(.*)', # a quoted block ("> ...")
783    }
784
785    def __init__ (self) :
786
787        self.regExp = {}
788        for key in self.patterns.keys() :
789            self.regExp[key] = re.compile("^%s$" % self.patterns[key],
790                                          re.DOTALL)
791
792        self.regExp['containsline'] = re.compile(r'^([-]*)$|^([=]*)$', re.M)
793
794RE = CorePatterns()
795
796
797class Markdown:
798    """ Markdown formatter class for creating an html document from
799        Markdown text """
800
801
802    def __init__(self, source=None):
803        """Creates a new Markdown instance.
804
805           @param source: The text in Markdown format. """
806       
807        if isinstance(source, unicode):
808            source = source.encode('utf8')
809        self.source = source
810        self.blockGuru = BlockGuru()
811        self.registeredExtensions = []
812        self.stripTopLevelTags = 1
813
814        self.preprocessors = [ HEADER_PREPROCESSOR,
815                               LINE_PREPROCESSOR,
816                               HTML_BLOCK_PREPROCESSOR,
817                               LINE_BREAKS_PREPROCESSOR,
818                               # A footnote preprocessor will
819                               # get inserted here
820                               REFERENCE_PREPROCESSOR ]
821
822
823        self.postprocessors = [] # a footnote postprocessor will get
824                                 # inserted later
825
826        self.prePatterns = []
827       
828
829        self.inlinePatterns = [ DOUBLE_BACKTICK_PATTERN,
830                                BACKTICK_PATTERN,
831                                ESCAPE_PATTERN,
832                                IMAGE_LINK_PATTERN,
833                                IMAGE_REFERENCE_PATTERN,
834                                REFERENCE_PATTERN,
835                                LINK_ANGLED_PATTERN,
836                                LINK_PATTERN,
837                                AUTOLINK_PATTERN,
838                                AUTOMAIL_PATTERN,
839                                HTML_PATTERN,
840                                ENTITY_PATTERN,
841                                NOT_STRONG_PATTERN,
842                                STRONG_EM_PATTERN,
843                                STRONG_EM_PATTERN_2,
844                                STRONG_PATTERN,
845                                STRONG_PATTERN_2,
846                                EMPHASIS_PATTERN,
847                                EMPHASIS_PATTERN_2
848                                # The order of the handlers matters!!!
849                                ]
850
851        self.reset()
852
853    def registerExtension(self, extension) :
854        self.registeredExtensions.append(extension)
855
856    def reset(self) :
857        """Resets all state variables so that we can start
858            with a new text."""
859        self.references={}
860        self.htmlStash = HtmlStash()
861
862        HTML_BLOCK_PREPROCESSOR.stash = self.htmlStash
863        REFERENCE_PREPROCESSOR.references = self.references
864        HTML_PATTERN.stash = self.htmlStash
865        ENTITY_PATTERN.stash = self.htmlStash
866        REFERENCE_PATTERN.references = self.references
867        IMAGE_REFERENCE_PATTERN.references = self.references
868
869        for extension in self.registeredExtensions :
870            extension.reset()
871
872
873    def _transform(self):
874        """Transforms the Markdown text into a XHTML body document
875
876           @returns: A NanoDom Document """
877
878        # Setup the document
879
880        self.doc = Document()
881        self.top_element = self.doc.createElement("span")
882        self.top_element.appendChild(self.doc.createTextNode('\n'))
883        self.top_element.setAttribute('class', 'markdown')
884        self.doc.appendChild(self.top_element)
885
886        # Fixup the source text
887        text = self.source.strip()
888        text = text.replace("\r\n", "\n").replace("\r", "\n")
889        text += "\n\n"
890        text = text.expandtabs(TAB_LENGTH)
891
892        # Split into lines and run the preprocessors that will work with
893        # self.lines
894
895        self.lines = text.split("\n")
896
897        # Run the pre-processors on the lines
898        for prep in self.preprocessors :
899            self.lines = prep.run(self.lines)
900
901        # Create a NanoDom tree from the lines and attach it to Document
902
903
904        buffer = []
905        for line in self.lines :
906            if line.startswith("#") :
907                self._processSection(self.top_element, buffer)
908                buffer = [line]
909            else :
910                buffer.append(line)
911        self._processSection(self.top_element, buffer)
912       
913        #self._processSection(self.top_element, self.lines)
914
915        # Not sure why I put this in but let's leave it for now.
916        self.top_element.appendChild(self.doc.createTextNode('\n'))
917
918        # Run the post-processors
919        for postprocessor in self.postprocessors :
920            postprocessor.run(self.doc)
921
922        return self.doc
923
924
925    def _processSection(self, parent_elem, lines,
926                        inList = 0, looseList = 0) :
927
928        """Process a section of a source document, looking for high
929           level structural elements like lists, block quotes, code
930           segments, html blocks, etc.  Some those then get stripped
931           of their high level markup (e.g. get unindented) and the
932           lower-level markup is processed recursively.
933
934           @param parent_elem: A NanoDom element to which the content
935                               will be added
936           @param lines: a list of lines
937           @param inList: a level
938           @returns: None"""
939
940        if not lines :
941            return
942
943        # Check if this section starts with a list, a blockquote or
944        # a code block
945
946        processFn = { 'ul' :     self._processUList,
947                      'ol' :     self._processOList,
948                      'quoted' : self._processQuote,
949                      'tabbed' : self._processCodeBlock }
950
951        for regexp in ['ul', 'ol', 'quoted', 'tabbed'] :
952            m = RE.regExp[regexp].match(lines[0])
953            if m :
954                processFn[regexp](parent_elem, lines, inList)
955                return
956
957        # We are NOT looking at one of the high-level structures like
958        # lists or blockquotes.  So, it's just a regular paragraph
959        # (though perhaps nested inside a list or something else).  If
960        # we are NOT inside a list, we just need to look for a blank
961        # line to find the end of the block.  If we ARE inside a
962        # list, however, we need to consider that a sublist does not
963        # need to be separated by a blank line.  Rather, the following
964        # markup is legal:
965        #
966        # * The top level list item
967        #
968        #     Another paragraph of the list.  This is where we are now.
969        #     * Underneath we might have a sublist.
970        #
971
972        if inList :
973
974            start, theRest = self._linesUntil(lines, (lambda line:
975                             RE.regExp['ul'].match(line)
976                             or RE.regExp['ol'].match(line)
977                                              or not line.strip()))
978
979            self._processSection(parent_elem, start,
980                                 inList - 1, looseList = looseList)
981            self._processSection(parent_elem, theRest,
982                                 inList - 1, looseList = looseList)
983
984
985        else : # Ok, so it's just a simple block
986
987            paragraph, theRest = self._linesUntil(lines, lambda line:
988                                                 not line.strip())
989
990            if len(paragraph) and paragraph[0].startswith('#') :
991                m = RE.regExp['header'].match(paragraph[0])
992                if m :
993                    level = len(m.group(1))
994                    h = self.doc.createElement("h%d" % level)
995                    parent_elem.appendChild(h)
996                    for item in self._handleInlineWrapper2(m.group(2).strip()) :
997                        h.appendChild(item)
998                else :
999                    message(CRITICAL, "We've got a problem header!")
1000
1001            elif paragraph :
1002
1003                list = self._handleInlineWrapper2("\n".join(paragraph))
1004
1005                if ( parent_elem.nodeName == 'li'
1006                     and not (looseList or parent_elem.childNodes)):
1007
1008                    #and not parent_elem.childNodes) :
1009                    # If this is the first paragraph inside "li", don't
1010                    # put <p> around it - append the paragraph bits directly
1011                    # onto parent_elem
1012                    el = parent_elem
1013                else :
1014                    # Otherwise make a "p" element
1015                    el = self.doc.createElement("p")
1016                    parent_elem.appendChild(el)
1017
1018                for item in list :
1019                    el.appendChild(item)
1020
1021            if theRest :
1022                theRest = theRest[1:]  # skip the first (blank) line
1023
1024            self._processSection(parent_elem, theRest, inList)
1025
1026
1027
1028    def _processUList(self, parent_elem, lines, inList) :
1029        self._processList(parent_elem, lines, inList,
1030                         listexpr='ul', tag = 'ul')
1031
1032    def _processOList(self, parent_elem, lines, inList) :
1033        self._processList(parent_elem, lines, inList,
1034                         listexpr='ol', tag = 'ol')
1035
1036
1037    def _processList(self, parent_elem, lines, inList, listexpr, tag) :
1038        """Given a list of document lines starting with a list item,
1039           finds the end of the list, breaks it up, and recursively
1040           processes each list item and the remainder of the text file.
1041
1042           @param parent_elem: A dom element to which the content will be added
1043           @param lines: a list of lines
1044           @param inList: a level
1045           @returns: None"""
1046
1047        ul = self.doc.createElement(tag)  # ul might actually be '<ol>'
1048        parent_elem.appendChild(ul)
1049
1050        looseList = 0
1051
1052        # Make a list of list items
1053        items = []
1054        item = -1
1055
1056        i = 0  # a counter to keep track of where we are
1057
1058        for line in lines :
1059
1060            loose = 0
1061            if not line.strip() :
1062                # If we see a blank line, this _might_ be the end of the list
1063                i += 1
1064                loose = 1
1065
1066                # Find the next non-blank line
1067                for j in range(i, len(lines)) :
1068                    if lines[j].strip() :
1069                        next = lines[j]
1070                        break
1071                else :
1072                    # There is no more text => end of the list
1073                    break
1074
1075                # Check if the next non-blank line is still a part of the list
1076                if ( RE.regExp['ul'].match(next) or
1077                     RE.regExp['ol'].match(next) or
1078                     RE.regExp['tabbed'].match(next) ):
1079                    # get rid of any white space in the line
1080                    items[item].append(line.strip())
1081                    looseList = loose or looseList
1082                    continue
1083                else :
1084                    break # found end of the list
1085
1086            # Now we need to detect list items (at the current level)
1087            # while also detabing child elements if necessary
1088
1089            for expr in ['ul', 'ol', 'tabbed']:
1090
1091                m = RE.regExp[expr].match(line)
1092                if m :
1093                    if expr in ['ul', 'ol'] :  # We are looking at a new item
1094                        if m.group(1) :
1095                            items.append([m.group(1)])
1096                            item += 1
1097                    elif expr == 'tabbed' :  # This line needs to be detabbed
1098                        items[item].append(m.group(4)) #after the 'tab'
1099
1100                    i += 1
1101                    break
1102            else :
1103                items[item].append(line)  # Just regular continuation
1104                i += 1 # added on 2006.02.25
1105        else :
1106            i += 1
1107
1108        # Add the dom elements
1109        for item in items :
1110            li = self.doc.createElement("li")
1111            ul.appendChild(li)
1112
1113            self._processSection(li, item, inList + 1, looseList = looseList)
1114
1115        # Process the remaining part of the section
1116
1117        self._processSection(parent_elem, lines[i:], inList)
1118
1119
1120    def _linesUntil(self, lines, condition) :
1121        """ A utility function to break a list of lines upon the
1122            first line that satisfied a condition.  The condition
1123            argument should be a predicate function.
1124            """
1125
1126        i = -1
1127        for line in lines :
1128            i += 1
1129            if condition(line) : break
1130        else :
1131            i += 1
1132        return lines[:i], lines[i:]
1133
1134    def _processQuote(self, parent_elem, lines, inList) :
1135        """Given a list of document lines starting with a quote finds
1136           the end of the quote, unindents it and recursively
1137           processes the body of the quote and the remainder of the
1138           text file.
1139
1140           @param parent_elem: DOM element to which the content will be added
1141           @param lines: a list of lines
1142           @param inList: a level
1143           @returns: None """
1144
1145        dequoted = []
1146        i = 0
1147        for line in lines :
1148            m = RE.regExp['quoted'].match(line)
1149            if m :
1150                dequoted.append(m.group(1))
1151                i += 1
1152            else :
1153                break
1154        else :
1155            i += 1
1156
1157        blockquote = self.doc.createElement('blockquote')
1158        parent_elem.appendChild(blockquote)
1159
1160        self._processSection(blockquote, dequoted, inList)
1161        self._processSection(parent_elem, lines[i:], inList)
1162
1163
1164
1165
1166    def _processCodeBlock(self, parent_elem, lines, inList) :
1167        """Given a list of document lines starting with a code block
1168           finds the end of the block, puts it into the dom verbatim
1169           wrapped in ("<pre><code>") and recursively processes the
1170           the remainder of the text file.
1171
1172           @param parent_elem: DOM element to which the content will be added
1173           @param lines: a list of lines
1174           @param inList: a level
1175           @returns: None"""
1176
1177        detabbed, theRest = self.blockGuru.detectTabbed(lines)
1178
1179        pre = self.doc.createElement('pre')
1180        code = self.doc.createElement('code')
1181        parent_elem.appendChild(pre)
1182        pre.appendChild(code)
1183        text = "\n".join(detabbed).rstrip()+"\n"
1184        text = text.replace("&", "&amp;")
1185        code.appendChild(self.doc.createTextNode(text))
1186        self._processSection(parent_elem, theRest, inList)
1187
1188
1189    def _handleInlineWrapper2 (self, line) :
1190
1191
1192        parts = [line]
1193
1194        #if not(line):
1195        #    return [self.doc.createTextNode(' ')]
1196
1197        for pattern in self.inlinePatterns :
1198
1199            #print
1200            #print self.inlinePatterns.index(pattern)
1201
1202            i = 0
1203
1204            #print parts
1205            while i < len(parts) :
1206               
1207                x = parts[i]
1208                #print i
1209                if isinstance(x, (str, unicode)) :
1210                    result = self._applyPattern(x, pattern)
1211                    #print result
1212                    #print result
1213                    #print parts, i
1214                    if result :
1215                        i -= 1
1216                        parts.remove(x)
1217                        for y in result :
1218                            parts.insert(i+1,y)
1219               
1220                i += 1
1221
1222        for i in range(len(parts)) :
1223            x = parts[i]
1224            if isinstance(x, (str, unicode)) :
1225                parts[i] = self.doc.createTextNode(x)
1226
1227        return parts
1228       
1229
1230
1231    def _handleInlineWrapper (self, line) :
1232
1233        # A wrapper around _handleInline to avoid recursion
1234
1235        parts = [line]
1236
1237        i = 0
1238       
1239        while i < len(parts) :
1240            x = parts[i]
1241            if isinstance(x, (str, unicode)) :
1242                parts.remove(x)
1243                result = self._handleInline(x)
1244                for y in result :
1245                    parts.insert(i,y)
1246            else :
1247                i += 1
1248
1249        return parts
1250
1251    def _handleInline(self,  line):
1252        """Transform a Markdown line with inline elements to an XHTML
1253        fragment.
1254
1255        This function uses auxiliary objects called inline patterns.
1256        See notes on inline patterns above.
1257
1258        @param item: A block of Markdown text
1259        @return: A list of NanoDom nodes """
1260
1261        if not(line):
1262            return [self.doc.createTextNode(' ')]
1263
1264        for pattern in self.inlinePatterns :
1265            list = self._applyPattern( line, pattern)
1266            if list: return list
1267
1268        return [self.doc.createTextNode(line)]
1269
1270    def _applyPattern(self, line, pattern) :
1271        """ Given a pattern name, this function checks if the line
1272        fits the pattern, creates the necessary elements, and returns
1273        back a list consisting of NanoDom elements and/or strings.
1274       
1275        @param line: the text to be processed
1276        @param pattern: the pattern to be checked
1277
1278        @returns: the appropriate newly created NanoDom element if the
1279                  pattern matches, None otherwise.
1280        """
1281
1282        # match the line to pattern's pre-compiled reg exp.
1283        # if no match, move on.
1284
1285        m = pattern.getCompiledRegExp().match(line)
1286        if not m :
1287            return None
1288
1289        # if we got a match let the pattern make us a NanoDom node
1290        # if it doesn't, move on
1291        node = pattern.handleMatch(m, self.doc)
1292
1293        if node :
1294            # Those are in the reverse order!
1295            return ( m.groups()[-1], # the string to the left
1296                     node,           # the new node
1297                     m.group(1))     # the string to the right of the match
1298
1299        else :
1300            return None
1301
1302    def __str__(self):
1303        """Return the document in XHTML format.
1304
1305        @returns: A serialized XHTML body."""
1306        #try :
1307        doc = self._transform()
1308        xml = doc.toxml()
1309        #finally:
1310        #    doc.unlink()
1311
1312        # Let's stick in all the raw html pieces
1313
1314        for i in range(self.htmlStash.html_counter) :
1315            xml = xml.replace("<p>%s\n</p>" % (HTML_PLACEHOLDER % i),
1316                              self.htmlStash.rawHtmlBlocks[i] + "\n")
1317            xml = xml.replace(HTML_PLACEHOLDER % i,
1318                              self.htmlStash.rawHtmlBlocks[i])
1319
1320        xml = xml.replace(FN_BACKLINK_TEXT, "&#8617;")
1321
1322        # And return everything but the top level tag
1323
1324        if self.stripTopLevelTags :
1325            xml = xml.strip()[23:-7]
1326
1327        if isinstance(xml, unicode) :
1328            xml = xml.encode("utf8")
1329
1330        return xml
1331
1332
1333    toString = __str__
1334
1335
1336"""
1337========================= FOOTNOTES =================================
1338
1339This section adds footnote handling to markdown.  It can be used as
1340an example for extending python-markdown with relatively complex
1341functionality.  While in this case the extension is included inside
1342the module itself, it could just as easily be added from outside the
1343module.  Not that all markdown classes above are ignorant about
1344footnotes.  All footnote functionality is provided separately and
1345then added to the markdown instance at the run time.
1346
1347Footnote functionality is attached by calling extendMarkdown()
1348method of FootnoteExtension.  The method also registers the
1349extension to allow it's state to be reset by a call to reset()
1350method.
1351"""
1352
1353class FootnoteExtension :
1354
1355    DEF_RE = re.compile(r'(\ ?\ ?\ ?)\[\^([^\]]*)\]:\s*(.*)')
1356    SHORT_USE_RE = re.compile(r'\[\^([^\]]*)\]', re.M) # [^a]
1357
1358    FN_PLACE_MARKER = "///Footnotes Go Here///"
1359
1360    def __init__ (self) :
1361        self.reset()
1362
1363    def extendMarkdown(self, md) :
1364
1365        self.md = md
1366
1367        # Stateless extensions do not need to be registered
1368        md.registerExtension(self)
1369
1370        # Insert a preprocessor before ReferencePreprocessor
1371        index = md.preprocessors.index(REFERENCE_PREPROCESSOR)
1372        preprocessor = FootnotePreprocessor(self)
1373        preprocessor.md = md
1374        md.preprocessors.insert(index, preprocessor)
1375
1376        # Insert an inline pattern before ImageReferencePattern
1377        FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
1378        index = md.inlinePatterns.index(IMAGE_REFERENCE_PATTERN)
1379        md.inlinePatterns.insert(index, FootnotePattern(FOOTNOTE_RE, self))
1380
1381        # Insert a post-processor that would actually add the footnote div
1382        postprocessor = FootnotePostprocessor(self)
1383        postprocessor.extension = self
1384       
1385        md.postprocessors.append(postprocessor)
1386
1387
1388    def reset(self) :
1389        # May be called by Markdown is state reset is desired
1390
1391        self.footnote_suffix = "-" + str(int(random.random()*1000000000))
1392        self.used_footnotes={}
1393        self.footnotes = {}
1394
1395    def findFootnotesPlaceholder(self, doc) :
1396        def findFootnotePlaceholderFn(node=None, indent=0):
1397            if node.type == 'text':
1398                if node.value.find(self.FN_PLACE_MARKER) > -1 :
1399                    return True
1400
1401        fn_div_list = doc.find(findFootnotePlaceholderFn)
1402        if fn_div_list :
1403            return fn_div_list[0]
1404
1405
1406    def setFootnote(self, id, text) :
1407        self.footnotes[id] = text
1408
1409    def makeFootnoteId(self, num) :
1410        return 'fn%d%s' % (num, self.footnote_suffix)
1411
1412    def makeFootnoteRefId(self, num) :
1413        return 'fnr%d%s' % (num, self.footnote_suffix)
1414
1415    def makeFootnotesDiv (self, doc) :
1416        """Creates the div with class='footnote' and populates it with
1417           the text of the footnotes.
1418
1419           @returns: the footnote div as a dom element """
1420
1421        if not self.footnotes.keys() :
1422            return None
1423
1424        div = doc.createElement("div")
1425        div.setAttribute('class', 'footnote')
1426        hr = doc.createElement("hr")
1427        div.appendChild(hr)
1428        ol = doc.createElement("ol")
1429        div.appendChild(ol)
1430
1431        footnotes = [(self.used_footnotes[id], id)
1432                     for id in self.footnotes.keys()]
1433        footnotes.sort()
1434
1435        for i, id in footnotes :
1436            li = doc.createElement('li')
1437            li.setAttribute('id', self.makeFootnoteId(i))
1438
1439            self.md._processSection(li, self.footnotes[id].split("\n"))
1440
1441            #li.appendChild(doc.createTextNode(self.footnotes[id]))
1442
1443            backlink = doc.createElement('a')
1444            backlink.setAttribute('href', '#' + self.makeFootnoteRefId(i))
1445            backlink.setAttribute('class', 'footnoteBackLink')
1446            backlink.setAttribute('title',
1447                                  'Jump back to footnote %d in the text' % 1)
1448            backlink.appendChild(doc.createTextNode(FN_BACKLINK_TEXT))
1449
1450            if li.childNodes :
1451                node = li.childNodes[-1]
1452                if node.type == "text" :
1453                    node = li
1454                node.appendChild(backlink)
1455
1456            ol.appendChild(li)
1457
1458        return div
1459
1460
1461class FootnotePreprocessor :
1462
1463    def __init__ (self, footnotes) :
1464        self.footnotes = footnotes
1465
1466    def run(self, lines) :
1467
1468        self.blockGuru = BlockGuru()
1469        lines = self._handleFootnoteDefinitions (lines)
1470
1471        # Make a hash of all footnote marks in the text so that we
1472        # know in what order they are supposed to appear.  (This
1473        # function call doesn't really substitute anything - it's just
1474        # a way to get a callback for each occurence.
1475
1476        text = "\n".join(lines)
1477        self.footnotes.SHORT_USE_RE.sub(self.recordFootnoteUse, text)
1478
1479        return text.split("\n")
1480
1481
1482    def recordFootnoteUse(self, match) :
1483
1484        id = match.group(1)
1485        id = id.strip()
1486        nextNum = len(self.footnotes.used_footnotes.keys()) + 1
1487        self.footnotes.used_footnotes[id] = nextNum
1488
1489
1490    def _handleFootnoteDefinitions(self, lines) :
1491        """Recursively finds all footnote definitions in the lines.
1492
1493            @param lines: a list of lines of text
1494            @returns: a string representing the text with footnote
1495                      definitions removed """
1496
1497        i, id, footnote = self._findFootnoteDefinition(lines)
1498
1499        if id :
1500
1501            plain = lines[:i]
1502
1503            detabbed, theRest = self.blockGuru.detectTabbed(lines[i+1:])
1504
1505            self.footnotes.setFootnote(id,
1506                                       footnote + "\n"
1507                                       + "\n".join(detabbed))
1508
1509            more_plain = self._handleFootnoteDefinitions(theRest)
1510            return plain + [""] + more_plain
1511
1512        else :
1513            return lines
1514
1515    def _findFootnoteDefinition(self, lines) :
1516        """Finds the first line of a footnote definition.
1517
1518            @param lines: a list of lines of text
1519            @returns: the index of the line containing a footnote definition """
1520
1521        counter = 0
1522        for line in lines :
1523            m = self.footnotes.DEF_RE.match(line)
1524            if m :
1525                return counter, m.group(2), m.group(3)
1526            counter += 1
1527        return counter, None, None
1528
1529
1530class FootnotePattern (BasePattern) :
1531
1532    def __init__ (self, pattern, footnotes) :
1533
1534        BasePattern.__init__(self, pattern)
1535        self.footnotes = footnotes
1536
1537    def handleMatch(self, m, doc) :
1538        sup = doc.createElement('sup')
1539        a = doc.createElement('a')
1540        sup.appendChild(a)
1541        id = m.group(2)
1542        num = self.footnotes.used_footnotes[id]
1543        sup.setAttribute('id', self.footnotes.makeFootnoteRefId(num))
1544        a.setAttribute('href', '#' + self.footnotes.makeFootnoteId(num))
1545        a.appendChild(doc.createTextNode(str(num)))
1546        return sup
1547
1548class FootnotePostprocessor :
1549
1550    def __init__ (self, footnotes) :
1551        self.footnotes = footnotes
1552
1553    def run(self, doc) :
1554        footnotesDiv = self.footnotes.makeFootnotesDiv(doc)
1555        if footnotesDiv :
1556            fnPlaceholder = self.extension.findFootnotesPlaceholder(doc)
1557            if fnPlaceholder :
1558                fnPlaceholder.parent.replaceChild(fnPlaceholder, footnotesDiv)
1559            else :
1560                doc.documentElement.appendChild(footnotesDiv)
1561
1562# ====================================================================
1563
1564def markdown(text) :
1565    message(VERBOSE, "in markdown.py, received text:\n%s" % text)
1566    return Markdown(text).toString()
1567
1568def markdownWithFootnotes(text):
1569    message(VERBOSE, "Running markdown with footnotes, "
1570            + "received text:\n%s" % text)
1571    md = Markdown()
1572    footnoteExtension = FootnoteExtension()
1573    footnoteExtension.extendMarkdown(md)
1574    md.source = text
1575
1576    return str(md)
1577
1578def test_markdown(args):
1579    """test markdown at the command line.
1580        in each test, arg 0 is the module name"""
1581    print "\nTEST 1: no arguments on command line"
1582    cmd_line(["markdown.py"])
1583    print "\nTEST 2a: 1 argument on command line: a good option"
1584    cmd_line(["markdown.py","-footnotes"])
1585    print "\nTEST 2b: 1 argument on command line: a bad option"
1586    cmd_line(["markdown.py","-foodnotes"])
1587    print "\nTEST 3: 1 argument on command line: non-existent input file"
1588    cmd_line(["markdown.py","junk.txt"])
1589    print "\nTEST 4: 1 argument on command line: existing input file"
1590    lines = """
1591Markdown text with[^1]:
1592
15932. **bold text**,
15943. *italic text*.
1595
1596Then more:
1597
1598    beginning of code block;
1599    another line of code block.
1600   
1601    a second paragraph of code block.
1602
1603more text to end our file.
1604
1605[^1]: "italic" means emphasis.
1606"""
1607    fid = "markdown-test.txt"
1608    f1 = open(fid, 'w+')
1609    f1.write(lines)
1610    f1.close()
1611    cmd_line(["markdown.py",fid])
1612    print "\nTEST 5: 2 arguments on command line: nofootnotes and input file"
1613    cmd_line(["markdown.py","-nofootnotes", fid])
1614    print "\nTEST 6: 2 arguments on command line: footnotes and input file"
1615    cmd_line(["markdown.py","-footnotes", fid])
1616    print "\nTEST 7: 3 arguments on command line: nofootnotes,inputfile, outputfile"
1617    fidout = "markdown-test.html"
1618    cmd_line(["markdown.py","-nofootnotes", fid, fidout])
1619
1620
1621def get_vars(args):
1622    """process the command-line args received; return usable variables"""
1623    #firstly get the variables
1624
1625    message(VERBOSE, "in get_vars(), args: %s" % args)
1626
1627    if len(args) <= 1:
1628        option, inFile, outFile = (None, None, None)
1629    elif len(args) >= 4:
1630        option, inFile, outFile = args[1:4]
1631    elif len(args) == 3:
1632        temp1, temp2 = args[1:3]
1633        if temp1[0] == '-':
1634            #then we have an option and inFile
1635            option, inFile, outFile = temp1, temp2, None
1636        else:
1637            #we have no option, so we must have inFile and outFile
1638            option, inFile, outFile = None, temp1, temp2
1639    else:
1640        #len(args) = 2
1641        #we have only one usable arg: might be an option or a file
1642        temp1 = args[1]
1643       
1644        message(VERBOSE, "our single arg is: %s" % str(temp1))
1645
1646        if temp1[0] == '-':
1647            #then we have an option
1648            option, inFile, outFile = temp1, None, None
1649        else:
1650            #we have no option, so we must have inFile
1651            option, inFile, outFile = None, temp1, None
1652   
1653    message(VERBOSE,
1654            "prior to validation, option: %s, inFile: %s, outFile: %s" %
1655            (str(option), str(inFile), str(outFile),))
1656   
1657    return option, inFile, outFile
1658
1659
1660USAGE = """
1661\nUsing markdown.py:
1662
1663    python markdown.py [option] input_file_with_markdown.txt [output_file.html]
1664
1665Options:
1666
1667    -footnotes or -fn   : generate markdown with footnotes
1668    -test or -t         : run a self-test
1669    -help or -h         : print this message
1670
1671"""
1672   
1673VALID_OPTIONS = ['footnotes','nofootnotes', 'fn', 'test', 't', 'f',
1674                 'help', 'h']
1675
1676EXPANDED_OPTIONS =  { "fn" : "footnotes",
1677                      "t"  : "test",
1678                      "h"  : "help" }
1679
1680
1681def validate_option(option) :
1682
1683    """ Check if the option makes sense and print an appropriate message
1684        if it isn't.
1685       
1686        @return: valid option string or None
1687    """
1688
1689    #now validate the variables
1690    if (option is not None):
1691        if (len(option) > 1 and option[1:] in VALID_OPTIONS) :
1692            option = option[1:]
1693
1694            if option in EXPANDED_OPTIONS.keys() :
1695                option = EXPANDED_OPTIONS[option]
1696            return option
1697        else:
1698            message(CRITICAL,
1699                    "\nSorry, I don't understand option %s" % option)
1700            message(CRITICAL, USAGE)
1701            return None
1702
1703
1704def validate_input_file(inFile) :       
1705    """ Check if the input file is specified and exists.
1706
1707        @return: valid input file path or None
1708    """
1709
1710    if not inFile :
1711        message(CRITICAL,
1712                "\nI need an input filename.\n")
1713        message(CRITICAL, USAGE)
1714        return None
1715   
1716       
1717    if os.access(inFile, os.R_OK):
1718        return inFile
1719    else :
1720        message(CRITICAL, "Sorry, I can't find input file %s" % str(inFile))
1721        return None
1722
1723   
1724           
1725
1726def cmd_line(args):
1727
1728    message(VERBOSE, "in cmd_line with args: %s" % args)
1729
1730    option, inFile, outFile = get_vars(args)
1731
1732    if option :
1733        option = validate_option(option)
1734        if not option : return
1735
1736    if option == "help" :
1737        message(CRITICAL, USAGE)
1738        return
1739    elif option == "test" :
1740        test_markdown(None)
1741        return
1742
1743    inFile = validate_input_file(inFile)
1744    if not inFile :
1745        return
1746    else :
1747        input = file(inFile).read()
1748
1749    message(VERBOSE, "Validated command line parameters:" +             
1750             "\n\toption: %s, \n\tinFile: %s, \n\toutFile: %s" % (
1751             str(option), str(inFile), str(outFile),))
1752
1753    if option == "footnotes" :
1754        md_function = markdownWithFootnotes
1755    else :
1756        md_function = markdown
1757
1758    if outFile is None:
1759        print md_function(input)
1760    else:
1761        output = md_function(input)
1762        f1 = open(outFile, "w+")
1763        f1.write(output)
1764        f1.close()
1765       
1766        if os.access(outFile, os.F_OK):
1767            message(INFO, "Successfully wrote %s" % outFile)
1768        else:
1769            message(INFO, "Failed to write %s" % outFile)
1770
1771
1772if __name__ == '__main__':
1773    """ Run Markdown from the command line.
1774        Set debug = 3 at top of file to get diagnostic output"""
1775    args = sys.argv
1776       
1777    #set testing=1 to test the command-line response of markdown.py
1778    testing = 0
1779    if testing:
1780        test_markdown(args)
1781    else:
1782        import time
1783        t0 = time.time()
1784        #for x in range(10) :
1785        cmd_line(args)
1786        #import profile
1787        #profile.run('cmd_line(args)', 'profile')
1788        t1 = time.time()
1789        #print "Time: %f - %f = %f" % (t1, t0, t1-t0)
1790
1791"""
1792CHANGELOG
1793=========
1794
1795May 15, 2006: A bug with lists, recursion on block-level elements,
1796run-in headers, spaces before headers, unicode input (thanks to Aaron
1797Swartz). Sourceforge tracker #s: 1489313, 1489312, 1489311, 1488370,
17981485178, 1485176. (v. 1.5)
1799
1800Mar. 24, 2006: Switched to a not-so-recursive algorithm with
1801_handleInline.  (Version 1.4)
1802
1803Mar. 15, 2006: Replaced some instance variables with class variables
1804(a patch from Stelios Xanthakis).  Chris Clark's new regexps that do
1805not trigger midword underlining.
1806
1807Feb. 28, 2006: Clean-up and command-line handling by Stewart
1808Midwinter. (Version 1.3)
1809
1810Feb. 24, 2006: Fixed a bug with the last line of the list appearing
1811again as a separate paragraph.  Incorporated Chris Clark's "mailto"
1812patch.  Added support for <br /> at the end of lines ending in two or
1813more spaces.  Fixed a crashing bug when using ImageReferencePattern.
1814Added several utility methods to Nanodom.  (Version 1.2)
1815
1816Jan. 31, 2006: Added "hr" and "hr/" to BLOCK_LEVEL_ELEMENTS and
1817changed <hr/> to <hr />.  (Thanks to Sergej Chodarev.)
1818
1819Nov. 26, 2005: Fixed a bug with certain tabbed lines inside lists
1820getting wrapped in <pre><code>.  (v. 1.1)
1821
1822Nov. 19, 2005: Made "<!...", "<?...", etc. behave like block-level
1823HTML tags.
1824
1825Nov. 14, 2005: Added entity code and email autolink fix by Tiago
1826Cogumbreiro.  Fixed some small issues with backticks to get 100%
1827compliance with John's test suite.  (v. 1.0)
1828
1829Nov. 7, 2005: Added an unlink method for documents to aid with memory
1830collection (per Doug Sauder's suggestion).
1831
1832Oct. 29, 2005: Restricted a set of html tags that get treated as
1833block-level elements.
1834
1835Sept. 18, 2005: Refactored the whole script to make it easier to
1836customize it and made footnote functionality into an extension.
1837(v. 0.9)
1838
1839Sept. 5, 2005: Fixed a bug with multi-paragraph footnotes.  Added
1840attribute support.
1841
1842Sept. 1, 2005: Changed the way headers are handled to allow inline
1843syntax in headers (e.g. links) and got the lists to use p-tags
1844correctly (v. 0.8)
1845
1846Aug. 29, 2005: Added flexible tabs, fixed a few small issues, added
1847basic support for footnotes.  Got rid of xml.dom.minidom and added
1848pretty-printing. (v. 0.7)
1849
1850Aug. 13, 2005: Fixed a number of small bugs in order to conform to the
1851test suite.  (v. 0.6)
1852
1853Aug. 11, 2005: Added support for inline html and entities, inline
1854images, autolinks, underscore emphasis. Cleaned up and refactored the
1855code, added some more comments.
1856
1857Feb. 19, 2005: Rewrote the handling of high-level elements to allow
1858multi-line list items and all sorts of nesting.
1859
1860Feb. 3, 2005: Reference-style links, single-line lists, backticks,
1861escape, emphasis in the beginning of the paragraph.
1862
1863Nov. 2004: Added links, blockquotes, html blocks to Manfred
1864Stienstra's code
1865
1866Apr. 2004: Manfred's version at http://www.dwerg.net/projects/markdown/
1867
1868"""
1869
1870
1871
1872
1873
1874
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。