tags.
+ """
+ yield 0, ""
+ for tup in inner:
+ yield tup
+ yield 0, "
"
+
+ def wrap(self, source, outfile):
+ """Return the source with a code, pre, and div."""
+ return self._wrap_div(self._wrap_pre(self._wrap_code(source)))
+
+ formatter = HtmlCodeFormatter(cssclass="codehilite", **formatter_opts)
+ return pygments.highlight(codeblock, lexer, formatter)
+
+ def _code_block_sub(self, match):
+ codeblock = match.group(1)
+ codeblock = self._outdent(codeblock)
+ codeblock = self._detab(codeblock)
+ codeblock = codeblock.lstrip('\n') # trim leading newlines
+ codeblock = codeblock.rstrip() # trim trailing whitespace
+
+ if "code-color" in self.extras and codeblock.startswith(":::"):
+ lexer_name, rest = codeblock.split('\n', 1)
+ lexer_name = lexer_name[3:].strip()
+ lexer = self._get_pygments_lexer(lexer_name)
+ codeblock = rest.lstrip("\n") # Remove lexer declaration line.
+ if lexer:
+ formatter_opts = self.extras['code-color'] or {}
+ colored = self._color_with_pygments(codeblock, lexer,
+ **formatter_opts)
+ return "\n\n%s\n\n" % colored
+
+ codeblock = self._encode_code(codeblock)
+ pre_class_str = self._html_class_str_from_tag("pre")
+ code_class_str = self._html_class_str_from_tag("code")
+ return "\n\n%s\n
\n\n" % (
+ pre_class_str, code_class_str, codeblock)
+
+ def _html_class_str_from_tag(self, tag):
+ """Get the appropriate ' class="..."' string (note the leading
+ space), if any, for the given tag.
+ """
+ if "html-classes" not in self.extras:
+ return ""
+ try:
+ html_classes_from_tag = self.extras["html-classes"]
+ except TypeError:
+ return ""
+ else:
+ if tag in html_classes_from_tag:
+ return ' class="%s"' % html_classes_from_tag[tag]
+ return ""
+
+ def _do_code_blocks(self, text):
+ """Process Markdown `` blocks."""
+ code_block_re = re.compile(r'''
+ (?:\n\n|\A)
+ ( # $1 = the code block -- one or more lines, starting with a space/tab
+ (?:
+ (?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces
+ .*\n+
+ )+
+ )
+ ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
+ ''' % (self.tab_width, self.tab_width),
+ re.M | re.X)
+
+ return code_block_re.sub(self._code_block_sub, text)
+
+
+ # Rules for a code span:
+ # - backslash escapes are not interpreted in a code span
+ # - to include one or or a run of more backticks the delimiters must
+ # be a longer run of backticks
+ # - cannot start or end a code span with a backtick; pad with a
+ # space and that space will be removed in the emitted HTML
+ # See `test/tm-cases/escapes.text` for a number of edge-case
+ # examples.
+ _code_span_re = re.compile(r'''
+ (?%s
" % c
+
+ def _do_code_spans(self, text):
+ # * Backtick quotes are used for
spans.
+ #
+ # * You can use multiple backticks as the delimiters if you want to
+ # include literal backticks in the code span. So, this input:
+ #
+ # Just type ``foo `bar` baz`` at the prompt.
+ #
+ # Will translate to:
+ #
+ # Just type foo `bar` baz
at the prompt.
+ #
+ # There's no arbitrary limit to the number of backticks you
+ # can use as delimters. If you need three consecutive backticks
+ # in your code, use four for delimiters, etc.
+ #
+ # * You can use spaces to get literal backticks at the edges:
+ #
+ # ... type `` `bar` `` ...
+ #
+ # Turns to:
+ #
+ # ... type `bar`
...
+ return self._code_span_re.sub(self._code_span_sub, text)
+
+ def _encode_code(self, text):
+ """Encode/escape certain characters inside Markdown code runs.
+ The point is that in code, these characters are literals,
+ and lose their special Markdown meanings.
+ """
+ replacements = [
+ # Encode all ampersands; HTML entities are not
+ # entities within a Markdown code span.
+ ('&', '&'),
+ # Do the angle bracket song and dance:
+ ('<', '<'),
+ ('>', '>'),
+ # Now, escape characters that are magic in Markdown:
+ ('*', g_escape_table['*']),
+ ('_', g_escape_table['_']),
+ ('{', g_escape_table['{']),
+ ('}', g_escape_table['}']),
+ ('[', g_escape_table['[']),
+ (']', g_escape_table[']']),
+ ('\\', g_escape_table['\\']),
+ ]
+ for before, after in replacements:
+ text = text.replace(before, after)
+ return text
+
+ _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S)
+ _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S)
+ #_spoiler_re = re.compile(r"###(?=\S)(.+?[*_]*)(?<=\S)###", re.S)
+
+ _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S)
+ _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S)
+ def _do_italics_and_bold(self, text):
+ # must go first:
+ if "code-friendly" in self.extras:
+ text = self._code_friendly_strong_re.sub(r"\1", text)
+ text = self._code_friendly_em_re.sub(r"\1", text)
+ else:
+ text = self._strong_re.sub(r"\2", text)
+ text = self._em_re.sub(r"\2", text)
+
+ #text = self._spoiler_re.sub("\\1", text)
+ return text
+
+
+ _block_quote_re = re.compile(r'''
+ ( # Wrap whole match in \1
+ (
+ ^[ \t]*>[^>] # '>' at the start of a line
+ .+\n # rest of the first line
+ \n* # blanks
+ )+
+ )
+ ''', re.M | re.X)
+ _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M);
+
+ _html_pre_block_re = re.compile(r'(\s*.+?
)', re.S)
+ def _dedent_two_spaces_sub(self, match):
+ return re.sub(r'(?m)^ ', '', match.group(1))
+
+ def _block_quote_sub(self, match):
+ bq = match.group(1)
+ #bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting
+ bq = self._ws_only_line_re.sub('', bq) # trim whitespace-only lines
+ bq = bq.strip('\n')
+ bq = self._run_span_gamut(bq)
+ #bq = self._run_block_gamut(bq) # recurse
+
+ bq = re.sub('(?m)^', ' ', bq)
+ # These leading spaces screw with content, so we need to fix that:
+ bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq)
+
+ return "\n%s\n
\n\n" % bq
+
+ def _do_block_quotes(self, text):
+ if '>' not in text:
+ return text
+ return self._block_quote_re.sub(self._block_quote_sub, text)
+
+ def _form_paragraphs(self, text):
+ # Strip leading and trailing lines:
+ text = text.strip('\n')
+
+ # Wrap tags.
+ grafs = []
+ for i, graf in enumerate(re.split(r"\n{2,}", text)):
+ if graf in self.html_blocks:
+ # Unhashify HTML blocks
+ grafs.append(self.html_blocks[graf])
+ else:
+ cuddled_list = None
+ if "cuddled-lists" in self.extras:
+ # Need to put back trailing '\n' for `_list_item_re`
+ # match at the end of the paragraph.
+ li = self._list_item_re.search(graf + '\n')
+ # Two of the same list marker in this paragraph: a likely
+ # candidate for a list cuddled to preceding paragraph
+ # text (issue 33). Note the `[-1]` is a quick way to
+ # consider numeric bullets (e.g. "1." and "2.") to be
+ # equal.
+ if (li and len(li.group(2)) <= 3 and li.group("next_marker")
+ and li.group("marker")[-1] == li.group("next_marker")[-1]):
+ start = li.start()
+ cuddled_list = self._do_lists(graf[start:]).rstrip("\n")
+ assert cuddled_list.startswith("
" % indent())
+ return '\n'.join(lines) + '\n'
+
+
+_slugify_strip_re = re.compile(r'[^\w\s-]')
+_slugify_hyphenate_re = re.compile(r'[-\s]+')
+def _slugify(value):
+ """
+ Normalizes string, converts to lowercase, removes non-alpha characters,
+ and converts spaces to hyphens.
+
+ From Django's "django/template/defaultfilters.py".
+ """
+ import unicodedata
+ value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
+ value = unicode(_slugify_strip_re.sub('', value).strip().lower())
+ return _slugify_hyphenate_re.sub('-', value)
+
+# From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549
+def _curry(*args, **kwargs):
+ function, args = args[0], args[1:]
+ def result(*rest, **kwrest):
+ combined = kwargs.copy()
+ combined.update(kwrest)
+ return function(*args + rest, **combined)
+ return result
+
+# Recipe: regex_from_encoded_pattern (1.0)
+def _regex_from_encoded_pattern(s):
+ """'foo' -> re.compile(re.escape('foo'))
+ '/foo/' -> re.compile('foo')
+ '/foo/i' -> re.compile('foo', re.I)
+ """
+ if s.startswith('/') and s.rfind('/') != 0:
+ # Parse it: /PATTERN/FLAGS
+ idx = s.rfind('/')
+ pattern, flags_str = s[1:idx], s[idx+1:]
+ flag_from_char = {
+ "i": re.IGNORECASE,
+ "l": re.LOCALE,
+ "s": re.DOTALL,
+ "m": re.MULTILINE,
+ "u": re.UNICODE,
+ }
+ flags = 0
+ for char in flags_str:
+ try:
+ flags |= flag_from_char[char]
+ except KeyError:
+ raise ValueError("unsupported regex flag: '%s' in '%s' "
+ "(must be one of '%s')"
+ % (char, s, ''.join(flag_from_char.keys())))
+ return re.compile(s[1:idx], flags)
+ else: # not an encoded regex
+ return re.compile(re.escape(s))
+
+# Recipe: dedent (0.1.2)
+def _dedentlines(lines, tabsize=8, skip_first_line=False):
+ """_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines
+
+ "lines" is a list of lines to dedent.
+ "tabsize" is the tab width to use for indent width calculations.
+ "skip_first_line" is a boolean indicating if the first line should
+ be skipped for calculating the indent width and for dedenting.
+ This is sometimes useful for docstrings and similar.
+
+ Same as dedent() except operates on a sequence of lines. Note: the
+ lines list is modified **in-place**.
+ """
+ DEBUG = False
+ if DEBUG:
+ print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\
+ % (tabsize, skip_first_line)
+ indents = []
+ margin = None
+ for i, line in enumerate(lines):
+ if i == 0 and skip_first_line: continue
+ indent = 0
+ for ch in line:
+ if ch == ' ':
+ indent += 1
+ elif ch == '\t':
+ indent += tabsize - (indent % tabsize)
+ elif ch in '\r\n':
+ continue # skip all-whitespace lines
+ else:
+ break
+ else:
+ continue # skip all-whitespace lines
+ if DEBUG: print "dedent: indent=%d: %r" % (indent, line)
+ if margin is None:
+ margin = indent
+ else:
+ margin = min(margin, indent)
+ if DEBUG: print "dedent: margin=%r" % margin
+
+ if margin is not None and margin > 0:
+ for i, line in enumerate(lines):
+ if i == 0 and skip_first_line: continue
+ removed = 0
+ for j, ch in enumerate(line):
+ if ch == ' ':
+ removed += 1
+ elif ch == '\t':
+ removed += tabsize - (removed % tabsize)
+ elif ch in '\r\n':
+ if DEBUG: print "dedent: %r: EOL -> strip up to EOL" % line
+ lines[i] = lines[i][j:]
+ break
+ else:
+ raise ValueError("unexpected non-whitespace char %r in "
+ "line %r while removing %d-space margin"
+ % (ch, line, margin))
+ if DEBUG:
+ print "dedent: %r: %r -> removed %d/%d"\
+ % (line, ch, removed, margin)
+ if removed == margin:
+ lines[i] = lines[i][j+1:]
+ break
+ elif removed > margin:
+ lines[i] = ' '*(removed-margin) + lines[i][j+1:]
+ break
+ else:
+ if removed:
+ lines[i] = lines[i][removed:]
+ return lines
+
+def _dedent(text, tabsize=8, skip_first_line=False):
+ """_dedent(text, tabsize=8, skip_first_line=False) -> dedented text
+
+ "text" is the text to dedent.
+ "tabsize" is the tab width to use for indent width calculations.
+ "skip_first_line" is a boolean indicating if the first line should
+ be skipped for calculating the indent width and for dedenting.
+ This is sometimes useful for docstrings and similar.
+
+ textwrap.dedent(s), but don't expand tabs to spaces
+ """
+ lines = text.splitlines(1)
+ _dedentlines(lines, tabsize=tabsize, skip_first_line=skip_first_line)
+ return ''.join(lines)
+
+
+class _memoized(object):
+ """Decorator that caches a function's return value each time it is called.
+ If called later with the same arguments, the cached value is returned, and
+ not re-evaluated.
+
+ http://wiki.python.org/moin/PythonDecoratorLibrary
+ """
+ def __init__(self, func):
+ self.func = func
+ self.cache = {}
+ def __call__(self, *args):
+ try:
+ return self.cache[args]
+ except KeyError:
+ self.cache[args] = value = self.func(*args)
+ return value
+ except TypeError:
+ # uncachable -- for instance, passing a list as an argument.
+ # Better to not cache than to blow up entirely.
+ return self.func(*args)
+ def __repr__(self):
+ """Return the function's docstring."""
+ return self.func.__doc__
+
+
+def _xml_oneliner_re_from_tab_width(tab_width):
+ """Standalone XML processing instruction regex."""
+ return re.compile(r"""
+ (?:
+ (?<=\n\n) # Starting after a blank line
+ | # or
+ \A\n? # the beginning of the doc
+ )
+ ( # save in $1
+ [ ]{0,%d}
+ (?:
+ <\?\w+\b\s+.*?\?> # XML processing instruction
+ |
+ <\w+:\w+\b\s+.*?/> # namespaced single tag
+ )
+ [ \t]*
+ (?=\n{2,}|\Z) # followed by a blank line or end of document
+ )
+ """ % (tab_width - 1), re.X)
+_xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width)
+
+def _hr_tag_re_from_tab_width(tab_width):
+ return re.compile(r"""
+ (?:
+ (?<=\n\n) # Starting after a blank line
+ | # or
+ \A\n? # the beginning of the doc
+ )
+ ( # save in \1
+ [ ]{0,%d}
+ <(hr) # start tag = \2
+ \b # word break
+ ([^<>])*? #
+ /?> # the matching end tag
+ [ \t]*
+ (?=\n{2,}|\Z) # followed by a blank line or end of document
+ )
+ """ % (tab_width - 1), re.X)
+_hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width)
+
+
+def _xml_encode_email_char_at_random(ch):
+ r = random()
+ # Roughly 10% raw, 45% hex, 45% dec.
+ # '@' *must* be encoded. I [John Gruber] insist.
+ # Issue 26: '_' must be encoded.
+ if r > 0.9 and ch not in "@_":
+ return ch
+ elif r < 0.45:
+ # The [1:] is to drop leading '0': 0x63 -> x63
+ return '%s;' % hex(ord(ch))[1:]
+ else:
+ return '%s;' % ord(ch)
+
+
+
+#---- mainline
+
+class _NoReflowFormatter(optparse.IndentedHelpFormatter):
+ """An optparse formatter that does NOT reflow the description."""
+ def format_description(self, description):
+ return description or ""
+
+def _test():
+ import doctest
+ doctest.testmod()
+
+def main(argv=None):
+ if argv is None:
+ argv = sys.argv
+ if not logging.root.handlers:
+ logging.basicConfig()
+
+ usage = "usage: %prog [PATHS...]"
+ version = "%prog "+__version__
+ parser = optparse.OptionParser(prog="markdown2", usage=usage,
+ version=version, description=cmdln_desc,
+ formatter=_NoReflowFormatter())
+ parser.add_option("-v", "--verbose", dest="log_level",
+ action="store_const", const=logging.DEBUG,
+ help="more verbose output")
+ parser.add_option("--encoding",
+ help="specify encoding of text content")
+ parser.add_option("--html4tags", action="store_true", default=False,
+ help="use HTML 4 style for empty element tags")
+ parser.add_option("-s", "--safe", metavar="MODE", dest="safe_mode",
+ help="sanitize literal HTML: 'escape' escapes "
+ "HTML meta chars, 'replace' replaces with an "
+ "[HTML_REMOVED] note")
+ parser.add_option("-x", "--extras", action="append",
+ help="Turn on specific extra features (not part of "
+ "the core Markdown spec). See above.")
+ parser.add_option("--use-file-vars",
+ help="Look for and use Emacs-style 'markdown-extras' "
+ "file var to turn on extras. See "
+ ".")
+ parser.add_option("--link-patterns-file",
+ help="path to a link pattern file")
+ parser.add_option("--self-test", action="store_true",
+ help="run internal self-tests (some doctests)")
+ parser.add_option("--compare", action="store_true",
+ help="run against Markdown.pl as well (for testing)")
+ parser.set_defaults(log_level=logging.INFO, compare=False,
+ encoding="utf-8", safe_mode=None, use_file_vars=False)
+ opts, paths = parser.parse_args()
+ log.setLevel(opts.log_level)
+
+ if opts.self_test:
+ return _test()
+
+ if opts.extras:
+ extras = {}
+ for s in opts.extras:
+ splitter = re.compile("[,;: ]+")
+ for e in splitter.split(s):
+ if '=' in e:
+ ename, earg = e.split('=', 1)
+ try:
+ earg = int(earg)
+ except ValueError:
+ pass
+ else:
+ ename, earg = e, None
+ extras[ename] = earg
+ else:
+ extras = None
+
+ if opts.link_patterns_file:
+ link_patterns = []
+ f = open(opts.link_patterns_file)
+ try:
+ for i, line in enumerate(f.readlines()):
+ if not line.strip(): continue
+ if line.lstrip().startswith("#"): continue
+ try:
+ pat, href = line.rstrip().rsplit(None, 1)
+ except ValueError:
+ raise MarkdownError("%s:%d: invalid link pattern line: %r"
+ % (opts.link_patterns_file, i+1, line))
+ link_patterns.append(
+ (_regex_from_encoded_pattern(pat), href))
+ finally:
+ f.close()
+ else:
+ link_patterns = None
+
+ from os.path import join, dirname, abspath, exists
+ markdown_pl = join(dirname(dirname(abspath(__file__))), "test",
+ "Markdown.pl")
+ for path in paths:
+ if opts.compare:
+ print "==== Markdown.pl ===="
+ perl_cmd = 'perl %s "%s"' % (markdown_pl, path)
+ o = os.popen(perl_cmd)
+ perl_html = o.read()
+ o.close()
+ sys.stdout.write(perl_html)
+ print "==== markdown2.py ===="
+ html = markdown_path(path, encoding=opts.encoding,
+ html4tags=opts.html4tags,
+ safe_mode=opts.safe_mode,
+ extras=extras, link_patterns=link_patterns,
+ use_file_vars=opts.use_file_vars)
+ sys.stdout.write(
+ html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))
+ if extras and "toc" in extras:
+ log.debug("toc_html: " +
+ html.toc_html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))
+ if opts.compare:
+ test_dir = join(dirname(dirname(abspath(__file__))), "test")
+ if exists(join(test_dir, "test_markdown2.py")):
+ sys.path.insert(0, test_dir)
+ from test_markdown2 import norm_html_from_html
+ norm_html = norm_html_from_html(html)
+ norm_perl_html = norm_html_from_html(perl_html)
+ else:
+ norm_html = html
+ norm_perl_html = perl_html
+ print "==== match? %r ====" % (norm_perl_html == norm_html)
+
+
+if __name__ == "__main__":
+ sys.exit( main(sys.argv) )
+
--
cgit v1.2.1-18-gbd029