aboutsummaryrefslogtreecommitdiff
path: root/cgi/markdown.py
diff options
context:
space:
mode:
authorLibravatar Renard 2020-03-29 18:43:36 -0300
committerLibravatar Renard 2020-03-29 18:43:36 -0300
commit56c690b9efdb009ab44f3112b6c301d7d393f07e (patch)
treeb2a28666888df9b60b46b6d1c59dd3818437b405 /cgi/markdown.py
parent775ef3e6291c5ad6bff68a12f6ca81c8663da3dc (diff)
downloadweabot-56c690b9efdb009ab44f3112b6c301d7d393f07e.tar.gz
weabot-56c690b9efdb009ab44f3112b6c301d7d393f07e.tar.xz
weabot-56c690b9efdb009ab44f3112b6c301d7d393f07e.zip
Formateo de python con pep8
Diffstat (limited to 'cgi/markdown.py')
-rw-r--r--cgi/markdown.py337
1 files changed, 193 insertions, 144 deletions
diff --git a/cgi/markdown.py b/cgi/markdown.py
index 3ebfaab..846c192 100644
--- a/cgi/markdown.py
+++ b/cgi/markdown.py
@@ -33,6 +33,15 @@ number of extras (e.g., code syntax coloring, footnotes) as described on
<http://code.google.com/p/python-markdown2/wiki/Extras>.
"""
+from urllib import quote
+import codecs
+from random import random, randint
+import optparse
+import logging
+import re
+from pprint import pprint
+import sys
+import os
cmdln_desc = """A fast and complete Python implementation of Markdown, a
text-to-HTML conversion tool for web writers.
@@ -59,33 +68,25 @@ Supported extras (see -x|--extras option below):
# not yet sure if there implications with this. Compare 'pydoc sre'
# and 'perldoc perlre'.
-__version_info__ = (1, 0, 1, 17) # first three nums match Markdown.pl
+__version_info__ = (1, 0, 1, 17) # first three nums match Markdown.pl
__version__ = '1.0.1.17'
__author__ = "Trent Mick"
-import os
-import sys
-from pprint import pprint
-import re
-import logging
try:
from hashlib import md5
except ImportError:
from md5 import md5
-import optparse
-from random import random, randint
-import codecs
-from urllib import quote
-
-#---- Python version compat
+# ---- Python version compat
-if sys.version_info[:2] < (2,4):
+if sys.version_info[:2] < (2, 4):
from sets import Set as set
+
def reversed(sequence):
for i in sequence[::-1]:
yield i
+
def _unicode_decode(s, encoding, errors='xmlcharrefreplace'):
return unicode(s, encoding, errors)
else:
@@ -107,26 +108,29 @@ except ImportError:
SECRET_SALT = str(randint(0, 1000000))
else:
SECRET_SALT = str(uuid.uuid4())
+
+
def _hash_ascii(s):
- #return md5(s).hexdigest() # Markdown.pl effectively does this.
+ # return md5(s).hexdigest() # Markdown.pl effectively does this.
return 'md5-' + md5(SECRET_SALT + s).hexdigest()
+
+
def _hash_text(s):
return 'md5-' + md5(SECRET_SALT + s.encode("utf-8")).hexdigest()
+
# Table of hash values for escaped characters:
g_escape_table = dict([(ch, _hash_ascii(ch))
for ch in '\\`*_{}[]()>#+-.!'])
-
#---- exceptions
class MarkdownError(Exception):
pass
-
-#---- public api
+# ---- public api
def markdown_path(path, encoding="utf-8",
html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
@@ -140,6 +144,7 @@ def markdown_path(path, encoding="utf-8",
link_patterns=link_patterns,
use_file_vars=use_file_vars).convert(text)
+
def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
safe_mode=None, extras=None, link_patterns=None,
use_file_vars=False):
@@ -148,6 +153,7 @@ def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
link_patterns=link_patterns,
use_file_vars=use_file_vars).convert(text)
+
class Markdown(object):
# The dict of "extras" to enable in processing -- a mapping of
# extra name to argument for the extra. Most extras do not have an
@@ -178,7 +184,7 @@ class Markdown(object):
self.tab_width = tab_width
# For compatibility with earlier markdown2.py and with
- # markdown.py's safe_mode being a boolean,
+ # markdown.py's safe_mode being a boolean,
# safe_mode == True -> "replace"
if safe_mode is True:
self.safe_mode = "replace"
@@ -212,7 +218,7 @@ class Markdown(object):
self.footnotes = {}
self.footnote_ids = []
if "header-ids" in self.extras:
- self._count_from_header_id = {} # no `defaultdict` in Python 2.4
+ self._count_from_header_id = {} # no `defaultdict` in Python 2.4
def convert(self, text):
"""Convert the given text."""
@@ -228,7 +234,7 @@ class Markdown(object):
self.reset()
if not isinstance(text, unicode):
- #TODO: perhaps shouldn't presume UTF-8 for string input?
+ # TODO: perhaps shouldn't presume UTF-8 for string input?
text = unicode(text, 'utf-8')
if self.use_file_vars:
@@ -287,13 +293,14 @@ class Markdown(object):
text = self._unhash_html_spans(text)
#text += "\n"
-
+
rv = UnicodeWithAttrs(text)
if "toc" in self.extras:
rv._toc = self._toc
return rv
- _emacs_oneliner_vars_pat = re.compile(r"-\*-\s*([^\r\n]*?)\s*-\*-", re.UNICODE)
+ _emacs_oneliner_vars_pat = re.compile(
+ r"-\*-\s*([^\r\n]*?)\s*-\*-", re.UNICODE)
# This regular expression is intended to match blocks like this:
# PREFIX Local Variables: SUFFIX
# PREFIX mode: Tcl SUFFIX
@@ -317,7 +324,7 @@ class Markdown(object):
http://www.gnu.org/software/emacs/manual/html_node/emacs/Specifying-File-Variables.html#Specifying-File-Variables
"""
emacs_vars = {}
- SIZE = pow(2, 13) # 8kB
+ SIZE = pow(2, 13) # 8kB
# Search near the start for a '-*-'-style one-liner of variables.
head = text[:SIZE]
@@ -374,9 +381,12 @@ class Markdown(object):
# Parse out one emacs var per line.
continued_for = None
- for line in lines[:-1]: # no var on the last line ("PREFIX End:")
- if prefix: line = line[len(prefix):] # strip prefix
- if suffix: line = line[:-len(suffix)] # strip suffix
+ # no var on the last line ("PREFIX End:")
+ for line in lines[:-1]:
+ if prefix:
+ line = line[len(prefix):] # strip prefix
+ if suffix:
+ line = line[:-len(suffix)] # strip suffix
line = line.strip()
if continued_for:
variable = continued_for
@@ -405,7 +415,7 @@ class Markdown(object):
# Unquote values.
for var, val in emacs_vars.items():
if len(val) > 1 and (val.startswith('"') and val.endswith('"')
- or val.startswith('"') and val.endswith('"')):
+ or val.startswith('"') and val.endswith('"')):
emacs_vars[var] = val[1:-1]
return emacs_vars
@@ -413,9 +423,11 @@ class Markdown(object):
# Cribbed from a post by Bart Lateur:
# <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
_detab_re = re.compile(r'(.*?)\t', re.M)
+
def _detab_sub(self, match):
g1 = match.group(1)
return g1 + (' ' * (self.tab_width - len(g1) % self.tab_width))
+
def _detab(self, text):
r"""Remove (leading?) tabs from a file.
@@ -447,7 +459,7 @@ class Markdown(object):
(?=\n+|\Z) # followed by a newline or end of document
)
""" % _block_tags_a,
- re.X | re.M)
+ re.X | re.M)
_block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math'
_liberal_tag_block_re = re.compile(r"""
@@ -461,7 +473,7 @@ class Markdown(object):
(?=\n+|\Z) # followed by a newline or end of document
)
""" % _block_tags_b,
- re.X | re.M)
+ re.X | re.M)
def _hash_html_block_sub(self, match, raw=False):
html = match.group(1)
@@ -506,7 +518,7 @@ class Markdown(object):
text = self._liberal_tag_block_re.sub(hash_html_block_sub, text)
# Special case just for <hr />. It was easier to make a special
- # case than to make the other regex more complicated.
+ # case than to make the other regex more complicated.
if "<hr" in text:
_hr_tag_re = _hr_tag_re_from_tab_width(self.tab_width)
text = _hr_tag_re.sub(hash_html_block_sub, text)
@@ -564,12 +576,13 @@ class Markdown(object):
html = self._sanitize_html(html)
key = _hash_text(html)
self.html_blocks[key] = html
- text = text[:start_idx] + "\n\n" + key + "\n\n" + text[end_idx:]
+ text = text[:start_idx] + "\n\n" + \
+ key + "\n\n" + text[end_idx:]
if "xml" in self.extras:
# Treat XML processing instructions and namespaced one-liner
# tags as if they were block HTML tags. E.g., if standalone
- # (i.e. are their own paragraph), the following do not get
+ # (i.e. are their own paragraph), the following do not get
# wrapped in a <p> tag:
# <?foo bar?>
#
@@ -583,7 +596,7 @@ class Markdown(object):
# Strips link definitions from text, stores the URLs and titles in
# hash references.
less_than_tab = self.tab_width - 1
-
+
# Link defs are in the form:
# [id]: url "optional title"
_link_def_re = re.compile(r"""
@@ -634,7 +647,7 @@ class Markdown(object):
- The 'note-id' can be pretty much anything, though typically it
is the number of the footnote.
- The first paragraph may start on the next line, like so:
-
+
[^note-id]:
Text of the note.
"""
@@ -656,7 +669,6 @@ class Markdown(object):
re.X | re.M)
return footnote_def_re.sub(self._extract_footnote_def_sub, text)
-
_hr_res = [
re.compile(r"^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$", re.M),
re.compile(r"^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$", re.M),
@@ -671,7 +683,7 @@ class Markdown(object):
# Do Horizontal Rules:
#hr = "\n<hr"+self.empty_element_suffix+"\n"
- #for hr_re in self._hr_res:
+ # for hr_re in self._hr_res:
# text = hr_re.sub(hr, text)
text = self._do_lists(text)
@@ -697,11 +709,11 @@ class Markdown(object):
lines = match.group(0).splitlines(0)
_dedentlines(lines)
indent = ' ' * self.tab_width
- s = ('\n' # separate from possible cuddled paragraph
+ s = ('\n' # separate from possible cuddled paragraph
+ indent + ('\n'+indent).join(lines)
+ '\n\n')
return s
-
+
def _prepare_pyshell_blocks(self, text):
"""Ensure that Python interactive shell sessions are put in
code blocks -- even if not properly indented.
@@ -721,14 +733,14 @@ class Markdown(object):
def _run_span_gamut(self, text):
# These are all the transformations that occur *within* block-level
# tags like paragraphs, headers, and list items.
-
- #text = self._do_code_spans(text) - El AA !
-
+
+ # text = self._do_code_spans(text) - El AA !
+
text = self._escape_special_chars(text)
-
+
# Process anchor and image tags.
text = self._do_links(text)
-
+
# Make links out of things like `<http://example.com/>`
# Must come after _do_links(), because you can use < and >
# delimiters in inline links like [this](<url>).
@@ -736,14 +748,14 @@ class Markdown(object):
if "link-patterns" in self.extras:
text = self._do_link_patterns(text)
-
+
text = self._encode_amps_and_angles(text)
-
+
text = self._do_italics_and_bold(text)
-
+
# Do hard breaks:
text = re.sub(r"\n", "<br%s" % self.empty_element_suffix, text)
-
+
return text
# "Sorta" because auto-links are identified as "tag" tokens.
@@ -763,7 +775,7 @@ class Markdown(object):
<\?.*?\?> # processing instruction
)
""", re.X)
-
+
def _escape_special_chars(self, text):
# Python markdown note: the HTML tokenization here differs from
# that in Markdown.pl, hence the behaviour for subtle cases can
@@ -875,7 +887,7 @@ class Markdown(object):
anchor_allowed_pos = 0
curr_pos = 0
- while True: # Handle the next link.
+ while True: # Handle the next link.
# The next '[' is the start of:
# - an inline anchor: [text](url "title")
# - a reference anchor: [text][id]
@@ -903,7 +915,7 @@ class Markdown(object):
# matching brackets in img alt text -- we'll differ in that
# regard.
bracket_depth = 0
- for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL,
+ for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL,
text_length)):
ch = text[p]
if ch == ']':
@@ -939,12 +951,12 @@ class Markdown(object):
return text
# Inline anchor or img?
- if text[p] == '(': # attempt at perf improvement
+ if text[p] == '(': # attempt at perf improvement
match = self._tail_of_inline_link_re.match(text, p)
if match:
# Handle an inline anchor or img.
#is_img = start_idx > 0 and text[start_idx-1] == "!"
- #if is_img:
+ # if is_img:
# start_idx -= 1
is_img = False
@@ -988,7 +1000,7 @@ class Markdown(object):
if match:
# Handle a reference-style anchor or img.
#is_img = start_idx > 0 and text[start_idx-1] == "!"
- #if is_img:
+ # if is_img:
# start_idx -= 1
is_img = False
@@ -1014,7 +1026,8 @@ class Markdown(object):
link_text.replace('"', '&quot;'),
title_str, self.empty_element_suffix)
curr_pos = start_idx + len(result)
- text = text[:start_idx] + result + text[match.end():]
+ text = text[:start_idx] + \
+ result + text[match.end():]
elif start_idx >= anchor_allowed_pos:
result = '<a href="%s"%s>%s</a>' \
% (url, title_str, link_text)
@@ -1024,7 +1037,8 @@ class Markdown(object):
# anchor_allowed_pos on.
curr_pos = start_idx + len(result_head)
anchor_allowed_pos = start_idx + len(result)
- text = text[:start_idx] + result + text[match.end():]
+ text = text[:start_idx] + \
+ result + text[match.end():]
else:
# Anchor not allowed here.
curr_pos = start_idx + 1
@@ -1036,12 +1050,12 @@ class Markdown(object):
# Otherwise, it isn't markup.
curr_pos = start_idx + 1
- return text
+ return text
def header_id_from_text(self, text, prefix):
"""Generate a header id attribute value from the given header
HTML content.
-
+
This is only called if the "header-ids" extra is enabled.
Subclasses may override this for different header ids.
"""
@@ -1056,12 +1070,14 @@ class Markdown(object):
return header_id
_toc = None
+
def _toc_add_entry(self, level, id, name):
if self._toc is None:
self._toc = []
self._toc.append((level, id, name))
_setext_h_re = re.compile(r'^(.+)[ \t]*\n(=+|-+)[ \t]*\n+', re.M)
+
def _setext_h_sub(self, match):
n = {"=": 1, "-": 2}[match.group(2)[0]]
demote_headers = self.extras.get("demote-headers")
@@ -1070,7 +1086,7 @@ class Markdown(object):
header_id_attr = ""
if "header-ids" in self.extras:
header_id = self.header_id_from_text(match.group(1),
- prefix=self.extras["header-ids"])
+ prefix=self.extras["header-ids"])
header_id_attr = ' id="%s"' % header_id
html = self._run_span_gamut(match.group(1))
if "toc" in self.extras:
@@ -1086,6 +1102,7 @@ class Markdown(object):
\#* # optional closing #'s (not counted)
\n+
''', re.X | re.M)
+
def _atx_h_sub(self, match):
n = len(match.group(1))
demote_headers = self.extras.get("demote-headers")
@@ -1094,7 +1111,7 @@ class Markdown(object):
header_id_attr = ""
if "header-ids" in self.extras:
header_id = self.header_id_from_text(match.group(2),
- prefix=self.extras["header-ids"])
+ prefix=self.extras["header-ids"])
header_id_attr = ' id="%s"' % header_id
html = self._run_span_gamut(match.group(2))
if "toc" in self.extras:
@@ -1105,7 +1122,7 @@ class Markdown(object):
# Setext-style headers:
# Header 1
# ========
- #
+ #
# Header 2
# --------
text = self._setext_h_re.sub(self._setext_h_sub, text)
@@ -1120,8 +1137,7 @@ class Markdown(object):
return text
-
- _marker_ul_chars = '*+-'
+ _marker_ul_chars = '*+-'
_marker_any = r'(?:[%s]|\d+\.)' % _marker_ul_chars
_marker_ul = '(?:[%s])' % _marker_ul_chars
_marker_ol = r'(?:\d+\.)'
@@ -1161,7 +1177,7 @@ class Markdown(object):
)
)
''' % (less_than_tab, marker_pat, marker_pat)
-
+
# We use a different prefix before nested lists than top-level lists.
# See extended comment in _process_list_items().
#
@@ -1189,7 +1205,7 @@ class Markdown(object):
text = list_re.sub(self._list_sub, text)
return text
-
+
_list_item_re = re.compile(r'''
(\n)? # leading line = \1
(^[ \t]*) # leading whitespace = \2
@@ -1201,6 +1217,7 @@ class Markdown(object):
re.M | re.X | re.S)
_last_li_endswith_two_eols = False
+
def _list_item_sub(self, match):
item = match.group(4)
leading_line = match.group(1)
@@ -1219,7 +1236,7 @@ class Markdown(object):
def _process_list_items(self, list_str):
# Process the contents of a single ordered or unordered list,
# splitting it into individual list items.
-
+
# The $g_list_level global keeps track of when we're inside a list.
# Each time we enter a list, we increment it; when we leave a list,
# we decrement. If it's zero, we're not in a list anymore.
@@ -1268,7 +1285,7 @@ class Markdown(object):
"""
yield 0, "<code>"
for tup in inner:
- yield tup
+ yield tup
yield 0, "</code>"
def wrap(self, source, outfile):
@@ -1333,7 +1350,6 @@ class Markdown(object):
return code_block_re.sub(self._code_block_sub, text)
-
# Rules for a code span:
# - backslash escapes are not interpreted in a code span
# - to include one or or a run of more backticks the delimiters must
@@ -1359,26 +1375,26 @@ class Markdown(object):
def _do_code_spans(self, text):
# * Backtick quotes are used for <code></code> spans.
- #
+ #
# * You can use multiple backticks as the delimiters if you want to
# include literal backticks in the code span. So, this input:
- #
+ #
# Just type ``foo `bar` baz`` at the prompt.
- #
+ #
# Will translate to:
- #
+ #
# <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
- #
+ #
# There's no arbitrary limit to the number of backticks you
# can use as delimters. If you need three consecutive backticks
# in your code, use four for delimiters, etc.
#
# * You can use spaces to get literal backticks at the edges:
- #
+ #
# ... type `` `bar` `` ...
- #
+ #
# Turns to:
- #
+ #
# ... type <code>`bar`</code> ...
return self._code_span_re.sub(self._code_span_sub, text)
@@ -1409,22 +1425,24 @@ class Markdown(object):
_strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S)
_em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S)
- #_spoiler_re = re.compile(r"###(?=\S)(.+?[*_]*)(?<=\S)###", re.S)
-
- _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S)
+ # _spoiler_re = re.compile(r"###(?=\S)(.+?[*_]*)(?<=\S)###", re.S)
+
+ _code_friendly_strong_re = re.compile(
+ r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S)
_code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S)
+
def _do_italics_and_bold(self, text):
# <strong> must go first:
if "code-friendly" in self.extras:
- text = self._code_friendly_strong_re.sub(r"<strong>\1</strong>", text)
+ text = self._code_friendly_strong_re.sub(
+ r"<strong>\1</strong>", text)
text = self._code_friendly_em_re.sub(r"<em>\1</em>", text)
else:
text = self._strong_re.sub(r"<strong>\2</strong>", text)
text = self._em_re.sub(r"<em>\2</em>", text)
-
+
#text = self._spoiler_re.sub("<del>\\1</del>", text)
return text
-
_block_quote_re = re.compile(r'''
( # Wrap whole match in \1
@@ -1435,19 +1453,20 @@ class Markdown(object):
)+
)
''', re.M | re.X)
- _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M);
+ _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M)
_html_pre_block_re = re.compile(r'(\s*<pre>.+?</pre>)', re.S)
+
def _dedent_two_spaces_sub(self, match):
return re.sub(r'(?m)^ ', '', match.group(1))
def _block_quote_sub(self, match):
bq = match.group(1)
- #bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting
+ # bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting
bq = self._ws_only_line_re.sub('', bq) # trim whitespace-only lines
bq = bq.strip('\n')
bq = self._run_span_gamut(bq)
- #bq = self._run_block_gamut(bq) # recurse
+ # bq = self._run_block_gamut(bq) # recurse
bq = re.sub('(?m)^', ' ', bq)
# These leading spaces screw with <pre> content, so we need to fix that:
@@ -1482,16 +1501,18 @@ class Markdown(object):
# consider numeric bullets (e.g. "1." and "2.") to be
# equal.
if (li and len(li.group(2)) <= 3 and li.group("next_marker")
- and li.group("marker")[-1] == li.group("next_marker")[-1]):
+ and li.group("marker")[-1] == li.group("next_marker")[-1]):
start = li.start()
- cuddled_list = self._do_lists(graf[start:]).rstrip("\n")
- assert cuddled_list.startswith("<ul>") or cuddled_list.startswith("<ol>")
+ cuddled_list = self._do_lists(
+ graf[start:]).rstrip("\n")
+ assert cuddled_list.startswith(
+ "<ul>") or cuddled_list.startswith("<ol>")
graf = graf[:start]
-
+
# Wrap <p> tags.
graf = self._run_span_gamut(graf)
grafs.append("<p>" + graf.lstrip(" \t") + "</p>")
-
+
if cuddled_list:
grafs.append(cuddled_list)
@@ -1510,9 +1531,9 @@ class Markdown(object):
footer.append('<li id="fn-%s">' % id)
footer.append(self._run_block_gamut(self.footnotes[id]))
backlink = ('<a href="#fnref-%s" '
- 'class="footnoteBackLink" '
- 'title="Jump back to footnote %d in the text.">'
- '&#8617;</a>' % (id, i+1))
+ 'class="footnoteBackLink" '
+ 'title="Jump back to footnote %d in the text.">'
+ '&#8617;</a>' % (id, i+1))
if footer[-1].endswith("</p>"):
footer[-1] = footer[-1][:-len("</p>")] \
+ '&nbsp;' + backlink + "</p>"
@@ -1535,7 +1556,7 @@ class Markdown(object):
# Smart processing for ampersands and angle brackets that need
# to be encoded.
text = self._ampersand_re.sub('&amp;', text)
-
+
# Encode naked <'s
text = self._naked_lt_re.sub('&lt;', text)
@@ -1551,6 +1572,7 @@ class Markdown(object):
return text
_auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I)
+
def _auto_link_sub(self, match):
g1 = match.group(1)
return '<a href="%s">%s</a>' % (g1, g1)
@@ -1565,6 +1587,7 @@ class Markdown(object):
)
>
""", re.I | re.X | re.U)
+
def _auto_email_link_sub(self, match):
return self._encode_email_address(
self._unescape_special_chars(match.group(1)))
@@ -1593,7 +1616,7 @@ class Markdown(object):
addr = '<a href="%s">%s</a>' \
% (''.join(chars), ''.join(chars[7:]))
return addr
-
+
def _do_link_patterns(self, text):
"""Caveat emptor: there isn't much guarding against link
patterns being formed inside other standard Markdown links, e.g.
@@ -1614,7 +1637,7 @@ class Markdown(object):
for (start, end), href in reversed(replacements):
escaped_href = (
href.replace('"', '&quot;') # b/c of attr quote
- # To avoid markdown <em> and <strong>:
+ # To avoid markdown <em> and <strong>:
.replace('*', g_escape_table['*'])
.replace('_', g_escape_table['_']))
link = '<a href="%s">%s</a>' % (escaped_href, text[start:end])
@@ -1624,7 +1647,7 @@ class Markdown(object):
for hash, link in link_from_hash.items():
text = text.replace(hash, link)
return text
-
+
def _unescape_special_chars(self, text):
# Swap back in all the special characters we've hidden.
for ch, hash in g_escape_table.items():
@@ -1651,7 +1674,7 @@ class MarkdownWithExtras(Markdown):
extras = ["footnotes", "code-color"]
-#---- internal support functions
+# ---- internal support functions
class UnicodeWithAttrs(unicode):
"""A subclass of unicode used for the return value of conversion to
@@ -1662,12 +1685,12 @@ class UnicodeWithAttrs(unicode):
@property
def toc_html(self):
"""Return the HTML for the current TOC.
-
+
This expects the `_toc` attribute to have been set on this instance.
"""
if self._toc is None:
return None
-
+
def indent():
return ' ' * (len(h_stack) - 1)
lines = []
@@ -1696,11 +1719,13 @@ class UnicodeWithAttrs(unicode):
_slugify_strip_re = re.compile(r'[^\w\s-]')
_slugify_hyphenate_re = re.compile(r'[-\s]+')
+
+
def _slugify(value):
"""
Normalizes string, converts to lowercase, removes non-alpha characters,
and converts spaces to hyphens.
-
+
From Django's "django/template/defaultfilters.py".
"""
import unicodedata
@@ -1709,8 +1734,11 @@ def _slugify(value):
return _slugify_hyphenate_re.sub('-', value)
# From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549
+
+
def _curry(*args, **kwargs):
function, args = args[0], args[1:]
+
def result(*rest, **kwrest):
combined = kwargs.copy()
combined.update(kwrest)
@@ -1718,6 +1746,8 @@ def _curry(*args, **kwargs):
return result
# Recipe: regex_from_encoded_pattern (1.0)
+
+
def _regex_from_encoded_pattern(s):
"""'foo' -> re.compile(re.escape('foo'))
'/foo/' -> re.compile('foo')
@@ -1743,30 +1773,33 @@ def _regex_from_encoded_pattern(s):
"(must be one of '%s')"
% (char, s, ''.join(flag_from_char.keys())))
return re.compile(s[1:idx], flags)
- else: # not an encoded regex
+ else: # not an encoded regex
return re.compile(re.escape(s))
# Recipe: dedent (0.1.2)
+
+
def _dedentlines(lines, tabsize=8, skip_first_line=False):
"""_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines
-
+
"lines" is a list of lines to dedent.
"tabsize" is the tab width to use for indent width calculations.
"skip_first_line" is a boolean indicating if the first line should
be skipped for calculating the indent width and for dedenting.
This is sometimes useful for docstrings and similar.
-
+
Same as dedent() except operates on a sequence of lines. Note: the
lines list is modified **in-place**.
"""
DEBUG = False
- if DEBUG:
+ if DEBUG:
print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\
% (tabsize, skip_first_line)
indents = []
margin = None
for i, line in enumerate(lines):
- if i == 0 and skip_first_line: continue
+ if i == 0 and skip_first_line:
+ continue
indent = 0
for ch in line:
if ch == ' ':
@@ -1774,21 +1807,24 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
elif ch == '\t':
indent += tabsize - (indent % tabsize)
elif ch in '\r\n':
- continue # skip all-whitespace lines
+ continue # skip all-whitespace lines
else:
break
else:
- continue # skip all-whitespace lines
- if DEBUG: print "dedent: indent=%d: %r" % (indent, line)
+ continue # skip all-whitespace lines
+ if DEBUG:
+ print "dedent: indent=%d: %r" % (indent, line)
if margin is None:
margin = indent
else:
margin = min(margin, indent)
- if DEBUG: print "dedent: margin=%r" % margin
+ if DEBUG:
+ print "dedent: margin=%r" % margin
if margin is not None and margin > 0:
for i, line in enumerate(lines):
- if i == 0 and skip_first_line: continue
+ if i == 0 and skip_first_line:
+ continue
removed = 0
for j, ch in enumerate(line):
if ch == ' ':
@@ -1796,7 +1832,8 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
elif ch == '\t':
removed += tabsize - (removed % tabsize)
elif ch in '\r\n':
- if DEBUG: print "dedent: %r: EOL -> strip up to EOL" % line
+ if DEBUG:
+ print "dedent: %r: EOL -> strip up to EOL" % line
lines[i] = lines[i][j:]
break
else:
@@ -1817,6 +1854,7 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
lines[i] = lines[i][removed:]
return lines
+
def _dedent(text, tabsize=8, skip_first_line=False):
"""_dedent(text, tabsize=8, skip_first_line=False) -> dedented text
@@ -1825,7 +1863,7 @@ def _dedent(text, tabsize=8, skip_first_line=False):
"skip_first_line" is a boolean indicating if the first line should
be skipped for calculating the indent width and for dedenting.
This is sometimes useful for docstrings and similar.
-
+
textwrap.dedent(s), but don't expand tabs to spaces
"""
lines = text.splitlines(1)
@@ -1834,28 +1872,31 @@ def _dedent(text, tabsize=8, skip_first_line=False):
class _memoized(object):
- """Decorator that caches a function's return value each time it is called.
- If called later with the same arguments, the cached value is returned, and
- not re-evaluated.
-
- http://wiki.python.org/moin/PythonDecoratorLibrary
- """
- def __init__(self, func):
- self.func = func
- self.cache = {}
- def __call__(self, *args):
- try:
- return self.cache[args]
- except KeyError:
- self.cache[args] = value = self.func(*args)
- return value
- except TypeError:
- # uncachable -- for instance, passing a list as an argument.
- # Better to not cache than to blow up entirely.
- return self.func(*args)
- def __repr__(self):
- """Return the function's docstring."""
- return self.func.__doc__
+ """Decorator that caches a function's return value each time it is called.
+ If called later with the same arguments, the cached value is returned, and
+ not re-evaluated.
+
+ http://wiki.python.org/moin/PythonDecoratorLibrary
+ """
+
+ def __init__(self, func):
+ self.func = func
+ self.cache = {}
+
+ def __call__(self, *args):
+ try:
+ return self.cache[args]
+ except KeyError:
+ self.cache[args] = value = self.func(*args)
+ return value
+ except TypeError:
+ # uncachable -- for instance, passing a list as an argument.
+ # Better to not cache than to blow up entirely.
+ return self.func(*args)
+
+ def __repr__(self):
+ """Return the function's docstring."""
+ return self.func.__doc__
def _xml_oneliner_re_from_tab_width(tab_width):
@@ -1877,10 +1918,13 @@ def _xml_oneliner_re_from_tab_width(tab_width):
(?=\n{2,}|\Z) # followed by a blank line or end of document
)
""" % (tab_width - 1), re.X)
+
+
_xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width)
+
def _hr_tag_re_from_tab_width(tab_width):
- return re.compile(r"""
+ return re.compile(r"""
(?:
(?<=\n\n) # Starting after a blank line
| # or
@@ -1896,6 +1940,8 @@ def _hr_tag_re_from_tab_width(tab_width):
(?=\n{2,}|\Z) # followed by a blank line or end of document
)
""" % (tab_width - 1), re.X)
+
+
_hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width)
@@ -1913,18 +1959,20 @@ def _xml_encode_email_char_at_random(ch):
return '&#%s;' % ord(ch)
-
#---- mainline
class _NoReflowFormatter(optparse.IndentedHelpFormatter):
"""An optparse formatter that does NOT reflow the description."""
+
def format_description(self, description):
return description or ""
+
def _test():
import doctest
doctest.testmod()
+
def main(argv=None):
if argv is None:
argv = sys.argv
@@ -1934,14 +1982,14 @@ def main(argv=None):
usage = "usage: %prog [PATHS...]"
version = "%prog "+__version__
parser = optparse.OptionParser(prog="markdown2", usage=usage,
- version=version, description=cmdln_desc,
- formatter=_NoReflowFormatter())
+ version=version, description=cmdln_desc,
+ formatter=_NoReflowFormatter())
parser.add_option("-v", "--verbose", dest="log_level",
action="store_const", const=logging.DEBUG,
help="more verbose output")
parser.add_option("--encoding",
help="specify encoding of text content")
- parser.add_option("--html4tags", action="store_true", default=False,
+ parser.add_option("--html4tags", action="store_true", default=False,
help="use HTML 4 style for empty element tags")
parser.add_option("-s", "--safe", metavar="MODE", dest="safe_mode",
help="sanitize literal HTML: 'escape' escapes "
@@ -1990,8 +2038,10 @@ def main(argv=None):
f = open(opts.link_patterns_file)
try:
for i, line in enumerate(f.readlines()):
- if not line.strip(): continue
- if line.lstrip().startswith("#"): continue
+ if not line.strip():
+ continue
+ if line.lstrip().startswith("#"):
+ continue
try:
pat, href = line.rstrip().rsplit(None, 1)
except ValueError:
@@ -2025,7 +2075,7 @@ def main(argv=None):
html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))
if extras and "toc" in extras:
log.debug("toc_html: " +
- html.toc_html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))
+ html.toc_html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))
if opts.compare:
test_dir = join(dirname(dirname(abspath(__file__))), "test")
if exists(join(test_dir, "test_markdown2.py")):
@@ -2040,5 +2090,4 @@ def main(argv=None):
if __name__ == "__main__":
- sys.exit( main(sys.argv) )
-
+ sys.exit(main(sys.argv))