diff options
author | Renard | 2020-03-29 18:43:36 -0300 |
---|---|---|
committer | Renard | 2020-03-29 18:43:36 -0300 |
commit | 56c690b9efdb009ab44f3112b6c301d7d393f07e (patch) | |
tree | b2a28666888df9b60b46b6d1c59dd3818437b405 /cgi/markdown.py | |
parent | 775ef3e6291c5ad6bff68a12f6ca81c8663da3dc (diff) | |
download | weabot-56c690b9efdb009ab44f3112b6c301d7d393f07e.tar.gz weabot-56c690b9efdb009ab44f3112b6c301d7d393f07e.tar.xz weabot-56c690b9efdb009ab44f3112b6c301d7d393f07e.zip |
Formateo de python con pep8
Diffstat (limited to 'cgi/markdown.py')
-rw-r--r-- | cgi/markdown.py | 337 |
1 files changed, 193 insertions, 144 deletions
diff --git a/cgi/markdown.py b/cgi/markdown.py index 3ebfaab..846c192 100644 --- a/cgi/markdown.py +++ b/cgi/markdown.py @@ -33,6 +33,15 @@ number of extras (e.g., code syntax coloring, footnotes) as described on <http://code.google.com/p/python-markdown2/wiki/Extras>. """ +from urllib import quote +import codecs +from random import random, randint +import optparse +import logging +import re +from pprint import pprint +import sys +import os cmdln_desc = """A fast and complete Python implementation of Markdown, a text-to-HTML conversion tool for web writers. @@ -59,33 +68,25 @@ Supported extras (see -x|--extras option below): # not yet sure if there implications with this. Compare 'pydoc sre' # and 'perldoc perlre'. -__version_info__ = (1, 0, 1, 17) # first three nums match Markdown.pl +__version_info__ = (1, 0, 1, 17) # first three nums match Markdown.pl __version__ = '1.0.1.17' __author__ = "Trent Mick" -import os -import sys -from pprint import pprint -import re -import logging try: from hashlib import md5 except ImportError: from md5 import md5 -import optparse -from random import random, randint -import codecs -from urllib import quote - -#---- Python version compat +# ---- Python version compat -if sys.version_info[:2] < (2,4): +if sys.version_info[:2] < (2, 4): from sets import Set as set + def reversed(sequence): for i in sequence[::-1]: yield i + def _unicode_decode(s, encoding, errors='xmlcharrefreplace'): return unicode(s, encoding, errors) else: @@ -107,26 +108,29 @@ except ImportError: SECRET_SALT = str(randint(0, 1000000)) else: SECRET_SALT = str(uuid.uuid4()) + + def _hash_ascii(s): - #return md5(s).hexdigest() # Markdown.pl effectively does this. + # return md5(s).hexdigest() # Markdown.pl effectively does this. return 'md5-' + md5(SECRET_SALT + s).hexdigest() + + def _hash_text(s): return 'md5-' + md5(SECRET_SALT + s.encode("utf-8")).hexdigest() + # Table of hash values for escaped characters: g_escape_table = dict([(ch, _hash_ascii(ch)) for ch in '\\`*_{}[]()>#+-.!']) - #---- exceptions class MarkdownError(Exception): pass - -#---- public api +# ---- public api def markdown_path(path, encoding="utf-8", html4tags=False, tab_width=DEFAULT_TAB_WIDTH, @@ -140,6 +144,7 @@ def markdown_path(path, encoding="utf-8", link_patterns=link_patterns, use_file_vars=use_file_vars).convert(text) + def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH, safe_mode=None, extras=None, link_patterns=None, use_file_vars=False): @@ -148,6 +153,7 @@ def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH, link_patterns=link_patterns, use_file_vars=use_file_vars).convert(text) + class Markdown(object): # The dict of "extras" to enable in processing -- a mapping of # extra name to argument for the extra. Most extras do not have an @@ -178,7 +184,7 @@ class Markdown(object): self.tab_width = tab_width # For compatibility with earlier markdown2.py and with - # markdown.py's safe_mode being a boolean, + # markdown.py's safe_mode being a boolean, # safe_mode == True -> "replace" if safe_mode is True: self.safe_mode = "replace" @@ -212,7 +218,7 @@ class Markdown(object): self.footnotes = {} self.footnote_ids = [] if "header-ids" in self.extras: - self._count_from_header_id = {} # no `defaultdict` in Python 2.4 + self._count_from_header_id = {} # no `defaultdict` in Python 2.4 def convert(self, text): """Convert the given text.""" @@ -228,7 +234,7 @@ class Markdown(object): self.reset() if not isinstance(text, unicode): - #TODO: perhaps shouldn't presume UTF-8 for string input? + # TODO: perhaps shouldn't presume UTF-8 for string input? text = unicode(text, 'utf-8') if self.use_file_vars: @@ -287,13 +293,14 @@ class Markdown(object): text = self._unhash_html_spans(text) #text += "\n" - + rv = UnicodeWithAttrs(text) if "toc" in self.extras: rv._toc = self._toc return rv - _emacs_oneliner_vars_pat = re.compile(r"-\*-\s*([^\r\n]*?)\s*-\*-", re.UNICODE) + _emacs_oneliner_vars_pat = re.compile( + r"-\*-\s*([^\r\n]*?)\s*-\*-", re.UNICODE) # This regular expression is intended to match blocks like this: # PREFIX Local Variables: SUFFIX # PREFIX mode: Tcl SUFFIX @@ -317,7 +324,7 @@ class Markdown(object): http://www.gnu.org/software/emacs/manual/html_node/emacs/Specifying-File-Variables.html#Specifying-File-Variables """ emacs_vars = {} - SIZE = pow(2, 13) # 8kB + SIZE = pow(2, 13) # 8kB # Search near the start for a '-*-'-style one-liner of variables. head = text[:SIZE] @@ -374,9 +381,12 @@ class Markdown(object): # Parse out one emacs var per line. continued_for = None - for line in lines[:-1]: # no var on the last line ("PREFIX End:") - if prefix: line = line[len(prefix):] # strip prefix - if suffix: line = line[:-len(suffix)] # strip suffix + # no var on the last line ("PREFIX End:") + for line in lines[:-1]: + if prefix: + line = line[len(prefix):] # strip prefix + if suffix: + line = line[:-len(suffix)] # strip suffix line = line.strip() if continued_for: variable = continued_for @@ -405,7 +415,7 @@ class Markdown(object): # Unquote values. for var, val in emacs_vars.items(): if len(val) > 1 and (val.startswith('"') and val.endswith('"') - or val.startswith('"') and val.endswith('"')): + or val.startswith('"') and val.endswith('"')): emacs_vars[var] = val[1:-1] return emacs_vars @@ -413,9 +423,11 @@ class Markdown(object): # Cribbed from a post by Bart Lateur: # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154> _detab_re = re.compile(r'(.*?)\t', re.M) + def _detab_sub(self, match): g1 = match.group(1) return g1 + (' ' * (self.tab_width - len(g1) % self.tab_width)) + def _detab(self, text): r"""Remove (leading?) tabs from a file. @@ -447,7 +459,7 @@ class Markdown(object): (?=\n+|\Z) # followed by a newline or end of document ) """ % _block_tags_a, - re.X | re.M) + re.X | re.M) _block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math' _liberal_tag_block_re = re.compile(r""" @@ -461,7 +473,7 @@ class Markdown(object): (?=\n+|\Z) # followed by a newline or end of document ) """ % _block_tags_b, - re.X | re.M) + re.X | re.M) def _hash_html_block_sub(self, match, raw=False): html = match.group(1) @@ -506,7 +518,7 @@ class Markdown(object): text = self._liberal_tag_block_re.sub(hash_html_block_sub, text) # Special case just for <hr />. It was easier to make a special - # case than to make the other regex more complicated. + # case than to make the other regex more complicated. if "<hr" in text: _hr_tag_re = _hr_tag_re_from_tab_width(self.tab_width) text = _hr_tag_re.sub(hash_html_block_sub, text) @@ -564,12 +576,13 @@ class Markdown(object): html = self._sanitize_html(html) key = _hash_text(html) self.html_blocks[key] = html - text = text[:start_idx] + "\n\n" + key + "\n\n" + text[end_idx:] + text = text[:start_idx] + "\n\n" + \ + key + "\n\n" + text[end_idx:] if "xml" in self.extras: # Treat XML processing instructions and namespaced one-liner # tags as if they were block HTML tags. E.g., if standalone - # (i.e. are their own paragraph), the following do not get + # (i.e. are their own paragraph), the following do not get # wrapped in a <p> tag: # <?foo bar?> # @@ -583,7 +596,7 @@ class Markdown(object): # Strips link definitions from text, stores the URLs and titles in # hash references. less_than_tab = self.tab_width - 1 - + # Link defs are in the form: # [id]: url "optional title" _link_def_re = re.compile(r""" @@ -634,7 +647,7 @@ class Markdown(object): - The 'note-id' can be pretty much anything, though typically it is the number of the footnote. - The first paragraph may start on the next line, like so: - + [^note-id]: Text of the note. """ @@ -656,7 +669,6 @@ class Markdown(object): re.X | re.M) return footnote_def_re.sub(self._extract_footnote_def_sub, text) - _hr_res = [ re.compile(r"^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$", re.M), re.compile(r"^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$", re.M), @@ -671,7 +683,7 @@ class Markdown(object): # Do Horizontal Rules: #hr = "\n<hr"+self.empty_element_suffix+"\n" - #for hr_re in self._hr_res: + # for hr_re in self._hr_res: # text = hr_re.sub(hr, text) text = self._do_lists(text) @@ -697,11 +709,11 @@ class Markdown(object): lines = match.group(0).splitlines(0) _dedentlines(lines) indent = ' ' * self.tab_width - s = ('\n' # separate from possible cuddled paragraph + s = ('\n' # separate from possible cuddled paragraph + indent + ('\n'+indent).join(lines) + '\n\n') return s - + def _prepare_pyshell_blocks(self, text): """Ensure that Python interactive shell sessions are put in code blocks -- even if not properly indented. @@ -721,14 +733,14 @@ class Markdown(object): def _run_span_gamut(self, text): # These are all the transformations that occur *within* block-level # tags like paragraphs, headers, and list items. - - #text = self._do_code_spans(text) - El AA ! - + + # text = self._do_code_spans(text) - El AA ! + text = self._escape_special_chars(text) - + # Process anchor and image tags. text = self._do_links(text) - + # Make links out of things like `<http://example.com/>` # Must come after _do_links(), because you can use < and > # delimiters in inline links like [this](<url>). @@ -736,14 +748,14 @@ class Markdown(object): if "link-patterns" in self.extras: text = self._do_link_patterns(text) - + text = self._encode_amps_and_angles(text) - + text = self._do_italics_and_bold(text) - + # Do hard breaks: text = re.sub(r"\n", "<br%s" % self.empty_element_suffix, text) - + return text # "Sorta" because auto-links are identified as "tag" tokens. @@ -763,7 +775,7 @@ class Markdown(object): <\?.*?\?> # processing instruction ) """, re.X) - + def _escape_special_chars(self, text): # Python markdown note: the HTML tokenization here differs from # that in Markdown.pl, hence the behaviour for subtle cases can @@ -875,7 +887,7 @@ class Markdown(object): anchor_allowed_pos = 0 curr_pos = 0 - while True: # Handle the next link. + while True: # Handle the next link. # The next '[' is the start of: # - an inline anchor: [text](url "title") # - a reference anchor: [text][id] @@ -903,7 +915,7 @@ class Markdown(object): # matching brackets in img alt text -- we'll differ in that # regard. bracket_depth = 0 - for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL, + for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL, text_length)): ch = text[p] if ch == ']': @@ -939,12 +951,12 @@ class Markdown(object): return text # Inline anchor or img? - if text[p] == '(': # attempt at perf improvement + if text[p] == '(': # attempt at perf improvement match = self._tail_of_inline_link_re.match(text, p) if match: # Handle an inline anchor or img. #is_img = start_idx > 0 and text[start_idx-1] == "!" - #if is_img: + # if is_img: # start_idx -= 1 is_img = False @@ -988,7 +1000,7 @@ class Markdown(object): if match: # Handle a reference-style anchor or img. #is_img = start_idx > 0 and text[start_idx-1] == "!" - #if is_img: + # if is_img: # start_idx -= 1 is_img = False @@ -1014,7 +1026,8 @@ class Markdown(object): link_text.replace('"', '"'), title_str, self.empty_element_suffix) curr_pos = start_idx + len(result) - text = text[:start_idx] + result + text[match.end():] + text = text[:start_idx] + \ + result + text[match.end():] elif start_idx >= anchor_allowed_pos: result = '<a href="%s"%s>%s</a>' \ % (url, title_str, link_text) @@ -1024,7 +1037,8 @@ class Markdown(object): # anchor_allowed_pos on. curr_pos = start_idx + len(result_head) anchor_allowed_pos = start_idx + len(result) - text = text[:start_idx] + result + text[match.end():] + text = text[:start_idx] + \ + result + text[match.end():] else: # Anchor not allowed here. curr_pos = start_idx + 1 @@ -1036,12 +1050,12 @@ class Markdown(object): # Otherwise, it isn't markup. curr_pos = start_idx + 1 - return text + return text def header_id_from_text(self, text, prefix): """Generate a header id attribute value from the given header HTML content. - + This is only called if the "header-ids" extra is enabled. Subclasses may override this for different header ids. """ @@ -1056,12 +1070,14 @@ class Markdown(object): return header_id _toc = None + def _toc_add_entry(self, level, id, name): if self._toc is None: self._toc = [] self._toc.append((level, id, name)) _setext_h_re = re.compile(r'^(.+)[ \t]*\n(=+|-+)[ \t]*\n+', re.M) + def _setext_h_sub(self, match): n = {"=": 1, "-": 2}[match.group(2)[0]] demote_headers = self.extras.get("demote-headers") @@ -1070,7 +1086,7 @@ class Markdown(object): header_id_attr = "" if "header-ids" in self.extras: header_id = self.header_id_from_text(match.group(1), - prefix=self.extras["header-ids"]) + prefix=self.extras["header-ids"]) header_id_attr = ' id="%s"' % header_id html = self._run_span_gamut(match.group(1)) if "toc" in self.extras: @@ -1086,6 +1102,7 @@ class Markdown(object): \#* # optional closing #'s (not counted) \n+ ''', re.X | re.M) + def _atx_h_sub(self, match): n = len(match.group(1)) demote_headers = self.extras.get("demote-headers") @@ -1094,7 +1111,7 @@ class Markdown(object): header_id_attr = "" if "header-ids" in self.extras: header_id = self.header_id_from_text(match.group(2), - prefix=self.extras["header-ids"]) + prefix=self.extras["header-ids"]) header_id_attr = ' id="%s"' % header_id html = self._run_span_gamut(match.group(2)) if "toc" in self.extras: @@ -1105,7 +1122,7 @@ class Markdown(object): # Setext-style headers: # Header 1 # ======== - # + # # Header 2 # -------- text = self._setext_h_re.sub(self._setext_h_sub, text) @@ -1120,8 +1137,7 @@ class Markdown(object): return text - - _marker_ul_chars = '*+-' + _marker_ul_chars = '*+-' _marker_any = r'(?:[%s]|\d+\.)' % _marker_ul_chars _marker_ul = '(?:[%s])' % _marker_ul_chars _marker_ol = r'(?:\d+\.)' @@ -1161,7 +1177,7 @@ class Markdown(object): ) ) ''' % (less_than_tab, marker_pat, marker_pat) - + # We use a different prefix before nested lists than top-level lists. # See extended comment in _process_list_items(). # @@ -1189,7 +1205,7 @@ class Markdown(object): text = list_re.sub(self._list_sub, text) return text - + _list_item_re = re.compile(r''' (\n)? # leading line = \1 (^[ \t]*) # leading whitespace = \2 @@ -1201,6 +1217,7 @@ class Markdown(object): re.M | re.X | re.S) _last_li_endswith_two_eols = False + def _list_item_sub(self, match): item = match.group(4) leading_line = match.group(1) @@ -1219,7 +1236,7 @@ class Markdown(object): def _process_list_items(self, list_str): # Process the contents of a single ordered or unordered list, # splitting it into individual list items. - + # The $g_list_level global keeps track of when we're inside a list. # Each time we enter a list, we increment it; when we leave a list, # we decrement. If it's zero, we're not in a list anymore. @@ -1268,7 +1285,7 @@ class Markdown(object): """ yield 0, "<code>" for tup in inner: - yield tup + yield tup yield 0, "</code>" def wrap(self, source, outfile): @@ -1333,7 +1350,6 @@ class Markdown(object): return code_block_re.sub(self._code_block_sub, text) - # Rules for a code span: # - backslash escapes are not interpreted in a code span # - to include one or or a run of more backticks the delimiters must @@ -1359,26 +1375,26 @@ class Markdown(object): def _do_code_spans(self, text): # * Backtick quotes are used for <code></code> spans. - # + # # * You can use multiple backticks as the delimiters if you want to # include literal backticks in the code span. So, this input: - # + # # Just type ``foo `bar` baz`` at the prompt. - # + # # Will translate to: - # + # # <p>Just type <code>foo `bar` baz</code> at the prompt.</p> - # + # # There's no arbitrary limit to the number of backticks you # can use as delimters. If you need three consecutive backticks # in your code, use four for delimiters, etc. # # * You can use spaces to get literal backticks at the edges: - # + # # ... type `` `bar` `` ... - # + # # Turns to: - # + # # ... type <code>`bar`</code> ... return self._code_span_re.sub(self._code_span_sub, text) @@ -1409,22 +1425,24 @@ class Markdown(object): _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S) _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S) - #_spoiler_re = re.compile(r"###(?=\S)(.+?[*_]*)(?<=\S)###", re.S) - - _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S) + # _spoiler_re = re.compile(r"###(?=\S)(.+?[*_]*)(?<=\S)###", re.S) + + _code_friendly_strong_re = re.compile( + r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S) _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S) + def _do_italics_and_bold(self, text): # <strong> must go first: if "code-friendly" in self.extras: - text = self._code_friendly_strong_re.sub(r"<strong>\1</strong>", text) + text = self._code_friendly_strong_re.sub( + r"<strong>\1</strong>", text) text = self._code_friendly_em_re.sub(r"<em>\1</em>", text) else: text = self._strong_re.sub(r"<strong>\2</strong>", text) text = self._em_re.sub(r"<em>\2</em>", text) - + #text = self._spoiler_re.sub("<del>\\1</del>", text) return text - _block_quote_re = re.compile(r''' ( # Wrap whole match in \1 @@ -1435,19 +1453,20 @@ class Markdown(object): )+ ) ''', re.M | re.X) - _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M); + _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M) _html_pre_block_re = re.compile(r'(\s*<pre>.+?</pre>)', re.S) + def _dedent_two_spaces_sub(self, match): return re.sub(r'(?m)^ ', '', match.group(1)) def _block_quote_sub(self, match): bq = match.group(1) - #bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting + # bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting bq = self._ws_only_line_re.sub('', bq) # trim whitespace-only lines bq = bq.strip('\n') bq = self._run_span_gamut(bq) - #bq = self._run_block_gamut(bq) # recurse + # bq = self._run_block_gamut(bq) # recurse bq = re.sub('(?m)^', ' ', bq) # These leading spaces screw with <pre> content, so we need to fix that: @@ -1482,16 +1501,18 @@ class Markdown(object): # consider numeric bullets (e.g. "1." and "2.") to be # equal. if (li and len(li.group(2)) <= 3 and li.group("next_marker") - and li.group("marker")[-1] == li.group("next_marker")[-1]): + and li.group("marker")[-1] == li.group("next_marker")[-1]): start = li.start() - cuddled_list = self._do_lists(graf[start:]).rstrip("\n") - assert cuddled_list.startswith("<ul>") or cuddled_list.startswith("<ol>") + cuddled_list = self._do_lists( + graf[start:]).rstrip("\n") + assert cuddled_list.startswith( + "<ul>") or cuddled_list.startswith("<ol>") graf = graf[:start] - + # Wrap <p> tags. graf = self._run_span_gamut(graf) grafs.append("<p>" + graf.lstrip(" \t") + "</p>") - + if cuddled_list: grafs.append(cuddled_list) @@ -1510,9 +1531,9 @@ class Markdown(object): footer.append('<li id="fn-%s">' % id) footer.append(self._run_block_gamut(self.footnotes[id])) backlink = ('<a href="#fnref-%s" ' - 'class="footnoteBackLink" ' - 'title="Jump back to footnote %d in the text.">' - '↩</a>' % (id, i+1)) + 'class="footnoteBackLink" ' + 'title="Jump back to footnote %d in the text.">' + '↩</a>' % (id, i+1)) if footer[-1].endswith("</p>"): footer[-1] = footer[-1][:-len("</p>")] \ + ' ' + backlink + "</p>" @@ -1535,7 +1556,7 @@ class Markdown(object): # Smart processing for ampersands and angle brackets that need # to be encoded. text = self._ampersand_re.sub('&', text) - + # Encode naked <'s text = self._naked_lt_re.sub('<', text) @@ -1551,6 +1572,7 @@ class Markdown(object): return text _auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I) + def _auto_link_sub(self, match): g1 = match.group(1) return '<a href="%s">%s</a>' % (g1, g1) @@ -1565,6 +1587,7 @@ class Markdown(object): ) > """, re.I | re.X | re.U) + def _auto_email_link_sub(self, match): return self._encode_email_address( self._unescape_special_chars(match.group(1))) @@ -1593,7 +1616,7 @@ class Markdown(object): addr = '<a href="%s">%s</a>' \ % (''.join(chars), ''.join(chars[7:])) return addr - + def _do_link_patterns(self, text): """Caveat emptor: there isn't much guarding against link patterns being formed inside other standard Markdown links, e.g. @@ -1614,7 +1637,7 @@ class Markdown(object): for (start, end), href in reversed(replacements): escaped_href = ( href.replace('"', '"') # b/c of attr quote - # To avoid markdown <em> and <strong>: + # To avoid markdown <em> and <strong>: .replace('*', g_escape_table['*']) .replace('_', g_escape_table['_'])) link = '<a href="%s">%s</a>' % (escaped_href, text[start:end]) @@ -1624,7 +1647,7 @@ class Markdown(object): for hash, link in link_from_hash.items(): text = text.replace(hash, link) return text - + def _unescape_special_chars(self, text): # Swap back in all the special characters we've hidden. for ch, hash in g_escape_table.items(): @@ -1651,7 +1674,7 @@ class MarkdownWithExtras(Markdown): extras = ["footnotes", "code-color"] -#---- internal support functions +# ---- internal support functions class UnicodeWithAttrs(unicode): """A subclass of unicode used for the return value of conversion to @@ -1662,12 +1685,12 @@ class UnicodeWithAttrs(unicode): @property def toc_html(self): """Return the HTML for the current TOC. - + This expects the `_toc` attribute to have been set on this instance. """ if self._toc is None: return None - + def indent(): return ' ' * (len(h_stack) - 1) lines = [] @@ -1696,11 +1719,13 @@ class UnicodeWithAttrs(unicode): _slugify_strip_re = re.compile(r'[^\w\s-]') _slugify_hyphenate_re = re.compile(r'[-\s]+') + + def _slugify(value): """ Normalizes string, converts to lowercase, removes non-alpha characters, and converts spaces to hyphens. - + From Django's "django/template/defaultfilters.py". """ import unicodedata @@ -1709,8 +1734,11 @@ def _slugify(value): return _slugify_hyphenate_re.sub('-', value) # From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549 + + def _curry(*args, **kwargs): function, args = args[0], args[1:] + def result(*rest, **kwrest): combined = kwargs.copy() combined.update(kwrest) @@ -1718,6 +1746,8 @@ def _curry(*args, **kwargs): return result # Recipe: regex_from_encoded_pattern (1.0) + + def _regex_from_encoded_pattern(s): """'foo' -> re.compile(re.escape('foo')) '/foo/' -> re.compile('foo') @@ -1743,30 +1773,33 @@ def _regex_from_encoded_pattern(s): "(must be one of '%s')" % (char, s, ''.join(flag_from_char.keys()))) return re.compile(s[1:idx], flags) - else: # not an encoded regex + else: # not an encoded regex return re.compile(re.escape(s)) # Recipe: dedent (0.1.2) + + def _dedentlines(lines, tabsize=8, skip_first_line=False): """_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines - + "lines" is a list of lines to dedent. "tabsize" is the tab width to use for indent width calculations. "skip_first_line" is a boolean indicating if the first line should be skipped for calculating the indent width and for dedenting. This is sometimes useful for docstrings and similar. - + Same as dedent() except operates on a sequence of lines. Note: the lines list is modified **in-place**. """ DEBUG = False - if DEBUG: + if DEBUG: print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\ % (tabsize, skip_first_line) indents = [] margin = None for i, line in enumerate(lines): - if i == 0 and skip_first_line: continue + if i == 0 and skip_first_line: + continue indent = 0 for ch in line: if ch == ' ': @@ -1774,21 +1807,24 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False): elif ch == '\t': indent += tabsize - (indent % tabsize) elif ch in '\r\n': - continue # skip all-whitespace lines + continue # skip all-whitespace lines else: break else: - continue # skip all-whitespace lines - if DEBUG: print "dedent: indent=%d: %r" % (indent, line) + continue # skip all-whitespace lines + if DEBUG: + print "dedent: indent=%d: %r" % (indent, line) if margin is None: margin = indent else: margin = min(margin, indent) - if DEBUG: print "dedent: margin=%r" % margin + if DEBUG: + print "dedent: margin=%r" % margin if margin is not None and margin > 0: for i, line in enumerate(lines): - if i == 0 and skip_first_line: continue + if i == 0 and skip_first_line: + continue removed = 0 for j, ch in enumerate(line): if ch == ' ': @@ -1796,7 +1832,8 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False): elif ch == '\t': removed += tabsize - (removed % tabsize) elif ch in '\r\n': - if DEBUG: print "dedent: %r: EOL -> strip up to EOL" % line + if DEBUG: + print "dedent: %r: EOL -> strip up to EOL" % line lines[i] = lines[i][j:] break else: @@ -1817,6 +1854,7 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False): lines[i] = lines[i][removed:] return lines + def _dedent(text, tabsize=8, skip_first_line=False): """_dedent(text, tabsize=8, skip_first_line=False) -> dedented text @@ -1825,7 +1863,7 @@ def _dedent(text, tabsize=8, skip_first_line=False): "skip_first_line" is a boolean indicating if the first line should be skipped for calculating the indent width and for dedenting. This is sometimes useful for docstrings and similar. - + textwrap.dedent(s), but don't expand tabs to spaces """ lines = text.splitlines(1) @@ -1834,28 +1872,31 @@ def _dedent(text, tabsize=8, skip_first_line=False): class _memoized(object): - """Decorator that caches a function's return value each time it is called. - If called later with the same arguments, the cached value is returned, and - not re-evaluated. - - http://wiki.python.org/moin/PythonDecoratorLibrary - """ - def __init__(self, func): - self.func = func - self.cache = {} - def __call__(self, *args): - try: - return self.cache[args] - except KeyError: - self.cache[args] = value = self.func(*args) - return value - except TypeError: - # uncachable -- for instance, passing a list as an argument. - # Better to not cache than to blow up entirely. - return self.func(*args) - def __repr__(self): - """Return the function's docstring.""" - return self.func.__doc__ + """Decorator that caches a function's return value each time it is called. + If called later with the same arguments, the cached value is returned, and + not re-evaluated. + + http://wiki.python.org/moin/PythonDecoratorLibrary + """ + + def __init__(self, func): + self.func = func + self.cache = {} + + def __call__(self, *args): + try: + return self.cache[args] + except KeyError: + self.cache[args] = value = self.func(*args) + return value + except TypeError: + # uncachable -- for instance, passing a list as an argument. + # Better to not cache than to blow up entirely. + return self.func(*args) + + def __repr__(self): + """Return the function's docstring.""" + return self.func.__doc__ def _xml_oneliner_re_from_tab_width(tab_width): @@ -1877,10 +1918,13 @@ def _xml_oneliner_re_from_tab_width(tab_width): (?=\n{2,}|\Z) # followed by a blank line or end of document ) """ % (tab_width - 1), re.X) + + _xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width) + def _hr_tag_re_from_tab_width(tab_width): - return re.compile(r""" + return re.compile(r""" (?: (?<=\n\n) # Starting after a blank line | # or @@ -1896,6 +1940,8 @@ def _hr_tag_re_from_tab_width(tab_width): (?=\n{2,}|\Z) # followed by a blank line or end of document ) """ % (tab_width - 1), re.X) + + _hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width) @@ -1913,18 +1959,20 @@ def _xml_encode_email_char_at_random(ch): return '&#%s;' % ord(ch) - #---- mainline class _NoReflowFormatter(optparse.IndentedHelpFormatter): """An optparse formatter that does NOT reflow the description.""" + def format_description(self, description): return description or "" + def _test(): import doctest doctest.testmod() + def main(argv=None): if argv is None: argv = sys.argv @@ -1934,14 +1982,14 @@ def main(argv=None): usage = "usage: %prog [PATHS...]" version = "%prog "+__version__ parser = optparse.OptionParser(prog="markdown2", usage=usage, - version=version, description=cmdln_desc, - formatter=_NoReflowFormatter()) + version=version, description=cmdln_desc, + formatter=_NoReflowFormatter()) parser.add_option("-v", "--verbose", dest="log_level", action="store_const", const=logging.DEBUG, help="more verbose output") parser.add_option("--encoding", help="specify encoding of text content") - parser.add_option("--html4tags", action="store_true", default=False, + parser.add_option("--html4tags", action="store_true", default=False, help="use HTML 4 style for empty element tags") parser.add_option("-s", "--safe", metavar="MODE", dest="safe_mode", help="sanitize literal HTML: 'escape' escapes " @@ -1990,8 +2038,10 @@ def main(argv=None): f = open(opts.link_patterns_file) try: for i, line in enumerate(f.readlines()): - if not line.strip(): continue - if line.lstrip().startswith("#"): continue + if not line.strip(): + continue + if line.lstrip().startswith("#"): + continue try: pat, href = line.rstrip().rsplit(None, 1) except ValueError: @@ -2025,7 +2075,7 @@ def main(argv=None): html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')) if extras and "toc" in extras: log.debug("toc_html: " + - html.toc_html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')) + html.toc_html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')) if opts.compare: test_dir = join(dirname(dirname(abspath(__file__))), "test") if exists(join(test_dir, "test_markdown2.py")): @@ -2040,5 +2090,4 @@ def main(argv=None): if __name__ == "__main__": - sys.exit( main(sys.argv) ) - + sys.exit(main(sys.argv)) |