aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cgi/BeautifulSoup.py2047
-rw-r--r--cgi/api.py147
-rw-r--r--cgi/fcgi.py1363
-rw-r--r--cgi/formatting.py29
-rw-r--r--cgi/framework.py40
-rw-r--r--cgi/manage.py150
-rw-r--r--cgi/markdown.py2093
-rw-r--r--cgi/post.py31
-rw-r--r--cgi/templates/bans_geo2
-rw-r--r--cgi/templates/bans_locations2
-rw-r--r--cgi/templates/manage/boardoptions.html2
-rw-r--r--cgi/templates/mobile/txt_thread.html10
-rw-r--r--cgi/templates/revision.html2
-rw-r--r--cgi/templates/txt_archive.html4
-rw-r--r--cgi/templates/txt_thread.en.html10
-rw-r--r--cgi/tenjin.py2293
-rwxr-xr-xcgi/weabot.py35
17 files changed, 235 insertions, 8025 deletions
diff --git a/cgi/BeautifulSoup.py b/cgi/BeautifulSoup.py
deleted file mode 100644
index 3e97785..0000000
--- a/cgi/BeautifulSoup.py
+++ /dev/null
@@ -1,2047 +0,0 @@
-"""Beautiful Soup
-Elixir and Tonic
-"The Screen-Scraper's Friend"
-http://www.crummy.com/software/BeautifulSoup/
-
-Beautiful Soup parses a (possibly invalid) XML or HTML document into a
-tree representation. It provides methods and Pythonic idioms that make
-it easy to navigate, search, and modify the tree.
-
-A well-formed XML/HTML document yields a well-formed data
-structure. An ill-formed XML/HTML document yields a correspondingly
-ill-formed data structure. If your document is only locally
-well-formed, you can use this library to find and process the
-well-formed part of it.
-
-Beautiful Soup works with Python 2.2 and up. It has no external
-dependencies, but you'll have more success at converting data to UTF-8
-if you also install these three packages:
-
-* chardet, for auto-detecting character encodings
- http://chardet.feedparser.org/
-* cjkcodecs and iconv_codec, which add more encodings to the ones supported
- by stock Python.
- http://cjkpython.i18n.org/
-
-Beautiful Soup defines classes for two main parsing strategies:
-
- * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific
- language that kind of looks like XML.
-
- * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
- or invalid. This class has web browser-like heuristics for
- obtaining a sensible parse tree in the face of common HTML errors.
-
-Beautiful Soup also defines a class (UnicodeDammit) for autodetecting
-the encoding of an HTML or XML document, and converting it to
-Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser.
-
-For more than you ever wanted to know about Beautiful Soup, see the
-documentation:
-http://www.crummy.com/software/BeautifulSoup/documentation.html
-
-Here, have some legalese:
-
-Copyright (c) 2004-2010, Leonard Richardson
-
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following
- disclaimer in the documentation and/or other materials provided
- with the distribution.
-
- * Neither the name of the the Beautiful Soup Consortium and All
- Night Kosher Bakery nor the names of its contributors may be
- used to endorse or promote products derived from this software
- without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.
-
-"""
-from __future__ import generators
-
-__author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "3.2.1"
-__copyright__ = "Copyright (c) 2004-2012 Leonard Richardson"
-__license__ = "New-style BSD"
-
-from sgmllib import SGMLParser, SGMLParseError
-import codecs
-import markupbase
-import types
-import re
-import sgmllib
-try:
- from htmlentitydefs import name2codepoint
-except ImportError:
- name2codepoint = {}
-try:
- set
-except NameError:
- from sets import Set as set
-
-# These hacks make Beautiful Soup able to parse XML with namespaces
-sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
-markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match
-
-DEFAULT_OUTPUT_ENCODING = "utf-8"
-
-
-def _match_css_class(str):
- """Build a RE to match the given CSS class."""
- return re.compile(r"(^|.*\s)%s($|\s)" % str)
-
-# First, the classes that represent markup elements.
-
-
-class PageElement(object):
- """Contains the navigational information for some part of the page
- (either a tag or a piece of text)"""
-
- def _invert(h):
- "Cheap function to invert a hash."
- i = {}
- for k, v in h.items():
- i[v] = k
- return i
-
- XML_ENTITIES_TO_SPECIAL_CHARS = {"apos": "'",
- "quot": '"',
- "amp": "&",
- "lt": "<",
- "gt": ">"}
-
- XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS)
-
- def setup(self, parent=None, previous=None):
- """Sets up the initial relations between this element and
- other elements."""
- self.parent = parent
- self.previous = previous
- self.next = None
- self.previousSibling = None
- self.nextSibling = None
- if self.parent and self.parent.contents:
- self.previousSibling = self.parent.contents[-1]
- self.previousSibling.nextSibling = self
-
- def replaceWith(self, replaceWith):
- oldParent = self.parent
- myIndex = self.parent.index(self)
- if hasattr(replaceWith, "parent")\
- and replaceWith.parent is self.parent:
- # We're replacing this element with one of its siblings.
- index = replaceWith.parent.index(replaceWith)
- if index and index < myIndex:
- # Furthermore, it comes before this element. That
- # means that when we extract it, the index of this
- # element will change.
- myIndex = myIndex - 1
- self.extract()
- oldParent.insert(myIndex, replaceWith)
-
- def replaceWithChildren(self):
- myParent = self.parent
- myIndex = self.parent.index(self)
- self.extract()
- reversedChildren = list(self.contents)
- reversedChildren.reverse()
- for child in reversedChildren:
- myParent.insert(myIndex, child)
-
- def extract(self):
- """Destructively rips this element out of the tree."""
- if self.parent:
- try:
- del self.parent.contents[self.parent.index(self)]
- except ValueError:
- pass
-
- # Find the two elements that would be next to each other if
- # this element (and any children) hadn't been parsed. Connect
- # the two.
- lastChild = self._lastRecursiveChild()
- nextElement = lastChild.next
-
- if self.previous:
- self.previous.next = nextElement
- if nextElement:
- nextElement.previous = self.previous
- self.previous = None
- lastChild.next = None
-
- self.parent = None
- if self.previousSibling:
- self.previousSibling.nextSibling = self.nextSibling
- if self.nextSibling:
- self.nextSibling.previousSibling = self.previousSibling
- self.previousSibling = self.nextSibling = None
- return self
-
- def _lastRecursiveChild(self):
- "Finds the last element beneath this object to be parsed."
- lastChild = self
- while hasattr(lastChild, 'contents') and lastChild.contents:
- lastChild = lastChild.contents[-1]
- return lastChild
-
- def insert(self, position, newChild):
- if isinstance(newChild, basestring) \
- and not isinstance(newChild, NavigableString):
- newChild = NavigableString(newChild)
-
- position = min(position, len(self.contents))
- if hasattr(newChild, 'parent') and newChild.parent is not None:
- # We're 'inserting' an element that's already one
- # of this object's children.
- if newChild.parent is self:
- index = self.index(newChild)
- if index > position:
- # Furthermore we're moving it further down the
- # list of this object's children. That means that
- # when we extract this element, our target index
- # will jump down one.
- position = position - 1
- newChild.extract()
-
- newChild.parent = self
- previousChild = None
- if position == 0:
- newChild.previousSibling = None
- newChild.previous = self
- else:
- previousChild = self.contents[position-1]
- newChild.previousSibling = previousChild
- newChild.previousSibling.nextSibling = newChild
- newChild.previous = previousChild._lastRecursiveChild()
- if newChild.previous:
- newChild.previous.next = newChild
-
- newChildsLastElement = newChild._lastRecursiveChild()
-
- if position >= len(self.contents):
- newChild.nextSibling = None
-
- parent = self
- parentsNextSibling = None
- while not parentsNextSibling:
- parentsNextSibling = parent.nextSibling
- parent = parent.parent
- if not parent: # This is the last element in the document.
- break
- if parentsNextSibling:
- newChildsLastElement.next = parentsNextSibling
- else:
- newChildsLastElement.next = None
- else:
- nextChild = self.contents[position]
- newChild.nextSibling = nextChild
- if newChild.nextSibling:
- newChild.nextSibling.previousSibling = newChild
- newChildsLastElement.next = nextChild
-
- if newChildsLastElement.next:
- newChildsLastElement.next.previous = newChildsLastElement
- self.contents.insert(position, newChild)
-
- def append(self, tag):
- """Appends the given tag to the contents of this tag."""
- self.insert(len(self.contents), tag)
-
- def findNext(self, name=None, attrs={}, text=None, **kwargs):
- """Returns the first item that matches the given criteria and
- appears after this Tag in the document."""
- return self._findOne(self.findAllNext, name, attrs, text, **kwargs)
-
- def findAllNext(self, name=None, attrs={}, text=None, limit=None,
- **kwargs):
- """Returns all items that match the given criteria and appear
- after this Tag in the document."""
- return self._findAll(name, attrs, text, limit, self.nextGenerator,
- **kwargs)
-
- def findNextSibling(self, name=None, attrs={}, text=None, **kwargs):
- """Returns the closest sibling to this Tag that matches the
- given criteria and appears after this Tag in the document."""
- return self._findOne(self.findNextSiblings, name, attrs, text,
- **kwargs)
-
- def findNextSiblings(self, name=None, attrs={}, text=None, limit=None,
- **kwargs):
- """Returns the siblings of this Tag that match the given
- criteria and appear after this Tag in the document."""
- return self._findAll(name, attrs, text, limit,
- self.nextSiblingGenerator, **kwargs)
- fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x
-
- def findPrevious(self, name=None, attrs={}, text=None, **kwargs):
- """Returns the first item that matches the given criteria and
- appears before this Tag in the document."""
- return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs)
-
- def findAllPrevious(self, name=None, attrs={}, text=None, limit=None,
- **kwargs):
- """Returns all items that match the given criteria and appear
- before this Tag in the document."""
- return self._findAll(name, attrs, text, limit, self.previousGenerator,
- **kwargs)
- fetchPrevious = findAllPrevious # Compatibility with pre-3.x
-
- def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs):
- """Returns the closest sibling to this Tag that matches the
- given criteria and appears before this Tag in the document."""
- return self._findOne(self.findPreviousSiblings, name, attrs, text,
- **kwargs)
-
- def findPreviousSiblings(self, name=None, attrs={}, text=None,
- limit=None, **kwargs):
- """Returns the siblings of this Tag that match the given
- criteria and appear before this Tag in the document."""
- return self._findAll(name, attrs, text, limit,
- self.previousSiblingGenerator, **kwargs)
- fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x
-
- def findParent(self, name=None, attrs={}, **kwargs):
- """Returns the closest parent of this Tag that matches the given
- criteria."""
- # NOTE: We can't use _findOne because findParents takes a different
- # set of arguments.
- r = None
- l = self.findParents(name, attrs, 1)
- if l:
- r = l[0]
- return r
-
- def findParents(self, name=None, attrs={}, limit=None, **kwargs):
- """Returns the parents of this Tag that match the given
- criteria."""
-
- return self._findAll(name, attrs, None, limit, self.parentGenerator,
- **kwargs)
- fetchParents = findParents # Compatibility with pre-3.x
-
- # These methods do the real heavy lifting.
-
- def _findOne(self, method, name, attrs, text, **kwargs):
- r = None
- l = method(name, attrs, text, 1, **kwargs)
- if l:
- r = l[0]
- return r
-
- def _findAll(self, name, attrs, text, limit, generator, **kwargs):
- "Iterates over a generator looking for things that match."
-
- if isinstance(name, SoupStrainer):
- strainer = name
- # (Possibly) special case some findAll*(...) searches
- elif text is None and not limit and not attrs and not kwargs:
- # findAll*(True)
- if name is True:
- return [element for element in generator()
- if isinstance(element, Tag)]
- # findAll*('tag-name')
- elif isinstance(name, basestring):
- return [element for element in generator()
- if isinstance(element, Tag) and
- element.name == name]
- else:
- strainer = SoupStrainer(name, attrs, text, **kwargs)
- # Build a SoupStrainer
- else:
- strainer = SoupStrainer(name, attrs, text, **kwargs)
- results = ResultSet(strainer)
- g = generator()
- while True:
- try:
- i = g.next()
- except StopIteration:
- break
- if i:
- found = strainer.search(i)
- if found:
- results.append(found)
- if limit and len(results) >= limit:
- break
- return results
-
- # These Generators can be used to navigate starting from both
- # NavigableStrings and Tags.
- def nextGenerator(self):
- i = self
- while i is not None:
- i = i.next
- yield i
-
- def nextSiblingGenerator(self):
- i = self
- while i is not None:
- i = i.nextSibling
- yield i
-
- def previousGenerator(self):
- i = self
- while i is not None:
- i = i.previous
- yield i
-
- def previousSiblingGenerator(self):
- i = self
- while i is not None:
- i = i.previousSibling
- yield i
-
- def parentGenerator(self):
- i = self
- while i is not None:
- i = i.parent
- yield i
-
- # Utility methods
- def substituteEncoding(self, str, encoding=None):
- encoding = encoding or "utf-8"
- return str.replace("%SOUP-ENCODING%", encoding)
-
- def toEncoding(self, s, encoding=None):
- """Encodes an object to a string in some encoding, or to Unicode.
- ."""
- if isinstance(s, unicode):
- if encoding:
- s = s.encode(encoding)
- elif isinstance(s, str):
- if encoding:
- s = s.encode(encoding)
- else:
- s = unicode(s)
- else:
- if encoding:
- s = self.toEncoding(str(s), encoding)
- else:
- s = unicode(s)
- return s
-
- BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
- + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
- + ")")
-
- def _sub_entity(self, x):
- """Used with a regular expression to substitute the
- appropriate XML entity for an XML special character."""
- return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";"
-
-
-class NavigableString(unicode, PageElement):
-
- def __new__(cls, value):
- """Create a new NavigableString.
-
- When unpickling a NavigableString, this method is called with
- the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be
- passed in to the superclass's __new__ or the superclass won't know
- how to handle non-ASCII characters.
- """
- if isinstance(value, unicode):
- return unicode.__new__(cls, value)
- return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
-
- def __getnewargs__(self):
- return (NavigableString.__str__(self),)
-
- def __getattr__(self, attr):
- """text.string gives you text. This is for backwards
- compatibility for Navigable*String, but for CData* it lets you
- get the string without the CData wrapper."""
- if attr == 'string':
- return self
- else:
- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
-
- def __unicode__(self):
- return str(self).decode(DEFAULT_OUTPUT_ENCODING)
-
- def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
- # Substitute outgoing XML entities.
- data = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, self)
- if encoding:
- return data.encode(encoding)
- else:
- return data
-
-
-class CData(NavigableString):
-
- def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
- return "<![CDATA[%s]]>" % NavigableString.__str__(self, encoding)
-
-
-class ProcessingInstruction(NavigableString):
- def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
- output = self
- if "%SOUP-ENCODING%" in output:
- output = self.substituteEncoding(output, encoding)
- return "<?%s?>" % self.toEncoding(output, encoding)
-
-
-class Comment(NavigableString):
- def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
- return "<!--%s-->" % NavigableString.__str__(self, encoding)
-
-
-class Declaration(NavigableString):
- def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
- return "<!%s>" % NavigableString.__str__(self, encoding)
-
-
-class Tag(PageElement):
-
- """Represents a found HTML tag with its attributes and contents."""
-
- def _convertEntities(self, match):
- """Used in a call to re.sub to replace HTML, XML, and numeric
- entities with the appropriate Unicode characters. If HTML
- entities are being converted, any unrecognized entities are
- escaped."""
- x = match.group(1)
- if self.convertHTMLEntities and x in name2codepoint:
- return unichr(name2codepoint[x])
- elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS:
- if self.convertXMLEntities:
- return self.XML_ENTITIES_TO_SPECIAL_CHARS[x]
- else:
- return u'&%s;' % x
- elif len(x) > 0 and x[0] == '#':
- # Handle numeric entities
- if len(x) > 1 and x[1] == 'x':
- return unichr(int(x[2:], 16))
- else:
- return unichr(int(x[1:]))
-
- elif self.escapeUnrecognizedEntities:
- return u'&amp;%s;' % x
- else:
- return u'&%s;' % x
-
- def __init__(self, parser, name, attrs=None, parent=None,
- previous=None):
- "Basic constructor."
-
- # We don't actually store the parser object: that lets extracted
- # chunks be garbage-collected
- self.parserClass = parser.__class__
- self.isSelfClosing = parser.isSelfClosingTag(name)
- self.name = name
- if attrs is None:
- attrs = []
- elif isinstance(attrs, dict):
- attrs = attrs.items()
- self.attrs = attrs
- self.contents = []
- self.setup(parent, previous)
- self.hidden = False
- self.containsSubstitutions = False
- self.convertHTMLEntities = parser.convertHTMLEntities
- self.convertXMLEntities = parser.convertXMLEntities
- self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities
-
- # Convert any HTML, XML, or numeric entities in the attribute values.
- def convert((k, val)): return (k,
- re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);",
- self._convertEntities,
- val))
- self.attrs = map(convert, self.attrs)
-
- def getString(self):
- if (len(self.contents) == 1
- and isinstance(self.contents[0], NavigableString)):
- return self.contents[0]
-
- def setString(self, string):
- """Replace the contents of the tag with a string"""
- self.clear()
- self.append(string)
-
- string = property(getString, setString)
-
- def getText(self, separator=u""):
- if not len(self.contents):
- return u""
- stopNode = self._lastRecursiveChild().next
- strings = []
- current = self.contents[0]
- while current is not stopNode:
- if isinstance(current, NavigableString):
- strings.append(current.strip())
- current = current.next
- return separator.join(strings)
-
- text = property(getText)
-
- def get(self, key, default=None):
- """Returns the value of the 'key' attribute for the tag, or
- the value given for 'default' if it doesn't have that
- attribute."""
- return self._getAttrMap().get(key, default)
-
- def clear(self):
- """Extract all children."""
- for child in self.contents[:]:
- child.extract()
-
- def index(self, element):
- for i, child in enumerate(self.contents):
- if child is element:
- return i
- raise ValueError("Tag.index: element not in tag")
-
- def has_key(self, key):
- return self._getAttrMap().has_key(key)
-
- def __getitem__(self, key):
- """tag[key] returns the value of the 'key' attribute for the tag,
- and throws an exception if it's not there."""
- return self._getAttrMap()[key]
-
- def __iter__(self):
- "Iterating over a tag iterates over its contents."
- return iter(self.contents)
-
- def __len__(self):
- "The length of a tag is the length of its list of contents."
- return len(self.contents)
-
- def __contains__(self, x):
- return x in self.contents
-
- def __nonzero__(self):
- "A tag is non-None even if it has no contents."
- return True
-
- def __setitem__(self, key, value):
- """Setting tag[key] sets the value of the 'key' attribute for the
- tag."""
- self._getAttrMap()
- self.attrMap[key] = value
- found = False
- for i in range(0, len(self.attrs)):
- if self.attrs[i][0] == key:
- self.attrs[i] = (key, value)
- found = True
- if not found:
- self.attrs.append((key, value))
- self._getAttrMap()[key] = value
-
- def __delitem__(self, key):
- "Deleting tag[key] deletes all 'key' attributes for the tag."
- for item in self.attrs:
- if item[0] == key:
- self.attrs.remove(item)
- # We don't break because bad HTML can define the same
- # attribute multiple times.
- self._getAttrMap()
- if self.attrMap.has_key(key):
- del self.attrMap[key]
-
- def __call__(self, *args, **kwargs):
- """Calling a tag like a function is the same as calling its
- findAll() method. Eg. tag('a') returns a list of all the A tags
- found within this tag."""
- return apply(self.findAll, args, kwargs)
-
- def __getattr__(self, tag):
- # print "Getattr %s.%s" % (self.__class__, tag)
- if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3:
- return self.find(tag[:-3])
- elif tag.find('__') != 0:
- return self.find(tag)
- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag)
-
- def __eq__(self, other):
- """Returns true iff this tag has the same name, the same attributes,
- and the same contents (recursively) as the given tag.
-
- NOTE: right now this will return false if two tags have the
- same attributes in a different order. Should this be fixed?"""
- if other is self:
- return True
- if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other):
- return False
- for i in range(0, len(self.contents)):
- if self.contents[i] != other.contents[i]:
- return False
- return True
-
- def __ne__(self, other):
- """Returns true iff this tag is not identical to the other tag,
- as defined in __eq__."""
- return not self == other
-
- def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING):
- """Renders this tag as a string."""
- return self.__str__(encoding)
-
- def __unicode__(self):
- return self.__str__(None)
-
- def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING,
- prettyPrint=False, indentLevel=0):
- """Returns a string or Unicode representation of this tag and
- its contents. To get Unicode, pass None for encoding.
-
- NOTE: since Python's HTML parser consumes whitespace, this
- method is not certain to reproduce the whitespace present in
- the original string."""
-
- encodedName = self.toEncoding(self.name, encoding)
-
- attrs = []
- if self.attrs:
- for key, val in self.attrs:
- fmt = '%s="%s"'
- if isinstance(val, basestring):
- if self.containsSubstitutions and '%SOUP-ENCODING%' in val:
- val = self.substituteEncoding(val, encoding)
-
- # The attribute value either:
- #
- # * Contains no embedded double quotes or single quotes.
- # No problem: we enclose it in double quotes.
- # * Contains embedded single quotes. No problem:
- # double quotes work here too.
- # * Contains embedded double quotes. No problem:
- # we enclose it in single quotes.
- # * Embeds both single _and_ double quotes. This
- # can't happen naturally, but it can happen if
- # you modify an attribute value after parsing
- # the document. Now we have a bit of a
- # problem. We solve it by enclosing the
- # attribute in single quotes, and escaping any
- # embedded single quotes to XML entities.
- if '"' in val:
- fmt = "%s='%s'"
- if "'" in val:
- # TODO: replace with apos when
- # appropriate.
- val = val.replace("'", "&squot;")
-
- # Now we're okay w/r/t quotes. But the attribute
- # value might also contain angle brackets, or
- # ampersands that aren't part of entities. We need
- # to escape those to XML entities too.
- val = self.BARE_AMPERSAND_OR_BRACKET.sub(
- self._sub_entity, val)
-
- attrs.append(fmt % (self.toEncoding(key, encoding),
- self.toEncoding(val, encoding)))
- close = ''
- closeTag = ''
- if self.isSelfClosing:
- close = ' /'
- else:
- closeTag = '</%s>' % encodedName
-
- indentTag, indentContents = 0, 0
- if prettyPrint:
- indentTag = indentLevel
- space = (' ' * (indentTag-1))
- indentContents = indentTag + 1
- contents = self.renderContents(encoding, prettyPrint, indentContents)
- if self.hidden:
- s = contents
- else:
- s = []
- attributeString = ''
- if attrs:
- attributeString = ' ' + ' '.join(attrs)
- if prettyPrint:
- s.append(space)
- s.append('<%s%s%s>' % (encodedName, attributeString, close))
- if prettyPrint:
- s.append("\n")
- s.append(contents)
- if prettyPrint and contents and contents[-1] != "\n":
- s.append("\n")
- if prettyPrint and closeTag:
- s.append(space)
- s.append(closeTag)
- if prettyPrint and closeTag and self.nextSibling:
- s.append("\n")
- s = ''.join(s)
- return s
-
- def decompose(self):
- """Recursively destroys the contents of this tree."""
- self.extract()
- if len(self.contents) == 0:
- return
- current = self.contents[0]
- while current is not None:
- next = current.next
- if isinstance(current, Tag):
- del current.contents[:]
- current.parent = None
- current.previous = None
- current.previousSibling = None
- current.next = None
- current.nextSibling = None
- current = next
-
- def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING):
- return self.__str__(encoding, True)
-
- def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING,
- prettyPrint=False, indentLevel=0):
- """Renders the contents of this tag as a string in the given
- encoding. If encoding is None, returns a Unicode string.."""
- s = []
- for c in self:
- text = None
- if isinstance(c, NavigableString):
- text = c.__str__(encoding)
- elif isinstance(c, Tag):
- s.append(c.__str__(encoding, prettyPrint, indentLevel))
- if text and prettyPrint:
- text = text.strip()
- if text:
- if prettyPrint:
- s.append(" " * (indentLevel-1))
- s.append(text)
- if prettyPrint:
- s.append("\n")
- return ''.join(s)
-
- # Soup methods
-
- def find(self, name=None, attrs={}, recursive=True, text=None,
- **kwargs):
- """Return only the first child of this Tag matching the given
- criteria."""
- r = None
- l = self.findAll(name, attrs, recursive, text, 1, **kwargs)
- if l:
- r = l[0]
- return r
- findChild = find
-
- def findAll(self, name=None, attrs={}, recursive=True, text=None,
- limit=None, **kwargs):
- """Extracts a list of Tag objects that match the given
- criteria. You can specify the name of the Tag and any
- attributes you want the Tag to have.
-
- The value of a key-value pair in the 'attrs' map can be a
- string, a list of strings, a regular expression object, or a
- callable that takes a string and returns whether or not the
- string matches for some custom definition of 'matches'. The
- same is true of the tag name."""
- generator = self.recursiveChildGenerator
- if not recursive:
- generator = self.childGenerator
- return self._findAll(name, attrs, text, limit, generator, **kwargs)
- findChildren = findAll
-
- # Pre-3.x compatibility methods
- first = find
- fetch = findAll
-
- def fetchText(self, text=None, recursive=True, limit=None):
- return self.findAll(text=text, recursive=recursive, limit=limit)
-
- def firstText(self, text=None, recursive=True):
- return self.find(text=text, recursive=recursive)
-
- # Private methods
-
- def _getAttrMap(self):
- """Initializes a map representation of this tag's attributes,
- if not already initialized."""
- if not getattr(self, 'attrMap'):
- self.attrMap = {}
- for (key, value) in self.attrs:
- self.attrMap[key] = value
- return self.attrMap
-
- # Generator methods
- def childGenerator(self):
- # Just use the iterator from the contents
- return iter(self.contents)
-
- def recursiveChildGenerator(self):
- if not len(self.contents):
- raise StopIteration
- stopNode = self._lastRecursiveChild().next
- current = self.contents[0]
- while current is not stopNode:
- yield current
- current = current.next
-
-
-# Next, a couple classes to represent queries and their results.
-class SoupStrainer:
- """Encapsulates a number of ways of matching a markup element (tag or
- text)."""
-
- def __init__(self, name=None, attrs={}, text=None, **kwargs):
- self.name = name
- if isinstance(attrs, basestring):
- kwargs['class'] = _match_css_class(attrs)
- attrs = None
- if kwargs:
- if attrs:
- attrs = attrs.copy()
- attrs.update(kwargs)
- else:
- attrs = kwargs
- self.attrs = attrs
- self.text = text
-
- def __str__(self):
- if self.text:
- return self.text
- else:
- return "%s|%s" % (self.name, self.attrs)
-
- def searchTag(self, markupName=None, markupAttrs={}):
- found = None
- markup = None
- if isinstance(markupName, Tag):
- markup = markupName
- markupAttrs = markup
- callFunctionWithTagData = callable(self.name) \
- and not isinstance(markupName, Tag)
-
- if (not self.name) \
- or callFunctionWithTagData \
- or (markup and self._matches(markup, self.name)) \
- or (not markup and self._matches(markupName, self.name)):
- if callFunctionWithTagData:
- match = self.name(markupName, markupAttrs)
- else:
- match = True
- markupAttrMap = None
- for attr, matchAgainst in self.attrs.items():
- if not markupAttrMap:
- if hasattr(markupAttrs, 'get'):
- markupAttrMap = markupAttrs
- else:
- markupAttrMap = {}
- for k, v in markupAttrs:
- markupAttrMap[k] = v
- attrValue = markupAttrMap.get(attr)
- if not self._matches(attrValue, matchAgainst):
- match = False
- break
- if match:
- if markup:
- found = markup
- else:
- found = markupName
- return found
-
- def search(self, markup):
- # print 'looking for %s in %s' % (self, markup)
- found = None
- # If given a list of items, scan it for a text element that
- # matches.
- if hasattr(markup, "__iter__") \
- and not isinstance(markup, Tag):
- for element in markup:
- if isinstance(element, NavigableString) \
- and self.search(element):
- found = element
- break
- # If it's a Tag, make sure its name or attributes match.
- # Don't bother with Tags if we're searching for text.
- elif isinstance(markup, Tag):
- if not self.text:
- found = self.searchTag(markup)
- # If it's text, make sure the text matches.
- elif isinstance(markup, NavigableString) or \
- isinstance(markup, basestring):
- if self._matches(markup, self.text):
- found = markup
- else:
- raise Exception, "I don't know how to match against a %s" \
- % markup.__class__
- return found
-
- def _matches(self, markup, matchAgainst):
- # print "Matching %s against %s" % (markup, matchAgainst)
- result = False
- if matchAgainst is True:
- result = markup is not None
- elif callable(matchAgainst):
- result = matchAgainst(markup)
- else:
- # Custom match methods take the tag as an argument, but all
- # other ways of matching match the tag name as a string.
- if isinstance(markup, Tag):
- markup = markup.name
- if markup and not isinstance(markup, basestring):
- markup = unicode(markup)
- # Now we know that chunk is either a string, or None.
- if hasattr(matchAgainst, 'match'):
- # It's a regexp object.
- result = markup and matchAgainst.search(markup)
- elif hasattr(matchAgainst, '__iter__'): # list-like
- result = markup in matchAgainst
- elif hasattr(matchAgainst, 'items'):
- result = markup.has_key(matchAgainst)
- elif matchAgainst and isinstance(markup, basestring):
- if isinstance(markup, unicode):
- matchAgainst = unicode(matchAgainst)
- else:
- matchAgainst = str(matchAgainst)
-
- if not result:
- result = matchAgainst == markup
- return result
-
-
-class ResultSet(list):
- """A ResultSet is just a list that keeps track of the SoupStrainer
- that created it."""
-
- def __init__(self, source):
- list.__init__([])
- self.source = source
-
-# Now, some helper functions.
-
-
-def buildTagMap(default, *args):
- """Turns a list of maps, lists, or scalars into a single map.
- Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and
- NESTING_RESET_TAGS maps out of lists and partial maps."""
- built = {}
- for portion in args:
- if hasattr(portion, 'items'):
- # It's a map. Merge it.
- for k, v in portion.items():
- built[k] = v
- elif hasattr(portion, '__iter__'): # is a list
- # It's a list. Map each item to the default.
- for k in portion:
- built[k] = default
- else:
- # It's a scalar. Map it to the default.
- built[portion] = default
- return built
-
-# Now, the parser classes.
-
-
-class BeautifulStoneSoup(Tag, SGMLParser):
-
- """This class contains the basic parser and search code. It defines
- a parser that knows nothing about tag behavior except for the
- following:
-
- You can't close a tag without closing all the tags it encloses.
- That is, "<foo><bar></foo>" actually means
- "<foo><bar></bar></foo>".
-
- [Another possible explanation is "<foo><bar /></foo>", but since
- this class defines no SELF_CLOSING_TAGS, it will never use that
- explanation.]
-
- This class is useful for parsing XML or made-up markup languages,
- or when BeautifulSoup makes an assumption counter to what you were
- expecting."""
-
- SELF_CLOSING_TAGS = {}
- NESTABLE_TAGS = {}
- RESET_NESTING_TAGS = {}
- QUOTE_TAGS = {}
- PRESERVE_WHITESPACE_TAGS = []
-
- MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'),
- lambda x: x.group(1) + ' />'),
- (re.compile('<!\s+([^<>]*)>'),
- lambda x: '<!' + x.group(1) + '>')
- ]
-
- ROOT_TAG_NAME = u'[document]'
-
- HTML_ENTITIES = "html"
- XML_ENTITIES = "xml"
- XHTML_ENTITIES = "xhtml"
- # TODO: This only exists for backwards-compatibility
- ALL_ENTITIES = XHTML_ENTITIES
-
- # Used when determining whether a text node is all whitespace and
- # can be replaced with a single space. A text node that contains
- # fancy Unicode spaces (usually non-breaking) should be left
- # alone.
- STRIP_ASCII_SPACES = {9: None, 10: None, 12: None, 13: None, 32: None, }
-
- def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None,
- markupMassage=True, smartQuotesTo=XML_ENTITIES,
- convertEntities=None, selfClosingTags=None, isHTML=False):
- """The Soup object is initialized as the 'root tag', and the
- provided markup (which can be a string or a file-like object)
- is fed into the underlying parser.
-
- sgmllib will process most bad HTML, and the BeautifulSoup
- class has some tricks for dealing with some HTML that kills
- sgmllib, but Beautiful Soup can nonetheless choke or lose data
- if your data uses self-closing tags or declarations
- incorrectly.
-
- By default, Beautiful Soup uses regexes to sanitize input,
- avoiding the vast majority of these problems. If the problems
- don't apply to you, pass in False for markupMassage, and
- you'll get better performance.
-
- The default parser massage techniques fix the two most common
- instances of invalid HTML that choke sgmllib:
-
- <br/> (No space between name of closing tag and tag close)
- <! --Comment--> (Extraneous whitespace in declaration)
-
- You can pass in a custom list of (RE object, replace method)
- tuples to get Beautiful Soup to scrub your input the way you
- want."""
-
- self.parseOnlyThese = parseOnlyThese
- self.fromEncoding = fromEncoding
- self.smartQuotesTo = smartQuotesTo
- self.convertEntities = convertEntities
- # Set the rules for how we'll deal with the entities we
- # encounter
- if self.convertEntities:
- # It doesn't make sense to convert encoded characters to
- # entities even while you're converting entities to Unicode.
- # Just convert it all to Unicode.
- self.smartQuotesTo = None
- if convertEntities == self.HTML_ENTITIES:
- self.convertXMLEntities = False
- self.convertHTMLEntities = True
- self.escapeUnrecognizedEntities = True
- elif convertEntities == self.XHTML_ENTITIES:
- self.convertXMLEntities = True
- self.convertHTMLEntities = True
- self.escapeUnrecognizedEntities = False
- elif convertEntities == self.XML_ENTITIES:
- self.convertXMLEntities = True
- self.convertHTMLEntities = False
- self.escapeUnrecognizedEntities = False
- else:
- self.convertXMLEntities = False
- self.convertHTMLEntities = False
- self.escapeUnrecognizedEntities = False
-
- self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags)
- SGMLParser.__init__(self)
-
- if hasattr(markup, 'read'): # It's a file-type object.
- markup = markup.read()
- self.markup = markup
- self.markupMassage = markupMassage
- try:
- self._feed(isHTML=isHTML)
- except StopParsing:
- pass
- self.markup = None # The markup can now be GCed
-
- def convert_charref(self, name):
- """This method fixes a bug in Python's SGMLParser."""
- try:
- n = int(name)
- except ValueError:
- return
- if not 0 <= n <= 127: # ASCII ends at 127, not 255
- return
- return self.convert_codepoint(n)
-
- def _feed(self, inDocumentEncoding=None, isHTML=False):
- # Convert the document to Unicode.
- markup = self.markup
- if isinstance(markup, unicode):
- if not hasattr(self, 'originalEncoding'):
- self.originalEncoding = None
- else:
- dammit = UnicodeDammit(markup, [self.fromEncoding, inDocumentEncoding],
- smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
- markup = dammit.unicode
- self.originalEncoding = dammit.originalEncoding
- self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
- if markup:
- if self.markupMassage:
- if not hasattr(self.markupMassage, "__iter__"):
- self.markupMassage = self.MARKUP_MASSAGE
- for fix, m in self.markupMassage:
- markup = fix.sub(m, markup)
- # TODO: We get rid of markupMassage so that the
- # soup object can be deepcopied later on. Some
- # Python installations can't copy regexes. If anyone
- # was relying on the existence of markupMassage, this
- # might cause problems.
- del(self.markupMassage)
- self.reset()
-
- SGMLParser.feed(self, markup)
- # Close out any unfinished strings and close all the open tags.
- self.endData()
- while self.currentTag.name != self.ROOT_TAG_NAME:
- self.popTag()
-
- def __getattr__(self, methodName):
- """This method routes method call requests to either the SGMLParser
- superclass or the Tag superclass, depending on the method name."""
- # print "__getattr__ called on %s.%s" % (self.__class__, methodName)
-
- if methodName.startswith('start_') or methodName.startswith('end_') \
- or methodName.startswith('do_'):
- return SGMLParser.__getattr__(self, methodName)
- elif not methodName.startswith('__'):
- return Tag.__getattr__(self, methodName)
- else:
- raise AttributeError
-
- def isSelfClosingTag(self, name):
- """Returns true iff the given string is the name of a
- self-closing tag according to this parser."""
- return self.SELF_CLOSING_TAGS.has_key(name) \
- or self.instanceSelfClosingTags.has_key(name)
-
- def reset(self):
- Tag.__init__(self, self, self.ROOT_TAG_NAME)
- self.hidden = 1
- SGMLParser.reset(self)
- self.currentData = []
- self.currentTag = None
- self.tagStack = []
- self.quoteStack = []
- self.pushTag(self)
-
- def popTag(self):
- tag = self.tagStack.pop()
-
- # print "Pop", tag.name
- if self.tagStack:
- self.currentTag = self.tagStack[-1]
- return self.currentTag
-
- def pushTag(self, tag):
- # print "Push", tag.name
- if self.currentTag:
- self.currentTag.contents.append(tag)
- self.tagStack.append(tag)
- self.currentTag = self.tagStack[-1]
-
- def endData(self, containerClass=NavigableString):
- if self.currentData:
- currentData = u''.join(self.currentData)
- if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and
- not set([tag.name for tag in self.tagStack]).intersection(
- self.PRESERVE_WHITESPACE_TAGS)):
- if '\n' in currentData:
- currentData = '\n'
- else:
- currentData = ' '
- self.currentData = []
- if self.parseOnlyThese and len(self.tagStack) <= 1 and \
- (not self.parseOnlyThese.text or
- not self.parseOnlyThese.search(currentData)):
- return
- o = containerClass(currentData)
- o.setup(self.currentTag, self.previous)
- if self.previous:
- self.previous.next = o
- self.previous = o
- self.currentTag.contents.append(o)
-
- def _popToTag(self, name, inclusivePop=True):
- """Pops the tag stack up to and including the most recent
- instance of the given tag. If inclusivePop is false, pops the tag
- stack up to but *not* including the most recent instqance of
- the given tag."""
- # print "Popping to %s" % name
- if name == self.ROOT_TAG_NAME:
- return
-
- numPops = 0
- mostRecentTag = None
- for i in range(len(self.tagStack)-1, 0, -1):
- if name == self.tagStack[i].name:
- numPops = len(self.tagStack)-i
- break
- if not inclusivePop:
- numPops = numPops - 1
-
- for i in range(0, numPops):
- mostRecentTag = self.popTag()
- return mostRecentTag
-
- def _smartPop(self, name):
- """We need to pop up to the previous tag of this type, unless
- one of this tag's nesting reset triggers comes between this
- tag and the previous tag of this type, OR unless this tag is a
- generic nesting trigger and another generic nesting trigger
- comes between this tag and the previous tag of this type.
-
- Examples:
- <p>Foo<b>Bar *<p>* should pop to 'p', not 'b'.
- <p>Foo<table>Bar *<p>* should pop to 'table', not 'p'.
- <p>Foo<table><tr>Bar *<p>* should pop to 'tr', not 'p'.
-
- <li><ul><li> *<li>* should pop to 'ul', not the first 'li'.
- <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr'
- <td><tr><td> *<td>* should pop to 'tr', not the first 'td'
- """
-
- nestingResetTriggers = self.NESTABLE_TAGS.get(name)
- isNestable = nestingResetTriggers != None
- isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
- popTo = None
- inclusive = True
- for i in range(len(self.tagStack)-1, 0, -1):
- p = self.tagStack[i]
- if (not p or p.name == name) and not isNestable:
- # Non-nestable tags get popped to the top or to their
- # last occurance.
- popTo = name
- break
- if (nestingResetTriggers is not None
- and p.name in nestingResetTriggers) \
- or (nestingResetTriggers is None and isResetNesting
- and self.RESET_NESTING_TAGS.has_key(p.name)):
-
- # If we encounter one of the nesting reset triggers
- # peculiar to this tag, or we encounter another tag
- # that causes nesting to reset, pop up to but not
- # including that tag.
- popTo = p.name
- inclusive = False
- break
- p = p.parent
- if popTo:
- self._popToTag(popTo, inclusive)
-
- def unknown_starttag(self, name, attrs, selfClosing=0):
- # print "Start tag %s: %s" % (name, attrs)
- if self.quoteStack:
- # This is not a real tag.
- # print "<%s> is not real!" % name
- attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs])
- self.handle_data('<%s%s>' % (name, attrs))
- return
- self.endData()
-
- if not self.isSelfClosingTag(name) and not selfClosing:
- self._smartPop(name)
-
- if self.parseOnlyThese and len(self.tagStack) <= 1 \
- and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)):
- return
-
- tag = Tag(self, name, attrs, self.currentTag, self.previous)
- if self.previous:
- self.previous.next = tag
- self.previous = tag
- self.pushTag(tag)
- if selfClosing or self.isSelfClosingTag(name):
- self.popTag()
- if name in self.QUOTE_TAGS:
- # print "Beginning quote (%s)" % name
- self.quoteStack.append(name)
- self.literal = 1
- return tag
-
- def unknown_endtag(self, name):
- # print "End tag %s" % name
- if self.quoteStack and self.quoteStack[-1] != name:
- # This is not a real end tag.
- # print "</%s> is not real!" % name
- self.handle_data('</%s>' % name)
- return
- self.endData()
- self._popToTag(name)
- if self.quoteStack and self.quoteStack[-1] == name:
- self.quoteStack.pop()
- self.literal = (len(self.quoteStack) > 0)
-
- def handle_data(self, data):
- self.currentData.append(data)
-
- def _toStringSubclass(self, text, subclass):
- """Adds a certain piece of text to the tree as a NavigableString
- subclass."""
- self.endData()
- self.handle_data(text)
- self.endData(subclass)
-
- def handle_pi(self, text):
- """Handle a processing instruction as a ProcessingInstruction
- object, possibly one with a %SOUP-ENCODING% slot into which an
- encoding will be plugged later."""
- if text[:3] == "xml":
- text = u"xml version='1.0' encoding='%SOUP-ENCODING%'"
- self._toStringSubclass(text, ProcessingInstruction)
-
- def handle_comment(self, text):
- "Handle comments as Comment objects."
- self._toStringSubclass(text, Comment)
-
- def handle_charref(self, ref):
- "Handle character references as data."
- if self.convertEntities:
- data = unichr(int(ref))
- else:
- data = '&#%s;' % ref
- self.handle_data(data)
-
- def handle_entityref(self, ref):
- """Handle entity references as data, possibly converting known
- HTML and/or XML entity references to the corresponding Unicode
- characters."""
- data = None
- if self.convertHTMLEntities:
- try:
- data = unichr(name2codepoint[ref])
- except KeyError:
- pass
-
- if not data and self.convertXMLEntities:
- data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref)
-
- if not data and self.convertHTMLEntities and \
- not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref):
- # TODO: We've got a problem here. We're told this is
- # an entity reference, but it's not an XML entity
- # reference or an HTML entity reference. Nonetheless,
- # the logical thing to do is to pass it through as an
- # unrecognized entity reference.
- #
- # Except: when the input is "&carol;" this function
- # will be called with input "carol". When the input is
- # "AT&T", this function will be called with input
- # "T". We have no way of knowing whether a semicolon
- # was present originally, so we don't know whether
- # this is an unknown entity or just a misplaced
- # ampersand.
- #
- # The more common case is a misplaced ampersand, so I
- # escape the ampersand and omit the trailing semicolon.
- data = "&amp;%s" % ref
- if not data:
- # This case is different from the one above, because we
- # haven't already gone through a supposedly comprehensive
- # mapping of entities to Unicode characters. We might not
- # have gone through any mapping at all. So the chances are
- # very high that this is a real entity, and not a
- # misplaced ampersand.
- data = "&%s;" % ref
- self.handle_data(data)
-
- def handle_decl(self, data):
- "Handle DOCTYPEs and the like as Declaration objects."
- self._toStringSubclass(data, Declaration)
-
- def parse_declaration(self, i):
- """Treat a bogus SGML declaration as raw data. Treat a CDATA
- declaration as a CData object."""
- j = None
- if self.rawdata[i:i+9] == '<![CDATA[':
- k = self.rawdata.find(']]>', i)
- if k == -1:
- k = len(self.rawdata)
- data = self.rawdata[i+9:k]
- j = k+3
- self._toStringSubclass(data, CData)
- else:
- try:
- j = SGMLParser.parse_declaration(self, i)
- except SGMLParseError:
- toHandle = self.rawdata[i:]
- self.handle_data(toHandle)
- j = i + len(toHandle)
- return j
-
-
-class BeautifulSoup(BeautifulStoneSoup):
-
- """This parser knows the following facts about HTML:
-
- * Some tags have no closing tag and should be interpreted as being
- closed as soon as they are encountered.
-
- * The text inside some tags (ie. 'script') may contain tags which
- are not really part of the document and which should be parsed
- as text, not tags. If you want to parse the text as tags, you can
- always fetch it and parse it explicitly.
-
- * Tag nesting rules:
-
- Most tags can't be nested at all. For instance, the occurance of
- a <p> tag should implicitly close the previous <p> tag.
-
- <p>Para1<p>Para2
- should be transformed into:
- <p>Para1</p><p>Para2
-
- Some tags can be nested arbitrarily. For instance, the occurance
- of a <blockquote> tag should _not_ implicitly close the previous
- <blockquote> tag.
-
- Alice said: <blockquote>Bob said: <blockquote>Blah
- should NOT be transformed into:
- Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah
-
- Some tags can be nested, but the nesting is reset by the
- interposition of other tags. For instance, a <tr> tag should
- implicitly close the previous <tr> tag within the same <table>,
- but not close a <tr> tag in another table.
-
- <table><tr>Blah<tr>Blah
- should be transformed into:
- <table><tr>Blah</tr><tr>Blah
- but,
- <tr>Blah<table><tr>Blah
- should NOT be transformed into
- <tr>Blah<table></tr><tr>Blah
-
- Differing assumptions about tag nesting rules are a major source
- of problems with the BeautifulSoup class. If BeautifulSoup is not
- treating as nestable a tag your page author treats as nestable,
- try ICantBelieveItsBeautifulSoup, MinimalSoup, or
- BeautifulStoneSoup before writing your own subclass."""
-
- def __init__(self, *args, **kwargs):
- if not kwargs.has_key('smartQuotesTo'):
- kwargs['smartQuotesTo'] = self.HTML_ENTITIES
- kwargs['isHTML'] = True
- BeautifulStoneSoup.__init__(self, *args, **kwargs)
-
- SELF_CLOSING_TAGS = buildTagMap(None,
- ('br', 'hr', 'input', 'img', 'meta',
- 'spacer', 'link', 'frame', 'base', 'col'))
-
- PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea'])
-
- QUOTE_TAGS = {'script': None, 'textarea': None}
-
- # According to the HTML standard, each of these inline tags can
- # contain another tag of the same type. Furthermore, it's common
- # to actually use these tags this way.
- NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup',
- 'center')
-
- # According to the HTML standard, these block tags can contain
- # another tag of the same type. Furthermore, it's common
- # to actually use these tags this way.
- NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del')
-
- # Lists can contain other lists, but there are restrictions.
- NESTABLE_LIST_TAGS = {'ol': [],
- 'ul': [],
- 'li': ['ul', 'ol'],
- 'dl': [],
- 'dd': ['dl'],
- 'dt': ['dl']}
-
- # Tables can contain other tables, but there are restrictions.
- NESTABLE_TABLE_TAGS = {'table': [],
- 'tr': ['table', 'tbody', 'tfoot', 'thead'],
- 'td': ['tr'],
- 'th': ['tr'],
- 'thead': ['table'],
- 'tbody': ['table'],
- 'tfoot': ['table'],
- }
-
- NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre')
-
- # If one of these tags is encountered, all tags up to the next tag of
- # this type are popped.
- RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript',
- NON_NESTABLE_BLOCK_TAGS,
- NESTABLE_LIST_TAGS,
- NESTABLE_TABLE_TAGS)
-
- NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS,
- NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS)
-
- # Used to detect the charset in a META tag; see start_meta
- CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)
-
- def start_meta(self, attrs):
- """Beautiful Soup can detect a charset included in a META tag,
- try to convert the document to that charset, and re-parse the
- document from the beginning."""
- httpEquiv = None
- contentType = None
- contentTypeIndex = None
- tagNeedsEncodingSubstitution = False
-
- for i in range(0, len(attrs)):
- key, value = attrs[i]
- key = key.lower()
- if key == 'http-equiv':
- httpEquiv = value
- elif key == 'content':
- contentType = value
- contentTypeIndex = i
-
- if httpEquiv and contentType: # It's an interesting meta tag.
- match = self.CHARSET_RE.search(contentType)
- if match:
- if (self.declaredHTMLEncoding is not None or
- self.originalEncoding == self.fromEncoding):
- # An HTML encoding was sniffed while converting
- # the document to Unicode, or an HTML encoding was
- # sniffed during a previous pass through the
- # document, or an encoding was specified
- # explicitly and it worked. Rewrite the meta tag.
- def rewrite(match):
- return match.group(1) + "%SOUP-ENCODING%"
- newAttr = self.CHARSET_RE.sub(rewrite, contentType)
- attrs[contentTypeIndex] = (attrs[contentTypeIndex][0],
- newAttr)
- tagNeedsEncodingSubstitution = True
- else:
- # This is our first pass through the document.
- # Go through it again with the encoding information.
- newCharset = match.group(3)
- if newCharset and newCharset != self.originalEncoding:
- self.declaredHTMLEncoding = newCharset
- self._feed(self.declaredHTMLEncoding)
- raise StopParsing
- pass
- tag = self.unknown_starttag("meta", attrs)
- if tag and tagNeedsEncodingSubstitution:
- tag.containsSubstitutions = True
-
-
-class StopParsing(Exception):
- pass
-
-
-class ICantBelieveItsBeautifulSoup(BeautifulSoup):
-
- """The BeautifulSoup class is oriented towards skipping over
- common HTML errors like unclosed tags. However, sometimes it makes
- errors of its own. For instance, consider this fragment:
-
- <b>Foo<b>Bar</b></b>
-
- This is perfectly valid (if bizarre) HTML. However, the
- BeautifulSoup class will implicitly close the first b tag when it
- encounters the second 'b'. It will think the author wrote
- "<b>Foo<b>Bar", and didn't close the first 'b' tag, because
- there's no real-world reason to bold something that's already
- bold. When it encounters '</b></b>' it will close two more 'b'
- tags, for a grand total of three tags closed instead of two. This
- can throw off the rest of your document structure. The same is
- true of a number of other tags, listed below.
-
- It's much more common for someone to forget to close a 'b' tag
- than to actually use nested 'b' tags, and the BeautifulSoup class
- handles the common case. This class handles the not-co-common
- case: where you can't believe someone wrote what they did, but
- it's valid HTML and BeautifulSoup screwed up by assuming it
- wouldn't be."""
-
- I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \
- ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong',
- 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b',
- 'big')
-
- I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',)
-
- NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS,
- I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS,
- I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS)
-
-
-class MinimalSoup(BeautifulSoup):
- """The MinimalSoup class is for parsing HTML that contains
- pathologically bad markup. It makes no assumptions about tag
- nesting, but it does know which tags are self-closing, that
- <script> tags contain Javascript and should not be parsed, that
- META tags may contain encoding information, and so on.
-
- This also makes it better for subclassing than BeautifulStoneSoup
- or BeautifulSoup."""
-
- RESET_NESTING_TAGS = buildTagMap('noscript')
- NESTABLE_TAGS = {}
-
-
-class BeautifulSOAP(BeautifulStoneSoup):
- """This class will push a tag with only a single string child into
- the tag's parent as an attribute. The attribute's name is the tag
- name, and the value is the string child. An example should give
- the flavor of the change:
-
- <foo><bar>baz</bar></foo>
- =>
- <foo bar="baz"><bar>baz</bar></foo>
-
- You can then access fooTag['bar'] instead of fooTag.barTag.string.
-
- This is, of course, useful for scraping structures that tend to
- use subelements instead of attributes, such as SOAP messages. Note
- that it modifies its input, so don't print the modified version
- out.
-
- I'm not sure how many people really want to use this class; let me
- know if you do. Mainly I like the name."""
-
- def popTag(self):
- if len(self.tagStack) > 1:
- tag = self.tagStack[-1]
- parent = self.tagStack[-2]
- parent._getAttrMap()
- if (isinstance(tag, Tag) and len(tag.contents) == 1 and
- isinstance(tag.contents[0], NavigableString) and
- not parent.attrMap.has_key(tag.name)):
- parent[tag.name] = tag.contents[0]
- BeautifulStoneSoup.popTag(self)
-
-# Enterprise class names! It has come to our attention that some people
-# think the names of the Beautiful Soup parser classes are too silly
-# and "unprofessional" for use in enterprise screen-scraping. We feel
-# your pain! For such-minded folk, the Beautiful Soup Consortium And
-# All-Night Kosher Bakery recommends renaming this file to
-# "RobustParser.py" (or, in cases of extreme enterprisiness,
-# "RobustParserBeanInterface.class") and using the following
-# enterprise-friendly class aliases:
-
-
-class RobustXMLParser(BeautifulStoneSoup):
- pass
-
-
-class RobustHTMLParser(BeautifulSoup):
- pass
-
-
-class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup):
- pass
-
-
-class RobustInsanelyWackAssHTMLParser(MinimalSoup):
- pass
-
-
-class SimplifyingSOAPParser(BeautifulSOAP):
- pass
-
-######################################################
-#
-# Bonus library: Unicode, Dammit
-#
-# This class forces XML data into a standard format (usually to UTF-8
-# or Unicode). It is heavily based on code from Mark Pilgrim's
-# Universal Feed Parser. It does not rewrite the XML or HTML to
-# reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi
-# (XML) and BeautifulSoup.start_meta (HTML).
-
-
-# Autodetects character encodings.
-# Download from http://chardet.feedparser.org/
-try:
- import chardet
-# import chardet.constants
-# chardet.constants._debug = 1
-except ImportError:
- chardet = None
-
-# cjkcodecs and iconv_codec make Python know about more character encodings.
-# Both are available from http://cjkpython.i18n.org/
-# They're built in if you use Python 2.4.
-try:
- import cjkcodecs.aliases
-except ImportError:
- pass
-try:
- import iconv_codec
-except ImportError:
- pass
-
-
-class UnicodeDammit:
- """A class for detecting the encoding of a *ML document and
- converting it to a Unicode string. If the source encoding is
- windows-1252, can replace MS smart quotes with their HTML or XML
- equivalents."""
-
- # This dictionary maps commonly seen values for "charset" in HTML
- # meta tags to the corresponding Python codec names. It only covers
- # values that aren't in Python's aliases and can't be determined
- # by the heuristics in find_codec.
- CHARSET_ALIASES = {"macintosh": "mac-roman",
- "x-sjis": "shift-jis"}
-
- def __init__(self, markup, overrideEncodings=[],
- smartQuotesTo='xml', isHTML=False):
- self.declaredHTMLEncoding = None
- self.markup, documentEncoding, sniffedEncoding = \
- self._detectEncoding(markup, isHTML)
- self.smartQuotesTo = smartQuotesTo
- self.triedEncodings = []
- if markup == '' or isinstance(markup, unicode):
- self.originalEncoding = None
- self.unicode = unicode(markup)
- return
-
- u = None
- for proposedEncoding in overrideEncodings:
- u = self._convertFrom(proposedEncoding)
- if u:
- break
- if not u:
- for proposedEncoding in (documentEncoding, sniffedEncoding):
- u = self._convertFrom(proposedEncoding)
- if u:
- break
-
- # If no luck and we have auto-detection library, try that:
- if not u and chardet and not isinstance(self.markup, unicode):
- u = self._convertFrom(chardet.detect(self.markup)['encoding'])
-
- # As a last resort, try utf-8 and windows-1252:
- if not u:
- for proposed_encoding in ("utf-8", "windows-1252"):
- u = self._convertFrom(proposed_encoding)
- if u:
- break
-
- self.unicode = u
- if not u:
- self.originalEncoding = None
-
- def _subMSChar(self, orig):
- """Changes a MS smart quote character to an XML or HTML
- entity."""
- sub = self.MS_CHARS.get(orig)
- if isinstance(sub, tuple):
- if self.smartQuotesTo == 'xml':
- sub = '&#x%s;' % sub[1]
- else:
- sub = '&%s;' % sub[0]
- return sub
-
- def _convertFrom(self, proposed):
- proposed = self.find_codec(proposed)
- if not proposed or proposed in self.triedEncodings:
- return None
- self.triedEncodings.append(proposed)
- markup = self.markup
-
- # Convert smart quotes to HTML if coming from an encoding
- # that might have them.
- if self.smartQuotesTo and proposed.lower() in("windows-1252",
- "iso-8859-1",
- "iso-8859-2"):
- markup = re.compile("([\x80-\x9f])").sub(lambda(x): self._subMSChar(x.group(1)),
- markup)
-
- try:
- # print "Trying to convert document to %s" % proposed
- u = self._toUnicode(markup, proposed)
- self.markup = u
- self.originalEncoding = proposed
- except Exception, e:
- # print "That didn't work!"
- # print e
- return None
- # print "Correct encoding: %s" % proposed
- return self.markup
-
- def _toUnicode(self, data, encoding):
- '''Given a string and its encoding, decodes the string into Unicode.
- %encoding is a string recognized by encodings.aliases'''
-
- # strip Byte Order Mark (if present)
- if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
- and (data[2:4] != '\x00\x00'):
- encoding = 'utf-16be'
- data = data[2:]
- elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \
- and (data[2:4] != '\x00\x00'):
- encoding = 'utf-16le'
- data = data[2:]
- elif data[:3] == '\xef\xbb\xbf':
- encoding = 'utf-8'
- data = data[3:]
- elif data[:4] == '\x00\x00\xfe\xff':
- encoding = 'utf-32be'
- data = data[4:]
- elif data[:4] == '\xff\xfe\x00\x00':
- encoding = 'utf-32le'
- data = data[4:]
- newdata = unicode(data, encoding)
- return newdata
-
- def _detectEncoding(self, xml_data, isHTML=False):
- """Given a document, tries to detect its XML encoding."""
- xml_encoding = sniffed_xml_encoding = None
- try:
- if xml_data[:4] == '\x4c\x6f\xa7\x94':
- # EBCDIC
- xml_data = self._ebcdic_to_ascii(xml_data)
- elif xml_data[:4] == '\x00\x3c\x00\x3f':
- # UTF-16BE
- sniffed_xml_encoding = 'utf-16be'
- xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
- elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
- and (xml_data[2:4] != '\x00\x00'):
- # UTF-16BE with BOM
- sniffed_xml_encoding = 'utf-16be'
- xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
- elif xml_data[:4] == '\x3c\x00\x3f\x00':
- # UTF-16LE
- sniffed_xml_encoding = 'utf-16le'
- xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
- elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
- (xml_data[2:4] != '\x00\x00'):
- # UTF-16LE with BOM
- sniffed_xml_encoding = 'utf-16le'
- xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
- elif xml_data[:4] == '\x00\x00\x00\x3c':
- # UTF-32BE
- sniffed_xml_encoding = 'utf-32be'
- xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
- elif xml_data[:4] == '\x3c\x00\x00\x00':
- # UTF-32LE
- sniffed_xml_encoding = 'utf-32le'
- xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
- elif xml_data[:4] == '\x00\x00\xfe\xff':
- # UTF-32BE with BOM
- sniffed_xml_encoding = 'utf-32be'
- xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
- elif xml_data[:4] == '\xff\xfe\x00\x00':
- # UTF-32LE with BOM
- sniffed_xml_encoding = 'utf-32le'
- xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
- elif xml_data[:3] == '\xef\xbb\xbf':
- # UTF-8 with BOM
- sniffed_xml_encoding = 'utf-8'
- xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
- else:
- sniffed_xml_encoding = 'ascii'
- pass
- except:
- xml_encoding_match = None
- xml_encoding_match = re.compile(
- '^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data)
- if not xml_encoding_match and isHTML:
- regexp = re.compile('<\s*meta[^>]+charset=([^>]*?)[;\'">]', re.I)
- xml_encoding_match = regexp.search(xml_data)
- if xml_encoding_match is not None:
- xml_encoding = xml_encoding_match.groups()[0].lower()
- if isHTML:
- self.declaredHTMLEncoding = xml_encoding
- if sniffed_xml_encoding and \
- (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode',
- 'iso-10646-ucs-4', 'ucs-4', 'csucs4',
- 'utf-16', 'utf-32', 'utf_16', 'utf_32',
- 'utf16', 'u16')):
- xml_encoding = sniffed_xml_encoding
- return xml_data, xml_encoding, sniffed_xml_encoding
-
- def find_codec(self, charset):
- return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \
- or (charset and self._codec(charset.replace("-", ""))) \
- or (charset and self._codec(charset.replace("-", "_"))) \
- or charset
-
- def _codec(self, charset):
- if not charset:
- return charset
- codec = None
- try:
- codecs.lookup(charset)
- codec = charset
- except (LookupError, ValueError):
- pass
- return codec
-
- EBCDIC_TO_ASCII_MAP = None
-
- def _ebcdic_to_ascii(self, s):
- c = self.__class__
- if not c.EBCDIC_TO_ASCII_MAP:
- emap = (0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31,
- 128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7,
- 144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26,
- 32, 160, 161, 162, 163, 164, 165, 166, 167, 168, 91, 46, 60, 40, 43, 33,
- 38, 169, 170, 171, 172, 173, 174, 175, 176, 177, 93, 36, 42, 41, 59, 94,
- 45, 47, 178, 179, 180, 181, 182, 183, 184, 185, 124, 44, 37, 95, 62, 63,
- 186, 187, 188, 189, 190, 191, 192, 193, 194, 96, 58, 35, 64, 39, 61, 34,
- 195, 97, 98, 99, 100, 101, 102, 103, 104, 105, 196, 197, 198, 199, 200,
- 201, 202, 106, 107, 108, 109, 110, 111, 112, 113, 114, 203, 204, 205,
- 206, 207, 208, 209, 126, 115, 116, 117, 118, 119, 120, 121, 122, 210,
- 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
- 225, 226, 227, 228, 229, 230, 231, 123, 65, 66, 67, 68, 69, 70, 71, 72,
- 73, 232, 233, 234, 235, 236, 237, 125, 74, 75, 76, 77, 78, 79, 80, 81,
- 82, 238, 239, 240, 241, 242, 243, 92, 159, 83, 84, 85, 86, 87, 88, 89,
- 90, 244, 245, 246, 247, 248, 249, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
- 250, 251, 252, 253, 254, 255)
- import string
- c.EBCDIC_TO_ASCII_MAP = string.maketrans(
- ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
- return s.translate(c.EBCDIC_TO_ASCII_MAP)
-
- MS_CHARS = {'\x80': ('euro', '20AC'),
- '\x81': ' ',
- '\x82': ('sbquo', '201A'),
- '\x83': ('fnof', '192'),
- '\x84': ('bdquo', '201E'),
- '\x85': ('hellip', '2026'),
- '\x86': ('dagger', '2020'),
- '\x87': ('Dagger', '2021'),
- '\x88': ('circ', '2C6'),
- '\x89': ('permil', '2030'),
- '\x8A': ('Scaron', '160'),
- '\x8B': ('lsaquo', '2039'),
- '\x8C': ('OElig', '152'),
- '\x8D': '?',
- '\x8E': ('#x17D', '17D'),
- '\x8F': '?',
- '\x90': '?',
- '\x91': ('lsquo', '2018'),
- '\x92': ('rsquo', '2019'),
- '\x93': ('ldquo', '201C'),
- '\x94': ('rdquo', '201D'),
- '\x95': ('bull', '2022'),
- '\x96': ('ndash', '2013'),
- '\x97': ('mdash', '2014'),
- '\x98': ('tilde', '2DC'),
- '\x99': ('trade', '2122'),
- '\x9a': ('scaron', '161'),
- '\x9b': ('rsaquo', '203A'),
- '\x9c': ('oelig', '153'),
- '\x9d': '?',
- '\x9e': ('#x17E', '17E'),
- '\x9f': ('Yuml', ''), }
-
-#######################################################################
-
-
-# By default, act as an HTML pretty-printer.
-if __name__ == '__main__':
- import sys
- soup = BeautifulSoup(sys.stdin)
- print soup.prettify()
diff --git a/cgi/api.py b/cgi/api.py
index fc44d76..0c6d61a 100644
--- a/cgi/api.py
+++ b/cgi/api.py
@@ -1,6 +1,5 @@
# coding=utf-8
import json
-import _mysql
import time
from framework import *
@@ -50,10 +49,10 @@ def api_process(self, path_split):
'SELECT dir, name, board_type, allow_images, allow_image_replies, maxsize FROM `boards` WHERE `secret`=0 ORDER BY `sort` ASC')
values['boards'] = boards
for board in values['boards']:
- board['board_type'] = int(board['board_type'])
- board['allow_images'] = int(board['allow_images'])
- board['allow_image_replies'] = int(board['allow_image_replies'])
- board['maxsize'] = int(board['maxsize'])
+ board['board_type'] = board['board_type']
+ board['allow_images'] = board['allow_images']
+ board['allow_image_replies'] = board['allow_image_replies']
+ board['maxsize'] = board['maxsize']
elif method == 'last':
data_limit = formdata.get('limit')
@@ -82,15 +81,15 @@ def api_process(self, path_split):
values['posts'] = FetchAll(sql)
for post in values['posts']:
- post['id'] = int(post['id'])
- post['timestamp'] = int(post['timestamp'])
- post['parentid'] = int(post['parentid'])
- post['file_size'] = int(post['file_size'])
- post['image_width'] = int(post['image_width'])
- post['image_height'] = int(post['image_height'])
- post['thumb_width'] = int(post['thumb_width'])
- post['thumb_height'] = int(post['thumb_height'])
- post['message'] = post['message'].decode('utf-8', 'replace')
+ post['id'] = post['id']
+ post['timestamp'] = post['timestamp']
+ post['parentid'] = post['parentid']
+ post['file_size'] = post['file_size']
+ post['image_width'] = post['image_width']
+ post['image_height'] = post['image_height']
+ post['thumb_width'] = post['thumb_width']
+ post['thumb_height'] = post['thumb_height']
+ post['message'] = post['message']
elif method == 'lastage':
data_limit = formdata.get('limit')
data_time = formdata.get('time', 0)
@@ -154,28 +153,28 @@ def api_process(self, path_split):
if numreplies:
for thread in threads:
- lastreplies = FetchAll("SELECT id, timestamp, timestamp_formatted, name, tripcode, email, subject, message, file, file_size, image_height, image_width, thumb, thumb_width, thumb_height, IS_DELETED FROM `posts` WHERE parentid = %s AND boardid = %s ORDER BY `timestamp` DESC LIMIT %d" % (
- thread['id'], board['id'], numreplies))
+ lastreplies = FetchAll("SELECT id, timestamp, timestamp_formatted, name, tripcode, email, subject, message, file, file_size, image_height, image_width, thumb, thumb_width, thumb_height, IS_DELETED FROM `posts` WHERE parentid = %s AND boardid = %s ORDER BY `timestamp` DESC LIMIT %s",
+ (thread['id'], board['id'], numreplies))
lastreplies = lastreplies[::-1]
- thread['id'] = int(thread['id'])
- thread['timestamp'] = int(thread['timestamp'])
- thread['bumped'] = int(thread['bumped'])
- thread['expires'] = int(thread['expires'])
- thread['total_replies'] = int(thread['total_replies'])
- thread['total_files'] = int(thread['total_files'])
- thread['file_size'] = int(thread['file_size'])
- thread['image_width'] = int(thread['image_width'])
- thread['image_height'] = int(thread['image_height'])
- thread['thumb_width'] = int(thread['thumb_width'])
- thread['thumb_height'] = int(thread['thumb_height'])
- thread['locked'] = int(thread['locked'])
+ thread['id'] = thread['id']
+ thread['timestamp'] = thread['timestamp']
+ thread['bumped'] = thread['bumped']
+ thread['expires'] = thread['expires']
+ thread['total_replies'] = thread['total_replies']
+ thread['total_files'] = thread['total_files']
+ thread['file_size'] = thread['file_size']
+ thread['image_width'] = thread['image_width']
+ thread['image_height'] = thread['image_height']
+ thread['thumb_width'] = thread['thumb_width']
+ thread['thumb_height'] = thread['thumb_height']
+ thread['locked'] = thread['locked']
thread['replies'] = []
for post in lastreplies:
- post['IS_DELETED'] = int(post['IS_DELETED'])
- post['id'] = int(post['id'])
- post['timestamp'] = int(post['timestamp'])
+ post['IS_DELETED'] = post['IS_DELETED']
+ post['id'] = post['id']
+ post['timestamp'] = post['timestamp']
if post['IS_DELETED']:
empty_post = {'id': post['id'],
@@ -184,13 +183,12 @@ def api_process(self, path_split):
}
thread['replies'].append(empty_post)
else:
- post['file_size'] = int(post['file_size'])
- post['image_width'] = int(post['image_width'])
- post['image_height'] = int(post['image_height'])
- post['thumb_width'] = int(post['thumb_width'])
- post['thumb_height'] = int(post['thumb_height'])
- post['message'] = post['message'].decode(
- 'utf-8', 'replace')
+ post['file_size'] = post['file_size']
+ post['image_width'] = post['image_width']
+ post['image_height'] = post['image_height']
+ post['thumb_width'] = post['thumb_width']
+ post['thumb_height'] = post['thumb_height']
+ post['message'] = post['message']
thread['replies'].append(post)
@@ -249,19 +247,19 @@ def api_process(self, path_split):
if not op_post:
raise APIError("Not a thread")
- values['id'] = int(op_post['id'])
- values['timestamp'] = int(op_post['timestamp'])
+ values['id'] = op_post['id']
+ values['timestamp'] = op_post['timestamp']
values['subject'] = op_post['subject']
- values['locked'] = int(op_post['locked'])
+ values['locked'] = op_post['locked']
- total_replies = int(FetchOne("SELECT COUNT(1) FROM posts WHERE boardid = '%s' AND parentid = '%d'" % (
- board["id"], values['id']), 0)[0])
+ total_replies = FetchOne("SELECT COUNT(1) AS count FROM posts WHERE boardid = %s AND parentid = %s",
+ (board["id"], values['id']))["count"]
values['total_replies'] = total_replies
- sql = "SELECT id, parentid, timestamp, timestamp_formatted, name, tripcode, email, subject, message, file, file_size, image_width, image_height, thumb, thumb_width, thumb_height, IS_DELETED FROM posts WHERE boardid = %s AND (parentid = %s OR id = %s) ORDER BY id ASC LIMIT %d OFFSET %d" % (
- _mysql.escape_string(board['id']), values['id'], values['id'], limit, offset)
- posts = FetchAll(sql)
+ sql = "SELECT id, parentid, timestamp, timestamp_formatted, name, tripcode, email, subject, message, file, file_size, image_width, image_height, thumb, thumb_width, thumb_height, IS_DELETED FROM posts WHERE boardid = %s AND (parentid = %s OR id = %s) ORDER BY id ASC LIMIT %s OFFSET %s"
+ sqlv = (board['id'], values['id'], values['id'], limit, offset)
+ posts = FetchAll(sql, sqlv)
values['posts'] = []
@@ -279,12 +277,12 @@ def api_process(self, path_split):
}
values['posts'].append(empty_post)
else:
- post['file_size'] = int(post['file_size'])
- post['image_width'] = int(post['image_width'])
- post['image_height'] = int(post['image_height'])
- post['thumb_width'] = int(post['thumb_width'])
- post['thumb_height'] = int(post['thumb_height'])
- post['message'] = post['message'].decode('utf-8', 'replace')
+ post['file_size'] = post['file_size']
+ post['image_width'] = post['image_width']
+ post['image_height'] = post['image_height']
+ post['thumb_width'] = post['thumb_width']
+ post['thumb_height'] = post['thumb_height']
+ post['message'] = post['message']
if striphtml:
post['message'] = post['message'].replace("<br />", " ")
post['message'] = re.compile(
@@ -310,18 +308,18 @@ def api_process(self, path_split):
except ValueError:
raise APIError("Post ID must be numeric")
- post = FetchOne("SELECT id, parentid, timestamp, timestamp_formatted, name, tripcode, email, subject, message, file, file_size, image_width, image_height, thumb, thumb_width, thumb_height, IS_DELETED FROM posts WHERE `id`='%d' AND boardid='%s'" % (
- postid, board["id"]))
+ post = FetchOne("SELECT id, parentid, timestamp, timestamp_formatted, name, tripcode, email, subject, message, file, file_size, image_width, image_height, thumb, thumb_width, thumb_height, IS_DELETED FROM posts WHERE `id` = %s AND boardid = %s"
+ (postid, board["id"]))
if not post:
raise APIError("Post ID cannot be found")
values['posts'] = []
- post['IS_DELETED'] = int(post['IS_DELETED'])
- post['id'] = int(post['id'])
- post['parentid'] = int(post['parentid'])
- post['timestamp'] = int(post['timestamp'])
+ post['IS_DELETED'] = post['IS_DELETED']
+ post['id'] = post['id']
+ post['parentid'] = post['parentid']
+ post['timestamp'] = post['timestamp']
if post['IS_DELETED']:
empty_post = {'id': post['id'],
@@ -331,12 +329,12 @@ def api_process(self, path_split):
}
values['posts'].append(empty_post)
else:
- post['file_size'] = int(post['file_size'])
- post['image_width'] = int(post['image_width'])
- post['image_height'] = int(post['image_height'])
- post['thumb_width'] = int(post['thumb_width'])
- post['thumb_height'] = int(post['thumb_height'])
- post['message'] = post['message'].decode('utf-8', 'replace')
+ post['file_size'] = post['file_size']
+ post['image_width'] = post['image_width']
+ post['image_height'] = post['image_height']
+ post['thumb_width'] = post['thumb_width']
+ post['thumb_height'] = post['thumb_height']
+ post['message'] = post['message']
values['posts'].append(post)
elif method == 'delete':
data_board = formdata.get('dir')
@@ -402,22 +400,21 @@ def api_process(self, path_split):
threads = getNewThreads(limit)
values['threads'] = threads
elif method == "blotter":
- latest_news = FetchAll(
- "SELECT `timestamp`, `message`, `timestamp_formatted` FROM `news` WHERE `type` = '2' ORDER BY `timestamp` DESC LIMIT " + str(Settings.HOME_NEWS))
+ latest_news = FetchAll("SELECT `timestamp`, `message`, `timestamp_formatted` FROM `news` WHERE `type` = '2' ORDER BY `timestamp` DESC LIMIT %s", (Settings.HOME_NEWS,))
values["news"] = latest_news
elif method == 'boardsExtra':
boards = FetchAll('SELECT dir, name, longname, subname, postarea_desc, postarea_extra, anonymous, subject, message, disable_name, disable_subject, allow_spoilers, allow_oekaki, numthreads, board_type, allow_images, allow_image_replies, maxsize FROM `boards` WHERE `secret`=0 ORDER BY `sort` ASC')
values['boards'] = boards
for board in values['boards']:
- board['board_type'] = int(board['board_type'])
- board['allow_images'] = int(board['allow_images'])
- board['allow_image_replies'] = int(board['allow_image_replies'])
- board['disable_name'] = int(board['disable_name'])
- board['disable_subject'] = int(board['disable_subject'])
- board['allow_spoilers'] = int(board['allow_spoilers'])
- board['allow_oekaki'] = int(board['allow_oekaki'])
- board['numthreads'] = int(board['numthreads'])
- board['maxsize'] = int(board['maxsize'])
+ board['board_type'] = board['board_type']
+ board['allow_images'] = board['allow_images']
+ board['allow_image_replies'] = board['allow_image_replies']
+ board['disable_name'] = board['disable_name']
+ board['disable_subject'] = board['disable_subject']
+ board['allow_spoilers'] = board['allow_spoilers']
+ board['allow_oekaki'] = board['allow_oekaki']
+ board['numthreads'] = board['numthreads']
+ board['maxsize'] = board['maxsize']
else:
raise APIError("Invalid method")
diff --git a/cgi/fcgi.py b/cgi/fcgi.py
deleted file mode 100644
index 08af980..0000000
--- a/cgi/fcgi.py
+++ /dev/null
@@ -1,1363 +0,0 @@
-# Copyright (c) 2002, 2003, 2005, 2006 Allan Saddi <allan@saddi.com>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
-#
-# $Id$
-
-"""
-fcgi - a FastCGI/WSGI gateway.
-
-For more information about FastCGI, see <http://www.fastcgi.com/>.
-
-For more information about the Web Server Gateway Interface, see
-<http://www.python.org/peps/pep-0333.html>.
-
-Example usage:
-
- #!/usr/bin/env python
- from myapplication import app # Assume app is your WSGI application object
- from fcgi import WSGIServer
- WSGIServer(app).run()
-
-See the documentation for WSGIServer/Server for more information.
-
-On most platforms, fcgi will fallback to regular CGI behavior if run in a
-non-FastCGI context. If you want to force CGI behavior, set the environment
-variable FCGI_FORCE_CGI to "Y" or "y".
-"""
-
-__author__ = 'Allan Saddi <allan@saddi.com>'
-__version__ = '$Revision$'
-
-import sys
-import os
-import signal
-import struct
-import cStringIO as StringIO
-import select
-import socket
-import errno
-import traceback
-
-try:
- import thread
- import threading
- thread_available = True
-except ImportError:
- import dummy_thread as thread
- import dummy_threading as threading
- thread_available = False
-
-# Apparently 2.3 doesn't define SHUT_WR? Assume it is 1 in this case.
-if not hasattr(socket, 'SHUT_WR'):
- socket.SHUT_WR = 1
-
-__all__ = ['WSGIServer']
-
-# Constants from the spec.
-FCGI_LISTENSOCK_FILENO = 0
-
-FCGI_HEADER_LEN = 8
-
-FCGI_VERSION_1 = 1
-
-FCGI_BEGIN_REQUEST = 1
-FCGI_ABORT_REQUEST = 2
-FCGI_END_REQUEST = 3
-FCGI_PARAMS = 4
-FCGI_STDIN = 5
-FCGI_STDOUT = 6
-FCGI_STDERR = 7
-FCGI_DATA = 8
-FCGI_GET_VALUES = 9
-FCGI_GET_VALUES_RESULT = 10
-FCGI_UNKNOWN_TYPE = 11
-FCGI_MAXTYPE = FCGI_UNKNOWN_TYPE
-
-FCGI_NULL_REQUEST_ID = 0
-
-FCGI_KEEP_CONN = 1
-
-FCGI_RESPONDER = 1
-FCGI_AUTHORIZER = 2
-FCGI_FILTER = 3
-
-FCGI_REQUEST_COMPLETE = 0
-FCGI_CANT_MPX_CONN = 1
-FCGI_OVERLOADED = 2
-FCGI_UNKNOWN_ROLE = 3
-
-FCGI_MAX_CONNS = 'FCGI_MAX_CONNS'
-FCGI_MAX_REQS = 'FCGI_MAX_REQS'
-FCGI_MPXS_CONNS = 'FCGI_MPXS_CONNS'
-
-FCGI_Header = '!BBHHBx'
-FCGI_BeginRequestBody = '!HB5x'
-FCGI_EndRequestBody = '!LB3x'
-FCGI_UnknownTypeBody = '!B7x'
-
-FCGI_EndRequestBody_LEN = struct.calcsize(FCGI_EndRequestBody)
-FCGI_UnknownTypeBody_LEN = struct.calcsize(FCGI_UnknownTypeBody)
-
-if __debug__:
- import time
-
- # Set non-zero to write debug output to a file.
- DEBUG = 0
- DEBUGLOG = '/tmp/fcgi.log'
-
- def _debug(level, msg):
- if DEBUG < level:
- return
-
- try:
- f = open(DEBUGLOG, 'a')
- f.write('%sfcgi: %s\n' % (time.ctime()[4:-4], msg))
- f.close()
- except:
- pass
-
-
-class InputStream(object):
- """
- File-like object representing FastCGI input streams (FCGI_STDIN and
- FCGI_DATA). Supports the minimum methods required by WSGI spec.
- """
-
- def __init__(self, conn):
- self._conn = conn
-
- # See Server.
- self._shrinkThreshold = conn.server.inputStreamShrinkThreshold
-
- self._buf = ''
- self._bufList = []
- self._pos = 0 # Current read position.
- self._avail = 0 # Number of bytes currently available.
-
- self._eof = False # True when server has sent EOF notification.
-
- def _shrinkBuffer(self):
- """Gets rid of already read data (since we can't rewind)."""
- if self._pos >= self._shrinkThreshold:
- self._buf = self._buf[self._pos:]
- self._avail -= self._pos
- self._pos = 0
-
- assert self._avail >= 0
-
- def _waitForData(self):
- """Waits for more data to become available."""
- self._conn.process_input()
-
- def read(self, n=-1):
- if self._pos == self._avail and self._eof:
- return ''
- while True:
- if n < 0 or (self._avail - self._pos) < n:
- # Not enough data available.
- if self._eof:
- # And there's no more coming.
- newPos = self._avail
- break
- else:
- # Wait for more data.
- self._waitForData()
- continue
- else:
- newPos = self._pos + n
- break
- # Merge buffer list, if necessary.
- if self._bufList:
- self._buf += ''.join(self._bufList)
- self._bufList = []
- r = self._buf[self._pos:newPos]
- self._pos = newPos
- self._shrinkBuffer()
- return r
-
- def readline(self, length=None):
- if self._pos == self._avail and self._eof:
- return ''
- while True:
- # Unfortunately, we need to merge the buffer list early.
- if self._bufList:
- self._buf += ''.join(self._bufList)
- self._bufList = []
- # Find newline.
- i = self._buf.find('\n', self._pos)
- if i < 0:
- # Not found?
- if self._eof:
- # No more data coming.
- newPos = self._avail
- break
- else:
- # Wait for more to come.
- self._waitForData()
- continue
- else:
- newPos = i + 1
- break
- if length is not None:
- if self._pos + length < newPos:
- newPos = self._pos + length
- r = self._buf[self._pos:newPos]
- self._pos = newPos
- self._shrinkBuffer()
- return r
-
- def readlines(self, sizehint=0):
- total = 0
- lines = []
- line = self.readline()
- while line:
- lines.append(line)
- total += len(line)
- if 0 < sizehint <= total:
- break
- line = self.readline()
- return lines
-
- def __iter__(self):
- return self
-
- def next(self):
- r = self.readline()
- if not r:
- raise StopIteration
- return r
-
- def add_data(self, data):
- if not data:
- self._eof = True
- else:
- self._bufList.append(data)
- self._avail += len(data)
-
-
-class MultiplexedInputStream(InputStream):
- """
- A version of InputStream meant to be used with MultiplexedConnections.
- Assumes the MultiplexedConnection (the producer) and the Request
- (the consumer) are running in different threads.
- """
-
- def __init__(self, conn):
- super(MultiplexedInputStream, self).__init__(conn)
-
- # Arbitrates access to this InputStream (it's used simultaneously
- # by a Request and its owning Connection object).
- lock = threading.RLock()
-
- # Notifies Request thread that there is new data available.
- self._lock = threading.Condition(lock)
-
- def _waitForData(self):
- # Wait for notification from add_data().
- self._lock.wait()
-
- def read(self, n=-1):
- self._lock.acquire()
- try:
- return super(MultiplexedInputStream, self).read(n)
- finally:
- self._lock.release()
-
- def readline(self, length=None):
- self._lock.acquire()
- try:
- return super(MultiplexedInputStream, self).readline(length)
- finally:
- self._lock.release()
-
- def add_data(self, data):
- self._lock.acquire()
- try:
- super(MultiplexedInputStream, self).add_data(data)
- self._lock.notify()
- finally:
- self._lock.release()
-
-
-class OutputStream(object):
- """
- FastCGI output stream (FCGI_STDOUT/FCGI_STDERR). By default, calls to
- write() or writelines() immediately result in Records being sent back
- to the server. Buffering should be done in a higher level!
- """
-
- def __init__(self, conn, req, type, buffered=False):
- self._conn = conn
- self._req = req
- self._type = type
- self._buffered = buffered
- self._bufList = [] # Used if buffered is True
- self.dataWritten = False
- self.closed = False
-
- def _write(self, data):
- length = len(data)
- while length:
- toWrite = min(length, self._req.server.maxwrite - FCGI_HEADER_LEN)
-
- rec = Record(self._type, self._req.requestId)
- rec.contentLength = toWrite
- rec.contentData = data[:toWrite]
- self._conn.writeRecord(rec)
-
- data = data[toWrite:]
- length -= toWrite
-
- def write(self, data):
- assert not self.closed
-
- if not data:
- return
-
- self.dataWritten = True
-
- if self._buffered:
- self._bufList.append(data)
- else:
- self._write(data)
-
- def writelines(self, lines):
- assert not self.closed
-
- for line in lines:
- self.write(line)
-
- def flush(self):
- # Only need to flush if this OutputStream is actually buffered.
- if self._buffered:
- data = ''.join(self._bufList)
- self._bufList = []
- self._write(data)
-
- # Though available, the following should NOT be called by WSGI apps.
- def close(self):
- """Sends end-of-stream notification, if necessary."""
- if not self.closed and self.dataWritten:
- self.flush()
- rec = Record(self._type, self._req.requestId)
- self._conn.writeRecord(rec)
- self.closed = True
-
-
-class TeeOutputStream(object):
- """
- Simple wrapper around two or more output file-like objects that copies
- written data to all streams.
- """
-
- def __init__(self, streamList):
- self._streamList = streamList
-
- def write(self, data):
- for f in self._streamList:
- f.write(data)
-
- def writelines(self, lines):
- for line in lines:
- self.write(line)
-
- def flush(self):
- for f in self._streamList:
- f.flush()
-
-
-class StdoutWrapper(object):
- """
- Wrapper for sys.stdout so we know if data has actually been written.
- """
-
- def __init__(self, stdout):
- self._file = stdout
- self.dataWritten = False
-
- def write(self, data):
- if data:
- self.dataWritten = True
- self._file.write(data)
-
- def writelines(self, lines):
- for line in lines:
- self.write(line)
-
- def __getattr__(self, name):
- return getattr(self._file, name)
-
-
-def decode_pair(s, pos=0):
- """
- Decodes a name/value pair.
-
- The number of bytes decoded as well as the name/value pair
- are returned.
- """
- nameLength = ord(s[pos])
- if nameLength & 128:
- nameLength = struct.unpack('!L', s[pos:pos+4])[0] & 0x7fffffff
- pos += 4
- else:
- pos += 1
-
- valueLength = ord(s[pos])
- if valueLength & 128:
- valueLength = struct.unpack('!L', s[pos:pos+4])[0] & 0x7fffffff
- pos += 4
- else:
- pos += 1
-
- name = s[pos:pos+nameLength]
- pos += nameLength
- value = s[pos:pos+valueLength]
- pos += valueLength
-
- return (pos, (name, value))
-
-
-def encode_pair(name, value):
- """
- Encodes a name/value pair.
-
- The encoded string is returned.
- """
- nameLength = len(name)
- if nameLength < 128:
- s = chr(nameLength)
- else:
- s = struct.pack('!L', nameLength | 0x80000000L)
-
- valueLength = len(value)
- if valueLength < 128:
- s += chr(valueLength)
- else:
- s += struct.pack('!L', valueLength | 0x80000000L)
-
- return s + name + value
-
-
-class Record(object):
- """
- A FastCGI Record.
-
- Used for encoding/decoding records.
- """
-
- def __init__(self, type=FCGI_UNKNOWN_TYPE, requestId=FCGI_NULL_REQUEST_ID):
- self.version = FCGI_VERSION_1
- self.type = type
- self.requestId = requestId
- self.contentLength = 0
- self.paddingLength = 0
- self.contentData = ''
-
- def _recvall(sock, length):
- """
- Attempts to receive length bytes from a socket, blocking if necessary.
- (Socket may be blocking or non-blocking.)
- """
- dataList = []
- recvLen = 0
- while length:
- try:
- data = sock.recv(length)
- except socket.error, e:
- if e[0] == errno.EAGAIN:
- select.select([sock], [], [])
- continue
- else:
- raise
- if not data: # EOF
- break
- dataList.append(data)
- dataLen = len(data)
- recvLen += dataLen
- length -= dataLen
- return ''.join(dataList), recvLen
- _recvall = staticmethod(_recvall)
-
- def read(self, sock):
- """Read and decode a Record from a socket."""
- try:
- header, length = self._recvall(sock, FCGI_HEADER_LEN)
- except:
- raise EOFError
-
- if length < FCGI_HEADER_LEN:
- raise EOFError
-
- self.version, self.type, self.requestId, self.contentLength, \
- self.paddingLength = struct.unpack(FCGI_Header, header)
-
- if __debug__:
- _debug(9, 'read: fd = %d, type = %d, requestId = %d, '
- 'contentLength = %d' %
- (sock.fileno(), self.type, self.requestId,
- self.contentLength))
-
- if self.contentLength:
- try:
- self.contentData, length = self._recvall(sock,
- self.contentLength)
- except:
- raise EOFError
-
- if length < self.contentLength:
- raise EOFError
-
- if self.paddingLength:
- try:
- self._recvall(sock, self.paddingLength)
- except:
- raise EOFError
-
- def _sendall(sock, data):
- """
- Writes data to a socket and does not return until all the data is sent.
- """
- length = len(data)
- while length:
- try:
- sent = sock.send(data)
- except socket.error, e:
- if e[0] == errno.EAGAIN:
- select.select([], [sock], [])
- continue
- else:
- raise
- data = data[sent:]
- length -= sent
- _sendall = staticmethod(_sendall)
-
- def write(self, sock):
- """Encode and write a Record to a socket."""
- self.paddingLength = -self.contentLength & 7
-
- if __debug__:
- _debug(9, 'write: fd = %d, type = %d, requestId = %d, '
- 'contentLength = %d' %
- (sock.fileno(), self.type, self.requestId,
- self.contentLength))
-
- header = struct.pack(FCGI_Header, self.version, self.type,
- self.requestId, self.contentLength,
- self.paddingLength)
- self._sendall(sock, header)
- if self.contentLength:
- self._sendall(sock, self.contentData)
- if self.paddingLength:
- self._sendall(sock, '\x00'*self.paddingLength)
-
-
-class Request(object):
- """
- Represents a single FastCGI request.
-
- These objects are passed to your handler and is the main interface
- between your handler and the fcgi module. The methods should not
- be called by your handler. However, server, params, stdin, stdout,
- stderr, and data are free for your handler's use.
- """
-
- def __init__(self, conn, inputStreamClass):
- self._conn = conn
-
- self.server = conn.server
- self.params = {}
- self.stdin = inputStreamClass(conn)
- self.stdout = OutputStream(conn, self, FCGI_STDOUT)
- self.stderr = OutputStream(conn, self, FCGI_STDERR, buffered=True)
- self.data = inputStreamClass(conn)
-
- def run(self):
- """Runs the handler, flushes the streams, and ends the request."""
- try:
- protocolStatus, appStatus = self.server.handler(self)
- except:
- traceback.print_exc(file=self.stderr)
- self.stderr.flush()
- if not self.stdout.dataWritten:
- self.server.error(self)
-
- protocolStatus, appStatus = FCGI_REQUEST_COMPLETE, 0
-
- if __debug__:
- _debug(1, 'protocolStatus = %d, appStatus = %d' %
- (protocolStatus, appStatus))
-
- self._flush()
- self._end(appStatus, protocolStatus)
-
- def _end(self, appStatus=0L, protocolStatus=FCGI_REQUEST_COMPLETE):
- self._conn.end_request(self, appStatus, protocolStatus)
-
- def _flush(self):
- self.stdout.close()
- self.stderr.close()
-
-
-class CGIRequest(Request):
- """A normal CGI request disguised as a FastCGI request."""
-
- def __init__(self, server):
- # These are normally filled in by Connection.
- self.requestId = 1
- self.role = FCGI_RESPONDER
- self.flags = 0
- self.aborted = False
-
- self.server = server
- self.params = dict(os.environ)
- self.stdin = sys.stdin
- self.stdout = StdoutWrapper(sys.stdout) # Oh, the humanity!
- self.stderr = sys.stderr
- self.data = StringIO.StringIO()
-
- def _end(self, appStatus=0L, protocolStatus=FCGI_REQUEST_COMPLETE):
- sys.exit(appStatus)
-
- def _flush(self):
- # Not buffered, do nothing.
- pass
-
-
-class Connection(object):
- """
- A Connection with the web server.
-
- Each Connection is associated with a single socket (which is
- connected to the web server) and is responsible for handling all
- the FastCGI message processing for that socket.
- """
- _multiplexed = False
- _inputStreamClass = InputStream
-
- def __init__(self, sock, addr, server):
- self._sock = sock
- self._addr = addr
- self.server = server
-
- # Active Requests for this Connection, mapped by request ID.
- self._requests = {}
-
- def _cleanupSocket(self):
- """Close the Connection's socket."""
- try:
- self._sock.shutdown(socket.SHUT_WR)
- except:
- return
- try:
- while True:
- r, w, e = select.select([self._sock], [], [])
- if not r or not self._sock.recv(1024):
- break
- except:
- pass
- self._sock.close()
-
- def run(self):
- """Begin processing data from the socket."""
- self._keepGoing = True
- while self._keepGoing:
- try:
- self.process_input()
- except EOFError:
- break
- except (select.error, socket.error), e:
- if e[0] == errno.EBADF: # Socket was closed by Request.
- break
- raise
-
- self._cleanupSocket()
-
- def process_input(self):
- """Attempt to read a single Record from the socket and process it."""
- # Currently, any children Request threads notify this Connection
- # that it is no longer needed by closing the Connection's socket.
- # We need to put a timeout on select, otherwise we might get
- # stuck in it indefinitely... (I don't like this solution.)
- while self._keepGoing:
- try:
- r, w, e = select.select([self._sock], [], [], 1.0)
- except ValueError:
- # Sigh. ValueError gets thrown sometimes when passing select
- # a closed socket.
- raise EOFError
- if r:
- break
- if not self._keepGoing:
- return
- rec = Record()
- rec.read(self._sock)
-
- if rec.type == FCGI_GET_VALUES:
- self._do_get_values(rec)
- elif rec.type == FCGI_BEGIN_REQUEST:
- self._do_begin_request(rec)
- elif rec.type == FCGI_ABORT_REQUEST:
- self._do_abort_request(rec)
- elif rec.type == FCGI_PARAMS:
- self._do_params(rec)
- elif rec.type == FCGI_STDIN:
- self._do_stdin(rec)
- elif rec.type == FCGI_DATA:
- self._do_data(rec)
- elif rec.requestId == FCGI_NULL_REQUEST_ID:
- self._do_unknown_type(rec)
- else:
- # Need to complain about this.
- pass
-
- def writeRecord(self, rec):
- """
- Write a Record to the socket.
- """
- rec.write(self._sock)
-
- def end_request(self, req, appStatus=0L,
- protocolStatus=FCGI_REQUEST_COMPLETE, remove=True):
- """
- End a Request.
-
- Called by Request objects. An FCGI_END_REQUEST Record is
- sent to the web server. If the web server no longer requires
- the connection, the socket is closed, thereby ending this
- Connection (run() returns).
- """
- rec = Record(FCGI_END_REQUEST, req.requestId)
- rec.contentData = struct.pack(FCGI_EndRequestBody, appStatus,
- protocolStatus)
- rec.contentLength = FCGI_EndRequestBody_LEN
- self.writeRecord(rec)
-
- if remove:
- del self._requests[req.requestId]
-
- if __debug__:
- _debug(2, 'end_request: flags = %d' % req.flags)
-
- if not (req.flags & FCGI_KEEP_CONN) and not self._requests:
- self._cleanupSocket()
- self._keepGoing = False
-
- def _do_get_values(self, inrec):
- """Handle an FCGI_GET_VALUES request from the web server."""
- outrec = Record(FCGI_GET_VALUES_RESULT)
-
- pos = 0
- while pos < inrec.contentLength:
- pos, (name, value) = decode_pair(inrec.contentData, pos)
- cap = self.server.capability.get(name)
- if cap is not None:
- outrec.contentData += encode_pair(name, str(cap))
-
- outrec.contentLength = len(outrec.contentData)
- self.writeRecord(outrec)
-
- def _do_begin_request(self, inrec):
- """Handle an FCGI_BEGIN_REQUEST from the web server."""
- role, flags = struct.unpack(FCGI_BeginRequestBody, inrec.contentData)
-
- req = self.server.request_class(self, self._inputStreamClass)
- req.requestId, req.role, req.flags = inrec.requestId, role, flags
- req.aborted = False
-
- if not self._multiplexed and self._requests:
- # Can't multiplex requests.
- self.end_request(req, 0L, FCGI_CANT_MPX_CONN, remove=False)
- else:
- self._requests[inrec.requestId] = req
-
- def _do_abort_request(self, inrec):
- """
- Handle an FCGI_ABORT_REQUEST from the web server.
-
- We just mark a flag in the associated Request.
- """
- req = self._requests.get(inrec.requestId)
- if req is not None:
- req.aborted = True
-
- def _start_request(self, req):
- """Run the request."""
- # Not multiplexed, so run it inline.
- req.run()
-
- def _do_params(self, inrec):
- """
- Handle an FCGI_PARAMS Record.
-
- If the last FCGI_PARAMS Record is received, start the request.
- """
- req = self._requests.get(inrec.requestId)
- if req is not None:
- if inrec.contentLength:
- pos = 0
- while pos < inrec.contentLength:
- pos, (name, value) = decode_pair(inrec.contentData, pos)
- req.params[name] = value
- else:
- self._start_request(req)
-
- def _do_stdin(self, inrec):
- """Handle the FCGI_STDIN stream."""
- req = self._requests.get(inrec.requestId)
- if req is not None:
- req.stdin.add_data(inrec.contentData)
-
- def _do_data(self, inrec):
- """Handle the FCGI_DATA stream."""
- req = self._requests.get(inrec.requestId)
- if req is not None:
- req.data.add_data(inrec.contentData)
-
- def _do_unknown_type(self, inrec):
- """Handle an unknown request type. Respond accordingly."""
- outrec = Record(FCGI_UNKNOWN_TYPE)
- outrec.contentData = struct.pack(FCGI_UnknownTypeBody, inrec.type)
- outrec.contentLength = FCGI_UnknownTypeBody_LEN
- self.writeRecord(rec)
-
-
-class MultiplexedConnection(Connection):
- """
- A version of Connection capable of handling multiple requests
- simultaneously.
- """
- _multiplexed = True
- _inputStreamClass = MultiplexedInputStream
-
- def __init__(self, sock, addr, server):
- super(MultiplexedConnection, self).__init__(sock, addr, server)
-
- # Used to arbitrate access to self._requests.
- lock = threading.RLock()
-
- # Notification is posted everytime a request completes, allowing us
- # to quit cleanly.
- self._lock = threading.Condition(lock)
-
- def _cleanupSocket(self):
- # Wait for any outstanding requests before closing the socket.
- self._lock.acquire()
- while self._requests:
- self._lock.wait()
- self._lock.release()
-
- super(MultiplexedConnection, self)._cleanupSocket()
-
- def writeRecord(self, rec):
- # Must use locking to prevent intermingling of Records from different
- # threads.
- self._lock.acquire()
- try:
- # Probably faster than calling super. ;)
- rec.write(self._sock)
- finally:
- self._lock.release()
-
- def end_request(self, req, appStatus=0L,
- protocolStatus=FCGI_REQUEST_COMPLETE, remove=True):
- self._lock.acquire()
- try:
- super(MultiplexedConnection, self).end_request(req, appStatus,
- protocolStatus,
- remove)
- self._lock.notify()
- finally:
- self._lock.release()
-
- def _do_begin_request(self, inrec):
- self._lock.acquire()
- try:
- super(MultiplexedConnection, self)._do_begin_request(inrec)
- finally:
- self._lock.release()
-
- def _do_abort_request(self, inrec):
- self._lock.acquire()
- try:
- super(MultiplexedConnection, self)._do_abort_request(inrec)
- finally:
- self._lock.release()
-
- def _start_request(self, req):
- thread.start_new_thread(req.run, ())
-
- def _do_params(self, inrec):
- self._lock.acquire()
- try:
- super(MultiplexedConnection, self)._do_params(inrec)
- finally:
- self._lock.release()
-
- def _do_stdin(self, inrec):
- self._lock.acquire()
- try:
- super(MultiplexedConnection, self)._do_stdin(inrec)
- finally:
- self._lock.release()
-
- def _do_data(self, inrec):
- self._lock.acquire()
- try:
- super(MultiplexedConnection, self)._do_data(inrec)
- finally:
- self._lock.release()
-
-
-class Server(object):
- """
- The FastCGI server.
-
- Waits for connections from the web server, processing each
- request.
-
- If run in a normal CGI context, it will instead instantiate a
- CGIRequest and run the handler through there.
- """
- request_class = Request
- cgirequest_class = CGIRequest
-
- # Limits the size of the InputStream's string buffer to this size + the
- # server's maximum Record size. Since the InputStream is not seekable,
- # we throw away already-read data once this certain amount has been read.
- inputStreamShrinkThreshold = 102400 - 8192
-
- def __init__(self, handler=None, maxwrite=8192, bindAddress=None,
- umask=None, multiplexed=False):
- """
- handler, if present, must reference a function or method that
- takes one argument: a Request object. If handler is not
- specified at creation time, Server *must* be subclassed.
- (The handler method below is abstract.)
-
- maxwrite is the maximum number of bytes (per Record) to write
- to the server. I've noticed mod_fastcgi has a relatively small
- receive buffer (8K or so).
-
- bindAddress, if present, must either be a string or a 2-tuple. If
- present, run() will open its own listening socket. You would use
- this if you wanted to run your application as an 'external' FastCGI
- app. (i.e. the webserver would no longer be responsible for starting
- your app) If a string, it will be interpreted as a filename and a UNIX
- socket will be opened. If a tuple, the first element, a string,
- is the interface name/IP to bind to, and the second element (an int)
- is the port number.
-
- Set multiplexed to True if you want to handle multiple requests
- per connection. Some FastCGI backends (namely mod_fastcgi) don't
- multiplex requests at all, so by default this is off (which saves
- on thread creation/locking overhead). If threads aren't available,
- this keyword is ignored; it's not possible to multiplex requests
- at all.
- """
- if handler is not None:
- self.handler = handler
- self.maxwrite = maxwrite
- if thread_available:
- try:
- import resource
- # Attempt to glean the maximum number of connections
- # from the OS.
- maxConns = resource.getrlimit(resource.RLIMIT_NOFILE)[0]
- except ImportError:
- maxConns = 100 # Just some made up number.
- maxReqs = maxConns
- if multiplexed:
- self._connectionClass = MultiplexedConnection
- maxReqs *= 5 # Another made up number.
- else:
- self._connectionClass = Connection
- self.capability = {
- FCGI_MAX_CONNS: maxConns,
- FCGI_MAX_REQS: maxReqs,
- FCGI_MPXS_CONNS: multiplexed and 1 or 0
- }
- else:
- self._connectionClass = Connection
- self.capability = {
- # If threads aren't available, these are pretty much correct.
- FCGI_MAX_CONNS: 1,
- FCGI_MAX_REQS: 1,
- FCGI_MPXS_CONNS: 0
- }
- self._bindAddress = bindAddress
- self._umask = umask
-
- def _setupSocket(self):
- if self._bindAddress is None: # Run as a normal FastCGI?
- isFCGI = True
-
- if isFCGI:
- try:
- sock = socket.fromfd(FCGI_LISTENSOCK_FILENO, socket.AF_INET,
- socket.SOCK_STREAM)
- sock.getpeername()
- except AttributeError:
- isFCGI = False
- except socket.error, e:
- if e[0] == errno.ENOTSOCK:
- # Not a socket, assume CGI context.
- isFCGI = False
- elif e[0] != errno.ENOTCONN:
- raise
-
- # FastCGI/CGI discrimination is broken on Mac OS X.
- # Set the environment variable FCGI_FORCE_CGI to "Y" or "y"
- # if you want to run your app as a simple CGI. (You can do
- # this with Apache's mod_env [not loaded by default in OS X
- # client, ha ha] and the SetEnv directive.)
- if not isFCGI or \
- os.environ.get('FCGI_FORCE_CGI', 'N').upper().startswith('Y'):
- req = self.cgirequest_class(self)
- req.run()
- sys.exit(0)
- else:
- # Run as a server
- oldUmask = None
- if type(self._bindAddress) is str:
- # Unix socket
- sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
- try:
- os.unlink(self._bindAddress)
- except OSError:
- pass
- if self._umask is not None:
- oldUmask = os.umask(self._umask)
- else:
- # INET socket
- assert type(self._bindAddress) is tuple
- assert len(self._bindAddress) == 2
- sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-
- sock.bind(self._bindAddress)
- sock.listen(socket.SOMAXCONN)
-
- if oldUmask is not None:
- os.umask(oldUmask)
-
- return sock
-
- def _cleanupSocket(self, sock):
- """Closes the main socket."""
- sock.close()
-
- def _installSignalHandlers(self):
- self._oldSIGs = [(x, signal.getsignal(x)) for x in
- (signal.SIGHUP, signal.SIGINT, signal.SIGTERM)]
- signal.signal(signal.SIGHUP, self._hupHandler)
- signal.signal(signal.SIGINT, self._intHandler)
- signal.signal(signal.SIGTERM, self._intHandler)
-
- def _restoreSignalHandlers(self):
- for signum, handler in self._oldSIGs:
- signal.signal(signum, handler)
-
- def _hupHandler(self, signum, frame):
- self._hupReceived = True
- self._keepGoing = False
-
- def _intHandler(self, signum, frame):
- self._keepGoing = False
-
- def run(self, timeout=1.0):
- """
- The main loop. Exits on SIGHUP, SIGINT, SIGTERM. Returns True if
- SIGHUP was received, False otherwise.
- """
- web_server_addrs = os.environ.get('FCGI_WEB_SERVER_ADDRS')
- if web_server_addrs is not None:
- web_server_addrs = map(lambda x: x.strip(),
- web_server_addrs.split(','))
-
- sock = self._setupSocket()
-
- self._keepGoing = True
- self._hupReceived = False
-
- # Install signal handlers.
- self._installSignalHandlers()
-
- while self._keepGoing:
- try:
- r, w, e = select.select([sock], [], [], timeout)
- except select.error, e:
- if e[0] == errno.EINTR:
- continue
- raise
-
- if r:
- try:
- clientSock, addr = sock.accept()
- except socket.error, e:
- if e[0] in (errno.EINTR, errno.EAGAIN):
- continue
- raise
-
- if web_server_addrs and \
- (len(addr) != 2 or addr[0] not in web_server_addrs):
- clientSock.close()
- continue
-
- # Instantiate a new Connection and begin processing FastCGI
- # messages (either in a new thread or this thread).
- conn = self._connectionClass(clientSock, addr, self)
- thread.start_new_thread(conn.run, ())
-
- self._mainloopPeriodic()
-
- # Restore signal handlers.
- self._restoreSignalHandlers()
-
- self._cleanupSocket(sock)
-
- return self._hupReceived
-
- def _mainloopPeriodic(self):
- """
- Called with just about each iteration of the main loop. Meant to
- be overridden.
- """
- pass
-
- def _exit(self, reload=False):
- """
- Protected convenience method for subclasses to force an exit. Not
- really thread-safe, which is why it isn't public.
- """
- if self._keepGoing:
- self._keepGoing = False
- self._hupReceived = reload
-
- def handler(self, req):
- """
- Default handler, which just raises an exception. Unless a handler
- is passed at initialization time, this must be implemented by
- a subclass.
- """
- raise NotImplementedError, self.__class__.__name__ + '.handler'
-
- def error(self, req):
- """
- Called by Request if an exception occurs within the handler. May and
- should be overridden.
- """
- import cgitb
- req.stdout.write('Content-Type: text/html\r\n\r\n' +
- cgitb.html(sys.exc_info()))
-
-
-class WSGIServer(Server):
- """
- FastCGI server that supports the Web Server Gateway Interface. See
- <http://www.python.org/peps/pep-0333.html>.
- """
-
- def __init__(self, application, environ=None, umask=None,
- multithreaded=True, **kw):
- """
- environ, if present, must be a dictionary-like object. Its
- contents will be copied into application's environ. Useful
- for passing application-specific variables.
-
- Set multithreaded to False if your application is not MT-safe.
- """
- if kw.has_key('handler'):
- del kw['handler'] # Doesn't make sense to let this through
- super(WSGIServer, self).__init__(**kw)
-
- if environ is None:
- environ = {}
-
- self.application = application
- self.environ = environ
- self.multithreaded = multithreaded
-
- # Used to force single-threadedness
- self._app_lock = thread.allocate_lock()
-
- def handler(self, req):
- """Special handler for WSGI."""
- if req.role != FCGI_RESPONDER:
- return FCGI_UNKNOWN_ROLE, 0
-
- # Mostly taken from example CGI gateway.
- environ = req.params
- environ.update(self.environ)
-
- environ['wsgi.version'] = (1, 0)
- environ['wsgi.input'] = req.stdin
- if self._bindAddress is None:
- stderr = req.stderr
- else:
- stderr = TeeOutputStream((sys.stderr, req.stderr))
- environ['wsgi.errors'] = stderr
- environ['wsgi.multithread'] = not isinstance(req, CGIRequest) and \
- thread_available and self.multithreaded
- # Rationale for the following: If started by the web server
- # (self._bindAddress is None) in either FastCGI or CGI mode, the
- # possibility of being spawned multiple times simultaneously is quite
- # real. And, if started as an external server, multiple copies may be
- # spawned for load-balancing/redundancy. (Though I don't think
- # mod_fastcgi supports this?)
- environ['wsgi.multiprocess'] = True
- environ['wsgi.run_once'] = isinstance(req, CGIRequest)
-
- if environ.get('HTTPS', 'off') in ('on', '1'):
- environ['wsgi.url_scheme'] = 'https'
- else:
- environ['wsgi.url_scheme'] = 'http'
-
- self._sanitizeEnv(environ)
-
- headers_set = []
- headers_sent = []
- result = None
-
- def write(data):
- assert type(data) is str, 'write() argument must be string'
- assert headers_set, 'write() before start_response()'
-
- if not headers_sent:
- status, responseHeaders = headers_sent[:] = headers_set
- found = False
- for header, value in responseHeaders:
- if header.lower() == 'content-length':
- found = True
- break
- if not found and result is not None:
- try:
- if len(result) == 1:
- responseHeaders.append(('Content-Length',
- str(len(data))))
- except:
- pass
- s = 'Status: %s\r\n' % status
- for header in responseHeaders:
- s += '%s: %s\r\n' % header
- s += '\r\n'
- req.stdout.write(s)
-
- req.stdout.write(data)
- req.stdout.flush()
-
- def start_response(status, response_headers, exc_info=None):
- if exc_info:
- try:
- if headers_sent:
- # Re-raise if too late
- raise exc_info[0], exc_info[1], exc_info[2]
- finally:
- exc_info = None # avoid dangling circular ref
- else:
- assert not headers_set, 'Headers already set!'
-
- assert type(status) is str, 'Status must be a string'
- assert len(status) >= 4, 'Status must be at least 4 characters'
- assert int(status[:3]), 'Status must begin with 3-digit code'
- assert status[3] == ' ', 'Status must have a space after code'
- assert type(response_headers) is list, 'Headers must be a list'
- if __debug__:
- for name, val in response_headers:
- assert type(name) is str, 'Header names must be strings'
- assert type(val) is str, 'Header values must be strings'
-
- headers_set[:] = [status, response_headers]
- return write
-
- if not self.multithreaded:
- self._app_lock.acquire()
- try:
- try:
- result = self.application(environ, start_response)
- try:
- for data in result:
- if data:
- write(data)
- if not headers_sent:
- write('') # in case body was empty
- finally:
- if hasattr(result, 'close'):
- result.close()
- except socket.error, e:
- if e[0] != errno.EPIPE:
- raise # Don't let EPIPE propagate beyond server
- finally:
- if not self.multithreaded:
- self._app_lock.release()
-
- return FCGI_REQUEST_COMPLETE, 0
-
- def _sanitizeEnv(self, environ):
- """Ensure certain values are present, if required by WSGI."""
- if not environ.has_key('SCRIPT_NAME'):
- environ['SCRIPT_NAME'] = ''
- if not environ.has_key('PATH_INFO'):
- environ['PATH_INFO'] = ''
-
- # If any of these are missing, it probably signifies a broken
- # server...
- for name, default in [('REQUEST_METHOD', 'GET'),
- ('SERVER_NAME', 'localhost'),
- ('SERVER_PORT', '80'),
- ('SERVER_PROTOCOL', 'HTTP/1.0')]:
- if not environ.has_key(name):
- environ['wsgi.errors'].write('%s: missing FastCGI param %s '
- 'required by WSGI!\n' %
- (self.__class__.__name__, name))
- environ[name] = default
-
-
-if __name__ == '__main__':
- def test_app(environ, start_response):
- """Probably not the most efficient example."""
- import cgi
- start_response('200 OK', [('Content-Type', 'text/html')])
- yield '<html><head><title>Hello World!</title></head>\n' \
- '<body>\n' \
- '<p>Hello World!</p>\n' \
- '<table border="1">'
- names = environ.keys()
- names.sort()
- for name in names:
- yield '<tr><td>%s</td><td>%s</td></tr>\n' % (
- name, cgi.escape(`environ[name]`))
-
- form = cgi.FieldStorage(fp=environ['wsgi.input'], environ=environ,
- keep_blank_values=1)
- if form.list:
- yield '<tr><th colspan="2">Form data</th></tr>'
-
- for field in form.list:
- yield '<tr><td>%s</td><td>%s</td></tr>\n' % (
- field.name, field.value)
-
- yield '</table>\n' \
- '</body></html>\n'
-
- WSGIServer(test_app).run()
diff --git a/cgi/formatting.py b/cgi/formatting.py
index 96bb73b..3da64da 100644
--- a/cgi/formatting.py
+++ b/cgi/formatting.py
@@ -3,7 +3,6 @@ import string
import html
import os
import re
-import pickle
import time
from database import *
@@ -111,7 +110,7 @@ def iphash(ip, post, t, useid, mobile, agent, cap_id, hide_end, has_countrycode)
if cap_id:
id = cap_id
- elif post['email'] and useid:
+ elif post['email'] and useid == 1:
id = '???'
elif ip == "127.0.0.1":
id = '???'
@@ -262,9 +261,9 @@ def checkRefLinks(message, parentid, parent_timestamp):
"""
board = Settings._.BOARD
- if board["board_type"] == '1':
+ if board["board_type"] == 1:
# Textboard
- if parentid != '0':
+ if parentid:
message = re.compile(r'&gt;&gt;(\d+(,\d+|-(?=[ \d\n])|\d+)*n?)').sub(
'<a href="' + Settings.BOARDS_URL + board['dir'] + '/read/' + str(parent_timestamp) + r'/\1">&gt;&gt;\1</a>', message)
else:
@@ -272,14 +271,14 @@ def checkRefLinks(message, parentid, parent_timestamp):
quotes_id_array = re.findall(r"&gt;&gt;([0-9]+)", message)
for quotes in quotes_id_array:
try:
- post = FetchOne('SELECT * FROM `posts` WHERE `id` = ' +
- quotes + ' AND `boardid` = ' + board['id'] + ' LIMIT 1')
- if post['parentid'] != '0':
+ post = FetchOne('SELECT * FROM `posts` WHERE `id` = %s AND `boardid` = %s LIMIT 1',
+ (quotes, board['id']))
+ if post['parentid']:
message = re.compile("&gt;&gt;" + quotes).sub('<a href="' + Settings.BOARDS_URL +
- board['dir'] + '/res/' + post['parentid'] + '.html#' + quotes + '">&gt;&gt;' + quotes + '</a>', message)
+ board['dir'] + '/res/' + str(post['parentid']) + '.html#' + quotes + '">&gt;&gt;' + quotes + '</a>', message)
else:
message = re.compile("&gt;&gt;" + quotes).sub('<a href="' + Settings.BOARDS_URL +
- board['dir'] + '/res/' + post['id'] + '.html#' + quotes + '">&gt;&gt;' + quotes + '</a>', message)
+ board['dir'] + '/res/' + str(post['id']) + '.html#' + quotes + '">&gt;&gt;' + quotes + '</a>', message)
except:
message = re.compile(
"&gt;&gt;" + quotes).sub(r'<span class="q">&gt;&gt;'+quotes+'</span>', message)
@@ -371,9 +370,9 @@ def checkWordfilters(message, ip, board):
wordfilters = FetchAll(
"SELECT * FROM `filters` WHERE `type` = '0' ORDER BY `id` ASC")
for wordfilter in wordfilters:
- if wordfilter["boards"] != "":
- boards = pickle.loads(wordfilter["boards"].encode("utf-8"))
- if wordfilter["boards"] == "" or board in boards:
+ if wordfilter["boards"]:
+ boards = str2boards(wordfilter["boards"])
+ if not wordfilter["boards"] or board in boards:
if wordfilter['action'] == '0':
if not re.search(wordfilter['from'], message, re.DOTALL | re.IGNORECASE) is None:
raise UserError(wordfilter['reason'])
@@ -404,9 +403,9 @@ def checkNamefilters(name, tripcode, ip, board):
namefilters = FetchAll("SELECT * FROM `filters` WHERE `type` = '1'")
for namefilter in namefilters:
- if namefilter["boards"] != "":
- boards = pickle.loads(namefilter["boards"])
- if namefilter["boards"] == "" or board in boards:
+ if namefilter["boards"]:
+ boards = str2boards(namefilter["boards"])
+ if not namefilter["boards"] or board in boards:
# check if this filter applies
match = False
diff --git a/cgi/framework.py b/cgi/framework.py
index 5277df0..e2af143 100644
--- a/cgi/framework.py
+++ b/cgi/framework.py
@@ -4,7 +4,6 @@ import cgi
import datetime
import time
import hashlib
-import pickle
import socket
import urllib.request, urllib.parse, urllib.error
import re
@@ -38,6 +37,14 @@ def setBoard(dir):
return board
+def str2boards(sstr):
+ return sstr.split(',')
+
+
+def boards2str(boards):
+ return ','.join(boards)
+
+
def cleanDir(path, ext=None):
if ext:
filelist = [f for f in os.listdir(path) if f.endswith("." + ext)]
@@ -49,14 +56,14 @@ def cleanDir(path, ext=None):
def addressIsBanned(ip, board, blind_only=False):
- query = "SELECT * FROM `bans` WHERE INET6_ATON('"+str(ip)+"') BETWEEN `ipstart` AND `ipend`"
+ query = "SELECT * FROM `bans` WHERE INET6_ATON(%s) BETWEEN `ipstart` AND `ipend`"
if blind_only:
query += " AND `blind` = '1'"
- bans = FetchAll(query)
+ bans = FetchAll(query, (ip,))
for ban in bans:
- if ban["boards"] != "":
- boards = pickle.loads(ban["boards"])
- if ban["boards"] == "" or board in boards:
+ if ban["boards"]:
+ boards = str2boards(ban["boards"])
+ if not ban["boards"] or board in boards:
if board not in Settings.EXCLUDE_GLOBAL_BANS:
return True
return False
@@ -140,15 +147,22 @@ def updateBoardSettings():
Pickle the board's settings and store it in the configuration field
"""
board = Settings._.BOARD
- #UpdateDb("UPDATE `boards` SET `configuration` = '%s' WHERE `id` = %s LIMIT 1" % (_mysql.escape_string(configuration), board["id"]))
-
+
del board["filetypes"]
del board["filetypes_ext"]
- post_values = ["`" + _mysql.escape_string(str(key)) + "` = '" + _mysql.escape_string(
- str(value)) + "'" for key, value in board.items()]
-
- UpdateDb("UPDATE `boards` SET %s WHERE `id` = '%s' LIMIT 1" %
- (", ".join(post_values), board["id"]))
+
+ sql = "UPDATE `boards` SET "
+ keys = []
+ values = []
+ for k, v in board.items():
+ keys.append("`" + k + "` = %s")
+ values.append(v)
+
+ sql += ", ".join(keys)
+ sql += " WHERE `id` = %s LIMIT 1"
+ values.append(board["id"])
+
+ UpdateDb(sql, values)
def timestamp(t=None):
diff --git a/cgi/manage.py b/cgi/manage.py
index 40be3b2..0053f54 100644
--- a/cgi/manage.py
+++ b/cgi/manage.py
@@ -35,7 +35,7 @@ def manage(self, path_split):
UpdateDb("DELETE FROM `logs` WHERE `timestamp` < %s", (timestamp() - Settings.MANAGE_LOG_TIME,))
else:
page += _('Incorrect username/password.')
- logAction('', 'Failed log-in. U:'+_mysql.escape_string(self.formdata['username'])+' IP logged.')
+ logAction('', 'Failed log-in. U:'+self.formdata['username']+' IP logged.')
logging.warn("Failed log-in. U:{} IP:{}".format(self.formdata['username'], self.environ["REMOTE_ADDR"]))
else:
# Validate existing session
@@ -687,10 +687,9 @@ def manage(self, path_split):
return
if self.formdata['seconds'] != '0':
- until = str(
- timestamp() + int(self.formdata['seconds']))
+ until = timestamp() + int(self.formdata['seconds'])
else:
- until = '0'
+ until = 0
where = ''
if 'board_all' not in self.formdata:
where = []
@@ -701,7 +700,7 @@ def manage(self, path_split):
if self.formdata[keyname] == "1":
where.append(board['dir'])
if len(where) > 0:
- where = pickle.dumps(where)
+ where = boards2str(where)
else:
self.error(
_("You must select where the ban shall be placed"))
@@ -719,14 +718,14 @@ def manage(self, path_split):
return"""
# Blind mode
- blind = self.formdata.get('blind', '0')
+ blind = self.formdata.get('blind', 0)
#raise UserError, "{} {} {}".format(ipstart, ipend, ipstr)
# Banear sin mensaje
- InsertDb("INSERT INTO `bans` (`ipstart`, `ipend`, `ipstr`, `boards`, `added`, `until`, `staff`, `reason`, `note`, `blind`) VALUES (INET6_ATON('" +
- ipstart + "'), INET6_ATON('" + ipend + "'), '" + ipstr + "', '" +
- _mysql.escape_string(where) + "', " + str(timestamp()) + ", " + until + ", '" + _mysql.escape_string(staff_account['username']) + "', '" + _mysql.escape_string(self.formdata['reason']) + "', '" + _mysql.escape_string(self.formdata['note']) + "', '"+blind+"')")
+ InsertDb("INSERT INTO `bans` (`ipstart`, `ipend`, `ipstr`, `boards`, `added`, `until`, `staff`, `reason`, `note`, `blind`) VALUES "
+ "(INET6_ATON(%s), INET6_ATON(%s), %s, %s, %s, %s, %s, %s, %s, %s)",
+ (ipstart, ipend, ipstr, where, timestamp(), until, staff_account['username'], self.formdata['reason'], self.formdata['note'], blind))
regenerateAccess()
if 'edit' in self.formdata:
@@ -747,18 +746,18 @@ def manage(self, path_split):
'reason': '',
'note': '',
'message': '(GET OUT)',
- 'seconds': '0',
- 'blind': '1'}
+ 'seconds': 0,
+ 'blind': 1}
edit_id = 0
if 'edit' in self.formdata:
edit_id = self.formdata['edit']
- ban = FetchOne("SELECT `id`, INET6_NTOA(`ip`) AS 'ip', CASE WHEN `netmask` IS NULL THEN '255.255.255.255' ELSE INET_NTOA(`netmask`) END AS 'netmask', boards, added, until, staff, reason, note, blind FROM `bans` WHERE `id` = '" +
- _mysql.escape_string(edit_id) + "' ORDER BY `added` DESC")
+ ban = FetchOne("SELECT `id`, INET6_NTOA(`ip`) AS 'ip', CASE WHEN `netmask` IS NULL THEN '255.255.255.255' ELSE INET_NTOA(`netmask`) END AS 'netmask', boards, added, until, staff, reason, note, blind FROM `bans` WHERE `id` = %s ORDER BY `added` DESC",
+ (edit_id,))
if ban:
if ban['boards'] == '':
where = ''
else:
- where = pickle.loads(ban['boards'])
+ where = boards2str(ban['boards'])
if ban['until'] == '0':
until = 0
else:
@@ -785,12 +784,12 @@ def manage(self, path_split):
action_taken = False
if len(path_split) > 4:
if path_split[3] == 'delete':
- ip = FetchOne("SELECT ipstr FROM `bans` WHERE `id` = '" +
- _mysql.escape_string(path_split[4]) + "' LIMIT 1", 0)[0]
- if ip != '':
+ ip = FetchOne("SELECT ipstr FROM `bans` WHERE `id` = %s LIMIT 1",
+ (path_split[4],))
+ if ip:
# Delete ban
- UpdateDb('DELETE FROM `bans` WHERE `id` = ' +
- _mysql.escape_string(path_split[4]) + ' LIMIT 1')
+ UpdateDb('DELETE FROM `bans` WHERE `id` = %s LIMIT 1',
+ (path_split[4],))
regenerateAccess()
message = _('Ban successfully deleted.')
template_filename = "message.html"
@@ -809,18 +808,18 @@ def manage(self, path_split):
if ban['boards'] == '':
ban['boards'] = _('All boards')
else:
- where = pickle.loads(ban['boards'].encode('utf-8'))
+ where = str2boards(ban['boards'])
if len(where) > 1:
ban['boards'] = '/' + \
'/, /'.join(where) + '/'
else:
ban['boards'] = '/' + where[0] + '/'
ban['added'] = formatTimestamp(ban['added'])
- if ban['until'] == '0':
+ if ban['until'] == 0:
ban['until'] = _('Does not expire')
else:
ban['until'] = formatTimestamp(ban['until'])
- if ban['blind'] == '1':
+ if ban['blind']:
ban['blind'] = 'Sí'
else:
ban['blind'] = 'No'
@@ -876,50 +875,50 @@ def manage(self, path_split):
board['slip'] = self.formdata['slip']
board['countrycode'] = self.formdata['countrycode']
if 'recyclebin' in self.formdata:
- board['recyclebin'] = '1'
+ board['recyclebin'] = 1
else:
- board['recyclebin'] = '0'
+ board['recyclebin'] = 0
if 'disable_name' in self.formdata:
- board['disable_name'] = '1'
+ board['disable_name'] = 1
else:
- board['disable_name'] = '0'
+ board['disable_name'] = 0
if 'disable_subject' in self.formdata:
- board['disable_subject'] = '1'
+ board['disable_subject'] = 1
else:
- board['disable_subject'] = '0'
+ board['disable_subject'] = 0
if 'secret' in self.formdata:
- board['secret'] = '1'
+ board['secret'] = 1
else:
- board['secret'] = '0'
+ board['secret'] = 0
if 'locked' in self.formdata:
- board['locked'] = '1'
+ board['locked'] = 1
else:
- board['locked'] = '0'
+ board['locked'] = 0
board['postarea_desc'] = self.formdata['postarea_desc']
if 'allow_noimage' in self.formdata:
- board['allow_noimage'] = '1'
+ board['allow_noimage'] = 1
else:
- board['allow_noimage'] = '0'
+ board['allow_noimage'] = 0
if 'allow_images' in self.formdata:
- board['allow_images'] = '1'
+ board['allow_images'] = 1
else:
- board['allow_images'] = '0'
+ board['allow_images'] = 0
if 'allow_image_replies' in self.formdata:
- board['allow_image_replies'] = '1'
+ board['allow_image_replies'] = 1
else:
- board['allow_image_replies'] = '0'
+ board['allow_image_replies'] = 0
if 'allow_spoilers' in self.formdata:
- board['allow_spoilers'] = '1'
+ board['allow_spoilers'] = 1
else:
- board['allow_spoilers'] = '0'
+ board['allow_spoilers'] = 0
if 'allow_oekaki' in self.formdata:
- board['allow_oekaki'] = '1'
+ board['allow_oekaki'] = 1
else:
- board['allow_oekaki'] = '0'
+ board['allow_oekaki'] = 0
if 'archive' in self.formdata:
- board['archive'] = '1'
+ board['archive'] = 1
else:
- board['archive'] = '0'
+ board['archive'] = 0
board['postarea_extra'] = self.formdata['postarea_extra']
board['force_css'] = self.formdata['force_css']
@@ -932,8 +931,7 @@ def manage(self, path_split):
board['id'], filetype['id']))
try:
- board['numthreads'] = int(
- self.formdata['numthreads'])
+ board['numthreads'] = int(self.formdata['numthreads'])
except:
raise UserError(_("Max threads shown must be numeric."))
@@ -963,14 +961,12 @@ def manage(self, path_split):
raise UserError(_("Max age must be numeric."))
try:
- board['maxinactive'] = int(
- self.formdata['maxinactive'])
+ board['maxinactive'] = int(self.formdata['maxinactive'])
except:
raise UserError(_("Max inactivity must be numeric."))
try:
- board['threadsecs'] = int(
- self.formdata['threadsecs'])
+ board['threadsecs'] = int(self.formdata['threadsecs'])
except:
raise UserError(_("Time between new threads must be numeric."))
@@ -1306,7 +1302,7 @@ def manage(self, path_split):
'SELECT * FROM archive WHERE boardid = %s ORDER BY timestamp DESC' % board['id'])
for item in threads:
t = time.time()
- self.output += item['timestamp'] + '<br />'
+ self.output += str(item['timestamp']) + '<br />'
fname = Settings.ROOT_DIR + \
board["dir"] + "/kako/" + \
str(item["timestamp"]) + ".json"
@@ -1367,10 +1363,10 @@ def manage(self, path_split):
new_timestamp_formatted = formatTimestamp(
post['timestamp'])
tim = 0
- if board["useid"] != '0':
+ if board["useid"] != 0:
new_timestamp_formatted += ' ID:' + \
- iphash(post['ip'], '', tim, '1',
- False, False, False, '0')
+ iphash(post['ip'], '', tim, 1,
+ False, False, False, 0)
self.output += "%s - %s <br />" % (
post['id'], new_timestamp_formatted)
query = "UPDATE `posts` SET timestamp_formatted = '%s' WHERE boardid = '%s' AND id = '%s'" % (
@@ -1442,7 +1438,6 @@ def manage(self, path_split):
filter_from = ''
filter_tripcode = ''
- # I don't like pickles... oh well.
where = ''
if 'board_all' not in self.formdata:
where = []
@@ -1453,8 +1448,7 @@ def manage(self, path_split):
if self.formdata[keyname] == "1":
where.append(board['dir'])
if len(where) > 0:
- where = _mysql.escape_string(
- pickle.dumps(where))
+ where = boards2str(where)
else:
self.error(
_("You must select what board the filter will affect"))
@@ -1560,10 +1554,10 @@ def manage(self, path_split):
edit_id = int(self.formdata['edit'])
filt = FetchOne(
"SELECT * FROM `filters` WHERE `id` = %s LIMIT 1" % str(edit_id))
- if filt['boards'] == '':
+ if not filt['boards']:
where = ''
else:
- where = pickle.loads(filt['boards'])
+ where = str2boards(filt['boards'])
startvalues = {'type': filt['type'],
'trip': filt['from_trip'],
'where': where,
@@ -1615,18 +1609,13 @@ def manage(self, path_split):
action_taken = True
if not action_taken:
- filters = FetchAll(
- "SELECT * FROM `filters` ORDER BY `added` DESC")
+ filters = FetchAll("SELECT * FROM `filters` ORDER BY `added` DESC")
for filter in filters:
if not filter['boards']:
filter['boards'] = _('All boards')
else:
- where = pickle.loads(filter['boards'].encode('utf-8'))
- if len(where) > 1:
- filter['boards'] = '/' + \
- '/, /'.join(where) + '/'
- else:
- filter['boards'] = '/' + where[0] + '/'
+ where = str2boards(filter['boards'])
+ filter['boards'] = '/' + '/, /'.join(where) + '/'
if filter['type'] == 0:
filter['type_formatted'] = _('Word:') + ' <b>' + html.escape(filter['from']) + '</b>'
elif filter['type'] == 1:
@@ -1834,10 +1823,10 @@ def manage(self, path_split):
message = None
import math
- pagesize = float(Settings.REPORTS_PER_PAGE)
+ pagesize = Settings.REPORTS_PER_PAGE
totals = FetchOne("SELECT COUNT(id) FROM `reports`")
total = int(totals['COUNT(id)'])
- pages = int(math.ceil(total / pagesize))
+ pages = int(math.ceil(total // pagesize))
try:
currentpage = int(path_split[3])
@@ -1847,24 +1836,23 @@ def manage(self, path_split):
if len(path_split) > 4:
if path_split[4] == 'ignore':
# Delete report
- UpdateDb("DELETE FROM `reports` WHERE `id` = '" +
- _mysql.escape_string(path_split[5])+"'")
+ UpdateDb("DELETE FROM `reports` WHERE `id` = %s", (path_split[5],))
message = _('Report %s ignored.') % path_split[5]
if 'ignore' in self.formdata:
ignored = 0
if 'board' in self.formdata and self.formdata['board'] != 'all':
- reports = FetchAll("SELECT `id` FROM `reports` WHERE `board` = '%s' ORDER BY `timestamp` DESC LIMIT %d, %d" % (
- _mysql.escape_string(self.formdata['board']), currentpage*pagesize, pagesize))
+ reports = FetchAll("SELECT `id` FROM `reports` WHERE `board` = %s ORDER BY `timestamp` DESC LIMIT %s, %s",
+ (self.formdata['board'], currentpage*pagesize, pagesize))
else:
- reports = FetchAll("SELECT `id` FROM `reports` ORDER BY `timestamp` DESC LIMIT %d, %d" % (
- currentpage*pagesize, pagesize))
+ reports = FetchAll("SELECT `id` FROM `reports` ORDER BY `timestamp` DESC LIMIT %s, %s",
+ (currentpage*pagesize, pagesize))
for report in reports:
keyname = 'i' + report['id']
if keyname in self.formdata:
# Ignore here
- UpdateDb("DELETE FROM `reports` WHERE `id` = '" +
- _mysql.escape_string(report['id'])+"'")
+ UpdateDb("DELETE FROM `reports` WHERE `id` = %s",
+ (report['id'],))
ignored += 1
message = _('Ignored %s report(s).') % str(ignored)
@@ -1880,11 +1868,11 @@ def manage(self, path_split):
# Tabla
if 'board' in self.formdata and self.formdata['board'] != 'all':
- reports = FetchAll("SELECT id, timestamp, timestamp_formatted, postid, parentid, link, board, INET6_NTOA(ip) AS ip, reason, INET6_NTOA(repip) AS repip FROM `reports` WHERE `board` = '%s' ORDER BY `timestamp` DESC LIMIT %d, %d" % (
- _mysql.escape_string(self.formdata['board']), currentpage*pagesize, pagesize))
+ reports = FetchAll("SELECT id, timestamp, timestamp_formatted, postid, parentid, link, board, INET6_NTOA(ip) AS ip, reason, INET6_NTOA(repip) AS repip FROM `reports` WHERE `board` = %s ORDER BY `timestamp` DESC LIMIT %s, %s",
+ (self.formdata['board'], currentpage*pagesize, pagesize))
else:
- reports = FetchAll("SELECT id, timestamp, timestamp_formatted, postid, parentid, link, board, INET6_NTOA(ip) AS ip, reason, INET6_NTOA(repip) AS repip FROM `reports` ORDER BY `timestamp` DESC LIMIT %d, %d" % (
- currentpage*pagesize, pagesize))
+ reports = FetchAll("SELECT id, timestamp, timestamp_formatted, postid, parentid, link, board, INET6_NTOA(ip) AS ip, reason, INET6_NTOA(repip) AS repip FROM `reports` ORDER BY `timestamp` DESC LIMIT %s, %s",
+ (currentpage*pagesize, pagesize))
if 'board' in self.formdata:
curboard = self.formdata['board']
diff --git a/cgi/markdown.py b/cgi/markdown.py
deleted file mode 100644
index 846c192..0000000
--- a/cgi/markdown.py
+++ /dev/null
@@ -1,2093 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2007-2008 ActiveState Corp.
-# License: MIT (http://www.opensource.org/licenses/mit-license.php)
-
-r"""A fast and complete Python implementation of Markdown.
-
-[from http://daringfireball.net/projects/markdown/]
-> Markdown is a text-to-HTML filter; it translates an easy-to-read /
-> easy-to-write structured text format into HTML. Markdown's text
-> format is most similar to that of plain text email, and supports
-> features such as headers, *emphasis*, code blocks, blockquotes, and
-> links.
->
-> Markdown's syntax is designed not as a generic markup language, but
-> specifically to serve as a front-end to (X)HTML. You can use span-level
-> HTML tags anywhere in a Markdown document, and you can use block level
-> HTML tags (like <div> and <table> as well).
-
-Module usage:
-
- >>> import markdown2
- >>> markdown2.markdown("*boo!*") # or use `html = markdown_path(PATH)`
- u'<p><em>boo!</em></p>\n'
-
- >>> markdowner = Markdown()
- >>> markdowner.convert("*boo!*")
- u'<p><em>boo!</em></p>\n'
- >>> markdowner.convert("**boom!**")
- u'<p><strong>boom!</strong></p>\n'
-
-This implementation of Markdown implements the full "core" syntax plus a
-number of extras (e.g., code syntax coloring, footnotes) as described on
-<http://code.google.com/p/python-markdown2/wiki/Extras>.
-"""
-
-from urllib import quote
-import codecs
-from random import random, randint
-import optparse
-import logging
-import re
-from pprint import pprint
-import sys
-import os
-cmdln_desc = """A fast and complete Python implementation of Markdown, a
-text-to-HTML conversion tool for web writers.
-
-Supported extras (see -x|--extras option below):
-* code-friendly: Disable _ and __ for em and strong.
-* code-color: Pygments-based syntax coloring of <code> sections.
-* cuddled-lists: Allow lists to be cuddled to the preceding paragraph.
-* footnotes: Support footnotes as in use on daringfireball.net and
- implemented in other Markdown processors (tho not in Markdown.pl v1.0.1).
-* html-classes: Takes a dict mapping html tag names (lowercase) to a
- string to use for a "class" tag attribute. Currently only supports
- "pre" and "code" tags. Add an issue if you require this for other tags.
-* pyshell: Treats unindented Python interactive shell sessions as <code>
- blocks.
-* link-patterns: Auto-link given regex patterns in text (e.g. bug number
- references, revision number references).
-* xml: Passes one-liner processing instructions and namespaced XML tags.
-"""
-
-# Dev Notes:
-# - There is already a Python markdown processor
-# (http://www.freewisdom.org/projects/python-markdown/).
-# - Python's regex syntax doesn't have '\z', so I'm using '\Z'. I'm
-# not yet sure if there implications with this. Compare 'pydoc sre'
-# and 'perldoc perlre'.
-
-__version_info__ = (1, 0, 1, 17) # first three nums match Markdown.pl
-__version__ = '1.0.1.17'
-__author__ = "Trent Mick"
-
-try:
- from hashlib import md5
-except ImportError:
- from md5 import md5
-
-
-# ---- Python version compat
-
-if sys.version_info[:2] < (2, 4):
- from sets import Set as set
-
- def reversed(sequence):
- for i in sequence[::-1]:
- yield i
-
- def _unicode_decode(s, encoding, errors='xmlcharrefreplace'):
- return unicode(s, encoding, errors)
-else:
- def _unicode_decode(s, encoding, errors='strict'):
- return s.decode(encoding, errors)
-
-
-#---- globals
-
-DEBUG = False
-log = logging.getLogger("markdown")
-
-DEFAULT_TAB_WIDTH = 4
-
-
-try:
- import uuid
-except ImportError:
- SECRET_SALT = str(randint(0, 1000000))
-else:
- SECRET_SALT = str(uuid.uuid4())
-
-
-def _hash_ascii(s):
- # return md5(s).hexdigest() # Markdown.pl effectively does this.
- return 'md5-' + md5(SECRET_SALT + s).hexdigest()
-
-
-def _hash_text(s):
- return 'md5-' + md5(SECRET_SALT + s.encode("utf-8")).hexdigest()
-
-
-# Table of hash values for escaped characters:
-g_escape_table = dict([(ch, _hash_ascii(ch))
- for ch in '\\`*_{}[]()>#+-.!'])
-
-
-#---- exceptions
-
-class MarkdownError(Exception):
- pass
-
-
-# ---- public api
-
-def markdown_path(path, encoding="utf-8",
- html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
- safe_mode=None, extras=None, link_patterns=None,
- use_file_vars=False):
- fp = codecs.open(path, 'r', encoding)
- text = fp.read()
- fp.close()
- return Markdown(html4tags=html4tags, tab_width=tab_width,
- safe_mode=safe_mode, extras=extras,
- link_patterns=link_patterns,
- use_file_vars=use_file_vars).convert(text)
-
-
-def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
- safe_mode=None, extras=None, link_patterns=None,
- use_file_vars=False):
- return Markdown(html4tags=html4tags, tab_width=tab_width,
- safe_mode=safe_mode, extras=extras,
- link_patterns=link_patterns,
- use_file_vars=use_file_vars).convert(text)
-
-
-class Markdown(object):
- # The dict of "extras" to enable in processing -- a mapping of
- # extra name to argument for the extra. Most extras do not have an
- # argument, in which case the value is None.
- #
- # This can be set via (a) subclassing and (b) the constructor
- # "extras" argument.
- extras = None
-
- urls = None
- titles = None
- html_blocks = None
- html_spans = None
- html_removed_text = "[HTML_REMOVED]" # for compat with markdown.py
-
- # Used to track when we're inside an ordered or unordered list
- # (see _ProcessListItems() for details):
- list_level = 0
-
- _ws_only_line_re = re.compile(r"^[ \t]+$", re.M)
-
- def __init__(self, html4tags=False, tab_width=4, safe_mode=None,
- extras=None, link_patterns=None, use_file_vars=False):
- if html4tags:
- self.empty_element_suffix = ">"
- else:
- self.empty_element_suffix = " />"
- self.tab_width = tab_width
-
- # For compatibility with earlier markdown2.py and with
- # markdown.py's safe_mode being a boolean,
- # safe_mode == True -> "replace"
- if safe_mode is True:
- self.safe_mode = "replace"
- else:
- self.safe_mode = safe_mode
-
- if self.extras is None:
- self.extras = {}
- elif not isinstance(self.extras, dict):
- self.extras = dict([(e, None) for e in self.extras])
- if extras:
- if not isinstance(extras, dict):
- extras = dict([(e, None) for e in extras])
- self.extras.update(extras)
- assert isinstance(self.extras, dict)
- if "toc" in self.extras and not "header-ids" in self.extras:
- self.extras["header-ids"] = None # "toc" implies "header-ids"
- self._instance_extras = self.extras.copy()
- self.link_patterns = link_patterns
- self.use_file_vars = use_file_vars
- self._outdent_re = re.compile(r'^(\t|[ ]{1,%d})' % tab_width, re.M)
-
- def reset(self):
- self.urls = {}
- self.titles = {}
- self.html_blocks = {}
- self.html_spans = {}
- self.list_level = 0
- self.extras = self._instance_extras.copy()
- if "footnotes" in self.extras:
- self.footnotes = {}
- self.footnote_ids = []
- if "header-ids" in self.extras:
- self._count_from_header_id = {} # no `defaultdict` in Python 2.4
-
- def convert(self, text):
- """Convert the given text."""
- # Main function. The order in which other subs are called here is
- # essential. Link and image substitutions need to happen before
- # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
- # and <img> tags get encoded.
-
- # Clear the global hashes. If we don't clear these, you get conflicts
- # from other articles when generating a page which contains more than
- # one article (e.g. an index page that shows the N most recent
- # articles):
- self.reset()
-
- if not isinstance(text, unicode):
- # TODO: perhaps shouldn't presume UTF-8 for string input?
- text = unicode(text, 'utf-8')
-
- if self.use_file_vars:
- # Look for emacs-style file variable hints.
- emacs_vars = self._get_emacs_vars(text)
- if "markdown-extras" in emacs_vars:
- splitter = re.compile("[ ,]+")
- for e in splitter.split(emacs_vars["markdown-extras"]):
- if '=' in e:
- ename, earg = e.split('=', 1)
- try:
- earg = int(earg)
- except ValueError:
- pass
- else:
- ename, earg = e, None
- self.extras[ename] = earg
-
- # Standardize line endings:
- text = re.sub("\r\n|\r", "\n", text)
-
- # Make sure $text ends with a couple of newlines:
- text += "\n\n"
-
- # Convert all tabs to spaces.
- text = self._detab(text)
-
- # Strip any lines consisting only of spaces and tabs.
- # This makes subsequent regexen easier to write, because we can
- # match consecutive blank lines with /\n+/ instead of something
- # contorted like /[ \t]*\n+/ .
- text = self._ws_only_line_re.sub("", text)
-
- if self.safe_mode:
- text = self._hash_html_spans(text)
-
- # Turn block-level HTML blocks into hash entries
- text = self._hash_html_blocks(text, raw=True)
-
- # Strip link definitions, store in hashes.
- if "footnotes" in self.extras:
- # Must do footnotes first because an unlucky footnote defn
- # looks like a link defn:
- # [^4]: this "looks like a link defn"
- text = self._strip_footnote_definitions(text)
- text = self._strip_link_definitions(text)
-
- text = self._run_block_gamut(text)
-
- if "footnotes" in self.extras:
- text = self._add_footnotes(text)
-
- text = self._unescape_special_chars(text)
-
- if self.safe_mode:
- text = self._unhash_html_spans(text)
-
- #text += "\n"
-
- rv = UnicodeWithAttrs(text)
- if "toc" in self.extras:
- rv._toc = self._toc
- return rv
-
- _emacs_oneliner_vars_pat = re.compile(
- r"-\*-\s*([^\r\n]*?)\s*-\*-", re.UNICODE)
- # This regular expression is intended to match blocks like this:
- # PREFIX Local Variables: SUFFIX
- # PREFIX mode: Tcl SUFFIX
- # PREFIX End: SUFFIX
- # Some notes:
- # - "[ \t]" is used instead of "\s" to specifically exclude newlines
- # - "(\r\n|\n|\r)" is used instead of "$" because the sre engine does
- # not like anything other than Unix-style line terminators.
- _emacs_local_vars_pat = re.compile(r"""^
- (?P<prefix>(?:[^\r\n|\n|\r])*?)
- [\ \t]*Local\ Variables:[\ \t]*
- (?P<suffix>.*?)(?:\r\n|\n|\r)
- (?P<content>.*?\1End:)
- """, re.IGNORECASE | re.MULTILINE | re.DOTALL | re.VERBOSE)
-
- def _get_emacs_vars(self, text):
- """Return a dictionary of emacs-style local variables.
-
- Parsing is done loosely according to this spec (and according to
- some in-practice deviations from this):
- http://www.gnu.org/software/emacs/manual/html_node/emacs/Specifying-File-Variables.html#Specifying-File-Variables
- """
- emacs_vars = {}
- SIZE = pow(2, 13) # 8kB
-
- # Search near the start for a '-*-'-style one-liner of variables.
- head = text[:SIZE]
- if "-*-" in head:
- match = self._emacs_oneliner_vars_pat.search(head)
- if match:
- emacs_vars_str = match.group(1)
- assert '\n' not in emacs_vars_str
- emacs_var_strs = [s.strip() for s in emacs_vars_str.split(';')
- if s.strip()]
- if len(emacs_var_strs) == 1 and ':' not in emacs_var_strs[0]:
- # While not in the spec, this form is allowed by emacs:
- # -*- Tcl -*-
- # where the implied "variable" is "mode". This form
- # is only allowed if there are no other variables.
- emacs_vars["mode"] = emacs_var_strs[0].strip()
- else:
- for emacs_var_str in emacs_var_strs:
- try:
- variable, value = emacs_var_str.strip().split(':', 1)
- except ValueError:
- log.debug("emacs variables error: malformed -*- "
- "line: %r", emacs_var_str)
- continue
- # Lowercase the variable name because Emacs allows "Mode"
- # or "mode" or "MoDe", etc.
- emacs_vars[variable.lower()] = value.strip()
-
- tail = text[-SIZE:]
- if "Local Variables" in tail:
- match = self._emacs_local_vars_pat.search(tail)
- if match:
- prefix = match.group("prefix")
- suffix = match.group("suffix")
- lines = match.group("content").splitlines(0)
- #print "prefix=%r, suffix=%r, content=%r, lines: %s"\
- # % (prefix, suffix, match.group("content"), lines)
-
- # Validate the Local Variables block: proper prefix and suffix
- # usage.
- for i, line in enumerate(lines):
- if not line.startswith(prefix):
- log.debug("emacs variables error: line '%s' "
- "does not use proper prefix '%s'"
- % (line, prefix))
- return {}
- # Don't validate suffix on last line. Emacs doesn't care,
- # neither should we.
- if i != len(lines)-1 and not line.endswith(suffix):
- log.debug("emacs variables error: line '%s' "
- "does not use proper suffix '%s'"
- % (line, suffix))
- return {}
-
- # Parse out one emacs var per line.
- continued_for = None
- # no var on the last line ("PREFIX End:")
- for line in lines[:-1]:
- if prefix:
- line = line[len(prefix):] # strip prefix
- if suffix:
- line = line[:-len(suffix)] # strip suffix
- line = line.strip()
- if continued_for:
- variable = continued_for
- if line.endswith('\\'):
- line = line[:-1].rstrip()
- else:
- continued_for = None
- emacs_vars[variable] += ' ' + line
- else:
- try:
- variable, value = line.split(':', 1)
- except ValueError:
- log.debug("local variables error: missing colon "
- "in local variables entry: '%s'" % line)
- continue
- # Do NOT lowercase the variable name, because Emacs only
- # allows "mode" (and not "Mode", "MoDe", etc.) in this block.
- value = value.strip()
- if value.endswith('\\'):
- value = value[:-1].rstrip()
- continued_for = variable
- else:
- continued_for = None
- emacs_vars[variable] = value
-
- # Unquote values.
- for var, val in emacs_vars.items():
- if len(val) > 1 and (val.startswith('"') and val.endswith('"')
- or val.startswith('"') and val.endswith('"')):
- emacs_vars[var] = val[1:-1]
-
- return emacs_vars
-
- # Cribbed from a post by Bart Lateur:
- # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
- _detab_re = re.compile(r'(.*?)\t', re.M)
-
- def _detab_sub(self, match):
- g1 = match.group(1)
- return g1 + (' ' * (self.tab_width - len(g1) % self.tab_width))
-
- def _detab(self, text):
- r"""Remove (leading?) tabs from a file.
-
- >>> m = Markdown()
- >>> m._detab("\tfoo")
- ' foo'
- >>> m._detab(" \tfoo")
- ' foo'
- >>> m._detab("\t foo")
- ' foo'
- >>> m._detab(" foo")
- ' foo'
- >>> m._detab(" foo\n\tbar\tblam")
- ' foo\n bar blam'
- """
- if '\t' not in text:
- return text
- return self._detab_re.subn(self._detab_sub, text)[0]
-
- _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del'
- _strict_tag_block_re = re.compile(r"""
- ( # save in \1
- ^ # start of line (with re.M)
- <(%s) # start tag = \2
- \b # word break
- (.*\n)*? # any number of lines, minimally matching
- </\2> # the matching end tag
- [ \t]* # trailing spaces/tabs
- (?=\n+|\Z) # followed by a newline or end of document
- )
- """ % _block_tags_a,
- re.X | re.M)
-
- _block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math'
- _liberal_tag_block_re = re.compile(r"""
- ( # save in \1
- ^ # start of line (with re.M)
- <(%s) # start tag = \2
- \b # word break
- (.*\n)*? # any number of lines, minimally matching
- .*</\2> # the matching end tag
- [ \t]* # trailing spaces/tabs
- (?=\n+|\Z) # followed by a newline or end of document
- )
- """ % _block_tags_b,
- re.X | re.M)
-
- def _hash_html_block_sub(self, match, raw=False):
- html = match.group(1)
- if raw and self.safe_mode:
- html = self._sanitize_html(html)
- key = _hash_text(html)
- self.html_blocks[key] = html
- return "\n\n" + key + "\n\n"
-
- def _hash_html_blocks(self, text, raw=False):
- """Hashify HTML blocks
-
- We only want to do this for block-level HTML tags, such as headers,
- lists, and tables. That's because we still want to wrap <p>s around
- "paragraphs" that are wrapped in non-block-level tags, such as anchors,
- phrase emphasis, and spans. The list of tags we're looking for is
- hard-coded.
-
- @param raw {boolean} indicates if these are raw HTML blocks in
- the original source. It makes a difference in "safe" mode.
- """
- if '<' not in text:
- return text
-
- # Pass `raw` value into our calls to self._hash_html_block_sub.
- hash_html_block_sub = _curry(self._hash_html_block_sub, raw=raw)
-
- # First, look for nested blocks, e.g.:
- # <div>
- # <div>
- # tags for inner block must be indented.
- # </div>
- # </div>
- #
- # The outermost tags must start at the left margin for this to match, and
- # the inner nested divs must be indented.
- # We need to do this before the next, more liberal match, because the next
- # match will start at the first `<div>` and stop at the first `</div>`.
- text = self._strict_tag_block_re.sub(hash_html_block_sub, text)
-
- # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
- text = self._liberal_tag_block_re.sub(hash_html_block_sub, text)
-
- # Special case just for <hr />. It was easier to make a special
- # case than to make the other regex more complicated.
- if "<hr" in text:
- _hr_tag_re = _hr_tag_re_from_tab_width(self.tab_width)
- text = _hr_tag_re.sub(hash_html_block_sub, text)
-
- # Special case for standalone HTML comments:
- if "<!--" in text:
- start = 0
- while True:
- # Delimiters for next comment block.
- try:
- start_idx = text.index("<!--", start)
- except ValueError, ex:
- break
- try:
- end_idx = text.index("-->", start_idx) + 3
- except ValueError, ex:
- break
-
- # Start position for next comment block search.
- start = end_idx
-
- # Validate whitespace before comment.
- if start_idx:
- # - Up to `tab_width - 1` spaces before start_idx.
- for i in range(self.tab_width - 1):
- if text[start_idx - 1] != ' ':
- break
- start_idx -= 1
- if start_idx == 0:
- break
- # - Must be preceded by 2 newlines or hit the start of
- # the document.
- if start_idx == 0:
- pass
- elif start_idx == 1 and text[0] == '\n':
- start_idx = 0 # to match minute detail of Markdown.pl regex
- elif text[start_idx-2:start_idx] == '\n\n':
- pass
- else:
- break
-
- # Validate whitespace after comment.
- # - Any number of spaces and tabs.
- while end_idx < len(text):
- if text[end_idx] not in ' \t':
- break
- end_idx += 1
- # - Must be following by 2 newlines or hit end of text.
- if text[end_idx:end_idx+2] not in ('', '\n', '\n\n'):
- continue
-
- # Escape and hash (must match `_hash_html_block_sub`).
- html = text[start_idx:end_idx]
- if raw and self.safe_mode:
- html = self._sanitize_html(html)
- key = _hash_text(html)
- self.html_blocks[key] = html
- text = text[:start_idx] + "\n\n" + \
- key + "\n\n" + text[end_idx:]
-
- if "xml" in self.extras:
- # Treat XML processing instructions and namespaced one-liner
- # tags as if they were block HTML tags. E.g., if standalone
- # (i.e. are their own paragraph), the following do not get
- # wrapped in a <p> tag:
- # <?foo bar?>
- #
- # <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="chapter_1.md"/>
- _xml_oneliner_re = _xml_oneliner_re_from_tab_width(self.tab_width)
- text = _xml_oneliner_re.sub(hash_html_block_sub, text)
-
- return text
-
- def _strip_link_definitions(self, text):
- # Strips link definitions from text, stores the URLs and titles in
- # hash references.
- less_than_tab = self.tab_width - 1
-
- # Link defs are in the form:
- # [id]: url "optional title"
- _link_def_re = re.compile(r"""
- ^[ ]{0,%d}\[(.+)\]: # id = \1
- [ \t]*
- \n? # maybe *one* newline
- [ \t]*
- <?(.+?)>? # url = \2
- [ \t]*
- (?:
- \n? # maybe one newline
- [ \t]*
- (?<=\s) # lookbehind for whitespace
- ['"(]
- ([^\n]*) # title = \3
- ['")]
- [ \t]*
- )? # title is optional
- (?:\n+|\Z)
- """ % less_than_tab, re.X | re.M | re.U)
- return _link_def_re.sub(self._extract_link_def_sub, text)
-
- def _extract_link_def_sub(self, match):
- id, url, title = match.groups()
- key = id.lower() # Link IDs are case-insensitive
- self.urls[key] = self._encode_amps_and_angles(url)
- if title:
- self.titles[key] = title.replace('"', '&quot;')
- return ""
-
- def _extract_footnote_def_sub(self, match):
- id, text = match.groups()
- text = _dedent(text, skip_first_line=not text.startswith('\n')).strip()
- normed_id = re.sub(r'\W', '-', id)
- # Ensure footnote text ends with a couple newlines (for some
- # block gamut matches).
- self.footnotes[normed_id] = text + "\n\n"
- return ""
-
- def _strip_footnote_definitions(self, text):
- """A footnote definition looks like this:
-
- [^note-id]: Text of the note.
-
- May include one or more indented paragraphs.
-
- Where,
- - The 'note-id' can be pretty much anything, though typically it
- is the number of the footnote.
- - The first paragraph may start on the next line, like so:
-
- [^note-id]:
- Text of the note.
- """
- less_than_tab = self.tab_width - 1
- footnote_def_re = re.compile(r'''
- ^[ ]{0,%d}\[\^(.+)\]: # id = \1
- [ \t]*
- ( # footnote text = \2
- # First line need not start with the spaces.
- (?:\s*.*\n+)
- (?:
- (?:[ ]{%d} | \t) # Subsequent lines must be indented.
- .*\n+
- )*
- )
- # Lookahead for non-space at line-start, or end of doc.
- (?:(?=^[ ]{0,%d}\S)|\Z)
- ''' % (less_than_tab, self.tab_width, self.tab_width),
- re.X | re.M)
- return footnote_def_re.sub(self._extract_footnote_def_sub, text)
-
- _hr_res = [
- re.compile(r"^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$", re.M),
- re.compile(r"^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$", re.M),
- re.compile(r"^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$", re.M),
- ]
-
- def _run_block_gamut(self, text):
- # These are all the transformations that form block-level
- # tags like paragraphs, headers, and list items.
-
- #text = self._do_headers(text)
-
- # Do Horizontal Rules:
- #hr = "\n<hr"+self.empty_element_suffix+"\n"
- # for hr_re in self._hr_res:
- # text = hr_re.sub(hr, text)
-
- text = self._do_lists(text)
-
- if "pyshell" in self.extras:
- text = self._prepare_pyshell_blocks(text)
-
- text = self._do_code_blocks(text)
-
- text = self._do_block_quotes(text)
-
- # We already ran _HashHTMLBlocks() before, in Markdown(), but that
- # was to escape raw HTML in the original Markdown source. This time,
- # we're escaping the markup we've just created, so that we don't wrap
- # <p> tags around block-level tags.
- text = self._hash_html_blocks(text)
-
- text = self._form_paragraphs(text)
-
- return text
-
- def _pyshell_block_sub(self, match):
- lines = match.group(0).splitlines(0)
- _dedentlines(lines)
- indent = ' ' * self.tab_width
- s = ('\n' # separate from possible cuddled paragraph
- + indent + ('\n'+indent).join(lines)
- + '\n\n')
- return s
-
- def _prepare_pyshell_blocks(self, text):
- """Ensure that Python interactive shell sessions are put in
- code blocks -- even if not properly indented.
- """
- if ">>>" not in text:
- return text
-
- less_than_tab = self.tab_width - 1
- _pyshell_block_re = re.compile(r"""
- ^([ ]{0,%d})>>>[ ].*\n # first line
- ^(\1.*\S+.*\n)* # any number of subsequent lines
- ^\n # ends with a blank line
- """ % less_than_tab, re.M | re.X)
-
- return _pyshell_block_re.sub(self._pyshell_block_sub, text)
-
- def _run_span_gamut(self, text):
- # These are all the transformations that occur *within* block-level
- # tags like paragraphs, headers, and list items.
-
- # text = self._do_code_spans(text) - El AA !
-
- text = self._escape_special_chars(text)
-
- # Process anchor and image tags.
- text = self._do_links(text)
-
- # Make links out of things like `<http://example.com/>`
- # Must come after _do_links(), because you can use < and >
- # delimiters in inline links like [this](<url>).
- #text = self._do_auto_links(text)
-
- if "link-patterns" in self.extras:
- text = self._do_link_patterns(text)
-
- text = self._encode_amps_and_angles(text)
-
- text = self._do_italics_and_bold(text)
-
- # Do hard breaks:
- text = re.sub(r"\n", "<br%s" % self.empty_element_suffix, text)
-
- return text
-
- # "Sorta" because auto-links are identified as "tag" tokens.
- _sorta_html_tokenize_re = re.compile(r"""
- (
- # tag
- </?
- (?:\w+) # tag name
- (?:\s+(?:[\w-]+:)?[\w-]+=(?:".*?"|'.*?'))* # attributes
- \s*/?>
- |
- # auto-link (e.g., <http://www.activestate.com/>)
- <\w+[^>]*>
- |
- <!--.*?--> # comment
- |
- <\?.*?\?> # processing instruction
- )
- """, re.X)
-
- def _escape_special_chars(self, text):
- # Python markdown note: the HTML tokenization here differs from
- # that in Markdown.pl, hence the behaviour for subtle cases can
- # differ (I believe the tokenizer here does a better job because
- # it isn't susceptible to unmatched '<' and '>' in HTML tags).
- # Note, however, that '>' is not allowed in an auto-link URL
- # here.
- escaped = []
- is_html_markup = False
- for token in self._sorta_html_tokenize_re.split(text):
- if is_html_markup:
- # Within tags/HTML-comments/auto-links, encode * and _
- # so they don't conflict with their use in Markdown for
- # italics and strong. We're replacing each such
- # character with its corresponding MD5 checksum value;
- # this is likely overkill, but it should prevent us from
- # colliding with the escape values by accident.
- escaped.append(token.replace('*', g_escape_table['*'])
- .replace('_', g_escape_table['_']))
- else:
- escaped.append(self._encode_backslash_escapes(token))
- is_html_markup = not is_html_markup
- return ''.join(escaped)
-
- def _hash_html_spans(self, text):
- # Used for safe_mode.
-
- def _is_auto_link(s):
- if ':' in s and self._auto_link_re.match(s):
- return True
- elif '@' in s and self._auto_email_link_re.match(s):
- return True
- return False
-
- tokens = []
- is_html_markup = False
- for token in self._sorta_html_tokenize_re.split(text):
- if is_html_markup and not _is_auto_link(token):
- sanitized = self._sanitize_html(token)
- key = _hash_text(sanitized)
- self.html_spans[key] = sanitized
- tokens.append(key)
- else:
- tokens.append(token)
- is_html_markup = not is_html_markup
- return ''.join(tokens)
-
- def _unhash_html_spans(self, text):
- for key, sanitized in self.html_spans.items():
- text = text.replace(key, sanitized)
- return text
-
- def _sanitize_html(self, s):
- if self.safe_mode == "replace":
- return self.html_removed_text
- elif self.safe_mode == "escape":
- replacements = [
- ('&', '&amp;'),
- ('<', '&lt;'),
- ('>', '&gt;'),
- ]
- for before, after in replacements:
- s = s.replace(before, after)
- return s
- else:
- raise MarkdownError("invalid value for 'safe_mode': %r (must be "
- "'escape' or 'replace')" % self.safe_mode)
-
- _tail_of_inline_link_re = re.compile(r'''
- # Match tail of: [text](/url/) or [text](/url/ "title")
- \( # literal paren
- [ \t]*
- (?P<url> # \1
- <.*?>
- |
- .*?
- )
- [ \t]*
- ( # \2
- (['"]) # quote char = \3
- (?P<title>.*?)
- \3 # matching quote
- )? # title is optional
- \)
- ''', re.X | re.S)
- _tail_of_reference_link_re = re.compile(r'''
- # Match tail of: [text][id]
- [ ]? # one optional space
- (?:\n[ ]*)? # one optional newline followed by spaces
- \[
- (?P<id>.*?)
- \]
- ''', re.X | re.S)
-
- def _do_links(self, text):
- """Turn Markdown link shortcuts into XHTML <a> and <img> tags.
-
- This is a combination of Markdown.pl's _DoAnchors() and
- _DoImages(). They are done together because that simplified the
- approach. It was necessary to use a different approach than
- Markdown.pl because of the lack of atomic matching support in
- Python's regex engine used in $g_nested_brackets.
- """
- MAX_LINK_TEXT_SENTINEL = 3000 # markdown2 issue 24
-
- # `anchor_allowed_pos` is used to support img links inside
- # anchors, but not anchors inside anchors. An anchor's start
- # pos must be `>= anchor_allowed_pos`.
- anchor_allowed_pos = 0
-
- curr_pos = 0
- while True: # Handle the next link.
- # The next '[' is the start of:
- # - an inline anchor: [text](url "title")
- # - a reference anchor: [text][id]
- # - an inline img: ![text](url "title")
- # - a reference img: ![text][id]
- # - a footnote ref: [^id]
- # (Only if 'footnotes' extra enabled)
- # - a footnote defn: [^id]: ...
- # (Only if 'footnotes' extra enabled) These have already
- # been stripped in _strip_footnote_definitions() so no
- # need to watch for them.
- # - a link definition: [id]: url "title"
- # These have already been stripped in
- # _strip_link_definitions() so no need to watch for them.
- # - not markup: [...anything else...
- try:
- start_idx = text.index('[', curr_pos)
- except ValueError:
- break
- text_length = len(text)
-
- # Find the matching closing ']'.
- # Markdown.pl allows *matching* brackets in link text so we
- # will here too. Markdown.pl *doesn't* currently allow
- # matching brackets in img alt text -- we'll differ in that
- # regard.
- bracket_depth = 0
- for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL,
- text_length)):
- ch = text[p]
- if ch == ']':
- bracket_depth -= 1
- if bracket_depth < 0:
- break
- elif ch == '[':
- bracket_depth += 1
- else:
- # Closing bracket not found within sentinel length.
- # This isn't markup.
- curr_pos = start_idx + 1
- continue
- link_text = text[start_idx+1:p]
-
- # Possibly a footnote ref?
- if "footnotes" in self.extras and link_text.startswith("^"):
- normed_id = re.sub(r'\W', '-', link_text[1:])
- if normed_id in self.footnotes:
- self.footnote_ids.append(normed_id)
- result = '<sup class="footnote-ref" id="fnref-%s">' \
- '<a href="#fn-%s">%s</a></sup>' \
- % (normed_id, normed_id, len(self.footnote_ids))
- text = text[:start_idx] + result + text[p+1:]
- else:
- # This id isn't defined, leave the markup alone.
- curr_pos = p+1
- continue
-
- # Now determine what this is by the remainder.
- p += 1
- if p == text_length:
- return text
-
- # Inline anchor or img?
- if text[p] == '(': # attempt at perf improvement
- match = self._tail_of_inline_link_re.match(text, p)
- if match:
- # Handle an inline anchor or img.
- #is_img = start_idx > 0 and text[start_idx-1] == "!"
- # if is_img:
- # start_idx -= 1
- is_img = False
-
- url, title = match.group("url"), match.group("title")
- if url and url[0] == '<':
- url = url[1:-1] # '<url>' -> 'url'
- # We've got to encode these to avoid conflicting
- # with italics/bold.
- url = url.replace('*', g_escape_table['*']) \
- .replace('_', g_escape_table['_'])
- if title:
- title_str = ' title="%s"' \
- % title.replace('*', g_escape_table['*']) \
- .replace('_', g_escape_table['_']) \
- .replace('"', '&quot;')
- else:
- title_str = ''
- if is_img:
- result = '<img src="%s" alt="%s"%s%s' \
- % (url.replace('"', '&quot;'),
- link_text.replace('"', '&quot;'),
- title_str, self.empty_element_suffix)
- curr_pos = start_idx + len(result)
- text = text[:start_idx] + result + text[match.end():]
- elif start_idx >= anchor_allowed_pos:
- result_head = '<a href="%s"%s>' % (url, title_str)
- result = '%s%s</a>' % (result_head, link_text)
- # <img> allowed from curr_pos on, <a> from
- # anchor_allowed_pos on.
- curr_pos = start_idx + len(result_head)
- anchor_allowed_pos = start_idx + len(result)
- text = text[:start_idx] + result + text[match.end():]
- else:
- # Anchor not allowed here.
- curr_pos = start_idx + 1
- continue
-
- # Reference anchor or img?
- else:
- match = self._tail_of_reference_link_re.match(text, p)
- if match:
- # Handle a reference-style anchor or img.
- #is_img = start_idx > 0 and text[start_idx-1] == "!"
- # if is_img:
- # start_idx -= 1
- is_img = False
-
- link_id = match.group("id").lower()
- if not link_id:
- link_id = link_text.lower() # for links like [this][]
- if link_id in self.urls:
- url = self.urls[link_id]
- # We've got to encode these to avoid conflicting
- # with italics/bold.
- url = url.replace('*', g_escape_table['*']) \
- .replace('_', g_escape_table['_'])
- title = self.titles.get(link_id)
- if title:
- title = title.replace('*', g_escape_table['*']) \
- .replace('_', g_escape_table['_'])
- title_str = ' title="%s"' % title
- else:
- title_str = ''
- if is_img:
- result = '<img src="%s" alt="%s"%s%s' \
- % (url.replace('"', '&quot;'),
- link_text.replace('"', '&quot;'),
- title_str, self.empty_element_suffix)
- curr_pos = start_idx + len(result)
- text = text[:start_idx] + \
- result + text[match.end():]
- elif start_idx >= anchor_allowed_pos:
- result = '<a href="%s"%s>%s</a>' \
- % (url, title_str, link_text)
- result_head = '<a href="%s"%s>' % (url, title_str)
- result = '%s%s</a>' % (result_head, link_text)
- # <img> allowed from curr_pos on, <a> from
- # anchor_allowed_pos on.
- curr_pos = start_idx + len(result_head)
- anchor_allowed_pos = start_idx + len(result)
- text = text[:start_idx] + \
- result + text[match.end():]
- else:
- # Anchor not allowed here.
- curr_pos = start_idx + 1
- else:
- # This id isn't defined, leave the markup alone.
- curr_pos = match.end()
- continue
-
- # Otherwise, it isn't markup.
- curr_pos = start_idx + 1
-
- return text
-
- def header_id_from_text(self, text, prefix):
- """Generate a header id attribute value from the given header
- HTML content.
-
- This is only called if the "header-ids" extra is enabled.
- Subclasses may override this for different header ids.
- """
- header_id = _slugify(text)
- if prefix:
- header_id = prefix + '-' + header_id
- if header_id in self._count_from_header_id:
- self._count_from_header_id[header_id] += 1
- header_id += '-%s' % self._count_from_header_id[header_id]
- else:
- self._count_from_header_id[header_id] = 1
- return header_id
-
- _toc = None
-
- def _toc_add_entry(self, level, id, name):
- if self._toc is None:
- self._toc = []
- self._toc.append((level, id, name))
-
- _setext_h_re = re.compile(r'^(.+)[ \t]*\n(=+|-+)[ \t]*\n+', re.M)
-
- def _setext_h_sub(self, match):
- n = {"=": 1, "-": 2}[match.group(2)[0]]
- demote_headers = self.extras.get("demote-headers")
- if demote_headers:
- n = min(n + demote_headers, 6)
- header_id_attr = ""
- if "header-ids" in self.extras:
- header_id = self.header_id_from_text(match.group(1),
- prefix=self.extras["header-ids"])
- header_id_attr = ' id="%s"' % header_id
- html = self._run_span_gamut(match.group(1))
- if "toc" in self.extras:
- self._toc_add_entry(n, header_id, html)
- return "<h%d%s>%s</h%d>\n\n" % (n, header_id_attr, html, n)
-
- _atx_h_re = re.compile(r'''
- ^(\#{1,6}) # \1 = string of #'s
- [ \t]*
- (.+?) # \2 = Header text
- [ \t]*
- (?<!\\) # ensure not an escaped trailing '#'
- \#* # optional closing #'s (not counted)
- \n+
- ''', re.X | re.M)
-
- def _atx_h_sub(self, match):
- n = len(match.group(1))
- demote_headers = self.extras.get("demote-headers")
- if demote_headers:
- n = min(n + demote_headers, 6)
- header_id_attr = ""
- if "header-ids" in self.extras:
- header_id = self.header_id_from_text(match.group(2),
- prefix=self.extras["header-ids"])
- header_id_attr = ' id="%s"' % header_id
- html = self._run_span_gamut(match.group(2))
- if "toc" in self.extras:
- self._toc_add_entry(n, header_id, html)
- return "<h%d%s>%s</h%d>\n\n" % (n, header_id_attr, html, n)
-
- def _do_headers(self, text):
- # Setext-style headers:
- # Header 1
- # ========
- #
- # Header 2
- # --------
- text = self._setext_h_re.sub(self._setext_h_sub, text)
-
- # atx-style headers:
- # # Header 1
- # ## Header 2
- # ## Header 2 with closing hashes ##
- # ...
- # ###### Header 6
- text = self._atx_h_re.sub(self._atx_h_sub, text)
-
- return text
-
- _marker_ul_chars = '*+-'
- _marker_any = r'(?:[%s]|\d+\.)' % _marker_ul_chars
- _marker_ul = '(?:[%s])' % _marker_ul_chars
- _marker_ol = r'(?:\d+\.)'
-
- def _list_sub(self, match):
- lst = match.group(1)
- lst_type = match.group(3) in self._marker_ul_chars and "ul" or "ol"
- result = self._process_list_items(lst)
- if self.list_level:
- return "<%s>\n%s</%s>\n" % (lst_type, result, lst_type)
- else:
- return "<%s>\n%s</%s>\n\n" % (lst_type, result, lst_type)
-
- def _do_lists(self, text):
- # Form HTML ordered (numbered) and unordered (bulleted) lists.
-
- for marker_pat in (self._marker_ul, self._marker_ol):
- # Re-usable pattern to match any entire ul or ol list:
- less_than_tab = self.tab_width - 1
- whole_list = r'''
- ( # \1 = whole list
- ( # \2
- [ ]{0,%d}
- (%s) # \3 = first list item marker
- [ \t]+
- )
- (?:.+?)
- ( # \4
- \Z
- |
- \n{2,}
- (?=\S)
- (?! # Negative lookahead for another list item marker
- [ \t]*
- %s[ \t]+
- )
- )
- )
- ''' % (less_than_tab, marker_pat, marker_pat)
-
- # We use a different prefix before nested lists than top-level lists.
- # See extended comment in _process_list_items().
- #
- # Note: There's a bit of duplication here. My original implementation
- # created a scalar regex pattern as the conditional result of the test on
- # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
- # substitution once, using the scalar as the pattern. This worked,
- # everywhere except when running under MT on my hosting account at Pair
- # Networks. There, this caused all rebuilds to be killed by the reaper (or
- # perhaps they crashed, but that seems incredibly unlikely given that the
- # same script on the same server ran fine *except* under MT. I've spent
- # more time trying to figure out why this is happening than I'd like to
- # admit. My only guess, backed up by the fact that this workaround works,
- # is that Perl optimizes the substition when it can figure out that the
- # pattern will never change, and when this optimization isn't on, we run
- # afoul of the reaper. Thus, the slightly redundant code to that uses two
- # static s/// patterns rather than one conditional pattern.
-
- if self.list_level:
- sub_list_re = re.compile("^"+whole_list, re.X | re.M | re.S)
- text = sub_list_re.sub(self._list_sub, text)
- else:
- list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"+whole_list,
- re.X | re.M | re.S)
- text = list_re.sub(self._list_sub, text)
-
- return text
-
- _list_item_re = re.compile(r'''
- (\n)? # leading line = \1
- (^[ \t]*) # leading whitespace = \2
- (?P<marker>%s) [ \t]+ # list marker = \3
- ((?:.+?) # list item text = \4
- (\n{1,2})) # eols = \5
- (?= \n* (\Z | \2 (?P<next_marker>%s) [ \t]+))
- ''' % (_marker_any, _marker_any),
- re.M | re.X | re.S)
-
- _last_li_endswith_two_eols = False
-
- def _list_item_sub(self, match):
- item = match.group(4)
- leading_line = match.group(1)
- leading_space = match.group(2)
- if leading_line or "\n\n" in item or self._last_li_endswith_two_eols:
- item = self._run_block_gamut(self._outdent(item))
- else:
- # Recursion for sub-lists:
- item = self._do_lists(self._outdent(item))
- if item.endswith('\n'):
- item = item[:-1]
- item = self._run_span_gamut(item)
- self._last_li_endswith_two_eols = (len(match.group(5)) == 2)
- return "<li>%s</li>\n" % item
-
- def _process_list_items(self, list_str):
- # Process the contents of a single ordered or unordered list,
- # splitting it into individual list items.
-
- # The $g_list_level global keeps track of when we're inside a list.
- # Each time we enter a list, we increment it; when we leave a list,
- # we decrement. If it's zero, we're not in a list anymore.
- #
- # We do this because when we're not inside a list, we want to treat
- # something like this:
- #
- # I recommend upgrading to version
- # 8. Oops, now this line is treated
- # as a sub-list.
- #
- # As a single paragraph, despite the fact that the second line starts
- # with a digit-period-space sequence.
- #
- # Whereas when we're inside a list (or sub-list), that line will be
- # treated as the start of a sub-list. What a kludge, huh? This is
- # an aspect of Markdown's syntax that's hard to parse perfectly
- # without resorting to mind-reading. Perhaps the solution is to
- # change the syntax rules such that sub-lists must start with a
- # starting cardinal number; e.g. "1." or "a.".
- self.list_level += 1
- self._last_li_endswith_two_eols = False
- list_str = list_str.rstrip('\n') + '\n'
- list_str = self._list_item_re.sub(self._list_item_sub, list_str)
- self.list_level -= 1
- return list_str
-
- def _get_pygments_lexer(self, lexer_name):
- try:
- from pygments import lexers, util
- except ImportError:
- return None
- try:
- return lexers.get_lexer_by_name(lexer_name)
- except util.ClassNotFound:
- return None
-
- def _color_with_pygments(self, codeblock, lexer, **formatter_opts):
- import pygments
- import pygments.formatters
-
- class HtmlCodeFormatter(pygments.formatters.HtmlFormatter):
- def _wrap_code(self, inner):
- """A function for use in a Pygments Formatter which
- wraps in <code> tags.
- """
- yield 0, "<code>"
- for tup in inner:
- yield tup
- yield 0, "</code>"
-
- def wrap(self, source, outfile):
- """Return the source with a code, pre, and div."""
- return self._wrap_div(self._wrap_pre(self._wrap_code(source)))
-
- formatter = HtmlCodeFormatter(cssclass="codehilite", **formatter_opts)
- return pygments.highlight(codeblock, lexer, formatter)
-
- def _code_block_sub(self, match):
- codeblock = match.group(1)
- codeblock = self._outdent(codeblock)
- codeblock = self._detab(codeblock)
- codeblock = codeblock.lstrip('\n') # trim leading newlines
- codeblock = codeblock.rstrip() # trim trailing whitespace
-
- if "code-color" in self.extras and codeblock.startswith(":::"):
- lexer_name, rest = codeblock.split('\n', 1)
- lexer_name = lexer_name[3:].strip()
- lexer = self._get_pygments_lexer(lexer_name)
- codeblock = rest.lstrip("\n") # Remove lexer declaration line.
- if lexer:
- formatter_opts = self.extras['code-color'] or {}
- colored = self._color_with_pygments(codeblock, lexer,
- **formatter_opts)
- return "\n\n%s\n\n" % colored
-
- codeblock = self._encode_code(codeblock)
- pre_class_str = self._html_class_str_from_tag("pre")
- code_class_str = self._html_class_str_from_tag("code")
- return "\n\n<pre%s><code%s>%s\n</code></pre>\n\n" % (
- pre_class_str, code_class_str, codeblock)
-
- def _html_class_str_from_tag(self, tag):
- """Get the appropriate ' class="..."' string (note the leading
- space), if any, for the given tag.
- """
- if "html-classes" not in self.extras:
- return ""
- try:
- html_classes_from_tag = self.extras["html-classes"]
- except TypeError:
- return ""
- else:
- if tag in html_classes_from_tag:
- return ' class="%s"' % html_classes_from_tag[tag]
- return ""
-
- def _do_code_blocks(self, text):
- """Process Markdown `<pre><code>` blocks."""
- code_block_re = re.compile(r'''
- (?:\n\n|\A)
- ( # $1 = the code block -- one or more lines, starting with a space/tab
- (?:
- (?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces
- .*\n+
- )+
- )
- ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
- ''' % (self.tab_width, self.tab_width),
- re.M | re.X)
-
- return code_block_re.sub(self._code_block_sub, text)
-
- # Rules for a code span:
- # - backslash escapes are not interpreted in a code span
- # - to include one or or a run of more backticks the delimiters must
- # be a longer run of backticks
- # - cannot start or end a code span with a backtick; pad with a
- # space and that space will be removed in the emitted HTML
- # See `test/tm-cases/escapes.text` for a number of edge-case
- # examples.
- _code_span_re = re.compile(r'''
- (?<!\\)
- (`+) # \1 = Opening run of `
- (?!`) # See Note A test/tm-cases/escapes.text
- (.+?) # \2 = The code block
- (?<!`)
- \1 # Matching closer
- (?!`)
- ''', re.X | re.S)
-
- def _code_span_sub(self, match):
- c = match.group(2).strip(" \t")
- c = self._encode_code(c)
- return "<code>%s</code>" % c
-
- def _do_code_spans(self, text):
- # * Backtick quotes are used for <code></code> spans.
- #
- # * You can use multiple backticks as the delimiters if you want to
- # include literal backticks in the code span. So, this input:
- #
- # Just type ``foo `bar` baz`` at the prompt.
- #
- # Will translate to:
- #
- # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
- #
- # There's no arbitrary limit to the number of backticks you
- # can use as delimters. If you need three consecutive backticks
- # in your code, use four for delimiters, etc.
- #
- # * You can use spaces to get literal backticks at the edges:
- #
- # ... type `` `bar` `` ...
- #
- # Turns to:
- #
- # ... type <code>`bar`</code> ...
- return self._code_span_re.sub(self._code_span_sub, text)
-
- def _encode_code(self, text):
- """Encode/escape certain characters inside Markdown code runs.
- The point is that in code, these characters are literals,
- and lose their special Markdown meanings.
- """
- replacements = [
- # Encode all ampersands; HTML entities are not
- # entities within a Markdown code span.
- ('&', '&amp;'),
- # Do the angle bracket song and dance:
- ('<', '&lt;'),
- ('>', '&gt;'),
- # Now, escape characters that are magic in Markdown:
- ('*', g_escape_table['*']),
- ('_', g_escape_table['_']),
- ('{', g_escape_table['{']),
- ('}', g_escape_table['}']),
- ('[', g_escape_table['[']),
- (']', g_escape_table[']']),
- ('\\', g_escape_table['\\']),
- ]
- for before, after in replacements:
- text = text.replace(before, after)
- return text
-
- _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S)
- _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S)
- # _spoiler_re = re.compile(r"###(?=\S)(.+?[*_]*)(?<=\S)###", re.S)
-
- _code_friendly_strong_re = re.compile(
- r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S)
- _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S)
-
- def _do_italics_and_bold(self, text):
- # <strong> must go first:
- if "code-friendly" in self.extras:
- text = self._code_friendly_strong_re.sub(
- r"<strong>\1</strong>", text)
- text = self._code_friendly_em_re.sub(r"<em>\1</em>", text)
- else:
- text = self._strong_re.sub(r"<strong>\2</strong>", text)
- text = self._em_re.sub(r"<em>\2</em>", text)
-
- #text = self._spoiler_re.sub("<del>\\1</del>", text)
- return text
-
- _block_quote_re = re.compile(r'''
- ( # Wrap whole match in \1
- (
- ^[ \t]*>[^>] # '>' at the start of a line
- .+\n # rest of the first line
- \n* # blanks
- )+
- )
- ''', re.M | re.X)
- _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M)
-
- _html_pre_block_re = re.compile(r'(\s*<pre>.+?</pre>)', re.S)
-
- def _dedent_two_spaces_sub(self, match):
- return re.sub(r'(?m)^ ', '', match.group(1))
-
- def _block_quote_sub(self, match):
- bq = match.group(1)
- # bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting
- bq = self._ws_only_line_re.sub('', bq) # trim whitespace-only lines
- bq = bq.strip('\n')
- bq = self._run_span_gamut(bq)
- # bq = self._run_block_gamut(bq) # recurse
-
- bq = re.sub('(?m)^', ' ', bq)
- # These leading spaces screw with <pre> content, so we need to fix that:
- bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq)
-
- return "<blockquote>\n%s\n</blockquote>\n\n" % bq
-
- def _do_block_quotes(self, text):
- if '>' not in text:
- return text
- return self._block_quote_re.sub(self._block_quote_sub, text)
-
- def _form_paragraphs(self, text):
- # Strip leading and trailing lines:
- text = text.strip('\n')
-
- # Wrap <p> tags.
- grafs = []
- for i, graf in enumerate(re.split(r"\n{2,}", text)):
- if graf in self.html_blocks:
- # Unhashify HTML blocks
- grafs.append(self.html_blocks[graf])
- else:
- cuddled_list = None
- if "cuddled-lists" in self.extras:
- # Need to put back trailing '\n' for `_list_item_re`
- # match at the end of the paragraph.
- li = self._list_item_re.search(graf + '\n')
- # Two of the same list marker in this paragraph: a likely
- # candidate for a list cuddled to preceding paragraph
- # text (issue 33). Note the `[-1]` is a quick way to
- # consider numeric bullets (e.g. "1." and "2.") to be
- # equal.
- if (li and len(li.group(2)) <= 3 and li.group("next_marker")
- and li.group("marker")[-1] == li.group("next_marker")[-1]):
- start = li.start()
- cuddled_list = self._do_lists(
- graf[start:]).rstrip("\n")
- assert cuddled_list.startswith(
- "<ul>") or cuddled_list.startswith("<ol>")
- graf = graf[:start]
-
- # Wrap <p> tags.
- graf = self._run_span_gamut(graf)
- grafs.append("<p>" + graf.lstrip(" \t") + "</p>")
-
- if cuddled_list:
- grafs.append(cuddled_list)
-
- return "\n\n".join(grafs)
-
- def _add_footnotes(self, text):
- if self.footnotes:
- footer = [
- '<div class="footnotes">',
- '<hr' + self.empty_element_suffix,
- '<ol>',
- ]
- for i, id in enumerate(self.footnote_ids):
- if i != 0:
- footer.append('')
- footer.append('<li id="fn-%s">' % id)
- footer.append(self._run_block_gamut(self.footnotes[id]))
- backlink = ('<a href="#fnref-%s" '
- 'class="footnoteBackLink" '
- 'title="Jump back to footnote %d in the text.">'
- '&#8617;</a>' % (id, i+1))
- if footer[-1].endswith("</p>"):
- footer[-1] = footer[-1][:-len("</p>")] \
- + '&nbsp;' + backlink + "</p>"
- else:
- footer.append("\n<p>%s</p>" % backlink)
- footer.append('</li>')
- footer.append('</ol>')
- footer.append('</div>')
- return text + '\n\n' + '\n'.join(footer)
- else:
- return text
-
- # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
- # http://bumppo.net/projects/amputator/
- _ampersand_re = re.compile(r'&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)')
- _naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I)
- _naked_gt_re = re.compile(r'''(?<![a-z?!/'"-])>''', re.I)
-
- def _encode_amps_and_angles(self, text):
- # Smart processing for ampersands and angle brackets that need
- # to be encoded.
- text = self._ampersand_re.sub('&amp;', text)
-
- # Encode naked <'s
- text = self._naked_lt_re.sub('&lt;', text)
-
- # Encode naked >'s
- # Note: Other markdown implementations (e.g. Markdown.pl, PHP
- # Markdown) don't do this.
- text = self._naked_gt_re.sub('&gt;', text)
- return text
-
- def _encode_backslash_escapes(self, text):
- for ch, escape in g_escape_table.items():
- text = text.replace("\\"+ch, escape)
- return text
-
- _auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I)
-
- def _auto_link_sub(self, match):
- g1 = match.group(1)
- return '<a href="%s">%s</a>' % (g1, g1)
-
- _auto_email_link_re = re.compile(r"""
- <
- (?:mailto:)?
- (
- [-.\w]+
- \@
- [-\w]+(\.[-\w]+)*\.[a-z]+
- )
- >
- """, re.I | re.X | re.U)
-
- def _auto_email_link_sub(self, match):
- return self._encode_email_address(
- self._unescape_special_chars(match.group(1)))
-
- def _do_auto_links(self, text):
- text = self._auto_link_re.sub(self._auto_link_sub, text)
- text = self._auto_email_link_re.sub(self._auto_email_link_sub, text)
- return text
-
- def _encode_email_address(self, addr):
- # Input: an email address, e.g. "foo@example.com"
- #
- # Output: the email address as a mailto link, with each character
- # of the address encoded as either a decimal or hex entity, in
- # the hopes of foiling most address harvesting spam bots. E.g.:
- #
- # <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
- # x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
- # &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
- #
- # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
- # mailing list: <http://tinyurl.com/yu7ue>
- chars = [_xml_encode_email_char_at_random(ch)
- for ch in "mailto:" + addr]
- # Strip the mailto: from the visible part.
- addr = '<a href="%s">%s</a>' \
- % (''.join(chars), ''.join(chars[7:]))
- return addr
-
- def _do_link_patterns(self, text):
- """Caveat emptor: there isn't much guarding against link
- patterns being formed inside other standard Markdown links, e.g.
- inside a [link def][like this].
-
- Dev Notes: *Could* consider prefixing regexes with a negative
- lookbehind assertion to attempt to guard against this.
- """
- link_from_hash = {}
- for regex, repl in self.link_patterns:
- replacements = []
- for match in regex.finditer(text):
- if hasattr(repl, "__call__"):
- href = repl(match)
- else:
- href = match.expand(repl)
- replacements.append((match.span(), href))
- for (start, end), href in reversed(replacements):
- escaped_href = (
- href.replace('"', '&quot;') # b/c of attr quote
- # To avoid markdown <em> and <strong>:
- .replace('*', g_escape_table['*'])
- .replace('_', g_escape_table['_']))
- link = '<a href="%s">%s</a>' % (escaped_href, text[start:end])
- hash = _hash_text(link)
- link_from_hash[hash] = link
- text = text[:start] + hash + text[end:]
- for hash, link in link_from_hash.items():
- text = text.replace(hash, link)
- return text
-
- def _unescape_special_chars(self, text):
- # Swap back in all the special characters we've hidden.
- for ch, hash in g_escape_table.items():
- text = text.replace(hash, ch)
- return text
-
- def _outdent(self, text):
- # Remove one level of line-leading tabs or spaces
- return self._outdent_re.sub('', text)
-
-
-class MarkdownWithExtras(Markdown):
- """A markdowner class that enables most extras:
-
- - footnotes
- - code-color (only has effect if 'pygments' Python module on path)
-
- These are not included:
- - pyshell (specific to Python-related documenting)
- - code-friendly (because it *disables* part of the syntax)
- - link-patterns (because you need to specify some actual
- link-patterns anyway)
- """
- extras = ["footnotes", "code-color"]
-
-
-# ---- internal support functions
-
-class UnicodeWithAttrs(unicode):
- """A subclass of unicode used for the return value of conversion to
- possibly attach some attributes. E.g. the "toc_html" attribute when
- the "toc" extra is used.
- """
- _toc = None
- @property
- def toc_html(self):
- """Return the HTML for the current TOC.
-
- This expects the `_toc` attribute to have been set on this instance.
- """
- if self._toc is None:
- return None
-
- def indent():
- return ' ' * (len(h_stack) - 1)
- lines = []
- h_stack = [0] # stack of header-level numbers
- for level, id, name in self._toc:
- if level > h_stack[-1]:
- lines.append("%s<ul>" % indent())
- h_stack.append(level)
- elif level == h_stack[-1]:
- lines[-1] += "</li>"
- else:
- while level < h_stack[-1]:
- h_stack.pop()
- if not lines[-1].endswith("</li>"):
- lines[-1] += "</li>"
- lines.append("%s</ul></li>" % indent())
- lines.append(u'%s<li><a href="#%s">%s</a>' % (
- indent(), id, name))
- while len(h_stack) > 1:
- h_stack.pop()
- if not lines[-1].endswith("</li>"):
- lines[-1] += "</li>"
- lines.append("%s</ul>" % indent())
- return '\n'.join(lines) + '\n'
-
-
-_slugify_strip_re = re.compile(r'[^\w\s-]')
-_slugify_hyphenate_re = re.compile(r'[-\s]+')
-
-
-def _slugify(value):
- """
- Normalizes string, converts to lowercase, removes non-alpha characters,
- and converts spaces to hyphens.
-
- From Django's "django/template/defaultfilters.py".
- """
- import unicodedata
- value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
- value = unicode(_slugify_strip_re.sub('', value).strip().lower())
- return _slugify_hyphenate_re.sub('-', value)
-
-# From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549
-
-
-def _curry(*args, **kwargs):
- function, args = args[0], args[1:]
-
- def result(*rest, **kwrest):
- combined = kwargs.copy()
- combined.update(kwrest)
- return function(*args + rest, **combined)
- return result
-
-# Recipe: regex_from_encoded_pattern (1.0)
-
-
-def _regex_from_encoded_pattern(s):
- """'foo' -> re.compile(re.escape('foo'))
- '/foo/' -> re.compile('foo')
- '/foo/i' -> re.compile('foo', re.I)
- """
- if s.startswith('/') and s.rfind('/') != 0:
- # Parse it: /PATTERN/FLAGS
- idx = s.rfind('/')
- pattern, flags_str = s[1:idx], s[idx+1:]
- flag_from_char = {
- "i": re.IGNORECASE,
- "l": re.LOCALE,
- "s": re.DOTALL,
- "m": re.MULTILINE,
- "u": re.UNICODE,
- }
- flags = 0
- for char in flags_str:
- try:
- flags |= flag_from_char[char]
- except KeyError:
- raise ValueError("unsupported regex flag: '%s' in '%s' "
- "(must be one of '%s')"
- % (char, s, ''.join(flag_from_char.keys())))
- return re.compile(s[1:idx], flags)
- else: # not an encoded regex
- return re.compile(re.escape(s))
-
-# Recipe: dedent (0.1.2)
-
-
-def _dedentlines(lines, tabsize=8, skip_first_line=False):
- """_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines
-
- "lines" is a list of lines to dedent.
- "tabsize" is the tab width to use for indent width calculations.
- "skip_first_line" is a boolean indicating if the first line should
- be skipped for calculating the indent width and for dedenting.
- This is sometimes useful for docstrings and similar.
-
- Same as dedent() except operates on a sequence of lines. Note: the
- lines list is modified **in-place**.
- """
- DEBUG = False
- if DEBUG:
- print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\
- % (tabsize, skip_first_line)
- indents = []
- margin = None
- for i, line in enumerate(lines):
- if i == 0 and skip_first_line:
- continue
- indent = 0
- for ch in line:
- if ch == ' ':
- indent += 1
- elif ch == '\t':
- indent += tabsize - (indent % tabsize)
- elif ch in '\r\n':
- continue # skip all-whitespace lines
- else:
- break
- else:
- continue # skip all-whitespace lines
- if DEBUG:
- print "dedent: indent=%d: %r" % (indent, line)
- if margin is None:
- margin = indent
- else:
- margin = min(margin, indent)
- if DEBUG:
- print "dedent: margin=%r" % margin
-
- if margin is not None and margin > 0:
- for i, line in enumerate(lines):
- if i == 0 and skip_first_line:
- continue
- removed = 0
- for j, ch in enumerate(line):
- if ch == ' ':
- removed += 1
- elif ch == '\t':
- removed += tabsize - (removed % tabsize)
- elif ch in '\r\n':
- if DEBUG:
- print "dedent: %r: EOL -> strip up to EOL" % line
- lines[i] = lines[i][j:]
- break
- else:
- raise ValueError("unexpected non-whitespace char %r in "
- "line %r while removing %d-space margin"
- % (ch, line, margin))
- if DEBUG:
- print "dedent: %r: %r -> removed %d/%d"\
- % (line, ch, removed, margin)
- if removed == margin:
- lines[i] = lines[i][j+1:]
- break
- elif removed > margin:
- lines[i] = ' '*(removed-margin) + lines[i][j+1:]
- break
- else:
- if removed:
- lines[i] = lines[i][removed:]
- return lines
-
-
-def _dedent(text, tabsize=8, skip_first_line=False):
- """_dedent(text, tabsize=8, skip_first_line=False) -> dedented text
-
- "text" is the text to dedent.
- "tabsize" is the tab width to use for indent width calculations.
- "skip_first_line" is a boolean indicating if the first line should
- be skipped for calculating the indent width and for dedenting.
- This is sometimes useful for docstrings and similar.
-
- textwrap.dedent(s), but don't expand tabs to spaces
- """
- lines = text.splitlines(1)
- _dedentlines(lines, tabsize=tabsize, skip_first_line=skip_first_line)
- return ''.join(lines)
-
-
-class _memoized(object):
- """Decorator that caches a function's return value each time it is called.
- If called later with the same arguments, the cached value is returned, and
- not re-evaluated.
-
- http://wiki.python.org/moin/PythonDecoratorLibrary
- """
-
- def __init__(self, func):
- self.func = func
- self.cache = {}
-
- def __call__(self, *args):
- try:
- return self.cache[args]
- except KeyError:
- self.cache[args] = value = self.func(*args)
- return value
- except TypeError:
- # uncachable -- for instance, passing a list as an argument.
- # Better to not cache than to blow up entirely.
- return self.func(*args)
-
- def __repr__(self):
- """Return the function's docstring."""
- return self.func.__doc__
-
-
-def _xml_oneliner_re_from_tab_width(tab_width):
- """Standalone XML processing instruction regex."""
- return re.compile(r"""
- (?:
- (?<=\n\n) # Starting after a blank line
- | # or
- \A\n? # the beginning of the doc
- )
- ( # save in $1
- [ ]{0,%d}
- (?:
- <\?\w+\b\s+.*?\?> # XML processing instruction
- |
- <\w+:\w+\b\s+.*?/> # namespaced single tag
- )
- [ \t]*
- (?=\n{2,}|\Z) # followed by a blank line or end of document
- )
- """ % (tab_width - 1), re.X)
-
-
-_xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width)
-
-
-def _hr_tag_re_from_tab_width(tab_width):
- return re.compile(r"""
- (?:
- (?<=\n\n) # Starting after a blank line
- | # or
- \A\n? # the beginning of the doc
- )
- ( # save in \1
- [ ]{0,%d}
- <(hr) # start tag = \2
- \b # word break
- ([^<>])*? #
- /?> # the matching end tag
- [ \t]*
- (?=\n{2,}|\Z) # followed by a blank line or end of document
- )
- """ % (tab_width - 1), re.X)
-
-
-_hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width)
-
-
-def _xml_encode_email_char_at_random(ch):
- r = random()
- # Roughly 10% raw, 45% hex, 45% dec.
- # '@' *must* be encoded. I [John Gruber] insist.
- # Issue 26: '_' must be encoded.
- if r > 0.9 and ch not in "@_":
- return ch
- elif r < 0.45:
- # The [1:] is to drop leading '0': 0x63 -> x63
- return '&#%s;' % hex(ord(ch))[1:]
- else:
- return '&#%s;' % ord(ch)
-
-
-#---- mainline
-
-class _NoReflowFormatter(optparse.IndentedHelpFormatter):
- """An optparse formatter that does NOT reflow the description."""
-
- def format_description(self, description):
- return description or ""
-
-
-def _test():
- import doctest
- doctest.testmod()
-
-
-def main(argv=None):
- if argv is None:
- argv = sys.argv
- if not logging.root.handlers:
- logging.basicConfig()
-
- usage = "usage: %prog [PATHS...]"
- version = "%prog "+__version__
- parser = optparse.OptionParser(prog="markdown2", usage=usage,
- version=version, description=cmdln_desc,
- formatter=_NoReflowFormatter())
- parser.add_option("-v", "--verbose", dest="log_level",
- action="store_const", const=logging.DEBUG,
- help="more verbose output")
- parser.add_option("--encoding",
- help="specify encoding of text content")
- parser.add_option("--html4tags", action="store_true", default=False,
- help="use HTML 4 style for empty element tags")
- parser.add_option("-s", "--safe", metavar="MODE", dest="safe_mode",
- help="sanitize literal HTML: 'escape' escapes "
- "HTML meta chars, 'replace' replaces with an "
- "[HTML_REMOVED] note")
- parser.add_option("-x", "--extras", action="append",
- help="Turn on specific extra features (not part of "
- "the core Markdown spec). See above.")
- parser.add_option("--use-file-vars",
- help="Look for and use Emacs-style 'markdown-extras' "
- "file var to turn on extras. See "
- "<http://code.google.com/p/python-markdown2/wiki/Extras>.")
- parser.add_option("--link-patterns-file",
- help="path to a link pattern file")
- parser.add_option("--self-test", action="store_true",
- help="run internal self-tests (some doctests)")
- parser.add_option("--compare", action="store_true",
- help="run against Markdown.pl as well (for testing)")
- parser.set_defaults(log_level=logging.INFO, compare=False,
- encoding="utf-8", safe_mode=None, use_file_vars=False)
- opts, paths = parser.parse_args()
- log.setLevel(opts.log_level)
-
- if opts.self_test:
- return _test()
-
- if opts.extras:
- extras = {}
- for s in opts.extras:
- splitter = re.compile("[,;: ]+")
- for e in splitter.split(s):
- if '=' in e:
- ename, earg = e.split('=', 1)
- try:
- earg = int(earg)
- except ValueError:
- pass
- else:
- ename, earg = e, None
- extras[ename] = earg
- else:
- extras = None
-
- if opts.link_patterns_file:
- link_patterns = []
- f = open(opts.link_patterns_file)
- try:
- for i, line in enumerate(f.readlines()):
- if not line.strip():
- continue
- if line.lstrip().startswith("#"):
- continue
- try:
- pat, href = line.rstrip().rsplit(None, 1)
- except ValueError:
- raise MarkdownError("%s:%d: invalid link pattern line: %r"
- % (opts.link_patterns_file, i+1, line))
- link_patterns.append(
- (_regex_from_encoded_pattern(pat), href))
- finally:
- f.close()
- else:
- link_patterns = None
-
- from os.path import join, dirname, abspath, exists
- markdown_pl = join(dirname(dirname(abspath(__file__))), "test",
- "Markdown.pl")
- for path in paths:
- if opts.compare:
- print "==== Markdown.pl ===="
- perl_cmd = 'perl %s "%s"' % (markdown_pl, path)
- o = os.popen(perl_cmd)
- perl_html = o.read()
- o.close()
- sys.stdout.write(perl_html)
- print "==== markdown2.py ===="
- html = markdown_path(path, encoding=opts.encoding,
- html4tags=opts.html4tags,
- safe_mode=opts.safe_mode,
- extras=extras, link_patterns=link_patterns,
- use_file_vars=opts.use_file_vars)
- sys.stdout.write(
- html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))
- if extras and "toc" in extras:
- log.debug("toc_html: " +
- html.toc_html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))
- if opts.compare:
- test_dir = join(dirname(dirname(abspath(__file__))), "test")
- if exists(join(test_dir, "test_markdown2.py")):
- sys.path.insert(0, test_dir)
- from test_markdown2 import norm_html_from_html
- norm_html = norm_html_from_html(html)
- norm_perl_html = norm_html_from_html(perl_html)
- else:
- norm_html = html
- norm_perl_html = perl_html
- print "==== match? %r ====" % (norm_perl_html == norm_html)
-
-
-if __name__ == "__main__":
- sys.exit(main(sys.argv))
diff --git a/cgi/post.py b/cgi/post.py
index 6f7ff03..22d8197 100644
--- a/cgi/post.py
+++ b/cgi/post.py
@@ -149,10 +149,7 @@ def getThread(postid=0, mobile=False, timestamp=0):
thread["message"] = op_post["message"]
thread["locked"] = op_post["locked"]
thread["size"] = "%d KB" % int(total_bytes / 1000)
-
- #threads = [thread]
else:
- raise Exception(postid)
return None
finally:
database_lock.release()
@@ -416,7 +413,7 @@ def threadList(mode=0):
thread['message'] = thread['message'].replace('<br />', ' ')
thread['message'] = thread['message'].split("<hr />")[0]
thread['message'] = re.compile(r"<[^>]*?>", re.DOTALL | re.IGNORECASE).sub('', thread['message'])
- thread['message'] = thread['message'].decode('utf-8')[:cutFactor].encode('utf-8')
+ thread['message'] = thread['message'][:cutFactor]
thread['message'] = re.compile(r"&(.(?!;))*$", re.DOTALL | re.IGNORECASE).sub('', thread['message']) # Removes incomplete HTML entities
thread['timestamp_formatted'] = re.compile(r"\(.{1,3}\)", re.DOTALL | re.IGNORECASE).sub(" ", thread['timestamp_formatted'])
@@ -432,7 +429,7 @@ def threadList(mode=0):
lastreply['message'] = lastreply['message'].replace('<br />', ' ')
lastreply['message'] = lastreply['message'].split("<hr />")[0]
lastreply['message'] = re.compile(r"<[^>]*?>", re.DOTALL | re.IGNORECASE).sub('', lastreply['message'])
- lastreply['message'] = lastreply['message'].decode('utf-8')[:60].encode('utf-8')
+ lastreply['message'] = lastreply['message'][:60]
lastreply['message'] = re.compile(r"&(.(?!;))*$", re.DOTALL | re.IGNORECASE).sub('', lastreply['message']) # Removes incomplete HTML entities
lastreply['timestamp_formatted'] = re.compile(r"\(.{1,3}\)", re.DOTALL | re.IGNORECASE).sub(" ", lastreply['timestamp_formatted'])
thread["lastreply"] = lastreply
@@ -540,6 +537,7 @@ def dynamicRead(parentid, ranges, mobile=False):
if not thread:
# Try the archive
+ import json
fname = Settings.ROOT_DIR + board["dir"] + "/kako/" + str(parentid) + ".json"
if os.path.isfile(fname):
import json
@@ -809,6 +807,7 @@ def trimThreads():
logging.debug("Trimming threads")
board = Settings._.BOARD
archived = False
+ trimmed = []
# Use limit of the board type
if board['board_type'] == 1:
@@ -817,21 +816,22 @@ def trimThreads():
limit = Settings.MAX_THREADS
# trim expiring threads first
- if board['maxage'] != '0':
+ if board['maxage'] > 0:
t = time.time()
- alert_time = int(round(int(board['maxage']) * Settings.MAX_AGE_ALERT))
+ alert_time = int(round(board['maxage'] * Settings.MAX_AGE_ALERT))
time_limit = t + (alert_time * 86400)
old_ops = FetchAll("SELECT `id`, `timestamp`, `expires`, `expires_alert`, `length` FROM `posts` WHERE `boardid` = %s AND `parentid` = 0 AND IS_DELETED = 0 AND `expires` > 0 AND `expires` < %s LIMIT 50", (board['id'], time_limit))
for op in old_ops:
- if t >= int(op['expires']):
+ if t >= op['expires']:
# Trim old threads
if board['archive'] and op["length"] >= Settings.ARCHIVE_MIN_LENGTH:
archiveThread(op["id"])
archived = True
deletePost(op["id"], None)
+ trimmed.append(op["id"])
else:
# Add alert to threads approaching deletion
UpdateDb("UPDATE `posts` SET expires_alert = 1 WHERE `boardid` = %s AND `id` = %s", (board['id'], op['id']))
@@ -840,7 +840,7 @@ def trimThreads():
if board['maxinactive'] > 0:
t = time.time()
- oldest_last = t - (int(board['maxinactive']) * 86400)
+ oldest_last = t - (board['maxinactive'] * 86400)
old_ops = FetchAll("SELECT `id`, `length` FROM `posts` WHERE `boardid` = %s AND `parentid` = 0 AND IS_DELETED = 0 AND `last` < %s LIMIT 50", (board['id'], oldest_last))
for op in old_ops:
@@ -849,6 +849,7 @@ def trimThreads():
archived = True
deletePost(op["id"], None)
+ trimmed.append(op["id"])
# select trim type by board
if board['board_type'] == 1:
@@ -874,10 +875,12 @@ def trimThreads():
archived = True
deletePost(post["id"], None)
- pass
+ trimmed.append(op["id"])
if archived:
regenerateKako()
+
+ return trimmed
def autoclose_thread(parentid, t, replies):
"""
@@ -1099,7 +1102,6 @@ def regenerateAccess():
if not Settings.HTACCESS_GEN:
return False
-
boards = FetchAll('SELECT `dir` FROM `boards`')
global_boards = [board['dir'] for board in boards if board['dir'] not in Settings.EXCLUDE_GLOBAL_BANS]
@@ -1108,11 +1110,11 @@ def regenerateAccess():
board_bans = {}
if Settings.ENABLE_BANS:
- bans = FetchAll("SELECT `ipstr`, `boards` FROM `bans` WHERE `blind` = '1' ORDER BY `ipstart` ASC")
+ bans = FetchAll("SELECT `ipstr`, `boards` FROM `bans` WHERE `blind` = 1 ORDER BY `ipstart` ASC")
for ban in bans:
if ban["boards"]:
- boards = pickle.loads(ban["boards"])
+ boards = str2boards(ban["boards"])
for board in boards:
board_bans.setdefault(board, []).append(ban["ipstr"])
else:
@@ -1233,7 +1235,8 @@ def archiveThread(postid):
except:
raise UserError("Can't archive: %s" % thread['timestamp'])
- UpdateDb("REPLACE INTO archive (id, boardid, timestamp, subject, length) VALUES ('%s', '%s', '%s', '%s', '%s')" % (thread['id'], board['id'], thread['timestamp'], _mysql.escape_string(thread['subject']), thread['length']))
+ UpdateDb("REPLACE INTO archive (oldid, boardid, timestamp, subject, length) VALUES (%s, %s, %s, %s, %s)",
+ (thread['id'], board['id'], thread['timestamp'], thread['subject'], thread['length']))
def throw_dice(dice):
qty = int(dice[0][1:])
diff --git a/cgi/templates/bans_geo b/cgi/templates/bans_geo
index 64687e8..ba7959a 100644
--- a/cgi/templates/bans_geo
+++ b/cgi/templates/bans_geo
@@ -4,7 +4,7 @@ geo $bans_global {
#{ip} 1;
<?py #endfor ?>
}
-<?py for board, bans in board_bans.iteritems(): ?>
+<?py for board, bans in board_bans.items(): ?>
geo $bans_#{board} {
default 0;
<?py for ip in bans: ?>
diff --git a/cgi/templates/bans_locations b/cgi/templates/bans_locations
index a514ccf..87ac1b7 100644
--- a/cgi/templates/bans_locations
+++ b/cgi/templates/bans_locations
@@ -4,7 +4,7 @@ location /#{board}/ {
if ($bans_global) { rewrite ^ /cgi/banned; }
<?py #endif ?>
<?py if board in board_bans: ?>
- if ($bans_#{board}) { rewrite ^ /cgi/banned; }
+ if ($bans_#{board}) { rewrite ^ /cgi/banned/#{board}; }
<?py #endif ?>
}
<?py #endfor ?>
diff --git a/cgi/templates/manage/boardoptions.html b/cgi/templates/manage/boardoptions.html
index fcd3bb8..4e33e5b 100644
--- a/cgi/templates/manage/boardoptions.html
+++ b/cgi/templates/manage/boardoptions.html
@@ -174,7 +174,7 @@
</tr>
<tr>
<td class="postblock">Archivar hilos</td>
-<td><input type="checkbox" name="archive" id="arch" value="1"#{checked(boardopts['archive'] == '1')} /><label for="arch"></label></td>
+<td><input type="checkbox" name="archive" id="arch" value="1"#{checked(boardopts['archive'])} /><label for="arch"></label></td>
</tr>
<tr>
<td class="postblock">Espera para crear nuevo hilo</td>
diff --git a/cgi/templates/mobile/txt_thread.html b/cgi/templates/mobile/txt_thread.html
index c9b58a8..3df16fc 100644
--- a/cgi/templates/mobile/txt_thread.html
+++ b/cgi/templates/mobile/txt_thread.html
@@ -11,7 +11,7 @@
<?py if thread['length'] > 50: ?>
<a href="#{cgi_url}mobileread/#{board}/#{thread['timestamp']}/-50" rel="nofollow">Primeros 50</a>
<?py #endif ?>
-<?py r = range(thread['length'] / 50) ?>
+<?py r = range(thread['length'] // 50) ?>
<?py for i in r[:-1]: ?>
<a href="#{cgi_url}mobileread/#{board}/#{thread['timestamp']}/#{(i+1)*50+1}-#{(i+2)*50}" rel="nofollow">#{(i+1)*50+1}-#{(i+2)*50}</a>
<?py #endfor ?>
@@ -29,9 +29,9 @@
<div id="thread">
<h1>#{thread['subject']} <span>(#{thread['length']})</span></h1>
<?py for post in thread['posts']: ?>
-<?py if post['IS_DELETED'] == '1': ?>
+<?py if post['IS_DELETED'] == 1: ?>
<div class="pst"><h3 class="del"><a href="#" class="num">#{str(post['num']).zfill(4)}</a> Eliminado por el usuario.</h3></div>
-<?py elif post['IS_DELETED'] == '2': ?>
+<?py elif post['IS_DELETED'] == 2: ?>
<div class="pst"><h3 class="del"><a href="#" class="num">#{str(post['num']).zfill(4)}</a> Eliminado por miembro del staff.</h3></div>
<?py else: ?>
<div id="p#{post['id']}" class="pst">
@@ -42,7 +42,7 @@
</div>
<?py #endif ?>
<?py #endfor ?>
-<?py if thread['locked'] != '1': ?>
+<?py if not thread['locked']: ?>
<a href="#{cgi_url}mobileread/#{board}/#{thread['timestamp']}/#{thread['length']}-n" id="n">Ver nuevos posts</a><span id="n2"></span>
<?py #endif ?>
<div class="nav">
@@ -51,7 +51,7 @@
<div><a href="#{cgi_url}mobileread/#{board}/#{thread['timestamp']}">Hilo completo</a><a href="#{cgi_url}mobileread/#{board}/#{thread['timestamp']}/-50">Primeros 50</a><a href="#{cgi_url}mobileread/#{board}/#{thread['timestamp']}/l10">Últimos 25</a></div>
<?py #endif ?>
</div>
-<?py if thread['locked'] != '1': ?>
+<?py if not thread['locked']: ?>
<form name="postform" id="postform" action="/cgi/post" method="post" enctype="multipart/form-data">
<input type="hidden" name="board" value="#{board}" /><input type="hidden" name="parent" value="#{thread['id']}" /><input type="hidden" name="mobile" value="true" /><input type="hidden" name="password" value="" />
<div style="display:none"><input type="text" name="name" /><input type="text" name="email" /></div>
diff --git a/cgi/templates/revision.html b/cgi/templates/revision.html
index 78bc1ab..9028ec6 100644
--- a/cgi/templates/revision.html
+++ b/cgi/templates/revision.html
@@ -1 +1 @@
-0.10.0
+0.10.5
diff --git a/cgi/templates/txt_archive.html b/cgi/templates/txt_archive.html
index 88b3196..1407fcf 100644
--- a/cgi/templates/txt_archive.html
+++ b/cgi/templates/txt_archive.html
@@ -43,9 +43,9 @@
<div class="thread" data-length="#{thread['length']}">
<h3>#{thread['subject']} <span>(${(str(thread['length'])+" respuestas") if thread['length'] > 1 else "Una respuesta"})</span></h3>
<?py for post in thread['posts']: ?>
- <?py if post['IS_DELETED'] == '1': ?>
+ <?py if post['IS_DELETED'] == 1: ?>
<div class="reply deleted" data-n="#{post['num']}"><h4>#{post['num']} : Mensaje eliminado por usuario.</h4></div>
- <?py elif post['IS_DELETED'] == '2': ?>
+ <?py elif post['IS_DELETED'] == 2: ?>
<div class="reply deleted" data-n="#{post['num']}"><h4>#{post['num']} : Mensaje eliminado por staff.</h4></div>
<?py else: ?>
<div class="reply#{' first' if post['num'] == 1 else ''}" data-n="#{post['num']}">
diff --git a/cgi/templates/txt_thread.en.html b/cgi/templates/txt_thread.en.html
index 4b16fca..c8348a1 100644
--- a/cgi/templates/txt_thread.en.html
+++ b/cgi/templates/txt_thread.en.html
@@ -9,7 +9,7 @@
<?py if thread['length'] > 100: ?>
<a href="#{boards_url}#{board}/read/#{thread['timestamp']}/1-100">First 100</a>
<?py #endif ?>
- <?py for i in range(thread['length'] / 100): ?>
+ <?py for i in range(thread['length'] // 100): ?>
<a href="#{boards_url}#{board}/read/#{thread['timestamp']}/#{(i+1)*100+1}-#{(i+2)*100}">#{(i+1)*100+1}-</a>
<?py #endfor ?>
<?py if thread['length'] > 51: ?>
@@ -28,9 +28,9 @@
<div class="thread" data-length="#{thread['length']}">
<h3>#{thread['subject']} <span>(${(str(thread['length'])+" replies") if thread['length']>1 else "1 reply"})</span></h3>
<?py for post in thread['posts']: ?>
- <?py if post['IS_DELETED'] == '1': ?>
+ <?py if post['IS_DELETED'] == 1: ?>
<div class="reply deleted" id="p#{post['id']}" data-n="#{post['num']}"><h4>#{post['num']} : Post deleted by user.</h4></div>
- <?py elif post['IS_DELETED'] == '2': ?>
+ <?py elif post['IS_DELETED'] == 2: ?>
<div class="reply deleted" id="p#{post['id']}" data-n="#{post['num']}"><h4>#{post['num']} : Post deleted by staff.</h4></div>
<?py else: ?>
<div class="reply#{' first' if post['num'] == 1 else ''}" id="p#{post['id']}" data-n="#{post['num']}">
@@ -61,7 +61,7 @@
<div id="size">#{thread['size']}</div>
</div>
<hr />
-<?py if thread['locked'] != '1': ?>
+<?py if not thread['locked']: ?>
<div class="lastposts"><a href="#{boards_url}#{board}/read/#{thread['timestamp']}/#{thread['length']}-n" id="n">Show new posts</a></div>
<hr />
<?py #endif ?>
@@ -105,4 +105,4 @@
<div class="end">weabot.py ver <?py include('templates/revision.html') ?> Bienvenido a Internet BBS/IB</div>
<a name="bottom"></a>
</body>
-</html> \ No newline at end of file
+</html>
diff --git a/cgi/tenjin.py b/cgi/tenjin.py
deleted file mode 100644
index ddc12bb..0000000
--- a/cgi/tenjin.py
+++ /dev/null
@@ -1,2293 +0,0 @@
-##
-# $Release: 1.1.1 $
-# $Copyright: copyright(c) 2007-2012 kuwata-lab.com all rights reserved. $
-# $License: MIT License $
-##
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-##
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-##
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-##
-
-"""Very fast and light-weight template engine based embedded Python.
- See User's Guide and examples for details.
- http://www.kuwata-lab.com/tenjin/pytenjin-users-guide.html
- http://www.kuwata-lab.com/tenjin/pytenjin-examples.html
-"""
-
-__version__ = "$Release: 1.1.1 $"[10:-2]
-__license__ = "$License: MIT License $"[10:-2]
-__all__ = ('Template', 'Engine', )
-
-
-from os.path import isfile as _isfile
-from os.path import getmtime as _getmtime
-from time import time as _time
-import sys
-import os
-import re
-import time
-import marshal
-random = pickle = unquote = None # lazy import
-python3 = sys.version_info[0] == 3
-python2 = sys.version_info[0] == 2
-
-logger = None
-
-
-##
-# utilities
-##
-
-def _write_binary_file(filename, content):
- global random
- if random is None:
- from random import random
- tmpfile = filename + str(random())[1:]
- f = open(tmpfile, 'w+b') # on windows, 'w+b' is preffered than 'wb'
- try:
- f.write(content)
- finally:
- f.close()
- if os.path.exists(tmpfile):
- try:
- os.rename(tmpfile, filename)
- except:
- # on windows, existing file should be removed before renaming
- os.remove(filename)
- os.rename(tmpfile, filename)
-
-
-def _read_binary_file(filename):
- f = open(filename, 'rb')
- try:
- return f.read()
- finally:
- f.close()
-
-
-codecs = None # lazy import
-
-
-def _read_text_file(filename, encoding=None):
- global codecs
- if not codecs:
- import codecs
- f = codecs.open(filename, encoding=(encoding or 'utf-8'))
- try:
- return f.read()
- finally:
- f.close()
-
-
-def _read_template_file(filename, encoding=None):
- s = _read_binary_file(filename) # binary(=str)
- if encoding:
- s = s.decode(encoding) # binary(=str) to unicode
- return s
-
-
-_basestring = basestring
-_unicode = unicode
-_bytes = str
-
-
-def _ignore_not_found_error(f, default=None):
- try:
- return f()
- except OSError, ex:
- if ex.errno == 2: # error: No such file or directory
- return default
- raise
-
-
-def create_module(module_name, dummy_func=None, **kwargs):
- """ex. mod = create_module('tenjin.util')"""
- try:
- mod = type(sys)(module_name)
- except:
- # The module creation above does not work for Jython 2.5.2
- import imp
- mod = imp.new_module(module_name)
-
- mod.__file__ = __file__
- mod.__dict__.update(kwargs)
- sys.modules[module_name] = mod
- if dummy_func:
- exec(dummy_func.func_code, mod.__dict__)
- return mod
-
-
-def _raise(exception_class, *args):
- raise exception_class(*args)
-
-
-##
-# helper method's module
-##
-
-def _dummy():
- global unquote
- unquote = None
- global to_str, escape, echo, new_cycle, generate_tostrfunc
- global start_capture, stop_capture, capture_as, captured_as, CaptureContext
- global _p, _P, _decode_params
-
- def generate_tostrfunc(encode=None, decode=None):
- """Generate 'to_str' function with encode or decode encoding.
- ex. generate to_str() function which encodes unicode into binary(=str).
- to_str = tenjin.generate_tostrfunc(encode='utf-8')
- repr(to_str(u'hoge')) #=> 'hoge' (str)
- ex. generate to_str() function which decodes binary(=str) into unicode.
- to_str = tenjin.generate_tostrfunc(decode='utf-8')
- repr(to_str('hoge')) #=> u'hoge' (unicode)
- """
- if encode:
- if decode:
- raise ValueError(
- "can't specify both encode and decode encoding.")
- else:
- def to_str(val, _str=str, _unicode=unicode, _isa=isinstance, _encode=encode):
- """Convert val into string or return '' if None. Unicode will be encoded into binary(=str)."""
- if _isa(val, _str):
- return val
- if val is None:
- return ''
- # if _isa(val, _unicode): return val.encode(_encode) # unicode to binary(=str)
- if _isa(val, _unicode):
- return val.encode(_encode) # unicode to binary(=str)
- return _str(val)
- else:
- if decode:
- def to_str(val, _str=str, _unicode=unicode, _isa=isinstance, _decode=decode):
- """Convert val into string or return '' if None. Binary(=str) will be decoded into unicode."""
- # if _isa(val, _str): return val.decode(_decode) # binary(=str) to unicode
- if _isa(val, _str):
- return val.decode(_decode)
- if val is None:
- return ''
- if _isa(val, _unicode):
- return val
- return _unicode(val)
- else:
- def to_str(val, _str=str, _unicode=unicode, _isa=isinstance):
- """Convert val into string or return '' if None. Both binary(=str) and unicode will be retruned as-is."""
- if _isa(val, _str):
- return val
- if val is None:
- return ''
- if _isa(val, _unicode):
- return val
- return _str(val)
- return to_str
-
- to_str = generate_tostrfunc(encode='utf-8') # or encode=None?
-
- def echo(string):
- """add string value into _buf. this is equivarent to '#{string}'."""
- lvars = sys._getframe(1).f_locals # local variables
- lvars['_buf'].append(string)
-
- def new_cycle(*values):
- """Generate cycle object.
- ex.
- cycle = new_cycle('odd', 'even')
- print(cycle()) #=> 'odd'
- print(cycle()) #=> 'even'
- print(cycle()) #=> 'odd'
- print(cycle()) #=> 'even'
- """
- def gen(values):
- i, n = 0, len(values)
- while True:
- yield values[i]
- i = (i + 1) % n
- return gen(values).next
-
- class CaptureContext(object):
-
- def __init__(self, name, store_to_context=True, lvars=None):
- self.name = name
- self.store_to_context = store_to_context
- self.lvars = lvars or sys._getframe(1).f_locals
-
- def __enter__(self):
- lvars = self.lvars
- self._buf_orig = lvars['_buf']
- lvars['_buf'] = _buf = []
- lvars['_extend'] = _buf.extend
- return self
-
- def __exit__(self, *args):
- lvars = self.lvars
- _buf = lvars['_buf']
- lvars['_buf'] = self._buf_orig
- lvars['_extend'] = self._buf_orig.extend
- lvars[self.name] = self.captured = ''.join(_buf)
- if self.store_to_context and '_context' in lvars:
- lvars['_context'][self.name] = self.captured
-
- def __iter__(self):
- self.__enter__()
- yield self
- self.__exit__()
-
- def start_capture(varname=None, _depth=1):
- """(obsolete) start capturing with name."""
- lvars = sys._getframe(_depth).f_locals
- capture_context = CaptureContext(varname, None, lvars)
- lvars['_capture_context'] = capture_context
- capture_context.__enter__()
-
- def stop_capture(store_to_context=True, _depth=1):
- """(obsolete) stop capturing and return the result of capturing.
- if store_to_context is True then the result is stored into _context[varname].
- """
- lvars = sys._getframe(_depth).f_locals
- capture_context = lvars.pop('_capture_context', None)
- if not capture_context:
- raise Exception(
- 'stop_capture(): start_capture() is not called before.')
- capture_context.store_to_context = store_to_context
- capture_context.__exit__()
- return capture_context.captured
-
- def capture_as(name, store_to_context=True):
- """capture partial of template."""
- return CaptureContext(name, store_to_context, sys._getframe(1).f_locals)
-
- def captured_as(name, _depth=1):
- """helper method for layout template.
- if captured string is found then append it to _buf and return True,
- else return False.
- """
- lvars = sys._getframe(_depth).f_locals # local variables
- if name in lvars:
- _buf = lvars['_buf']
- _buf.append(lvars[name])
- return True
- return False
-
- def _p(arg):
- """ex. '/show/'+_p("item['id']") => "/show/#{item['id']}" """
- return '<`#%s#`>' % arg # decoded into #{...} by preprocessor
-
- def _P(arg):
- """ex. '<b>%s</b>' % _P("item['id']") => "<b>${item['id']}</b>" """
- return '<`$%s$`>' % arg # decoded into ${...} by preprocessor
-
- def _decode_params(s):
- """decode <`#...#`> and <`$...$`> into #{...} and ${...}"""
- global unquote
- if unquote is None:
- from urllib import unquote
- dct = {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', '#039': "'", }
-
- def unescape(s):
- # return s.replace('&lt;', '<').replace('&gt;', '>').replace('&quot;', '"').replace('&#039;', "'").replace('&amp;', '&')
- return re.sub(r'&(lt|gt|quot|amp|#039);', lambda m: dct[m.group(1)], s)
- s = to_str(s)
- s = re.sub(r'%3C%60%23(.*?)%23%60%3E',
- lambda m: '#{%s}' % unquote(m.group(1)), s)
- s = re.sub(r'%3C%60%24(.*?)%24%60%3E',
- lambda m: '${%s}' % unquote(m.group(1)), s)
- s = re.sub(r'&lt;`#(.*?)#`&gt;',
- lambda m: '#{%s}' % unescape(m.group(1)), s)
- s = re.sub(r'&lt;`\$(.*?)\$`&gt;',
- lambda m: '${%s}' % unescape(m.group(1)), s)
- s = re.sub(r'<`#(.*?)#`>', r'#{\1}', s)
- s = re.sub(r'<`\$(.*?)\$`>', r'${\1}', s)
- return s
-
-
-helpers = create_module('tenjin.helpers', _dummy, sys=sys, re=re)
-helpers.__all__ = ['to_str', 'escape', 'echo', 'new_cycle', 'generate_tostrfunc',
- 'start_capture', 'stop_capture', 'capture_as', 'captured_as',
- 'not_cached', 'echo_cached', 'cache_as',
- '_p', '_P', '_decode_params',
- ]
-generate_tostrfunc = helpers.generate_tostrfunc
-
-
-##
-# escaped module
-##
-def _dummy():
- global is_escaped, as_escaped, to_escaped
- global Escaped, EscapedStr, EscapedUnicode
- global __all__
- # 'Escaped', 'EscapedStr',
- __all__ = ('is_escaped', 'as_escaped', 'to_escaped', )
-
- class Escaped(object):
- """marking class that object is already escaped."""
- pass
-
- def is_escaped(value):
- """return True if value is marked as escaped, else return False."""
- return isinstance(value, Escaped)
-
- class EscapedStr(str, Escaped):
- """string class which is marked as escaped."""
- pass
-
- class EscapedUnicode(unicode, Escaped):
- """unicode class which is marked as escaped."""
- pass
-
- def as_escaped(s):
- """mark string as escaped, without escaping."""
- if isinstance(s, str):
- return EscapedStr(s)
- if isinstance(s, unicode):
- return EscapedUnicode(s)
- raise TypeError("as_escaped(%r): expected str or unicode." % (s, ))
-
- def to_escaped(value):
- """convert any value into string and escape it.
- if value is already marked as escaped, don't escape it."""
- if hasattr(value, '__html__'):
- value = value.__html__()
- if is_escaped(value):
- # return value # EscapedUnicode should be convered into EscapedStr
- return as_escaped(_helpers.to_str(value))
- # if isinstance(value, _basestring):
- # return as_escaped(_helpers.escape(value))
- return as_escaped(_helpers.escape(_helpers.to_str(value)))
-
-
-escaped = create_module('tenjin.escaped', _dummy, _helpers=helpers)
-
-
-##
-# module for html
-##
-def _dummy():
- global escape_html, escape_xml, escape, tagattr, tagattrs, _normalize_attrs
- global checked, selected, disabled, nl2br, text2html, nv, js_link
-
- # _escape_table = { '&': '&amp;', '<': '&lt;', '>': '&gt;', '"': '&quot;', "'": '&#39;' }
- #_escape_pattern = re.compile(r'[&<>"]')
- ##_escape_callable = lambda m: _escape_table[m.group(0)]
- ##_escape_callable = lambda m: _escape_table.__get__(m.group(0))
- #_escape_get = _escape_table.__getitem__
- #_escape_callable = lambda m: _escape_get(m.group(0))
- #_escape_sub = _escape_pattern.sub
-
- # def escape_html(s):
- # return s # 3.02
-
- # def escape_html(s):
- # return _escape_pattern.sub(_escape_callable, s) # 6.31
-
- # def escape_html(s):
- # return _escape_sub(_escape_callable, s) # 6.01
-
- # def escape_html(s, _p=_escape_pattern, _f=_escape_callable):
- # return _p.sub(_f, s) # 6.27
-
- # def escape_html(s, _sub=_escape_pattern.sub, _callable=_escape_callable):
- # return _sub(_callable, s) # 6.04
-
- # def escape_html(s):
- # s = s.replace('&', '&amp;')
- # s = s.replace('<', '&lt;')
- # s = s.replace('>', '&gt;')
- # s = s.replace('"', '&quot;')
- # return s # 5.83
-
- def escape_html(s):
- """Escape '&', '<', '>', '"' into '&amp;', '&lt;', '&gt;', '&quot;'."""
- return s.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;') # 5.72
-
- escape_xml = escape_html # for backward compatibility
-
- def tagattr(name, expr, value=None, escape=True):
- """(experimental) Return ' name="value"' if expr is true value, else '' (empty string).
- If value is not specified, expr is used as value instead."""
- if not expr and expr != 0:
- return _escaped.as_escaped('')
- if value is None:
- value = expr
- if escape:
- value = _escaped.to_escaped(value)
- return _escaped.as_escaped(' %s="%s"' % (name, value))
-
- def tagattrs(**kwargs):
- """(experimental) built html tag attribtes.
- ex.
- >>> tagattrs(klass='main', size=20)
- ' class="main" size="20"'
- >>> tagattrs(klass='', size=0)
- ''
- """
- kwargs = _normalize_attrs(kwargs)
- esc = _escaped.to_escaped
- s = ''.join([' %s="%s"' % (k, esc(v))
- for k, v in kwargs.iteritems() if v or v == 0])
- return _escaped.as_escaped(s)
-
- def _normalize_attrs(kwargs):
- if 'klass' in kwargs:
- kwargs['class'] = kwargs.pop('klass')
- if 'checked' in kwargs:
- kwargs['checked'] = kwargs.pop('checked') and 'checked' or None
- if 'selected' in kwargs:
- kwargs['selected'] = kwargs.pop('selected') and 'selected' or None
- if 'disabled' in kwargs:
- kwargs['disabled'] = kwargs.pop('disabled') and 'disabled' or None
- return kwargs
-
- def checked(expr):
- """return ' checked="checked"' if expr is true."""
- return _escaped.as_escaped(expr and ' checked="checked"' or '')
-
- def selected(expr):
- """return ' selected="selected"' if expr is true."""
- return _escaped.as_escaped(expr and ' selected="selected"' or '')
-
- def disabled(expr):
- """return ' disabled="disabled"' if expr is true."""
- return _escaped.as_escaped(expr and ' disabled="disabled"' or '')
-
- def nl2br(text):
- """replace "\n" to "<br />\n" and return it."""
- if not text:
- return _escaped.as_escaped('')
- return _escaped.as_escaped(text.replace('\n', '<br />\n'))
-
- def text2html(text, use_nbsp=True):
- """(experimental) escape xml characters, replace "\n" to "<br />\n", and return it."""
- if not text:
- return _escaped.as_escaped('')
- s = _escaped.to_escaped(text)
- if use_nbsp:
- s = s.replace(' ', ' &nbsp;')
- # return nl2br(s)
- s = s.replace('\n', '<br />\n')
- return _escaped.as_escaped(s)
-
- def nv(name, value, sep=None, **kwargs):
- """(experimental) Build name and value attributes.
- ex.
- >>> nv('rank', 'A')
- 'name="rank" value="A"'
- >>> nv('rank', 'A', '.')
- 'name="rank" value="A" id="rank.A"'
- >>> nv('rank', 'A', '.', checked=True)
- 'name="rank" value="A" id="rank.A" checked="checked"'
- >>> nv('rank', 'A', '.', klass='error', style='color:red')
- 'name="rank" value="A" id="rank.A" class="error" style="color:red"'
- """
- name = _escaped.to_escaped(name)
- value = _escaped.to_escaped(value)
- s = sep and 'name="%s" value="%s" id="%s"' % (name, value, name+sep+value) \
- or 'name="%s" value="%s"' % (name, value)
- html = kwargs and s + tagattrs(**kwargs) or s
- return _escaped.as_escaped(html)
-
- def js_link(label, onclick, **kwargs):
- s = kwargs and tagattrs(**kwargs) or ''
- html = '<a href="javascript:undefined" onclick="%s;return false"%s>%s</a>' % \
- (_escaped.to_escaped(onclick), s, _escaped.to_escaped(label))
- return _escaped.as_escaped(html)
-
-
-html = create_module('tenjin.html', _dummy, helpers=helpers, _escaped=escaped)
-helpers.escape = html.escape_html
-helpers.html = html # for backward compatibility
-sys.modules['tenjin.helpers.html'] = html
-
-
-##
-# utility function to set default encoding of template files
-##
-_template_encoding = (None, 'utf-8') # encodings for decode and encode
-
-
-def set_template_encoding(decode=None, encode=None):
- """Set default encoding of template files.
- This should be called before importing helper functions.
- ex.
- ## I like template files to be unicode-base like Django.
- import tenjin
- tenjin.set_template_encoding('utf-8') # should be called before importing helpers
- from tenjin.helpers import *
- """
- global _template_encoding
- if _template_encoding == (decode, encode):
- return
- if decode and encode:
- raise ValueError(
- "set_template_encoding(): cannot specify both decode and encode.")
- if not decode and not encode:
- raise ValueError(
- "set_template_encoding(): decode or encode should be specified.")
- if decode:
- Template.encoding = decode # unicode base template
- helpers.to_str = helpers.generate_tostrfunc(decode=decode)
- else:
- Template.encoding = None # binary base template
- helpers.to_str = helpers.generate_tostrfunc(encode=encode)
- _template_encoding = (decode, encode)
-
-
-##
-# Template class
-##
-
-class TemplateSyntaxError(SyntaxError):
-
- def build_error_message(self):
- ex = self
- if not ex.text:
- return self.args[0]
- return ''.join([
- "%s:%s:%s: %s\n" % (ex.filename, ex.lineno, ex.offset, ex.msg, ),
- "%4d: %s\n" % (ex.lineno, ex.text.rstrip(), ),
- " %s^\n" % (' ' * ex.offset, ),
- ])
-
-
-class Template(object):
- """Convert and evaluate embedded python string.
- See User's Guide and examples for details.
- http://www.kuwata-lab.com/tenjin/pytenjin-users-guide.html
- http://www.kuwata-lab.com/tenjin/pytenjin-examples.html
- """
-
- # default value of attributes
- filename = None
- encoding = None
- escapefunc = 'escape'
- tostrfunc = 'to_str'
- indent = 4
- preamble = None # "_buf = []; _expand = _buf.expand; _to_str = to_str; _escape = escape"
- postamble = None # "print ''.join(_buf)"
- smarttrim = None
- args = None
- timestamp = None
- trace = False # if True then '<!-- begin: file -->' and '<!-- end: file -->' are printed
-
- def __init__(self, filename=None, encoding=None, input=None, escapefunc=None, tostrfunc=None,
- indent=None, preamble=None, postamble=None, smarttrim=None, trace=None):
- """Initailizer of Template class.
-
- filename:str (=None)
- Filename to convert (optional). If None, no convert.
- encoding:str (=None)
- Encoding name. If specified, template string is converted into
- unicode object internally.
- Template.render() returns str object if encoding is None,
- else returns unicode object if encoding name is specified.
- input:str (=None)
- Input string. In other words, content of template file.
- Template file will not be read if this argument is specified.
- escapefunc:str (='escape')
- Escape function name.
- tostrfunc:str (='to_str')
- 'to_str' function name.
- indent:int (=4)
- Indent width.
- preamble:str or bool (=None)
- Preamble string which is inserted into python code.
- If true, '_buf = []; ' is used insated.
- postamble:str or bool (=None)
- Postamble string which is appended to python code.
- If true, 'print("".join(_buf))' is used instead.
- smarttrim:bool (=None)
- If True then "<div>\\n#{_context}\\n</div>" is parsed as
- "<div>\\n#{_context}</div>".
- """
- if encoding is not None:
- self.encoding = encoding
- if escapefunc is not None:
- self.escapefunc = escapefunc
- if tostrfunc is not None:
- self.tostrfunc = tostrfunc
- if indent is not None:
- self.indent = indent
- if preamble is not None:
- self.preamble = preamble
- if postamble is not None:
- self.postamble = postamble
- if smarttrim is not None:
- self.smarttrim = smarttrim
- if trace is not None:
- self.trace = trace
- #
- if preamble is True:
- self.preamble = "_buf = []"
- if postamble is True:
- self.postamble = "print(''.join(_buf))"
- if input:
- self.convert(input, filename)
- # False means 'file not exist' (= Engine should not check timestamp of file)
- self.timestamp = False
- elif filename:
- self.convert_file(filename)
- else:
- self._reset()
-
- def _reset(self, input=None, filename=None):
- self.script = None
- self.bytecode = None
- self.input = input
- self.filename = filename
- if input != None:
- i = input.find("\n")
- if i < 0:
- self.newline = "\n" # or None
- elif len(input) >= 2 and input[i-1] == "\r":
- self.newline = "\r\n"
- else:
- self.newline = "\n"
- self._localvars_assignments_added = False
-
- def _localvars_assignments(self):
- return "_extend=_buf.extend;_to_str=%s;_escape=%s; " % (self.tostrfunc, self.escapefunc)
-
- def before_convert(self, buf):
- if self.preamble:
- eol = self.input.startswith('<?py') and "\n" or "; "
- buf.append(self.preamble + eol)
-
- def after_convert(self, buf):
- if self.postamble:
- if buf and not buf[-1].endswith("\n"):
- buf.append("\n")
- buf.append(self.postamble + "\n")
-
- def convert_file(self, filename):
- """Convert file into python script and return it.
- This is equivarent to convert(open(filename).read(), filename).
- """
- input = _read_template_file(filename)
- return self.convert(input, filename)
-
- def convert(self, input, filename=None):
- """Convert string in which python code is embedded into python script and return it.
-
- input:str
- Input string to convert into python code.
- filename:str (=None)
- Filename of input. this is optional but recommended to report errors.
- """
- if self.encoding and isinstance(input, str):
- input = input.decode(self.encoding)
- self._reset(input, filename)
- buf = []
- self.before_convert(buf)
- self.parse_stmts(buf, input)
- self.after_convert(buf)
- script = ''.join(buf)
- self.script = script
- return script
-
- STMT_PATTERN = (r'<\?py( |\t|\r?\n)(.*?) ?\?>([ \t]*\r?\n)?', re.S)
-
- def stmt_pattern(self):
- pat = self.STMT_PATTERN
- if isinstance(pat, tuple):
- pat = self.__class__.STMT_PATTERN = re.compile(*pat)
- return pat
-
- def parse_stmts(self, buf, input):
- if not input:
- return
- rexp = self.stmt_pattern()
- is_bol = True
- index = 0
- for m in rexp.finditer(input):
- mspace, code, rspace = m.groups()
- #mspace, close, rspace = m.groups()
- #code = input[m.start()+4+len(mspace):m.end()-len(close)-(rspace and len(rspace) or 0)]
- text = input[index:m.start()]
- index = m.end()
- # detect spaces at beginning of line
- lspace = None
- if text == '':
- if is_bol:
- lspace = ''
- elif text[-1] == '\n':
- lspace = ''
- else:
- rindex = text.rfind('\n')
- if rindex < 0:
- if is_bol and text.isspace():
- lspace, text = text, ''
- else:
- s = text[rindex+1:]
- if s.isspace():
- lspace, text = s, text[:rindex+1]
- #is_bol = rspace is not None
- # add text, spaces, and statement
- self.parse_exprs(buf, text, is_bol)
- is_bol = rspace is not None
- # if mspace == "\n":
- if mspace and mspace.endswith("\n"):
- code = "\n" + (code or "")
- # if rspace == "\n":
- if rspace and rspace.endswith("\n"):
- code = (code or "") + "\n"
- if code:
- code = self.statement_hook(code)
- m = self._match_to_args_declaration(code)
- if m:
- self._add_args_declaration(buf, m)
- else:
- self.add_stmt(buf, code)
- rest = input[index:]
- if rest:
- self.parse_exprs(buf, rest)
- self._arrange_indent(buf)
-
- def statement_hook(self, stmt):
- """expand macros and parse '#@ARGS' in a statement."""
- return stmt.replace("\r\n", "\n") # Python can't handle "\r\n" in code
-
- def _match_to_args_declaration(self, stmt):
- if self.args is not None:
- return None
- args_pattern = r'^ *#@ARGS(?:[ \t]+(.*?))?$'
- return re.match(args_pattern, stmt)
-
- def _add_args_declaration(self, buf, m):
- arr = (m.group(1) or '').split(',')
- args = []
- declares = []
- for s in arr:
- arg = s.strip()
- if not s:
- continue
- if not re.match('^[a-zA-Z_]\w*$', arg):
- raise ValueError("%r: invalid template argument." % arg)
- args.append(arg)
- declares.append("%s = _context.get('%s'); " % (arg, arg))
- self.args = args
- #nl = stmt[m.end():]
- #if nl: declares.append(nl)
- buf.append(''.join(declares) + "\n")
-
- s = '(?:\{.*?\}.*?)*'
- EXPR_PATTERN = (
- r'#\{(.*?'+s+r')\}|\$\{(.*?'+s+r')\}|\{=(?:=(.*?)=|(.*?))=\}', re.S)
- del s
-
- def expr_pattern(self):
- pat = self.EXPR_PATTERN
- if isinstance(pat, tuple):
- self.__class__.EXPR_PATTERN = pat = re.compile(*pat)
- return pat
-
- def get_expr_and_flags(self, match):
- expr1, expr2, expr3, expr4 = match.groups()
- if expr1 is not None:
- return expr1, (False, True) # not escape, call to_str
- if expr2 is not None:
- return expr2, (True, True) # call escape, call to_str
- if expr3 is not None:
- return expr3, (False, True) # not escape, call to_str
- if expr4 is not None:
- return expr4, (True, True) # call escape, call to_str
-
- def parse_exprs(self, buf, input, is_bol=False):
- buf2 = []
- self._parse_exprs(buf2, input, is_bol)
- if buf2:
- buf.append(''.join(buf2))
-
- def _parse_exprs(self, buf, input, is_bol=False):
- if not input:
- return
- self.start_text_part(buf)
- rexp = self.expr_pattern()
- smarttrim = self.smarttrim
- nl = self.newline
- nl_len = len(nl)
- pos = 0
- for m in rexp.finditer(input):
- start = m.start()
- text = input[pos:start]
- pos = m.end()
- expr, flags = self.get_expr_and_flags(m)
- #
- if text:
- self.add_text(buf, text)
- self.add_expr(buf, expr, *flags)
- #
- if smarttrim:
- flag_bol = text.endswith(
- nl) or not text and (start > 0 or is_bol)
- if flag_bol and not flags[0] and input[pos:pos+nl_len] == nl:
- pos += nl_len
- buf.append("\n")
- if smarttrim:
- if buf and buf[-1] == "\n":
- buf.pop()
- rest = input[pos:]
- if rest:
- self.add_text(buf, rest, True)
- self.stop_text_part(buf)
- if input[-1] == '\n':
- buf.append("\n")
-
- def start_text_part(self, buf):
- self._add_localvars_assignments_to_text(buf)
- # buf.append("_buf.extend((")
- buf.append("_extend((")
-
- def _add_localvars_assignments_to_text(self, buf):
- if not self._localvars_assignments_added:
- self._localvars_assignments_added = True
- buf.append(self._localvars_assignments())
-
- def stop_text_part(self, buf):
- buf.append("));")
-
- def _quote_text(self, text):
- text = re.sub(r"(['\\\\])", r"\\\1", text)
- text = text.replace("\r\n", "\\r\n")
- return text
-
- def add_text(self, buf, text, encode_newline=False):
- if not text:
- return
- use_unicode = self.encoding and python2
- buf.append(use_unicode and "u'''" or "'''")
- text = self._quote_text(text)
- if not encode_newline:
- buf.extend((text, "''', "))
- elif text.endswith("\r\n"):
- buf.extend((text[0:-2], "\\r\\n''', "))
- elif text.endswith("\n"):
- buf.extend((text[0:-1], "\\n''', "))
- else:
- buf.extend((text, "''', "))
-
- _add_text = add_text
-
- def add_expr(self, buf, code, *flags):
- if not code or code.isspace():
- return
- flag_escape, flag_tostr = flags
- if not self.tostrfunc:
- flag_tostr = False
- if not self.escapefunc:
- flag_escape = False
- if flag_tostr and flag_escape:
- s1, s2 = "_escape(_to_str(", ")), "
- elif flag_tostr:
- s1, s2 = "_to_str(", "), "
- elif flag_escape:
- s1, s2 = "_escape(", "), "
- else:
- s1, s2 = "(", "), "
- buf.extend((s1, code, s2, ))
-
- def add_stmt(self, buf, code):
- if not code:
- return
- lines = code.splitlines(True) # keep "\n"
- if lines[-1][-1] != "\n":
- lines[-1] = lines[-1] + "\n"
- buf.extend(lines)
- self._add_localvars_assignments_to_stmts(buf)
-
- def _add_localvars_assignments_to_stmts(self, buf):
- if self._localvars_assignments_added:
- return
- for index, stmt in enumerate(buf):
- if not re.match(r'^[ \t]*(?:\#|_buf ?= ?\[\]|from __future__)', stmt):
- break
- else:
- return
- self._localvars_assignments_added = True
- if re.match(r'^[ \t]*(if|for|while|def|with|class)\b', stmt):
- buf.insert(index, self._localvars_assignments() + "\n")
- else:
- buf[index] = self._localvars_assignments() + buf[index]
-
- _START_WORDS = dict.fromkeys(
- ('for', 'if', 'while', 'def', 'try:', 'with', 'class'), True)
- _END_WORDS = dict.fromkeys(('#end', '#endfor', '#endif', '#endwhile',
- '#enddef', '#endtry', '#endwith', '#endclass'), True)
- _CONT_WORDS = dict.fromkeys(
- ('elif', 'else:', 'except', 'except:', 'finally:'), True)
- _WORD_REXP = re.compile(r'\S+')
-
- depth = -1
-
- ##
- # ex.
- # input = r"""
- # if items:
- ## _buf.extend(('<ul>\n', ))
- ## i = 0
- # for item in items:
- ## i += 1
- ## _buf.extend(('<li>', to_str(item), '</li>\n', ))
- # endfor
- ## _buf.extend(('</ul>\n', ))
- # endif
- # """[1:]
- ## lines = input.splitlines(True)
- ## block = self.parse_lines(lines)
- # => [ "if items:\n",
- # [ "_buf.extend(('<ul>\n', ))\n",
- ## "i = 0\n",
- ## "for item in items:\n",
- # [ "i += 1\n",
- ## "_buf.extend(('<li>', to_str(item), '</li>\n', ))\n",
- # ],
- # "#endfor\n",
- ## "_buf.extend(('</ul>\n', ))\n",
- # ],
- # "#endif\n",
- # ]
- def parse_lines(self, lines):
- block = []
- try:
- self._parse_lines(lines.__iter__(), False, block, 0)
- except StopIteration:
- if self.depth > 0:
- fname, linenum, colnum, linetext = self.filename, len(
- lines), None, None
- raise TemplateSyntaxError(
- "unexpected EOF.", (fname, linenum, colnum, linetext))
- else:
- pass
- return block
-
- def _parse_lines(self, lines_iter, end_block, block, linenum):
- if block is None:
- block = []
- _START_WORDS = self._START_WORDS
- _END_WORDS = self._END_WORDS
- _CONT_WORDS = self._CONT_WORDS
- _WORD_REXP = self._WORD_REXP
- get_line = lines_iter.next
- while True:
- line = get_line()
- linenum += line.count("\n")
- m = _WORD_REXP.search(line)
- if not m:
- block.append(line)
- continue
- word = m.group(0)
- if word in _END_WORDS:
- if word != end_block and word != '#end':
- if end_block is False:
- msg = "'%s' found but corresponding statement is missing." % (
- word, )
- else:
- msg = "'%s' expected but got '%s'." % (end_block, word)
- colnum = m.start() + 1
- raise TemplateSyntaxError(
- msg, (self.filename, linenum, colnum, line))
- return block, line, None, linenum
- elif line.endswith(':\n') or line.endswith(':\r\n'):
- if word in _CONT_WORDS:
- return block, line, word, linenum
- elif word in _START_WORDS:
- block.append(line)
- self.depth += 1
- cont_word = None
- try:
- child_block, line, cont_word, linenum = \
- self._parse_lines(
- lines_iter, '#end'+word, [], linenum)
- block.extend((child_block, line, ))
- while cont_word: # 'elif' or 'else:'
- child_block, line, cont_word, linenum = \
- self._parse_lines(
- lines_iter, '#end'+word, [], linenum)
- block.extend((child_block, line, ))
- except StopIteration:
- msg = "'%s' is not closed." % (cont_word or word)
- colnum = m.start() + 1
- raise TemplateSyntaxError(
- msg, (self.filename, linenum, colnum, line))
- self.depth -= 1
- else:
- block.append(line)
- else:
- block.append(line)
- assert "unreachable"
-
- def _join_block(self, block, buf, depth):
- indent = ' ' * (self.indent * depth)
- for line in block:
- if isinstance(line, list):
- self._join_block(line, buf, depth+1)
- elif line.isspace():
- buf.append(line)
- else:
- buf.append(indent + line.lstrip())
-
- def _arrange_indent(self, buf):
- """arrange indentation of statements in buf"""
- block = self.parse_lines(buf)
- buf[:] = []
- self._join_block(block, buf, 0)
-
- def render(self, context=None, globals=None, _buf=None):
- """Evaluate python code with context dictionary.
- If _buf is None then return the result of evaluation as str,
- else return None.
-
- context:dict (=None)
- Context object to evaluate. If None then new dict is created.
- globals:dict (=None)
- Global object. If None then globals() is used.
- _buf:list (=None)
- If None then new list is created.
- """
- if context is None:
- locals = context = {}
- elif self.args is None:
- locals = context.copy()
- else:
- locals = {}
- if '_engine' in context:
- context.get('_engine').hook_context(locals)
- locals['_context'] = context
- if globals is None:
- globals = sys._getframe(1).f_globals
- bufarg = _buf
- if _buf is None:
- _buf = []
- locals['_buf'] = _buf
- if not self.bytecode:
- self.compile()
- if self.trace:
- _buf.append("<!-- ***** begin: %s ***** -->\n" % self.filename)
- exec(self.bytecode, globals, locals)
- _buf.append("<!-- ***** end: %s ***** -->\n" % self.filename)
- else:
- exec(self.bytecode, globals, locals)
- if bufarg is not None:
- return bufarg
- elif not logger:
- return ''.join(_buf)
- else:
- try:
- return ''.join(_buf)
- except UnicodeDecodeError, ex:
- logger.error("[tenjin.Template] " + str(ex))
- logger.error("[tenjin.Template] (_buf=%r)" % (_buf, ))
- raise
-
- def compile(self):
- """compile self.script into self.bytecode"""
- self.bytecode = compile(
- self.script, self.filename or '(tenjin)', 'exec')
-
-
-##
-# preprocessor class
-##
-
-class Preprocessor(Template):
- """Template class for preprocessing."""
-
- STMT_PATTERN = (r'<\?PY( |\t|\r?\n)(.*?) ?\?>([ \t]*\r?\n)?', re.S)
-
- EXPR_PATTERN = (
- r'#\{\{(.*?)\}\}|\$\{\{(.*?)\}\}|\{#=(?:=(.*?)=|(.*?))=#\}', re.S)
-
- def add_expr(self, buf, code, *flags):
- if not code or code.isspace():
- return
- code = "_decode_params(%s)" % code
- Template.add_expr(self, buf, code, *flags)
-
-
-class TemplatePreprocessor(object):
- factory = Preprocessor
-
- def __init__(self, factory=None):
- if factory is not None:
- self.factory = factory
- self.globals = sys._getframe(1).f_globals
-
- def __call__(self, input, **kwargs):
- filename = kwargs.get('filename')
- context = kwargs.get('context') or {}
- globals = kwargs.get('globals') or self.globals
- template = self.factory()
- template.convert(input, filename)
- return template.render(context, globals=globals)
-
-
-class TrimPreprocessor(object):
-
- _rexp = re.compile(r'^[ \t]+<', re.M)
- _rexp_all = re.compile(r'^[ \t]+', re.M)
-
- def __init__(self, all=False):
- self.all = all
-
- def __call__(self, input, **kwargs):
- if self.all:
- return self._rexp_all.sub('', input)
- else:
- return self._rexp.sub('<', input)
-
-
-class PrefixedLinePreprocessor(object):
-
- def __init__(self, prefix='::(?=[ \t]|$)'):
- self.prefix = prefix
- self.regexp = re.compile(r'^([ \t]*)' + prefix + r'(.*)', re.M)
-
- def convert_prefixed_lines(self, text):
- def fn(m): return "%s<?py%s ?>" % (m.group(1), m.group(2))
- return self.regexp.sub(fn, text)
-
- STMT_REXP = re.compile(r'<\?py\s.*?\?>', re.S)
-
- def __call__(self, input, **kwargs):
- buf = []
- append = buf.append
- pos = 0
- for m in self.STMT_REXP.finditer(input):
- text = input[pos:m.start()]
- stmt = m.group(0)
- pos = m.end()
- if text:
- append(self.convert_prefixed_lines(text))
- append(stmt)
- rest = input[pos:]
- if rest:
- append(self.convert_prefixed_lines(rest))
- return "".join(buf)
-
-
-class ParseError(Exception):
- pass
-
-
-class JavaScriptPreprocessor(object):
-
- def __init__(self, **attrs):
- self._attrs = attrs
-
- def __call__(self, input, **kwargs):
- return self.parse(input, kwargs.get('filename'))
-
- def parse(self, input, filename=None):
- buf = []
- self._parse_chunks(input, buf, filename)
- return ''.join(buf)
-
- CHUNK_REXP = re.compile(
- r'(?:^( *)<|<)!-- *#(?:JS: (\$?\w+(?:\.\w+)*\(.*?\))|/JS:?) *-->([ \t]*\r?\n)?', re.M)
-
- def _scan_chunks(self, input, filename):
- rexp = self.CHUNK_REXP
- pos = 0
- curr_funcdecl = None
- for m in rexp.finditer(input):
- lspace, funcdecl, rspace = m.groups()
- text = input[pos:m.start()]
- pos = m.end()
- if funcdecl:
- if curr_funcdecl:
- raise ParseError("%s is nested in %s. (file: %s, line: %s)" %
- (funcdecl, curr_funcdecl, filename, _linenum(input, m.start()), ))
- curr_funcdecl = funcdecl
- else:
- if not curr_funcdecl:
- raise ParseError("unexpected '<!-- #/JS -->'. (file: %s, line: %s)" %
- (filename, _linenum(input, m.start()), ))
- curr_funcdecl = None
- yield text, lspace, funcdecl, rspace, False
- if curr_funcdecl:
- raise ParseError("%s is not closed by '<!-- #/JS -->'. (file: %s, line: %s)" %
- (curr_funcdecl, filename, _linenum(input, m.start()), ))
- rest = input[pos:]
- yield rest, None, None, None, True
-
- def _parse_chunks(self, input, buf, filename=None):
- if not input:
- return
- stag = '<script'
- if self._attrs:
- for k in self._attrs:
- stag = "".join((stag, ' ', k, '="', self._attrs[k], '"'))
- stag += '>'
- etag = '</script>'
- for text, lspace, funcdecl, rspace, end_p in self._scan_chunks(input, filename):
- if end_p:
- break
- if funcdecl:
- buf.append(text)
- if re.match(r'^\$?\w+\(', funcdecl):
- buf.extend((lspace or '', stag, 'function ',
- funcdecl, "{var _buf='';", rspace or ''))
- else:
- m = re.match(r'(.+?)\((.*)\)', funcdecl)
- buf.extend((lspace or '', stag, m.group(
- 1), '=function(', m.group(2), "){var _buf='';", rspace or ''))
- else:
- self._parse_stmts(text, buf)
- buf.extend(
- (lspace or '', "return _buf;};", etag, rspace or ''))
- #
- buf.append(text)
-
- STMT_REXP = re.compile(
- r'(?:^( *)<|<)\?js(\s.*?) ?\?>([ \t]*\r?\n)?', re.M | re.S)
-
- def _scan_stmts(self, input):
- rexp = self.STMT_REXP
- pos = 0
- for m in rexp.finditer(input):
- lspace, code, rspace = m.groups()
- text = input[pos:m.start()]
- pos = m.end()
- yield text, lspace, code, rspace, False
- rest = input[pos:]
- yield rest, None, None, None, True
-
- def _parse_stmts(self, input, buf):
- if not input:
- return
- for text, lspace, code, rspace, end_p in self._scan_stmts(input):
- if end_p:
- break
- if lspace is not None and rspace is not None:
- self._parse_exprs(text, buf)
- buf.extend((lspace, code, rspace))
- else:
- if lspace:
- text += lspace
- self._parse_exprs(text, buf)
- buf.append(code)
- if rspace:
- self._parse_exprs(rspace, buf)
- if text:
- self._parse_exprs(text, buf)
-
- s = r'(?:\{[^{}]*?\}[^{}]*?)*'
- EXPR_REXP = re.compile(r'\{=(.*?)=\}|([$#])\{(.*?' + s + r')\}', re.S)
- del s
-
- def _get_expr(self, m):
- code1, ch, code2 = m.groups()
- if ch:
- code = code2
- escape_p = ch == '$'
- elif code1[0] == code1[-1] == '=':
- code = code1[1:-1]
- escape_p = False
- else:
- code = code1
- escape_p = True
- return code, escape_p
-
- def _scan_exprs(self, input):
- rexp = self.EXPR_REXP
- pos = 0
- for m in rexp.finditer(input):
- text = input[pos:m.start()]
- pos = m.end()
- code, escape_p = self._get_expr(m)
- yield text, code, escape_p, False
- rest = input[pos:]
- yield rest, None, None, True
-
- def _parse_exprs(self, input, buf):
- if not input:
- return
- buf.append("_buf+=")
- extend = buf.extend
- op = ''
- for text, code, escape_p, end_p in self._scan_exprs(input):
- if end_p:
- break
- if text:
- extend((op, self._escape_text(text)))
- op = '+'
- if code:
- extend((op, escape_p and '_E(' or '_S(', code, ')'))
- op = '+'
- rest = text
- if rest:
- extend((op, self._escape_text(rest)))
- if input.endswith("\n"):
- buf.append(";\n")
- else:
- buf.append(";")
-
- def _escape_text(self, text):
- lines = text.splitlines(True)
- fn = self._escape_str
- s = "\\\n".join(fn(line) for line in lines)
- return "".join(("'", s, "'"))
-
- def _escape_str(self, string):
- return string.replace("\\", "\\\\").replace("'", "\\'").replace("\n", r"\n")
-
-
-def _linenum(input, pos):
- return input[0:pos].count("\n") + 1
-
-
-JS_FUNC = r"""
-function _S(x){return x==null?'':x;}
-function _E(x){return x==null?'':typeof(x)!=='string'?x:x.replace(/[&<>"']/g,_EF);}
-var _ET={'&':"&amp;",'<':"&lt;",'>':"&gt;",'"':"&quot;","'":"&#039;"};
-function _EF(c){return _ET[c];};
-"""[1:-1]
-JS_FUNC = escaped.EscapedStr(JS_FUNC)
-
-
-##
-# cache storages
-##
-
-class CacheStorage(object):
- """[abstract] Template object cache class (in memory and/or file)"""
-
- def __init__(self):
- self.items = {} # key: full path, value: template object
-
- def get(self, cachepath, create_template):
- """get template object. if not found, load attributes from cache file and restore template object."""
- template = self.items.get(cachepath)
- if not template:
- dct = self._load(cachepath)
- if dct:
- template = create_template()
- for k in dct:
- setattr(template, k, dct[k])
- self.items[cachepath] = template
- return template
-
- def set(self, cachepath, template):
- """set template object and save template attributes into cache file."""
- self.items[cachepath] = template
- dct = self._save_data_of(template)
- return self._store(cachepath, dct)
-
- def _save_data_of(self, template):
- return {'args': template.args, 'bytecode': template.bytecode,
- 'script': template.script, 'timestamp': template.timestamp}
-
- def unset(self, cachepath):
- """remove template object from dict and cache file."""
- self.items.pop(cachepath, None)
- return self._delete(cachepath)
-
- def clear(self):
- """remove all template objects and attributes from dict and cache file."""
- d, self.items = self.items, {}
- for k in d.iterkeys():
- self._delete(k)
- d.clear()
-
- def _load(self, cachepath):
- """(abstract) load dict object which represents template object attributes from cache file."""
- raise NotImplementedError.new(
- "%s#_load(): not implemented yet." % self.__class__.__name__)
-
- def _store(self, cachepath, template):
- """(abstract) load dict object which represents template object attributes from cache file."""
- raise NotImplementedError.new(
- "%s#_store(): not implemented yet." % self.__class__.__name__)
-
- def _delete(self, cachepath):
- """(abstract) remove template object from cache file."""
- raise NotImplementedError.new(
- "%s#_delete(): not implemented yet." % self.__class__.__name__)
-
-
-class MemoryCacheStorage(CacheStorage):
-
- def _load(self, cachepath):
- return None
-
- def _store(self, cachepath, template):
- pass
-
- def _delete(self, cachepath):
- pass
-
-
-class FileCacheStorage(CacheStorage):
-
- def _load(self, cachepath):
- if not _isfile(cachepath):
- return None
- if logger:
- logger.info("[tenjin.%s] load cache (file=%r)" %
- (self.__class__.__name__, cachepath))
- data = _read_binary_file(cachepath)
- return self._restore(data)
-
- def _store(self, cachepath, dct):
- if logger:
- logger.info("[tenjin.%s] store cache (file=%r)" %
- (self.__class__.__name__, cachepath))
- data = self._dump(dct)
- _write_binary_file(cachepath, data)
-
- def _restore(self, data):
- raise NotImplementedError(
- "%s._restore(): not implemented yet." % self.__class__.__name__)
-
- def _dump(self, dct):
- raise NotImplementedError(
- "%s._dump(): not implemented yet." % self.__class__.__name__)
-
- def _delete(self, cachepath):
- _ignore_not_found_error(lambda: os.unlink(cachepath))
-
-
-class MarshalCacheStorage(FileCacheStorage):
-
- def _restore(self, data):
- return marshal.loads(data)
-
- def _dump(self, dct):
- return marshal.dumps(dct)
-
-
-class PickleCacheStorage(FileCacheStorage):
-
- def __init__(self, *args, **kwargs):
- global pickle
- if pickle is None:
- import cPickle as pickle
- FileCacheStorage.__init__(self, *args, **kwargs)
-
- def _restore(self, data):
- return pickle.loads(data)
-
- def _dump(self, dct):
- dct.pop('bytecode', None)
- return pickle.dumps(dct)
-
-
-class TextCacheStorage(FileCacheStorage):
-
- def _restore(self, data):
- header, script = data.split("\n\n", 1)
- timestamp = encoding = args = None
- for line in header.split("\n"):
- key, val = line.split(": ", 1)
- if key == 'timestamp':
- timestamp = float(val)
- elif key == 'encoding':
- encoding = val
- elif key == 'args':
- args = val.split(', ')
- if encoding:
- script = script.decode(encoding) # binary(=str) to unicode
- return {'args': args, 'script': script, 'timestamp': timestamp}
-
- def _dump(self, dct):
- s = dct['script']
- if dct.get('encoding') and isinstance(s, unicode):
- s = s.encode(dct['encoding']) # unicode to binary(=str)
- sb = []
- sb.append("timestamp: %s\n" % dct['timestamp'])
- if dct.get('encoding'):
- sb.append("encoding: %s\n" % dct['encoding'])
- if dct.get('args') is not None:
- sb.append("args: %s\n" % ', '.join(dct['args']))
- sb.append("\n")
- sb.append(s)
- s = ''.join(sb)
- if python3:
- if isinstance(s, str):
- # unicode(=str) to binary
- s = s.encode(dct.get('encoding') or 'utf-8')
- return s
-
- def _save_data_of(self, template):
- dct = FileCacheStorage._save_data_of(self, template)
- dct['encoding'] = template.encoding
- return dct
-
-
-##
-# abstract class for data cache
-##
-class KeyValueStore(object):
-
- def get(self, key, *options):
- raise NotImplementedError(
- "%s.get(): not implemented yet." % self.__class__.__name__)
-
- def set(self, key, value, *options):
- raise NotImplementedError(
- "%s.set(): not implemented yet." % self.__class__.__name__)
-
- def delete(self, key, *options):
- raise NotImplementedError(
- "%s.del(): not implemented yet." % self.__class__.__name__)
-
- def has(self, key, *options):
- raise NotImplementedError(
- "%s.has(): not implemented yet." % self.__class__.__name__)
-
-
-##
-# memory base data cache
-##
-class MemoryBaseStore(KeyValueStore):
-
- def __init__(self):
- self.values = {}
-
- def get(self, key, original_timestamp=None):
- tupl = self.values.get(key)
- if not tupl:
- return None
- value, created_at, expires_at = tupl
- if original_timestamp is not None and created_at < original_timestamp:
- self.delete(key)
- return None
- if expires_at < _time():
- self.delete(key)
- return None
- return value
-
- def set(self, key, value, lifetime=0):
- created_at = _time()
- expires_at = lifetime and created_at + lifetime or 0
- self.values[key] = (value, created_at, expires_at)
- return True
-
- def delete(self, key):
- try:
- del self.values[key]
- return True
- except KeyError:
- return False
-
- def has(self, key):
- pair = self.values.get(key)
- if not pair:
- return False
- value, created_at, expires_at = pair
- if expires_at and expires_at < _time():
- self.delete(key)
- return False
- return True
-
-
-##
-# file base data cache
-##
-class FileBaseStore(KeyValueStore):
-
- lifetime = 604800 # = 60*60*24*7
-
- def __init__(self, root_path, encoding=None):
- if not os.path.isdir(root_path):
- raise ValueError("%r: directory not found." % (root_path, ))
- self.root_path = root_path
- if encoding is None and python3:
- encoding = 'utf-8'
- self.encoding = encoding
-
- _pat = re.compile(r'[^-.\/\w]')
-
- def filepath(self, key, _pat1=_pat):
- return os.path.join(self.root_path, _pat1.sub('_', key))
-
- def get(self, key, original_timestamp=None):
- fpath = self.filepath(key)
- # if not _isfile(fpath): return None
- stat = _ignore_not_found_error(lambda: os.stat(fpath), None)
- if stat is None:
- return None
- created_at = stat.st_ctime
- expires_at = stat.st_mtime
- if original_timestamp is not None and created_at < original_timestamp:
- self.delete(key)
- return None
- if expires_at < _time():
- self.delete(key)
- return None
- if self.encoding:
- def f(): return _read_text_file(fpath, self.encoding)
- else:
- def f(): return _read_binary_file(fpath)
- return _ignore_not_found_error(f, None)
-
- def set(self, key, value, lifetime=0):
- fpath = self.filepath(key)
- dirname = os.path.dirname(fpath)
- if not os.path.isdir(dirname):
- os.makedirs(dirname)
- now = _time()
- if isinstance(value, _unicode):
- value = value.encode(self.encoding or 'utf-8')
- _write_binary_file(fpath, value)
- expires_at = now + (lifetime or self.lifetime) # timestamp
- os.utime(fpath, (expires_at, expires_at))
- return True
-
- def delete(self, key):
- fpath = self.filepath(key)
- ret = _ignore_not_found_error(lambda: os.unlink(fpath), False)
- return ret != False
-
- def has(self, key):
- fpath = self.filepath(key)
- if not _isfile(fpath):
- return False
- if _getmtime(fpath) < _time():
- self.delete(key)
- return False
- return True
-
-
-##
-# html fragment cache helper class
-##
-class FragmentCacheHelper(object):
- """html fragment cache helper class."""
-
- lifetime = 60 # 1 minute
- prefix = None
-
- def __init__(self, store, lifetime=None, prefix=None):
- self.store = store
- if lifetime is not None:
- self.lifetime = lifetime
- if prefix is not None:
- self.prefix = prefix
-
- def not_cached(self, cache_key, lifetime=None):
- """(obsolete. use cache_as() instead of this.)
- html fragment cache helper. see document of FragmentCacheHelper class."""
- context = sys._getframe(1).f_locals['_context']
- context['_cache_key'] = cache_key
- key = self.prefix and self.prefix + cache_key or cache_key
- value = self.store.get(key)
- if value: # cached
- if logger:
- logger.debug('[tenjin.not_cached] %r: cached.' % (cache_key, ))
- context[key] = value
- return False
- else: # not cached
- if logger:
- logger.debug(
- '[tenjin.not_cached]: %r: not cached.' % (cache_key, ))
- if key in context:
- del context[key]
- if lifetime is None:
- lifetime = self.lifetime
- context['_cache_lifetime'] = lifetime
- helpers.start_capture(cache_key, _depth=2)
- return True
-
- def echo_cached(self):
- """(obsolete. use cache_as() instead of this.)
- html fragment cache helper. see document of FragmentCacheHelper class."""
- f_locals = sys._getframe(1).f_locals
- context = f_locals['_context']
- cache_key = context.pop('_cache_key')
- key = self.prefix and self.prefix + cache_key or cache_key
- if key in context: # cached
- value = context.pop(key)
- else: # not cached
- value = helpers.stop_capture(False, _depth=2)
- lifetime = context.pop('_cache_lifetime')
- self.store.set(key, value, lifetime)
- f_locals['_buf'].append(value)
-
- def functions(self):
- """(obsolete. use cache_as() instead of this.)"""
- return (self.not_cached, self.echo_cached)
-
- def cache_as(self, cache_key, lifetime=None):
- key = self.prefix and self.prefix + cache_key or cache_key
- _buf = sys._getframe(1).f_locals['_buf']
- value = self.store.get(key)
- if value:
- if logger:
- logger.debug('[tenjin.cache_as] %r: cache found.' %
- (cache_key, ))
- _buf.append(value)
- else:
- if logger:
- logger.debug(
- '[tenjin.cache_as] %r: expired or not cached yet.' % (cache_key, ))
- _buf_len = len(_buf)
- yield None
- value = ''.join(_buf[_buf_len:])
- self.store.set(key, value, lifetime)
-
-
-# you can change default store by 'tenjin.helpers.fragment_cache.store = ...'
-helpers.fragment_cache = FragmentCacheHelper(MemoryBaseStore())
-helpers.not_cached = helpers.fragment_cache.not_cached
-helpers.echo_cached = helpers.fragment_cache.echo_cached
-helpers.cache_as = helpers.fragment_cache.cache_as
-helpers.__all__.extend(('not_cached', 'echo_cached', 'cache_as'))
-
-
-##
-# helper class to find and read template
-##
-class Loader(object):
-
- def exists(self, filepath):
- raise NotImplementedError(
- "%s.exists(): not implemented yet." % self.__class__.__name__)
-
- def find(self, filename, dirs=None):
- #: if dirs provided then search template file from it.
- if dirs:
- for dirname in dirs:
- filepath = os.path.join(dirname, filename)
- if self.exists(filepath):
- return filepath
- #: if dirs not provided then just return filename if file exists.
- else:
- if self.exists(filename):
- return filename
- #: if file not found then return None.
- return None
-
- def abspath(self, filename):
- raise NotImplementedError(
- "%s.abspath(): not implemented yet." % self.__class__.__name__)
-
- def timestamp(self, filepath):
- raise NotImplementedError(
- "%s.timestamp(): not implemented yet." % self.__class__.__name__)
-
- def load(self, filepath):
- raise NotImplementedError(
- "%s.timestamp(): not implemented yet." % self.__class__.__name__)
-
-
-##
-# helper class to find and read files
-##
-class FileSystemLoader(Loader):
-
- def exists(self, filepath):
- #: return True if filepath exists as a file.
- return os.path.isfile(filepath)
-
- def abspath(self, filepath):
- #: return full-path of filepath
- return os.path.abspath(filepath)
-
- def timestamp(self, filepath):
- #: return mtime of file
- return _getmtime(filepath)
-
- def load(self, filepath):
- #: if file exists, return file content and mtime
- def f():
- mtime = _getmtime(filepath)
- input = _read_template_file(filepath)
- mtime2 = _getmtime(filepath)
- if mtime != mtime2:
- mtime = mtime2
- input = _read_template_file(filepath)
- mtime2 = _getmtime(filepath)
- if mtime != mtime2:
- if logger:
- logger.warn(
- "[tenjin] %s.load(): timestamp is changed while reading file." % self.__class__.__name__)
- return input, mtime
- #: if file not exist, return None
- return _ignore_not_found_error(f)
-
-
-##
-##
-##
-class TemplateNotFoundError(Exception):
- pass
-
-
-##
-# template engine class
-##
-
-class Engine(object):
- """Template Engine class.
- See User's Guide and examples for details.
- http://www.kuwata-lab.com/tenjin/pytenjin-users-guide.html
- http://www.kuwata-lab.com/tenjin/pytenjin-examples.html
- """
-
- # default value of attributes
- prefix = ''
- postfix = ''
- layout = None
- templateclass = Template
- path = None
- cache = TextCacheStorage() # save converted Python code into text file
- lang = None
- loader = FileSystemLoader()
- preprocess = False
- preprocessorclass = Preprocessor
- timestamp_interval = 1 # seconds
-
- def __init__(self, prefix=None, postfix=None, layout=None, path=None, cache=True, preprocess=None, templateclass=None, preprocessorclass=None, lang=None, loader=None, pp=None, **kwargs):
- """Initializer of Engine class.
-
- prefix:str (='')
- Prefix string used to convert template short name to template filename.
- postfix:str (='')
- Postfix string used to convert template short name to template filename.
- layout:str (=None)
- Default layout template name.
- path:list of str(=None)
- List of directory names which contain template files.
- cache:bool or CacheStorage instance (=True)
- Cache storage object to store converted python code.
- If True, default cache storage (=Engine.cache) is used (if it is None
- then create MarshalCacheStorage object for each engine object).
- If False, no cache storage is used nor no cache files are created.
- preprocess:bool(=False)
- Activate preprocessing or not.
- templateclass:class (=Template)
- Template class which engine creates automatically.
- lang:str (=None)
- Language name such as 'en', 'fr', 'ja', and so on. If you specify
- this, cache file path will be 'inex.html.en.cache' for example.
- pp:list (=None)
- List of preprocessor object which is callable and manipulates template content.
- kwargs:dict
- Options for Template class constructor.
- See document of Template.__init__() for details.
- """
- if prefix:
- self.prefix = prefix
- if postfix:
- self.postfix = postfix
- if layout:
- self.layout = layout
- if templateclass:
- self.templateclass = templateclass
- if preprocessorclass:
- self.preprocessorclass = preprocessorclass
- if path is not None:
- self.path = path
- if lang is not None:
- self.lang = lang
- if loader is not None:
- self.loader = loader
- if preprocess is not None:
- self.preprocess = preprocess
- if pp is None:
- pp = []
- elif isinstance(pp, list):
- pass
- elif isinstance(pp, tuple):
- pp = list(pp)
- else:
- raise TypeError("'pp' expected to be a list but got %r." % (pp,))
- self.pp = pp
- if preprocess:
- self.pp.append(TemplatePreprocessor(self.preprocessorclass))
- self.kwargs = kwargs
- self.encoding = kwargs.get('encoding')
- self._filepaths = {} # template_name => relative path and absolute path
- self._added_templates = {} # templates added by add_template()
- #self.cache = cache
- self._set_cache_storage(cache)
-
- def _set_cache_storage(self, cache):
- if cache is True:
- if not self.cache:
- self.cache = MarshalCacheStorage()
- elif cache is None:
- pass
- elif cache is False:
- self.cache = None
- elif isinstance(cache, CacheStorage):
- self.cache = cache
- else:
- raise ValueError("%r: invalid cache object." % (cache, ))
-
- def cachename(self, filepath):
- #: if lang is provided then add it to cache filename.
- if self.lang:
- return '%s.%s.cache' % (filepath, self.lang)
- #: return cache file name.
- else:
- return filepath + '.cache'
-
- def to_filename(self, template_name):
- """Convert template short name into filename.
- ex.
- >>> engine = tenjin.Engine(prefix='user_', postfix='.pyhtml')
- >>> engine.to_filename(':list')
- 'user_list.pyhtml'
- >>> engine.to_filename('list')
- 'list'
- """
- #: if template_name starts with ':', add prefix and postfix to it.
- if template_name[0] == ':':
- return self.prefix + template_name[1:] + self.postfix
- #: if template_name doesn't start with ':', just return it.
- return template_name
-
- def _create_template(self, input=None, filepath=None, _context=None, _globals=None):
- #: if input is not specified then just create empty template object.
- template = self.templateclass(None, **self.kwargs)
- #: if input is specified then create template object and return it.
- if input:
- template.convert(input, filepath)
- return template
-
- def _preprocess(self, input, filepath, _context, _globals):
- #if _context is None: _context = {}
- #if _globals is None: _globals = sys._getframe(3).f_globals
- #: preprocess template and return result
- #preprocessor = self.preprocessorclass(filepath, input=input)
- # return preprocessor.render(_context, globals=_globals)
- #: preprocesses input with _context and returns result.
- if '_engine' not in _context:
- self.hook_context(_context)
- for pp in self.pp:
- input = pp.__call__(input, filename=filepath,
- context=_context, globals=_globals)
- return input
-
- def add_template(self, template):
- self._added_templates[template.filename] = template
-
- def _get_template_from_cache(self, cachepath, filepath):
- #: if template not found in cache, return None
- template = self.cache.get(cachepath, self.templateclass)
- if not template:
- return None
- assert template.timestamp is not None
- #: if checked within a sec, skip timestamp check.
- now = _time()
- last_checked = getattr(template, '_last_checked_at', None)
- if last_checked and now < last_checked + self.timestamp_interval:
- # if logger: logger.trace('[tenjin.%s] timestamp check skipped (%f < %f + %f)' % \
- # (self.__class__.__name__, now, template._last_checked_at, self.timestamp_interval))
- return template
- #: if timestamp of template objectis same as file, return it.
- if template.timestamp == self.loader.timestamp(filepath):
- template._last_checked_at = now
- return template
- #: if timestamp of template object is different from file, clear it
- # cache._delete(cachepath)
- if logger:
- logger.info("[tenjin.%s] cache expired (filepath=%r)" %
- (self.__class__.__name__, filepath))
- return None
-
- def get_template(self, template_name, _context=None, _globals=None):
- """Return template object.
- If template object has not registered, template engine creates
- and registers template object automatically.
- """
- #: accept template_name such as ':index'.
- filename = self.to_filename(template_name)
- #: if template object is added by add_template(), return it.
- if filename in self._added_templates:
- return self._added_templates[filename]
- #: get filepath and fullpath of template
- pair = self._filepaths.get(filename)
- if pair:
- filepath, fullpath = pair
- else:
- #: if template file is not found then raise TemplateNotFoundError.
- filepath = self.loader.find(filename, self.path)
- if not filepath:
- raise TemplateNotFoundError(
- '%s: filename not found (path=%r).' % (filename, self.path))
- #
- fullpath = self.loader.abspath(filepath)
- self._filepaths[filename] = (filepath, fullpath)
- #: use full path as base of cache file path
- cachepath = self.cachename(fullpath)
- #: get template object from cache
- cache = self.cache
- template = cache and self._get_template_from_cache(
- cachepath, filepath) or None
- #: if template object is not found in cache or is expired...
- if not template:
- ret = self.loader.load(filepath)
- if not ret:
- raise TemplateNotFoundError(
- "%r: template not found." % filepath)
- input, timestamp = ret
- if self.pp: # required for preprocessing
- if _context is None:
- _context = {}
- if _globals is None:
- _globals = sys._getframe(1).f_globals
- input = self._preprocess(input, filepath, _context, _globals)
- #: create template object.
- template = self._create_template(
- input, filepath, _context, _globals)
- #: set timestamp and filename of template object.
- template.timestamp = timestamp
- template._last_checked_at = _time()
- #: save template object into cache.
- if cache:
- if not template.bytecode:
- #: ignores syntax error when compiling.
- try:
- template.compile()
- except SyntaxError:
- pass
- cache.set(cachepath, template)
- # else:
- # template.compile()
- #:
- template.filename = filepath
- return template
-
- def include(self, template_name, append_to_buf=True, **kwargs):
- """Evaluate template using current local variables as context.
-
- template_name:str
- Filename (ex. 'user_list.pyhtml') or short name (ex. ':list') of template.
- append_to_buf:boolean (=True)
- If True then append output into _buf and return None,
- else return stirng output.
-
- ex.
- <?py include('file.pyhtml') ?>
- #{include('file.pyhtml', False)}
- <?py val = include('file.pyhtml', False) ?>
- """
- #: get local and global vars of caller.
- frame = sys._getframe(1)
- locals = frame.f_locals
- globals = frame.f_globals
- #: get _context from caller's local vars.
- assert '_context' in locals
- context = locals['_context']
- #: if kwargs specified then add them into context.
- if kwargs:
- context.update(kwargs)
- #: get template object with context data and global vars.
- # (context and globals are passed to get_template() only for preprocessing.)
- template = self.get_template(template_name, context, globals)
- #: if append_to_buf is true then add output to _buf.
- #: if append_to_buf is false then don't add output to _buf.
- if append_to_buf:
- _buf = locals['_buf']
- else:
- _buf = None
- #: render template and return output.
- s = template.render(context, globals, _buf=_buf)
- #: kwargs are removed from context data.
- if kwargs:
- for k in kwargs:
- del context[k]
- return s
-
- def render(self, template_name, context=None, globals=None, layout=True):
- """Evaluate template with layout file and return result of evaluation.
-
- template_name:str
- Filename (ex. 'user_list.pyhtml') or short name (ex. ':list') of template.
- context:dict (=None)
- Context object to evaluate. If None then new dict is used.
- globals:dict (=None)
- Global context to evaluate. If None then globals() is used.
- layout:str or Bool(=True)
- If True, the default layout name specified in constructor is used.
- If False, no layout template is used.
- If str, it is regarded as layout template name.
-
- If temlate object related with the 'template_name' argument is not exist,
- engine generates a template object and register it automatically.
- """
- if context is None:
- context = {}
- if globals is None:
- globals = sys._getframe(1).f_globals
- self.hook_context(context)
- while True:
- # context and globals are passed to get_template() only for preprocessing
- template = self.get_template(template_name, context, globals)
- content = template.render(context, globals)
- layout = context.pop('_layout', layout)
- if layout is True or layout is None:
- layout = self.layout
- if not layout:
- break
- template_name = layout
- layout = False
- context['_content'] = content
- context.pop('_content', None)
- return content
-
- def hook_context(self, context):
- #: add engine itself into context data.
- context['_engine'] = self
- #context['render'] = self.render
- #: add include() method into context data.
- context['include'] = self.include
-
-
-##
-# safe template and engine
-##
-
-class SafeTemplate(Template):
- """Uses 'to_escaped()' instead of 'escape()'.
- '#{...}' is not allowed with this class. Use '[==...==]' instead.
- """
-
- tostrfunc = 'to_str'
- escapefunc = 'to_escaped'
-
- def get_expr_and_flags(self, match):
- return _get_expr_and_flags(match, "#{%s}: '#{}' is not allowed with SafeTemplate.")
-
-
-class SafePreprocessor(Preprocessor):
-
- tostrfunc = 'to_str'
- escapefunc = 'to_escaped'
-
- def get_expr_and_flags(self, match):
- return _get_expr_and_flags(match, "#{{%s}}: '#{{}}' is not allowed with SafePreprocessor.")
-
-
-def _get_expr_and_flags(match, errmsg):
- expr1, expr2, expr3, expr4 = match.groups()
- if expr1 is not None:
- raise TemplateSyntaxError(errmsg % match.group(1))
- if expr2 is not None:
- return expr2, (True, False) # #{...} : call escape, not to_str
- if expr3 is not None:
- return expr3, (False, True) # [==...==] : not escape, call to_str
- if expr4 is not None:
- return expr4, (True, False) # [=...=] : call escape, not to_str
-
-
-class SafeEngine(Engine):
-
- templateclass = SafeTemplate
- preprocessorclass = SafePreprocessor
-
-
-##
-# for Google App Engine
-# (should separate into individual file or module?)
-##
-
-def _dummy():
- global memcache, _tenjin
- memcache = _tenjin = None # lazy import of google.appengine.api.memcache
- global GaeMemcacheCacheStorage, GaeMemcacheStore, init
-
- class GaeMemcacheCacheStorage(CacheStorage):
-
- lifetime = 0 # 0 means unlimited
-
- def __init__(self, lifetime=None, namespace=None):
- CacheStorage.__init__(self)
- if lifetime is not None:
- self.lifetime = lifetime
- self.namespace = namespace
-
- def _load(self, cachepath):
- key = cachepath
- if _tenjin.logger:
- _tenjin.logger.info(
- "[tenjin.gae.GaeMemcacheCacheStorage] load cache (key=%r)" % (key, ))
- return memcache.get(key, namespace=self.namespace)
-
- def _store(self, cachepath, dct):
- dct.pop('bytecode', None)
- key = cachepath
- if _tenjin.logger:
- _tenjin.logger.info(
- "[tenjin.gae.GaeMemcacheCacheStorage] store cache (key=%r)" % (key, ))
- ret = memcache.set(key, dct, self.lifetime,
- namespace=self.namespace)
- if not ret:
- if _tenjin.logger:
- _tenjin.logger.info(
- "[tenjin.gae.GaeMemcacheCacheStorage] failed to store cache (key=%r)" % (key, ))
-
- def _delete(self, cachepath):
- key = cachepath
- memcache.delete(key, namespace=self.namespace)
-
- class GaeMemcacheStore(KeyValueStore):
-
- lifetime = 0
-
- def __init__(self, lifetime=None, namespace=None):
- if lifetime is not None:
- self.lifetime = lifetime
- self.namespace = namespace
-
- def get(self, key):
- return memcache.get(key, namespace=self.namespace)
-
- def set(self, key, value, lifetime=None):
- if lifetime is None:
- lifetime = self.lifetime
- if memcache.set(key, value, lifetime, namespace=self.namespace):
- return True
- else:
- if _tenjin.logger:
- _tenjin.logger.info(
- "[tenjin.gae.GaeMemcacheStore] failed to set (key=%r)" % (key, ))
- return False
-
- def delete(self, key):
- return memcache.delete(key, namespace=self.namespace)
-
- def has(self, key):
- if memcache.add(key, 'dummy', namespace=self.namespace):
- memcache.delete(key, namespace=self.namespace)
- return False
- else:
- return True
-
- def init():
- global memcache, _tenjin
- if not memcache:
- from google.appengine.api import memcache
- if not _tenjin:
- import tenjin as _tenjin
- # avoid cache confliction between versions
- ver = os.environ.get('CURRENT_VERSION_ID', '1.1') # .split('.')[0]
- Engine.cache = GaeMemcacheCacheStorage(namespace=ver)
- # set fragment cache store
- helpers.fragment_cache.store = GaeMemcacheStore(namespace=ver)
- helpers.fragment_cache.lifetime = 60 # 1 minute
- helpers.fragment_cache.prefix = 'fragment.'
-
-
-gae = create_module('tenjin.gae', _dummy,
- os=os, helpers=helpers, Engine=Engine,
- CacheStorage=CacheStorage, KeyValueStore=KeyValueStore)
-
-
-del _dummy
diff --git a/cgi/weabot.py b/cgi/weabot.py
index 720916d..636eb02 100755
--- a/cgi/weabot.py
+++ b/cgi/weabot.py
@@ -23,7 +23,7 @@ from formatting import *
from post import *
from img import *
-__version__ = "0.10.0"
+__version__ = "0.10.5"
# Set to True to disable weabot's exception routing and enable profiling
_DEBUG = False
@@ -35,7 +35,7 @@ class weabot(object):
def __init__(self, environ, start_response):
global _DEBUG
- logging.basicConfig(filename='weabot.log', format='%(asctime)s %(levelname)s %(message)s', level=logging.DEBUG)
+ logging.basicConfig(filename='weabot.log', format='%(asctime)s %(levelname)s %(message)s', level=logging.INFO)
self.environ = environ
if self.environ["PATH_INFO"].startswith("/weabot.py/"):
@@ -85,7 +85,7 @@ class weabot(object):
def error(self, message):
board = Settings._.BOARD
if board:
- if board['board_type'] == '1':
+ if board['board_type'] == 1:
info = {}
info['host'] = self.environ["REMOTE_ADDR"]
info['name'] = self.formdata.get('fielda', '')
@@ -265,7 +265,7 @@ class weabot(object):
if Settings.ENABLE_BANS and addressIsBanned(self.environ['REMOTE_ADDR'], board["dir"], blind_only=True):
raise UserError('<meta http-equiv="refresh" content="0; url=/cgi/banned/%s">' % board["dir"])
- if len(path_split) > 4 and path_split[4] and board['board_type'] == '1':
+ if len(path_split) > 4 and path_split[4] and board['board_type'] == 1:
self.output = dynamicRead(int(path_split[3]), path_split[4], True)
elif board['board_type'] == 1:
self.output = threadPage(0, True, int(path_split[3]))
@@ -329,12 +329,12 @@ class weabot(object):
self.output += '<html xmlns="http://www.w3.org/1999/xhtml"><meta http-equiv="refresh" content="0;url=%s" /><body><p>...</p></body></html>' % url
elif path_split[1] == "banned":
OpenDb()
- bans = FetchAll("SELECT * FROM `bans` WHERE INET6_ATON('"+self.environ["REMOTE_ADDR"]+"') BETWEEN `ipstart` AND `ipend`")
+ bans = FetchAll("SELECT * FROM `bans` WHERE INET6_ATON(%s) BETWEEN `ipstart` AND `ipend`", (self.environ["REMOTE_ADDR"],))
if bans:
for ban in bans:
if ban["boards"]:
- boards = pickle.loads(ban["boards"])
- if ban["boards"] or path_split[2] in boards:
+ boards = str2boards(ban["boards"])
+ if not ban["boards"] or path_split[2] in boards:
caught = True
if ban["boards"]:
boards_str = '/' + '/, /'.join(boards) + '/'
@@ -592,7 +592,7 @@ class weabot(object):
# make ID hash
if board["useid"]:
post["timestamp_formatted"] += ' ID:' + iphash(ip, post, tim, board["useid"], mobile,
- self.environ["HTTP_USER_AGENT"], cap_id, hide_end, (board["countrycode"] in ['1', '2']))
+ self.environ["HTTP_USER_AGENT"], cap_id, hide_end, (board["countrycode"] in [1, 2]))
# use for future file checks
xfile = (file is not None or oek_file)
@@ -793,7 +793,13 @@ class weabot(object):
postid = post.insert()
# delete threads that have crossed last page
- trimThreads()
+ trimmed = trimThreads()
+
+ # let's stop here if the thread we posted in got trimmed
+ if post["parentid"] and post["parentid"] in trimmed:
+ regenerateFrontPages()
+ regenerateHome()
+ raise UserError("El hilo en el que publicaste ya fue eliminado.")
# fix null references when creating thread
if board["board_type"] == 1 and not post["parentid"]:
@@ -947,8 +953,8 @@ class weabot(object):
raise UserError(_("You're banned."))
# check if post exists
- post = FetchOne("SELECT `id`, `parentid`, `ip` FROM `posts` WHERE `id` = '%s' AND `boardid` = '%s'" % (
- _mysql.escape_string(str(postid)), _mysql.escape_string(board['id'])))
+ post = FetchOne("SELECT `id`, `parentid`, `ip` FROM `posts` WHERE `id` = %s AND `boardid` = %s",
+ (postid, board['id']))
if not post:
raise UserError(_("Post doesn't exist."))
@@ -963,13 +969,12 @@ class weabot(object):
# insert report
t = time.time()
- message = cgi.escape(self.formdata["reason"]).strip()[0:8000]
+ message = html.escape(self.formdata["reason"]).strip()[0:800]
message = message.replace("\n", "<br />")
UpdateDb("INSERT INTO `reports` (board, postid, parentid, link, ip, reason, repip, timestamp, timestamp_formatted) " +
- "VALUES ('%s', '%s', '%s', '%s', '%s', '%s', INET6_ATON('%s'), '%s', '%s')" % (
- board["dir"], post['id'], post['parentid'], link, post['ip'], _mysql.escape_string(message),
- _mysql.escape_string(self.environ["REMOTE_ADDR"]), str(t), formatTimestamp(t)))
+ "VALUES (%s, %s, %s, %s, %s, %s, INET6_ATON(%s), %s, %s)",
+ (board["dir"], post['id'], post['parentid'], link, post['ip'], message, self.environ["REMOTE_ADDR"], t, formatTimestamp(t)))
self.output = renderTemplate("report.html", {'finished': True})
def stats(self):