diff options
author | neptune | 2021-11-15 13:35:08 -0300 |
---|---|---|
committer | neptune | 2021-11-15 13:35:08 -0300 |
commit | a16f23f034fb942b44e0665224d499af57aec1ad (patch) | |
tree | 8733eec9968d38153f1e0cfa2ca76f664dfb1773 | |
parent | e34ea592bd9981d99d1c7f70c0fbf10c0bd83eb3 (diff) | |
download | weabot-a16f23f034fb942b44e0665224d499af57aec1ad.tar.gz weabot-a16f23f034fb942b44e0665224d499af57aec1ad.tar.xz weabot-a16f23f034fb942b44e0665224d499af57aec1ad.zip |
Nueva revisiĆ³n Py3
-rw-r--r-- | cgi/BeautifulSoup.py | 2047 | ||||
-rw-r--r-- | cgi/api.py | 147 | ||||
-rw-r--r-- | cgi/fcgi.py | 1363 | ||||
-rw-r--r-- | cgi/formatting.py | 29 | ||||
-rw-r--r-- | cgi/framework.py | 40 | ||||
-rw-r--r-- | cgi/manage.py | 150 | ||||
-rw-r--r-- | cgi/markdown.py | 2093 | ||||
-rw-r--r-- | cgi/post.py | 31 | ||||
-rw-r--r-- | cgi/templates/bans_geo | 2 | ||||
-rw-r--r-- | cgi/templates/bans_locations | 2 | ||||
-rw-r--r-- | cgi/templates/manage/boardoptions.html | 2 | ||||
-rw-r--r-- | cgi/templates/mobile/txt_thread.html | 10 | ||||
-rw-r--r-- | cgi/templates/revision.html | 2 | ||||
-rw-r--r-- | cgi/templates/txt_archive.html | 4 | ||||
-rw-r--r-- | cgi/templates/txt_thread.en.html | 10 | ||||
-rw-r--r-- | cgi/tenjin.py | 2293 | ||||
-rwxr-xr-x | cgi/weabot.py | 35 |
17 files changed, 235 insertions, 8025 deletions
diff --git a/cgi/BeautifulSoup.py b/cgi/BeautifulSoup.py deleted file mode 100644 index 3e97785..0000000 --- a/cgi/BeautifulSoup.py +++ /dev/null @@ -1,2047 +0,0 @@ -"""Beautiful Soup -Elixir and Tonic -"The Screen-Scraper's Friend" -http://www.crummy.com/software/BeautifulSoup/ - -Beautiful Soup parses a (possibly invalid) XML or HTML document into a -tree representation. It provides methods and Pythonic idioms that make -it easy to navigate, search, and modify the tree. - -A well-formed XML/HTML document yields a well-formed data -structure. An ill-formed XML/HTML document yields a correspondingly -ill-formed data structure. If your document is only locally -well-formed, you can use this library to find and process the -well-formed part of it. - -Beautiful Soup works with Python 2.2 and up. It has no external -dependencies, but you'll have more success at converting data to UTF-8 -if you also install these three packages: - -* chardet, for auto-detecting character encodings - http://chardet.feedparser.org/ -* cjkcodecs and iconv_codec, which add more encodings to the ones supported - by stock Python. - http://cjkpython.i18n.org/ - -Beautiful Soup defines classes for two main parsing strategies: - - * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific - language that kind of looks like XML. - - * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid - or invalid. This class has web browser-like heuristics for - obtaining a sensible parse tree in the face of common HTML errors. - -Beautiful Soup also defines a class (UnicodeDammit) for autodetecting -the encoding of an HTML or XML document, and converting it to -Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. - -For more than you ever wanted to know about Beautiful Soup, see the -documentation: -http://www.crummy.com/software/BeautifulSoup/documentation.html - -Here, have some legalese: - -Copyright (c) 2004-2010, Leonard Richardson - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of the the Beautiful Soup Consortium and All - Night Kosher Bakery nor the names of its contributors may be - used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. - -""" -from __future__ import generators - -__author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "3.2.1" -__copyright__ = "Copyright (c) 2004-2012 Leonard Richardson" -__license__ = "New-style BSD" - -from sgmllib import SGMLParser, SGMLParseError -import codecs -import markupbase -import types -import re -import sgmllib -try: - from htmlentitydefs import name2codepoint -except ImportError: - name2codepoint = {} -try: - set -except NameError: - from sets import Set as set - -# These hacks make Beautiful Soup able to parse XML with namespaces -sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') -markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match - -DEFAULT_OUTPUT_ENCODING = "utf-8" - - -def _match_css_class(str): - """Build a RE to match the given CSS class.""" - return re.compile(r"(^|.*\s)%s($|\s)" % str) - -# First, the classes that represent markup elements. - - -class PageElement(object): - """Contains the navigational information for some part of the page - (either a tag or a piece of text)""" - - def _invert(h): - "Cheap function to invert a hash." - i = {} - for k, v in h.items(): - i[v] = k - return i - - XML_ENTITIES_TO_SPECIAL_CHARS = {"apos": "'", - "quot": '"', - "amp": "&", - "lt": "<", - "gt": ">"} - - XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) - - def setup(self, parent=None, previous=None): - """Sets up the initial relations between this element and - other elements.""" - self.parent = parent - self.previous = previous - self.next = None - self.previousSibling = None - self.nextSibling = None - if self.parent and self.parent.contents: - self.previousSibling = self.parent.contents[-1] - self.previousSibling.nextSibling = self - - def replaceWith(self, replaceWith): - oldParent = self.parent - myIndex = self.parent.index(self) - if hasattr(replaceWith, "parent")\ - and replaceWith.parent is self.parent: - # We're replacing this element with one of its siblings. - index = replaceWith.parent.index(replaceWith) - if index and index < myIndex: - # Furthermore, it comes before this element. That - # means that when we extract it, the index of this - # element will change. - myIndex = myIndex - 1 - self.extract() - oldParent.insert(myIndex, replaceWith) - - def replaceWithChildren(self): - myParent = self.parent - myIndex = self.parent.index(self) - self.extract() - reversedChildren = list(self.contents) - reversedChildren.reverse() - for child in reversedChildren: - myParent.insert(myIndex, child) - - def extract(self): - """Destructively rips this element out of the tree.""" - if self.parent: - try: - del self.parent.contents[self.parent.index(self)] - except ValueError: - pass - - # Find the two elements that would be next to each other if - # this element (and any children) hadn't been parsed. Connect - # the two. - lastChild = self._lastRecursiveChild() - nextElement = lastChild.next - - if self.previous: - self.previous.next = nextElement - if nextElement: - nextElement.previous = self.previous - self.previous = None - lastChild.next = None - - self.parent = None - if self.previousSibling: - self.previousSibling.nextSibling = self.nextSibling - if self.nextSibling: - self.nextSibling.previousSibling = self.previousSibling - self.previousSibling = self.nextSibling = None - return self - - def _lastRecursiveChild(self): - "Finds the last element beneath this object to be parsed." - lastChild = self - while hasattr(lastChild, 'contents') and lastChild.contents: - lastChild = lastChild.contents[-1] - return lastChild - - def insert(self, position, newChild): - if isinstance(newChild, basestring) \ - and not isinstance(newChild, NavigableString): - newChild = NavigableString(newChild) - - position = min(position, len(self.contents)) - if hasattr(newChild, 'parent') and newChild.parent is not None: - # We're 'inserting' an element that's already one - # of this object's children. - if newChild.parent is self: - index = self.index(newChild) - if index > position: - # Furthermore we're moving it further down the - # list of this object's children. That means that - # when we extract this element, our target index - # will jump down one. - position = position - 1 - newChild.extract() - - newChild.parent = self - previousChild = None - if position == 0: - newChild.previousSibling = None - newChild.previous = self - else: - previousChild = self.contents[position-1] - newChild.previousSibling = previousChild - newChild.previousSibling.nextSibling = newChild - newChild.previous = previousChild._lastRecursiveChild() - if newChild.previous: - newChild.previous.next = newChild - - newChildsLastElement = newChild._lastRecursiveChild() - - if position >= len(self.contents): - newChild.nextSibling = None - - parent = self - parentsNextSibling = None - while not parentsNextSibling: - parentsNextSibling = parent.nextSibling - parent = parent.parent - if not parent: # This is the last element in the document. - break - if parentsNextSibling: - newChildsLastElement.next = parentsNextSibling - else: - newChildsLastElement.next = None - else: - nextChild = self.contents[position] - newChild.nextSibling = nextChild - if newChild.nextSibling: - newChild.nextSibling.previousSibling = newChild - newChildsLastElement.next = nextChild - - if newChildsLastElement.next: - newChildsLastElement.next.previous = newChildsLastElement - self.contents.insert(position, newChild) - - def append(self, tag): - """Appends the given tag to the contents of this tag.""" - self.insert(len(self.contents), tag) - - def findNext(self, name=None, attrs={}, text=None, **kwargs): - """Returns the first item that matches the given criteria and - appears after this Tag in the document.""" - return self._findOne(self.findAllNext, name, attrs, text, **kwargs) - - def findAllNext(self, name=None, attrs={}, text=None, limit=None, - **kwargs): - """Returns all items that match the given criteria and appear - after this Tag in the document.""" - return self._findAll(name, attrs, text, limit, self.nextGenerator, - **kwargs) - - def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): - """Returns the closest sibling to this Tag that matches the - given criteria and appears after this Tag in the document.""" - return self._findOne(self.findNextSiblings, name, attrs, text, - **kwargs) - - def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, - **kwargs): - """Returns the siblings of this Tag that match the given - criteria and appear after this Tag in the document.""" - return self._findAll(name, attrs, text, limit, - self.nextSiblingGenerator, **kwargs) - fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x - - def findPrevious(self, name=None, attrs={}, text=None, **kwargs): - """Returns the first item that matches the given criteria and - appears before this Tag in the document.""" - return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) - - def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, - **kwargs): - """Returns all items that match the given criteria and appear - before this Tag in the document.""" - return self._findAll(name, attrs, text, limit, self.previousGenerator, - **kwargs) - fetchPrevious = findAllPrevious # Compatibility with pre-3.x - - def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): - """Returns the closest sibling to this Tag that matches the - given criteria and appears before this Tag in the document.""" - return self._findOne(self.findPreviousSiblings, name, attrs, text, - **kwargs) - - def findPreviousSiblings(self, name=None, attrs={}, text=None, - limit=None, **kwargs): - """Returns the siblings of this Tag that match the given - criteria and appear before this Tag in the document.""" - return self._findAll(name, attrs, text, limit, - self.previousSiblingGenerator, **kwargs) - fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x - - def findParent(self, name=None, attrs={}, **kwargs): - """Returns the closest parent of this Tag that matches the given - criteria.""" - # NOTE: We can't use _findOne because findParents takes a different - # set of arguments. - r = None - l = self.findParents(name, attrs, 1) - if l: - r = l[0] - return r - - def findParents(self, name=None, attrs={}, limit=None, **kwargs): - """Returns the parents of this Tag that match the given - criteria.""" - - return self._findAll(name, attrs, None, limit, self.parentGenerator, - **kwargs) - fetchParents = findParents # Compatibility with pre-3.x - - # These methods do the real heavy lifting. - - def _findOne(self, method, name, attrs, text, **kwargs): - r = None - l = method(name, attrs, text, 1, **kwargs) - if l: - r = l[0] - return r - - def _findAll(self, name, attrs, text, limit, generator, **kwargs): - "Iterates over a generator looking for things that match." - - if isinstance(name, SoupStrainer): - strainer = name - # (Possibly) special case some findAll*(...) searches - elif text is None and not limit and not attrs and not kwargs: - # findAll*(True) - if name is True: - return [element for element in generator() - if isinstance(element, Tag)] - # findAll*('tag-name') - elif isinstance(name, basestring): - return [element for element in generator() - if isinstance(element, Tag) and - element.name == name] - else: - strainer = SoupStrainer(name, attrs, text, **kwargs) - # Build a SoupStrainer - else: - strainer = SoupStrainer(name, attrs, text, **kwargs) - results = ResultSet(strainer) - g = generator() - while True: - try: - i = g.next() - except StopIteration: - break - if i: - found = strainer.search(i) - if found: - results.append(found) - if limit and len(results) >= limit: - break - return results - - # These Generators can be used to navigate starting from both - # NavigableStrings and Tags. - def nextGenerator(self): - i = self - while i is not None: - i = i.next - yield i - - def nextSiblingGenerator(self): - i = self - while i is not None: - i = i.nextSibling - yield i - - def previousGenerator(self): - i = self - while i is not None: - i = i.previous - yield i - - def previousSiblingGenerator(self): - i = self - while i is not None: - i = i.previousSibling - yield i - - def parentGenerator(self): - i = self - while i is not None: - i = i.parent - yield i - - # Utility methods - def substituteEncoding(self, str, encoding=None): - encoding = encoding or "utf-8" - return str.replace("%SOUP-ENCODING%", encoding) - - def toEncoding(self, s, encoding=None): - """Encodes an object to a string in some encoding, or to Unicode. - .""" - if isinstance(s, unicode): - if encoding: - s = s.encode(encoding) - elif isinstance(s, str): - if encoding: - s = s.encode(encoding) - else: - s = unicode(s) - else: - if encoding: - s = self.toEncoding(str(s), encoding) - else: - s = unicode(s) - return s - - BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" - + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" - + ")") - - def _sub_entity(self, x): - """Used with a regular expression to substitute the - appropriate XML entity for an XML special character.""" - return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" - - -class NavigableString(unicode, PageElement): - - def __new__(cls, value): - """Create a new NavigableString. - - When unpickling a NavigableString, this method is called with - the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be - passed in to the superclass's __new__ or the superclass won't know - how to handle non-ASCII characters. - """ - if isinstance(value, unicode): - return unicode.__new__(cls, value) - return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) - - def __getnewargs__(self): - return (NavigableString.__str__(self),) - - def __getattr__(self, attr): - """text.string gives you text. This is for backwards - compatibility for Navigable*String, but for CData* it lets you - get the string without the CData wrapper.""" - if attr == 'string': - return self - else: - raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) - - def __unicode__(self): - return str(self).decode(DEFAULT_OUTPUT_ENCODING) - - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): - # Substitute outgoing XML entities. - data = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, self) - if encoding: - return data.encode(encoding) - else: - return data - - -class CData(NavigableString): - - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): - return "<![CDATA[%s]]>" % NavigableString.__str__(self, encoding) - - -class ProcessingInstruction(NavigableString): - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): - output = self - if "%SOUP-ENCODING%" in output: - output = self.substituteEncoding(output, encoding) - return "<?%s?>" % self.toEncoding(output, encoding) - - -class Comment(NavigableString): - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): - return "<!--%s-->" % NavigableString.__str__(self, encoding) - - -class Declaration(NavigableString): - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): - return "<!%s>" % NavigableString.__str__(self, encoding) - - -class Tag(PageElement): - - """Represents a found HTML tag with its attributes and contents.""" - - def _convertEntities(self, match): - """Used in a call to re.sub to replace HTML, XML, and numeric - entities with the appropriate Unicode characters. If HTML - entities are being converted, any unrecognized entities are - escaped.""" - x = match.group(1) - if self.convertHTMLEntities and x in name2codepoint: - return unichr(name2codepoint[x]) - elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: - if self.convertXMLEntities: - return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] - else: - return u'&%s;' % x - elif len(x) > 0 and x[0] == '#': - # Handle numeric entities - if len(x) > 1 and x[1] == 'x': - return unichr(int(x[2:], 16)) - else: - return unichr(int(x[1:])) - - elif self.escapeUnrecognizedEntities: - return u'&%s;' % x - else: - return u'&%s;' % x - - def __init__(self, parser, name, attrs=None, parent=None, - previous=None): - "Basic constructor." - - # We don't actually store the parser object: that lets extracted - # chunks be garbage-collected - self.parserClass = parser.__class__ - self.isSelfClosing = parser.isSelfClosingTag(name) - self.name = name - if attrs is None: - attrs = [] - elif isinstance(attrs, dict): - attrs = attrs.items() - self.attrs = attrs - self.contents = [] - self.setup(parent, previous) - self.hidden = False - self.containsSubstitutions = False - self.convertHTMLEntities = parser.convertHTMLEntities - self.convertXMLEntities = parser.convertXMLEntities - self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities - - # Convert any HTML, XML, or numeric entities in the attribute values. - def convert((k, val)): return (k, - re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", - self._convertEntities, - val)) - self.attrs = map(convert, self.attrs) - - def getString(self): - if (len(self.contents) == 1 - and isinstance(self.contents[0], NavigableString)): - return self.contents[0] - - def setString(self, string): - """Replace the contents of the tag with a string""" - self.clear() - self.append(string) - - string = property(getString, setString) - - def getText(self, separator=u""): - if not len(self.contents): - return u"" - stopNode = self._lastRecursiveChild().next - strings = [] - current = self.contents[0] - while current is not stopNode: - if isinstance(current, NavigableString): - strings.append(current.strip()) - current = current.next - return separator.join(strings) - - text = property(getText) - - def get(self, key, default=None): - """Returns the value of the 'key' attribute for the tag, or - the value given for 'default' if it doesn't have that - attribute.""" - return self._getAttrMap().get(key, default) - - def clear(self): - """Extract all children.""" - for child in self.contents[:]: - child.extract() - - def index(self, element): - for i, child in enumerate(self.contents): - if child is element: - return i - raise ValueError("Tag.index: element not in tag") - - def has_key(self, key): - return self._getAttrMap().has_key(key) - - def __getitem__(self, key): - """tag[key] returns the value of the 'key' attribute for the tag, - and throws an exception if it's not there.""" - return self._getAttrMap()[key] - - def __iter__(self): - "Iterating over a tag iterates over its contents." - return iter(self.contents) - - def __len__(self): - "The length of a tag is the length of its list of contents." - return len(self.contents) - - def __contains__(self, x): - return x in self.contents - - def __nonzero__(self): - "A tag is non-None even if it has no contents." - return True - - def __setitem__(self, key, value): - """Setting tag[key] sets the value of the 'key' attribute for the - tag.""" - self._getAttrMap() - self.attrMap[key] = value - found = False - for i in range(0, len(self.attrs)): - if self.attrs[i][0] == key: - self.attrs[i] = (key, value) - found = True - if not found: - self.attrs.append((key, value)) - self._getAttrMap()[key] = value - - def __delitem__(self, key): - "Deleting tag[key] deletes all 'key' attributes for the tag." - for item in self.attrs: - if item[0] == key: - self.attrs.remove(item) - # We don't break because bad HTML can define the same - # attribute multiple times. - self._getAttrMap() - if self.attrMap.has_key(key): - del self.attrMap[key] - - def __call__(self, *args, **kwargs): - """Calling a tag like a function is the same as calling its - findAll() method. Eg. tag('a') returns a list of all the A tags - found within this tag.""" - return apply(self.findAll, args, kwargs) - - def __getattr__(self, tag): - # print "Getattr %s.%s" % (self.__class__, tag) - if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: - return self.find(tag[:-3]) - elif tag.find('__') != 0: - return self.find(tag) - raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) - - def __eq__(self, other): - """Returns true iff this tag has the same name, the same attributes, - and the same contents (recursively) as the given tag. - - NOTE: right now this will return false if two tags have the - same attributes in a different order. Should this be fixed?""" - if other is self: - return True - if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): - return False - for i in range(0, len(self.contents)): - if self.contents[i] != other.contents[i]: - return False - return True - - def __ne__(self, other): - """Returns true iff this tag is not identical to the other tag, - as defined in __eq__.""" - return not self == other - - def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): - """Renders this tag as a string.""" - return self.__str__(encoding) - - def __unicode__(self): - return self.__str__(None) - - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, - prettyPrint=False, indentLevel=0): - """Returns a string or Unicode representation of this tag and - its contents. To get Unicode, pass None for encoding. - - NOTE: since Python's HTML parser consumes whitespace, this - method is not certain to reproduce the whitespace present in - the original string.""" - - encodedName = self.toEncoding(self.name, encoding) - - attrs = [] - if self.attrs: - for key, val in self.attrs: - fmt = '%s="%s"' - if isinstance(val, basestring): - if self.containsSubstitutions and '%SOUP-ENCODING%' in val: - val = self.substituteEncoding(val, encoding) - - # The attribute value either: - # - # * Contains no embedded double quotes or single quotes. - # No problem: we enclose it in double quotes. - # * Contains embedded single quotes. No problem: - # double quotes work here too. - # * Contains embedded double quotes. No problem: - # we enclose it in single quotes. - # * Embeds both single _and_ double quotes. This - # can't happen naturally, but it can happen if - # you modify an attribute value after parsing - # the document. Now we have a bit of a - # problem. We solve it by enclosing the - # attribute in single quotes, and escaping any - # embedded single quotes to XML entities. - if '"' in val: - fmt = "%s='%s'" - if "'" in val: - # TODO: replace with apos when - # appropriate. - val = val.replace("'", "&squot;") - - # Now we're okay w/r/t quotes. But the attribute - # value might also contain angle brackets, or - # ampersands that aren't part of entities. We need - # to escape those to XML entities too. - val = self.BARE_AMPERSAND_OR_BRACKET.sub( - self._sub_entity, val) - - attrs.append(fmt % (self.toEncoding(key, encoding), - self.toEncoding(val, encoding))) - close = '' - closeTag = '' - if self.isSelfClosing: - close = ' /' - else: - closeTag = '</%s>' % encodedName - - indentTag, indentContents = 0, 0 - if prettyPrint: - indentTag = indentLevel - space = (' ' * (indentTag-1)) - indentContents = indentTag + 1 - contents = self.renderContents(encoding, prettyPrint, indentContents) - if self.hidden: - s = contents - else: - s = [] - attributeString = '' - if attrs: - attributeString = ' ' + ' '.join(attrs) - if prettyPrint: - s.append(space) - s.append('<%s%s%s>' % (encodedName, attributeString, close)) - if prettyPrint: - s.append("\n") - s.append(contents) - if prettyPrint and contents and contents[-1] != "\n": - s.append("\n") - if prettyPrint and closeTag: - s.append(space) - s.append(closeTag) - if prettyPrint and closeTag and self.nextSibling: - s.append("\n") - s = ''.join(s) - return s - - def decompose(self): - """Recursively destroys the contents of this tree.""" - self.extract() - if len(self.contents) == 0: - return - current = self.contents[0] - while current is not None: - next = current.next - if isinstance(current, Tag): - del current.contents[:] - current.parent = None - current.previous = None - current.previousSibling = None - current.next = None - current.nextSibling = None - current = next - - def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): - return self.__str__(encoding, True) - - def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, - prettyPrint=False, indentLevel=0): - """Renders the contents of this tag as a string in the given - encoding. If encoding is None, returns a Unicode string..""" - s = [] - for c in self: - text = None - if isinstance(c, NavigableString): - text = c.__str__(encoding) - elif isinstance(c, Tag): - s.append(c.__str__(encoding, prettyPrint, indentLevel)) - if text and prettyPrint: - text = text.strip() - if text: - if prettyPrint: - s.append(" " * (indentLevel-1)) - s.append(text) - if prettyPrint: - s.append("\n") - return ''.join(s) - - # Soup methods - - def find(self, name=None, attrs={}, recursive=True, text=None, - **kwargs): - """Return only the first child of this Tag matching the given - criteria.""" - r = None - l = self.findAll(name, attrs, recursive, text, 1, **kwargs) - if l: - r = l[0] - return r - findChild = find - - def findAll(self, name=None, attrs={}, recursive=True, text=None, - limit=None, **kwargs): - """Extracts a list of Tag objects that match the given - criteria. You can specify the name of the Tag and any - attributes you want the Tag to have. - - The value of a key-value pair in the 'attrs' map can be a - string, a list of strings, a regular expression object, or a - callable that takes a string and returns whether or not the - string matches for some custom definition of 'matches'. The - same is true of the tag name.""" - generator = self.recursiveChildGenerator - if not recursive: - generator = self.childGenerator - return self._findAll(name, attrs, text, limit, generator, **kwargs) - findChildren = findAll - - # Pre-3.x compatibility methods - first = find - fetch = findAll - - def fetchText(self, text=None, recursive=True, limit=None): - return self.findAll(text=text, recursive=recursive, limit=limit) - - def firstText(self, text=None, recursive=True): - return self.find(text=text, recursive=recursive) - - # Private methods - - def _getAttrMap(self): - """Initializes a map representation of this tag's attributes, - if not already initialized.""" - if not getattr(self, 'attrMap'): - self.attrMap = {} - for (key, value) in self.attrs: - self.attrMap[key] = value - return self.attrMap - - # Generator methods - def childGenerator(self): - # Just use the iterator from the contents - return iter(self.contents) - - def recursiveChildGenerator(self): - if not len(self.contents): - raise StopIteration - stopNode = self._lastRecursiveChild().next - current = self.contents[0] - while current is not stopNode: - yield current - current = current.next - - -# Next, a couple classes to represent queries and their results. -class SoupStrainer: - """Encapsulates a number of ways of matching a markup element (tag or - text).""" - - def __init__(self, name=None, attrs={}, text=None, **kwargs): - self.name = name - if isinstance(attrs, basestring): - kwargs['class'] = _match_css_class(attrs) - attrs = None - if kwargs: - if attrs: - attrs = attrs.copy() - attrs.update(kwargs) - else: - attrs = kwargs - self.attrs = attrs - self.text = text - - def __str__(self): - if self.text: - return self.text - else: - return "%s|%s" % (self.name, self.attrs) - - def searchTag(self, markupName=None, markupAttrs={}): - found = None - markup = None - if isinstance(markupName, Tag): - markup = markupName - markupAttrs = markup - callFunctionWithTagData = callable(self.name) \ - and not isinstance(markupName, Tag) - - if (not self.name) \ - or callFunctionWithTagData \ - or (markup and self._matches(markup, self.name)) \ - or (not markup and self._matches(markupName, self.name)): - if callFunctionWithTagData: - match = self.name(markupName, markupAttrs) - else: - match = True - markupAttrMap = None - for attr, matchAgainst in self.attrs.items(): - if not markupAttrMap: - if hasattr(markupAttrs, 'get'): - markupAttrMap = markupAttrs - else: - markupAttrMap = {} - for k, v in markupAttrs: - markupAttrMap[k] = v - attrValue = markupAttrMap.get(attr) - if not self._matches(attrValue, matchAgainst): - match = False - break - if match: - if markup: - found = markup - else: - found = markupName - return found - - def search(self, markup): - # print 'looking for %s in %s' % (self, markup) - found = None - # If given a list of items, scan it for a text element that - # matches. - if hasattr(markup, "__iter__") \ - and not isinstance(markup, Tag): - for element in markup: - if isinstance(element, NavigableString) \ - and self.search(element): - found = element - break - # If it's a Tag, make sure its name or attributes match. - # Don't bother with Tags if we're searching for text. - elif isinstance(markup, Tag): - if not self.text: - found = self.searchTag(markup) - # If it's text, make sure the text matches. - elif isinstance(markup, NavigableString) or \ - isinstance(markup, basestring): - if self._matches(markup, self.text): - found = markup - else: - raise Exception, "I don't know how to match against a %s" \ - % markup.__class__ - return found - - def _matches(self, markup, matchAgainst): - # print "Matching %s against %s" % (markup, matchAgainst) - result = False - if matchAgainst is True: - result = markup is not None - elif callable(matchAgainst): - result = matchAgainst(markup) - else: - # Custom match methods take the tag as an argument, but all - # other ways of matching match the tag name as a string. - if isinstance(markup, Tag): - markup = markup.name - if markup and not isinstance(markup, basestring): - markup = unicode(markup) - # Now we know that chunk is either a string, or None. - if hasattr(matchAgainst, 'match'): - # It's a regexp object. - result = markup and matchAgainst.search(markup) - elif hasattr(matchAgainst, '__iter__'): # list-like - result = markup in matchAgainst - elif hasattr(matchAgainst, 'items'): - result = markup.has_key(matchAgainst) - elif matchAgainst and isinstance(markup, basestring): - if isinstance(markup, unicode): - matchAgainst = unicode(matchAgainst) - else: - matchAgainst = str(matchAgainst) - - if not result: - result = matchAgainst == markup - return result - - -class ResultSet(list): - """A ResultSet is just a list that keeps track of the SoupStrainer - that created it.""" - - def __init__(self, source): - list.__init__([]) - self.source = source - -# Now, some helper functions. - - -def buildTagMap(default, *args): - """Turns a list of maps, lists, or scalars into a single map. - Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and - NESTING_RESET_TAGS maps out of lists and partial maps.""" - built = {} - for portion in args: - if hasattr(portion, 'items'): - # It's a map. Merge it. - for k, v in portion.items(): - built[k] = v - elif hasattr(portion, '__iter__'): # is a list - # It's a list. Map each item to the default. - for k in portion: - built[k] = default - else: - # It's a scalar. Map it to the default. - built[portion] = default - return built - -# Now, the parser classes. - - -class BeautifulStoneSoup(Tag, SGMLParser): - - """This class contains the basic parser and search code. It defines - a parser that knows nothing about tag behavior except for the - following: - - You can't close a tag without closing all the tags it encloses. - That is, "<foo><bar></foo>" actually means - "<foo><bar></bar></foo>". - - [Another possible explanation is "<foo><bar /></foo>", but since - this class defines no SELF_CLOSING_TAGS, it will never use that - explanation.] - - This class is useful for parsing XML or made-up markup languages, - or when BeautifulSoup makes an assumption counter to what you were - expecting.""" - - SELF_CLOSING_TAGS = {} - NESTABLE_TAGS = {} - RESET_NESTING_TAGS = {} - QUOTE_TAGS = {} - PRESERVE_WHITESPACE_TAGS = [] - - MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), - lambda x: x.group(1) + ' />'), - (re.compile('<!\s+([^<>]*)>'), - lambda x: '<!' + x.group(1) + '>') - ] - - ROOT_TAG_NAME = u'[document]' - - HTML_ENTITIES = "html" - XML_ENTITIES = "xml" - XHTML_ENTITIES = "xhtml" - # TODO: This only exists for backwards-compatibility - ALL_ENTITIES = XHTML_ENTITIES - - # Used when determining whether a text node is all whitespace and - # can be replaced with a single space. A text node that contains - # fancy Unicode spaces (usually non-breaking) should be left - # alone. - STRIP_ASCII_SPACES = {9: None, 10: None, 12: None, 13: None, 32: None, } - - def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, - markupMassage=True, smartQuotesTo=XML_ENTITIES, - convertEntities=None, selfClosingTags=None, isHTML=False): - """The Soup object is initialized as the 'root tag', and the - provided markup (which can be a string or a file-like object) - is fed into the underlying parser. - - sgmllib will process most bad HTML, and the BeautifulSoup - class has some tricks for dealing with some HTML that kills - sgmllib, but Beautiful Soup can nonetheless choke or lose data - if your data uses self-closing tags or declarations - incorrectly. - - By default, Beautiful Soup uses regexes to sanitize input, - avoiding the vast majority of these problems. If the problems - don't apply to you, pass in False for markupMassage, and - you'll get better performance. - - The default parser massage techniques fix the two most common - instances of invalid HTML that choke sgmllib: - - <br/> (No space between name of closing tag and tag close) - <! --Comment--> (Extraneous whitespace in declaration) - - You can pass in a custom list of (RE object, replace method) - tuples to get Beautiful Soup to scrub your input the way you - want.""" - - self.parseOnlyThese = parseOnlyThese - self.fromEncoding = fromEncoding - self.smartQuotesTo = smartQuotesTo - self.convertEntities = convertEntities - # Set the rules for how we'll deal with the entities we - # encounter - if self.convertEntities: - # It doesn't make sense to convert encoded characters to - # entities even while you're converting entities to Unicode. - # Just convert it all to Unicode. - self.smartQuotesTo = None - if convertEntities == self.HTML_ENTITIES: - self.convertXMLEntities = False - self.convertHTMLEntities = True - self.escapeUnrecognizedEntities = True - elif convertEntities == self.XHTML_ENTITIES: - self.convertXMLEntities = True - self.convertHTMLEntities = True - self.escapeUnrecognizedEntities = False - elif convertEntities == self.XML_ENTITIES: - self.convertXMLEntities = True - self.convertHTMLEntities = False - self.escapeUnrecognizedEntities = False - else: - self.convertXMLEntities = False - self.convertHTMLEntities = False - self.escapeUnrecognizedEntities = False - - self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) - SGMLParser.__init__(self) - - if hasattr(markup, 'read'): # It's a file-type object. - markup = markup.read() - self.markup = markup - self.markupMassage = markupMassage - try: - self._feed(isHTML=isHTML) - except StopParsing: - pass - self.markup = None # The markup can now be GCed - - def convert_charref(self, name): - """This method fixes a bug in Python's SGMLParser.""" - try: - n = int(name) - except ValueError: - return - if not 0 <= n <= 127: # ASCII ends at 127, not 255 - return - return self.convert_codepoint(n) - - def _feed(self, inDocumentEncoding=None, isHTML=False): - # Convert the document to Unicode. - markup = self.markup - if isinstance(markup, unicode): - if not hasattr(self, 'originalEncoding'): - self.originalEncoding = None - else: - dammit = UnicodeDammit(markup, [self.fromEncoding, inDocumentEncoding], - smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) - markup = dammit.unicode - self.originalEncoding = dammit.originalEncoding - self.declaredHTMLEncoding = dammit.declaredHTMLEncoding - if markup: - if self.markupMassage: - if not hasattr(self.markupMassage, "__iter__"): - self.markupMassage = self.MARKUP_MASSAGE - for fix, m in self.markupMassage: - markup = fix.sub(m, markup) - # TODO: We get rid of markupMassage so that the - # soup object can be deepcopied later on. Some - # Python installations can't copy regexes. If anyone - # was relying on the existence of markupMassage, this - # might cause problems. - del(self.markupMassage) - self.reset() - - SGMLParser.feed(self, markup) - # Close out any unfinished strings and close all the open tags. - self.endData() - while self.currentTag.name != self.ROOT_TAG_NAME: - self.popTag() - - def __getattr__(self, methodName): - """This method routes method call requests to either the SGMLParser - superclass or the Tag superclass, depending on the method name.""" - # print "__getattr__ called on %s.%s" % (self.__class__, methodName) - - if methodName.startswith('start_') or methodName.startswith('end_') \ - or methodName.startswith('do_'): - return SGMLParser.__getattr__(self, methodName) - elif not methodName.startswith('__'): - return Tag.__getattr__(self, methodName) - else: - raise AttributeError - - def isSelfClosingTag(self, name): - """Returns true iff the given string is the name of a - self-closing tag according to this parser.""" - return self.SELF_CLOSING_TAGS.has_key(name) \ - or self.instanceSelfClosingTags.has_key(name) - - def reset(self): - Tag.__init__(self, self, self.ROOT_TAG_NAME) - self.hidden = 1 - SGMLParser.reset(self) - self.currentData = [] - self.currentTag = None - self.tagStack = [] - self.quoteStack = [] - self.pushTag(self) - - def popTag(self): - tag = self.tagStack.pop() - - # print "Pop", tag.name - if self.tagStack: - self.currentTag = self.tagStack[-1] - return self.currentTag - - def pushTag(self, tag): - # print "Push", tag.name - if self.currentTag: - self.currentTag.contents.append(tag) - self.tagStack.append(tag) - self.currentTag = self.tagStack[-1] - - def endData(self, containerClass=NavigableString): - if self.currentData: - currentData = u''.join(self.currentData) - if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and - not set([tag.name for tag in self.tagStack]).intersection( - self.PRESERVE_WHITESPACE_TAGS)): - if '\n' in currentData: - currentData = '\n' - else: - currentData = ' ' - self.currentData = [] - if self.parseOnlyThese and len(self.tagStack) <= 1 and \ - (not self.parseOnlyThese.text or - not self.parseOnlyThese.search(currentData)): - return - o = containerClass(currentData) - o.setup(self.currentTag, self.previous) - if self.previous: - self.previous.next = o - self.previous = o - self.currentTag.contents.append(o) - - def _popToTag(self, name, inclusivePop=True): - """Pops the tag stack up to and including the most recent - instance of the given tag. If inclusivePop is false, pops the tag - stack up to but *not* including the most recent instqance of - the given tag.""" - # print "Popping to %s" % name - if name == self.ROOT_TAG_NAME: - return - - numPops = 0 - mostRecentTag = None - for i in range(len(self.tagStack)-1, 0, -1): - if name == self.tagStack[i].name: - numPops = len(self.tagStack)-i - break - if not inclusivePop: - numPops = numPops - 1 - - for i in range(0, numPops): - mostRecentTag = self.popTag() - return mostRecentTag - - def _smartPop(self, name): - """We need to pop up to the previous tag of this type, unless - one of this tag's nesting reset triggers comes between this - tag and the previous tag of this type, OR unless this tag is a - generic nesting trigger and another generic nesting trigger - comes between this tag and the previous tag of this type. - - Examples: - <p>Foo<b>Bar *<p>* should pop to 'p', not 'b'. - <p>Foo<table>Bar *<p>* should pop to 'table', not 'p'. - <p>Foo<table><tr>Bar *<p>* should pop to 'tr', not 'p'. - - <li><ul><li> *<li>* should pop to 'ul', not the first 'li'. - <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr' - <td><tr><td> *<td>* should pop to 'tr', not the first 'td' - """ - - nestingResetTriggers = self.NESTABLE_TAGS.get(name) - isNestable = nestingResetTriggers != None - isResetNesting = self.RESET_NESTING_TAGS.has_key(name) - popTo = None - inclusive = True - for i in range(len(self.tagStack)-1, 0, -1): - p = self.tagStack[i] - if (not p or p.name == name) and not isNestable: - # Non-nestable tags get popped to the top or to their - # last occurance. - popTo = name - break - if (nestingResetTriggers is not None - and p.name in nestingResetTriggers) \ - or (nestingResetTriggers is None and isResetNesting - and self.RESET_NESTING_TAGS.has_key(p.name)): - - # If we encounter one of the nesting reset triggers - # peculiar to this tag, or we encounter another tag - # that causes nesting to reset, pop up to but not - # including that tag. - popTo = p.name - inclusive = False - break - p = p.parent - if popTo: - self._popToTag(popTo, inclusive) - - def unknown_starttag(self, name, attrs, selfClosing=0): - # print "Start tag %s: %s" % (name, attrs) - if self.quoteStack: - # This is not a real tag. - # print "<%s> is not real!" % name - attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs]) - self.handle_data('<%s%s>' % (name, attrs)) - return - self.endData() - - if not self.isSelfClosingTag(name) and not selfClosing: - self._smartPop(name) - - if self.parseOnlyThese and len(self.tagStack) <= 1 \ - and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): - return - - tag = Tag(self, name, attrs, self.currentTag, self.previous) - if self.previous: - self.previous.next = tag - self.previous = tag - self.pushTag(tag) - if selfClosing or self.isSelfClosingTag(name): - self.popTag() - if name in self.QUOTE_TAGS: - # print "Beginning quote (%s)" % name - self.quoteStack.append(name) - self.literal = 1 - return tag - - def unknown_endtag(self, name): - # print "End tag %s" % name - if self.quoteStack and self.quoteStack[-1] != name: - # This is not a real end tag. - # print "</%s> is not real!" % name - self.handle_data('</%s>' % name) - return - self.endData() - self._popToTag(name) - if self.quoteStack and self.quoteStack[-1] == name: - self.quoteStack.pop() - self.literal = (len(self.quoteStack) > 0) - - def handle_data(self, data): - self.currentData.append(data) - - def _toStringSubclass(self, text, subclass): - """Adds a certain piece of text to the tree as a NavigableString - subclass.""" - self.endData() - self.handle_data(text) - self.endData(subclass) - - def handle_pi(self, text): - """Handle a processing instruction as a ProcessingInstruction - object, possibly one with a %SOUP-ENCODING% slot into which an - encoding will be plugged later.""" - if text[:3] == "xml": - text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" - self._toStringSubclass(text, ProcessingInstruction) - - def handle_comment(self, text): - "Handle comments as Comment objects." - self._toStringSubclass(text, Comment) - - def handle_charref(self, ref): - "Handle character references as data." - if self.convertEntities: - data = unichr(int(ref)) - else: - data = '&#%s;' % ref - self.handle_data(data) - - def handle_entityref(self, ref): - """Handle entity references as data, possibly converting known - HTML and/or XML entity references to the corresponding Unicode - characters.""" - data = None - if self.convertHTMLEntities: - try: - data = unichr(name2codepoint[ref]) - except KeyError: - pass - - if not data and self.convertXMLEntities: - data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) - - if not data and self.convertHTMLEntities and \ - not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): - # TODO: We've got a problem here. We're told this is - # an entity reference, but it's not an XML entity - # reference or an HTML entity reference. Nonetheless, - # the logical thing to do is to pass it through as an - # unrecognized entity reference. - # - # Except: when the input is "&carol;" this function - # will be called with input "carol". When the input is - # "AT&T", this function will be called with input - # "T". We have no way of knowing whether a semicolon - # was present originally, so we don't know whether - # this is an unknown entity or just a misplaced - # ampersand. - # - # The more common case is a misplaced ampersand, so I - # escape the ampersand and omit the trailing semicolon. - data = "&%s" % ref - if not data: - # This case is different from the one above, because we - # haven't already gone through a supposedly comprehensive - # mapping of entities to Unicode characters. We might not - # have gone through any mapping at all. So the chances are - # very high that this is a real entity, and not a - # misplaced ampersand. - data = "&%s;" % ref - self.handle_data(data) - - def handle_decl(self, data): - "Handle DOCTYPEs and the like as Declaration objects." - self._toStringSubclass(data, Declaration) - - def parse_declaration(self, i): - """Treat a bogus SGML declaration as raw data. Treat a CDATA - declaration as a CData object.""" - j = None - if self.rawdata[i:i+9] == '<![CDATA[': - k = self.rawdata.find(']]>', i) - if k == -1: - k = len(self.rawdata) - data = self.rawdata[i+9:k] - j = k+3 - self._toStringSubclass(data, CData) - else: - try: - j = SGMLParser.parse_declaration(self, i) - except SGMLParseError: - toHandle = self.rawdata[i:] - self.handle_data(toHandle) - j = i + len(toHandle) - return j - - -class BeautifulSoup(BeautifulStoneSoup): - - """This parser knows the following facts about HTML: - - * Some tags have no closing tag and should be interpreted as being - closed as soon as they are encountered. - - * The text inside some tags (ie. 'script') may contain tags which - are not really part of the document and which should be parsed - as text, not tags. If you want to parse the text as tags, you can - always fetch it and parse it explicitly. - - * Tag nesting rules: - - Most tags can't be nested at all. For instance, the occurance of - a <p> tag should implicitly close the previous <p> tag. - - <p>Para1<p>Para2 - should be transformed into: - <p>Para1</p><p>Para2 - - Some tags can be nested arbitrarily. For instance, the occurance - of a <blockquote> tag should _not_ implicitly close the previous - <blockquote> tag. - - Alice said: <blockquote>Bob said: <blockquote>Blah - should NOT be transformed into: - Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah - - Some tags can be nested, but the nesting is reset by the - interposition of other tags. For instance, a <tr> tag should - implicitly close the previous <tr> tag within the same <table>, - but not close a <tr> tag in another table. - - <table><tr>Blah<tr>Blah - should be transformed into: - <table><tr>Blah</tr><tr>Blah - but, - <tr>Blah<table><tr>Blah - should NOT be transformed into - <tr>Blah<table></tr><tr>Blah - - Differing assumptions about tag nesting rules are a major source - of problems with the BeautifulSoup class. If BeautifulSoup is not - treating as nestable a tag your page author treats as nestable, - try ICantBelieveItsBeautifulSoup, MinimalSoup, or - BeautifulStoneSoup before writing your own subclass.""" - - def __init__(self, *args, **kwargs): - if not kwargs.has_key('smartQuotesTo'): - kwargs['smartQuotesTo'] = self.HTML_ENTITIES - kwargs['isHTML'] = True - BeautifulStoneSoup.__init__(self, *args, **kwargs) - - SELF_CLOSING_TAGS = buildTagMap(None, - ('br', 'hr', 'input', 'img', 'meta', - 'spacer', 'link', 'frame', 'base', 'col')) - - PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) - - QUOTE_TAGS = {'script': None, 'textarea': None} - - # According to the HTML standard, each of these inline tags can - # contain another tag of the same type. Furthermore, it's common - # to actually use these tags this way. - NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', - 'center') - - # According to the HTML standard, these block tags can contain - # another tag of the same type. Furthermore, it's common - # to actually use these tags this way. - NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del') - - # Lists can contain other lists, but there are restrictions. - NESTABLE_LIST_TAGS = {'ol': [], - 'ul': [], - 'li': ['ul', 'ol'], - 'dl': [], - 'dd': ['dl'], - 'dt': ['dl']} - - # Tables can contain other tables, but there are restrictions. - NESTABLE_TABLE_TAGS = {'table': [], - 'tr': ['table', 'tbody', 'tfoot', 'thead'], - 'td': ['tr'], - 'th': ['tr'], - 'thead': ['table'], - 'tbody': ['table'], - 'tfoot': ['table'], - } - - NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre') - - # If one of these tags is encountered, all tags up to the next tag of - # this type are popped. - RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', - NON_NESTABLE_BLOCK_TAGS, - NESTABLE_LIST_TAGS, - NESTABLE_TABLE_TAGS) - - NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, - NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) - - # Used to detect the charset in a META tag; see start_meta - CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) - - def start_meta(self, attrs): - """Beautiful Soup can detect a charset included in a META tag, - try to convert the document to that charset, and re-parse the - document from the beginning.""" - httpEquiv = None - contentType = None - contentTypeIndex = None - tagNeedsEncodingSubstitution = False - - for i in range(0, len(attrs)): - key, value = attrs[i] - key = key.lower() - if key == 'http-equiv': - httpEquiv = value - elif key == 'content': - contentType = value - contentTypeIndex = i - - if httpEquiv and contentType: # It's an interesting meta tag. - match = self.CHARSET_RE.search(contentType) - if match: - if (self.declaredHTMLEncoding is not None or - self.originalEncoding == self.fromEncoding): - # An HTML encoding was sniffed while converting - # the document to Unicode, or an HTML encoding was - # sniffed during a previous pass through the - # document, or an encoding was specified - # explicitly and it worked. Rewrite the meta tag. - def rewrite(match): - return match.group(1) + "%SOUP-ENCODING%" - newAttr = self.CHARSET_RE.sub(rewrite, contentType) - attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], - newAttr) - tagNeedsEncodingSubstitution = True - else: - # This is our first pass through the document. - # Go through it again with the encoding information. - newCharset = match.group(3) - if newCharset and newCharset != self.originalEncoding: - self.declaredHTMLEncoding = newCharset - self._feed(self.declaredHTMLEncoding) - raise StopParsing - pass - tag = self.unknown_starttag("meta", attrs) - if tag and tagNeedsEncodingSubstitution: - tag.containsSubstitutions = True - - -class StopParsing(Exception): - pass - - -class ICantBelieveItsBeautifulSoup(BeautifulSoup): - - """The BeautifulSoup class is oriented towards skipping over - common HTML errors like unclosed tags. However, sometimes it makes - errors of its own. For instance, consider this fragment: - - <b>Foo<b>Bar</b></b> - - This is perfectly valid (if bizarre) HTML. However, the - BeautifulSoup class will implicitly close the first b tag when it - encounters the second 'b'. It will think the author wrote - "<b>Foo<b>Bar", and didn't close the first 'b' tag, because - there's no real-world reason to bold something that's already - bold. When it encounters '</b></b>' it will close two more 'b' - tags, for a grand total of three tags closed instead of two. This - can throw off the rest of your document structure. The same is - true of a number of other tags, listed below. - - It's much more common for someone to forget to close a 'b' tag - than to actually use nested 'b' tags, and the BeautifulSoup class - handles the common case. This class handles the not-co-common - case: where you can't believe someone wrote what they did, but - it's valid HTML and BeautifulSoup screwed up by assuming it - wouldn't be.""" - - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ - ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', - 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', - 'big') - - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',) - - NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) - - -class MinimalSoup(BeautifulSoup): - """The MinimalSoup class is for parsing HTML that contains - pathologically bad markup. It makes no assumptions about tag - nesting, but it does know which tags are self-closing, that - <script> tags contain Javascript and should not be parsed, that - META tags may contain encoding information, and so on. - - This also makes it better for subclassing than BeautifulStoneSoup - or BeautifulSoup.""" - - RESET_NESTING_TAGS = buildTagMap('noscript') - NESTABLE_TAGS = {} - - -class BeautifulSOAP(BeautifulStoneSoup): - """This class will push a tag with only a single string child into - the tag's parent as an attribute. The attribute's name is the tag - name, and the value is the string child. An example should give - the flavor of the change: - - <foo><bar>baz</bar></foo> - => - <foo bar="baz"><bar>baz</bar></foo> - - You can then access fooTag['bar'] instead of fooTag.barTag.string. - - This is, of course, useful for scraping structures that tend to - use subelements instead of attributes, such as SOAP messages. Note - that it modifies its input, so don't print the modified version - out. - - I'm not sure how many people really want to use this class; let me - know if you do. Mainly I like the name.""" - - def popTag(self): - if len(self.tagStack) > 1: - tag = self.tagStack[-1] - parent = self.tagStack[-2] - parent._getAttrMap() - if (isinstance(tag, Tag) and len(tag.contents) == 1 and - isinstance(tag.contents[0], NavigableString) and - not parent.attrMap.has_key(tag.name)): - parent[tag.name] = tag.contents[0] - BeautifulStoneSoup.popTag(self) - -# Enterprise class names! It has come to our attention that some people -# think the names of the Beautiful Soup parser classes are too silly -# and "unprofessional" for use in enterprise screen-scraping. We feel -# your pain! For such-minded folk, the Beautiful Soup Consortium And -# All-Night Kosher Bakery recommends renaming this file to -# "RobustParser.py" (or, in cases of extreme enterprisiness, -# "RobustParserBeanInterface.class") and using the following -# enterprise-friendly class aliases: - - -class RobustXMLParser(BeautifulStoneSoup): - pass - - -class RobustHTMLParser(BeautifulSoup): - pass - - -class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup): - pass - - -class RobustInsanelyWackAssHTMLParser(MinimalSoup): - pass - - -class SimplifyingSOAPParser(BeautifulSOAP): - pass - -###################################################### -# -# Bonus library: Unicode, Dammit -# -# This class forces XML data into a standard format (usually to UTF-8 -# or Unicode). It is heavily based on code from Mark Pilgrim's -# Universal Feed Parser. It does not rewrite the XML or HTML to -# reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi -# (XML) and BeautifulSoup.start_meta (HTML). - - -# Autodetects character encodings. -# Download from http://chardet.feedparser.org/ -try: - import chardet -# import chardet.constants -# chardet.constants._debug = 1 -except ImportError: - chardet = None - -# cjkcodecs and iconv_codec make Python know about more character encodings. -# Both are available from http://cjkpython.i18n.org/ -# They're built in if you use Python 2.4. -try: - import cjkcodecs.aliases -except ImportError: - pass -try: - import iconv_codec -except ImportError: - pass - - -class UnicodeDammit: - """A class for detecting the encoding of a *ML document and - converting it to a Unicode string. If the source encoding is - windows-1252, can replace MS smart quotes with their HTML or XML - equivalents.""" - - # This dictionary maps commonly seen values for "charset" in HTML - # meta tags to the corresponding Python codec names. It only covers - # values that aren't in Python's aliases and can't be determined - # by the heuristics in find_codec. - CHARSET_ALIASES = {"macintosh": "mac-roman", - "x-sjis": "shift-jis"} - - def __init__(self, markup, overrideEncodings=[], - smartQuotesTo='xml', isHTML=False): - self.declaredHTMLEncoding = None - self.markup, documentEncoding, sniffedEncoding = \ - self._detectEncoding(markup, isHTML) - self.smartQuotesTo = smartQuotesTo - self.triedEncodings = [] - if markup == '' or isinstance(markup, unicode): - self.originalEncoding = None - self.unicode = unicode(markup) - return - - u = None - for proposedEncoding in overrideEncodings: - u = self._convertFrom(proposedEncoding) - if u: - break - if not u: - for proposedEncoding in (documentEncoding, sniffedEncoding): - u = self._convertFrom(proposedEncoding) - if u: - break - - # If no luck and we have auto-detection library, try that: - if not u and chardet and not isinstance(self.markup, unicode): - u = self._convertFrom(chardet.detect(self.markup)['encoding']) - - # As a last resort, try utf-8 and windows-1252: - if not u: - for proposed_encoding in ("utf-8", "windows-1252"): - u = self._convertFrom(proposed_encoding) - if u: - break - - self.unicode = u - if not u: - self.originalEncoding = None - - def _subMSChar(self, orig): - """Changes a MS smart quote character to an XML or HTML - entity.""" - sub = self.MS_CHARS.get(orig) - if isinstance(sub, tuple): - if self.smartQuotesTo == 'xml': - sub = '&#x%s;' % sub[1] - else: - sub = '&%s;' % sub[0] - return sub - - def _convertFrom(self, proposed): - proposed = self.find_codec(proposed) - if not proposed or proposed in self.triedEncodings: - return None - self.triedEncodings.append(proposed) - markup = self.markup - - # Convert smart quotes to HTML if coming from an encoding - # that might have them. - if self.smartQuotesTo and proposed.lower() in("windows-1252", - "iso-8859-1", - "iso-8859-2"): - markup = re.compile("([\x80-\x9f])").sub(lambda(x): self._subMSChar(x.group(1)), - markup) - - try: - # print "Trying to convert document to %s" % proposed - u = self._toUnicode(markup, proposed) - self.markup = u - self.originalEncoding = proposed - except Exception, e: - # print "That didn't work!" - # print e - return None - # print "Correct encoding: %s" % proposed - return self.markup - - def _toUnicode(self, data, encoding): - '''Given a string and its encoding, decodes the string into Unicode. - %encoding is a string recognized by encodings.aliases''' - - # strip Byte Order Mark (if present) - if (len(data) >= 4) and (data[:2] == '\xfe\xff') \ - and (data[2:4] != '\x00\x00'): - encoding = 'utf-16be' - data = data[2:] - elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \ - and (data[2:4] != '\x00\x00'): - encoding = 'utf-16le' - data = data[2:] - elif data[:3] == '\xef\xbb\xbf': - encoding = 'utf-8' - data = data[3:] - elif data[:4] == '\x00\x00\xfe\xff': - encoding = 'utf-32be' - data = data[4:] - elif data[:4] == '\xff\xfe\x00\x00': - encoding = 'utf-32le' - data = data[4:] - newdata = unicode(data, encoding) - return newdata - - def _detectEncoding(self, xml_data, isHTML=False): - """Given a document, tries to detect its XML encoding.""" - xml_encoding = sniffed_xml_encoding = None - try: - if xml_data[:4] == '\x4c\x6f\xa7\x94': - # EBCDIC - xml_data = self._ebcdic_to_ascii(xml_data) - elif xml_data[:4] == '\x00\x3c\x00\x3f': - # UTF-16BE - sniffed_xml_encoding = 'utf-16be' - xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ - and (xml_data[2:4] != '\x00\x00'): - # UTF-16BE with BOM - sniffed_xml_encoding = 'utf-16be' - xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') - elif xml_data[:4] == '\x3c\x00\x3f\x00': - # UTF-16LE - sniffed_xml_encoding = 'utf-16le' - xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ - (xml_data[2:4] != '\x00\x00'): - # UTF-16LE with BOM - sniffed_xml_encoding = 'utf-16le' - xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') - elif xml_data[:4] == '\x00\x00\x00\x3c': - # UTF-32BE - sniffed_xml_encoding = 'utf-32be' - xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') - elif xml_data[:4] == '\x3c\x00\x00\x00': - # UTF-32LE - sniffed_xml_encoding = 'utf-32le' - xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') - elif xml_data[:4] == '\x00\x00\xfe\xff': - # UTF-32BE with BOM - sniffed_xml_encoding = 'utf-32be' - xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') - elif xml_data[:4] == '\xff\xfe\x00\x00': - # UTF-32LE with BOM - sniffed_xml_encoding = 'utf-32le' - xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') - elif xml_data[:3] == '\xef\xbb\xbf': - # UTF-8 with BOM - sniffed_xml_encoding = 'utf-8' - xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') - else: - sniffed_xml_encoding = 'ascii' - pass - except: - xml_encoding_match = None - xml_encoding_match = re.compile( - '^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data) - if not xml_encoding_match and isHTML: - regexp = re.compile('<\s*meta[^>]+charset=([^>]*?)[;\'">]', re.I) - xml_encoding_match = regexp.search(xml_data) - if xml_encoding_match is not None: - xml_encoding = xml_encoding_match.groups()[0].lower() - if isHTML: - self.declaredHTMLEncoding = xml_encoding - if sniffed_xml_encoding and \ - (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', - 'iso-10646-ucs-4', 'ucs-4', 'csucs4', - 'utf-16', 'utf-32', 'utf_16', 'utf_32', - 'utf16', 'u16')): - xml_encoding = sniffed_xml_encoding - return xml_data, xml_encoding, sniffed_xml_encoding - - def find_codec(self, charset): - return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \ - or (charset and self._codec(charset.replace("-", ""))) \ - or (charset and self._codec(charset.replace("-", "_"))) \ - or charset - - def _codec(self, charset): - if not charset: - return charset - codec = None - try: - codecs.lookup(charset) - codec = charset - except (LookupError, ValueError): - pass - return codec - - EBCDIC_TO_ASCII_MAP = None - - def _ebcdic_to_ascii(self, s): - c = self.__class__ - if not c.EBCDIC_TO_ASCII_MAP: - emap = (0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31, - 128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7, - 144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26, - 32, 160, 161, 162, 163, 164, 165, 166, 167, 168, 91, 46, 60, 40, 43, 33, - 38, 169, 170, 171, 172, 173, 174, 175, 176, 177, 93, 36, 42, 41, 59, 94, - 45, 47, 178, 179, 180, 181, 182, 183, 184, 185, 124, 44, 37, 95, 62, 63, - 186, 187, 188, 189, 190, 191, 192, 193, 194, 96, 58, 35, 64, 39, 61, 34, - 195, 97, 98, 99, 100, 101, 102, 103, 104, 105, 196, 197, 198, 199, 200, - 201, 202, 106, 107, 108, 109, 110, 111, 112, 113, 114, 203, 204, 205, - 206, 207, 208, 209, 126, 115, 116, 117, 118, 119, 120, 121, 122, 210, - 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, - 225, 226, 227, 228, 229, 230, 231, 123, 65, 66, 67, 68, 69, 70, 71, 72, - 73, 232, 233, 234, 235, 236, 237, 125, 74, 75, 76, 77, 78, 79, 80, 81, - 82, 238, 239, 240, 241, 242, 243, 92, 159, 83, 84, 85, 86, 87, 88, 89, - 90, 244, 245, 246, 247, 248, 249, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, - 250, 251, 252, 253, 254, 255) - import string - c.EBCDIC_TO_ASCII_MAP = string.maketrans( - ''.join(map(chr, range(256))), ''.join(map(chr, emap))) - return s.translate(c.EBCDIC_TO_ASCII_MAP) - - MS_CHARS = {'\x80': ('euro', '20AC'), - '\x81': ' ', - '\x82': ('sbquo', '201A'), - '\x83': ('fnof', '192'), - '\x84': ('bdquo', '201E'), - '\x85': ('hellip', '2026'), - '\x86': ('dagger', '2020'), - '\x87': ('Dagger', '2021'), - '\x88': ('circ', '2C6'), - '\x89': ('permil', '2030'), - '\x8A': ('Scaron', '160'), - '\x8B': ('lsaquo', '2039'), - '\x8C': ('OElig', '152'), - '\x8D': '?', - '\x8E': ('#x17D', '17D'), - '\x8F': '?', - '\x90': '?', - '\x91': ('lsquo', '2018'), - '\x92': ('rsquo', '2019'), - '\x93': ('ldquo', '201C'), - '\x94': ('rdquo', '201D'), - '\x95': ('bull', '2022'), - '\x96': ('ndash', '2013'), - '\x97': ('mdash', '2014'), - '\x98': ('tilde', '2DC'), - '\x99': ('trade', '2122'), - '\x9a': ('scaron', '161'), - '\x9b': ('rsaquo', '203A'), - '\x9c': ('oelig', '153'), - '\x9d': '?', - '\x9e': ('#x17E', '17E'), - '\x9f': ('Yuml', ''), } - -####################################################################### - - -# By default, act as an HTML pretty-printer. -if __name__ == '__main__': - import sys - soup = BeautifulSoup(sys.stdin) - print soup.prettify() @@ -1,6 +1,5 @@ # coding=utf-8 import json -import _mysql import time from framework import * @@ -50,10 +49,10 @@ def api_process(self, path_split): 'SELECT dir, name, board_type, allow_images, allow_image_replies, maxsize FROM `boards` WHERE `secret`=0 ORDER BY `sort` ASC') values['boards'] = boards for board in values['boards']: - board['board_type'] = int(board['board_type']) - board['allow_images'] = int(board['allow_images']) - board['allow_image_replies'] = int(board['allow_image_replies']) - board['maxsize'] = int(board['maxsize']) + board['board_type'] = board['board_type'] + board['allow_images'] = board['allow_images'] + board['allow_image_replies'] = board['allow_image_replies'] + board['maxsize'] = board['maxsize'] elif method == 'last': data_limit = formdata.get('limit') @@ -82,15 +81,15 @@ def api_process(self, path_split): values['posts'] = FetchAll(sql) for post in values['posts']: - post['id'] = int(post['id']) - post['timestamp'] = int(post['timestamp']) - post['parentid'] = int(post['parentid']) - post['file_size'] = int(post['file_size']) - post['image_width'] = int(post['image_width']) - post['image_height'] = int(post['image_height']) - post['thumb_width'] = int(post['thumb_width']) - post['thumb_height'] = int(post['thumb_height']) - post['message'] = post['message'].decode('utf-8', 'replace') + post['id'] = post['id'] + post['timestamp'] = post['timestamp'] + post['parentid'] = post['parentid'] + post['file_size'] = post['file_size'] + post['image_width'] = post['image_width'] + post['image_height'] = post['image_height'] + post['thumb_width'] = post['thumb_width'] + post['thumb_height'] = post['thumb_height'] + post['message'] = post['message'] elif method == 'lastage': data_limit = formdata.get('limit') data_time = formdata.get('time', 0) @@ -154,28 +153,28 @@ def api_process(self, path_split): if numreplies: for thread in threads: - lastreplies = FetchAll("SELECT id, timestamp, timestamp_formatted, name, tripcode, email, subject, message, file, file_size, image_height, image_width, thumb, thumb_width, thumb_height, IS_DELETED FROM `posts` WHERE parentid = %s AND boardid = %s ORDER BY `timestamp` DESC LIMIT %d" % ( - thread['id'], board['id'], numreplies)) + lastreplies = FetchAll("SELECT id, timestamp, timestamp_formatted, name, tripcode, email, subject, message, file, file_size, image_height, image_width, thumb, thumb_width, thumb_height, IS_DELETED FROM `posts` WHERE parentid = %s AND boardid = %s ORDER BY `timestamp` DESC LIMIT %s", + (thread['id'], board['id'], numreplies)) lastreplies = lastreplies[::-1] - thread['id'] = int(thread['id']) - thread['timestamp'] = int(thread['timestamp']) - thread['bumped'] = int(thread['bumped']) - thread['expires'] = int(thread['expires']) - thread['total_replies'] = int(thread['total_replies']) - thread['total_files'] = int(thread['total_files']) - thread['file_size'] = int(thread['file_size']) - thread['image_width'] = int(thread['image_width']) - thread['image_height'] = int(thread['image_height']) - thread['thumb_width'] = int(thread['thumb_width']) - thread['thumb_height'] = int(thread['thumb_height']) - thread['locked'] = int(thread['locked']) + thread['id'] = thread['id'] + thread['timestamp'] = thread['timestamp'] + thread['bumped'] = thread['bumped'] + thread['expires'] = thread['expires'] + thread['total_replies'] = thread['total_replies'] + thread['total_files'] = thread['total_files'] + thread['file_size'] = thread['file_size'] + thread['image_width'] = thread['image_width'] + thread['image_height'] = thread['image_height'] + thread['thumb_width'] = thread['thumb_width'] + thread['thumb_height'] = thread['thumb_height'] + thread['locked'] = thread['locked'] thread['replies'] = [] for post in lastreplies: - post['IS_DELETED'] = int(post['IS_DELETED']) - post['id'] = int(post['id']) - post['timestamp'] = int(post['timestamp']) + post['IS_DELETED'] = post['IS_DELETED'] + post['id'] = post['id'] + post['timestamp'] = post['timestamp'] if post['IS_DELETED']: empty_post = {'id': post['id'], @@ -184,13 +183,12 @@ def api_process(self, path_split): } thread['replies'].append(empty_post) else: - post['file_size'] = int(post['file_size']) - post['image_width'] = int(post['image_width']) - post['image_height'] = int(post['image_height']) - post['thumb_width'] = int(post['thumb_width']) - post['thumb_height'] = int(post['thumb_height']) - post['message'] = post['message'].decode( - 'utf-8', 'replace') + post['file_size'] = post['file_size'] + post['image_width'] = post['image_width'] + post['image_height'] = post['image_height'] + post['thumb_width'] = post['thumb_width'] + post['thumb_height'] = post['thumb_height'] + post['message'] = post['message'] thread['replies'].append(post) @@ -249,19 +247,19 @@ def api_process(self, path_split): if not op_post: raise APIError("Not a thread") - values['id'] = int(op_post['id']) - values['timestamp'] = int(op_post['timestamp']) + values['id'] = op_post['id'] + values['timestamp'] = op_post['timestamp'] values['subject'] = op_post['subject'] - values['locked'] = int(op_post['locked']) + values['locked'] = op_post['locked'] - total_replies = int(FetchOne("SELECT COUNT(1) FROM posts WHERE boardid = '%s' AND parentid = '%d'" % ( - board["id"], values['id']), 0)[0]) + total_replies = FetchOne("SELECT COUNT(1) AS count FROM posts WHERE boardid = %s AND parentid = %s", + (board["id"], values['id']))["count"] values['total_replies'] = total_replies - sql = "SELECT id, parentid, timestamp, timestamp_formatted, name, tripcode, email, subject, message, file, file_size, image_width, image_height, thumb, thumb_width, thumb_height, IS_DELETED FROM posts WHERE boardid = %s AND (parentid = %s OR id = %s) ORDER BY id ASC LIMIT %d OFFSET %d" % ( - _mysql.escape_string(board['id']), values['id'], values['id'], limit, offset) - posts = FetchAll(sql) + sql = "SELECT id, parentid, timestamp, timestamp_formatted, name, tripcode, email, subject, message, file, file_size, image_width, image_height, thumb, thumb_width, thumb_height, IS_DELETED FROM posts WHERE boardid = %s AND (parentid = %s OR id = %s) ORDER BY id ASC LIMIT %s OFFSET %s" + sqlv = (board['id'], values['id'], values['id'], limit, offset) + posts = FetchAll(sql, sqlv) values['posts'] = [] @@ -279,12 +277,12 @@ def api_process(self, path_split): } values['posts'].append(empty_post) else: - post['file_size'] = int(post['file_size']) - post['image_width'] = int(post['image_width']) - post['image_height'] = int(post['image_height']) - post['thumb_width'] = int(post['thumb_width']) - post['thumb_height'] = int(post['thumb_height']) - post['message'] = post['message'].decode('utf-8', 'replace') + post['file_size'] = post['file_size'] + post['image_width'] = post['image_width'] + post['image_height'] = post['image_height'] + post['thumb_width'] = post['thumb_width'] + post['thumb_height'] = post['thumb_height'] + post['message'] = post['message'] if striphtml: post['message'] = post['message'].replace("<br />", " ") post['message'] = re.compile( @@ -310,18 +308,18 @@ def api_process(self, path_split): except ValueError: raise APIError("Post ID must be numeric") - post = FetchOne("SELECT id, parentid, timestamp, timestamp_formatted, name, tripcode, email, subject, message, file, file_size, image_width, image_height, thumb, thumb_width, thumb_height, IS_DELETED FROM posts WHERE `id`='%d' AND boardid='%s'" % ( - postid, board["id"])) + post = FetchOne("SELECT id, parentid, timestamp, timestamp_formatted, name, tripcode, email, subject, message, file, file_size, image_width, image_height, thumb, thumb_width, thumb_height, IS_DELETED FROM posts WHERE `id` = %s AND boardid = %s" + (postid, board["id"])) if not post: raise APIError("Post ID cannot be found") values['posts'] = [] - post['IS_DELETED'] = int(post['IS_DELETED']) - post['id'] = int(post['id']) - post['parentid'] = int(post['parentid']) - post['timestamp'] = int(post['timestamp']) + post['IS_DELETED'] = post['IS_DELETED'] + post['id'] = post['id'] + post['parentid'] = post['parentid'] + post['timestamp'] = post['timestamp'] if post['IS_DELETED']: empty_post = {'id': post['id'], @@ -331,12 +329,12 @@ def api_process(self, path_split): } values['posts'].append(empty_post) else: - post['file_size'] = int(post['file_size']) - post['image_width'] = int(post['image_width']) - post['image_height'] = int(post['image_height']) - post['thumb_width'] = int(post['thumb_width']) - post['thumb_height'] = int(post['thumb_height']) - post['message'] = post['message'].decode('utf-8', 'replace') + post['file_size'] = post['file_size'] + post['image_width'] = post['image_width'] + post['image_height'] = post['image_height'] + post['thumb_width'] = post['thumb_width'] + post['thumb_height'] = post['thumb_height'] + post['message'] = post['message'] values['posts'].append(post) elif method == 'delete': data_board = formdata.get('dir') @@ -402,22 +400,21 @@ def api_process(self, path_split): threads = getNewThreads(limit) values['threads'] = threads elif method == "blotter": - latest_news = FetchAll( - "SELECT `timestamp`, `message`, `timestamp_formatted` FROM `news` WHERE `type` = '2' ORDER BY `timestamp` DESC LIMIT " + str(Settings.HOME_NEWS)) + latest_news = FetchAll("SELECT `timestamp`, `message`, `timestamp_formatted` FROM `news` WHERE `type` = '2' ORDER BY `timestamp` DESC LIMIT %s", (Settings.HOME_NEWS,)) values["news"] = latest_news elif method == 'boardsExtra': boards = FetchAll('SELECT dir, name, longname, subname, postarea_desc, postarea_extra, anonymous, subject, message, disable_name, disable_subject, allow_spoilers, allow_oekaki, numthreads, board_type, allow_images, allow_image_replies, maxsize FROM `boards` WHERE `secret`=0 ORDER BY `sort` ASC') values['boards'] = boards for board in values['boards']: - board['board_type'] = int(board['board_type']) - board['allow_images'] = int(board['allow_images']) - board['allow_image_replies'] = int(board['allow_image_replies']) - board['disable_name'] = int(board['disable_name']) - board['disable_subject'] = int(board['disable_subject']) - board['allow_spoilers'] = int(board['allow_spoilers']) - board['allow_oekaki'] = int(board['allow_oekaki']) - board['numthreads'] = int(board['numthreads']) - board['maxsize'] = int(board['maxsize']) + board['board_type'] = board['board_type'] + board['allow_images'] = board['allow_images'] + board['allow_image_replies'] = board['allow_image_replies'] + board['disable_name'] = board['disable_name'] + board['disable_subject'] = board['disable_subject'] + board['allow_spoilers'] = board['allow_spoilers'] + board['allow_oekaki'] = board['allow_oekaki'] + board['numthreads'] = board['numthreads'] + board['maxsize'] = board['maxsize'] else: raise APIError("Invalid method") diff --git a/cgi/fcgi.py b/cgi/fcgi.py deleted file mode 100644 index 08af980..0000000 --- a/cgi/fcgi.py +++ /dev/null @@ -1,1363 +0,0 @@ -# Copyright (c) 2002, 2003, 2005, 2006 Allan Saddi <allan@saddi.com> -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -# SUCH DAMAGE. -# -# $Id$ - -""" -fcgi - a FastCGI/WSGI gateway. - -For more information about FastCGI, see <http://www.fastcgi.com/>. - -For more information about the Web Server Gateway Interface, see -<http://www.python.org/peps/pep-0333.html>. - -Example usage: - - #!/usr/bin/env python - from myapplication import app # Assume app is your WSGI application object - from fcgi import WSGIServer - WSGIServer(app).run() - -See the documentation for WSGIServer/Server for more information. - -On most platforms, fcgi will fallback to regular CGI behavior if run in a -non-FastCGI context. If you want to force CGI behavior, set the environment -variable FCGI_FORCE_CGI to "Y" or "y". -""" - -__author__ = 'Allan Saddi <allan@saddi.com>' -__version__ = '$Revision$' - -import sys -import os -import signal -import struct -import cStringIO as StringIO -import select -import socket -import errno -import traceback - -try: - import thread - import threading - thread_available = True -except ImportError: - import dummy_thread as thread - import dummy_threading as threading - thread_available = False - -# Apparently 2.3 doesn't define SHUT_WR? Assume it is 1 in this case. -if not hasattr(socket, 'SHUT_WR'): - socket.SHUT_WR = 1 - -__all__ = ['WSGIServer'] - -# Constants from the spec. -FCGI_LISTENSOCK_FILENO = 0 - -FCGI_HEADER_LEN = 8 - -FCGI_VERSION_1 = 1 - -FCGI_BEGIN_REQUEST = 1 -FCGI_ABORT_REQUEST = 2 -FCGI_END_REQUEST = 3 -FCGI_PARAMS = 4 -FCGI_STDIN = 5 -FCGI_STDOUT = 6 -FCGI_STDERR = 7 -FCGI_DATA = 8 -FCGI_GET_VALUES = 9 -FCGI_GET_VALUES_RESULT = 10 -FCGI_UNKNOWN_TYPE = 11 -FCGI_MAXTYPE = FCGI_UNKNOWN_TYPE - -FCGI_NULL_REQUEST_ID = 0 - -FCGI_KEEP_CONN = 1 - -FCGI_RESPONDER = 1 -FCGI_AUTHORIZER = 2 -FCGI_FILTER = 3 - -FCGI_REQUEST_COMPLETE = 0 -FCGI_CANT_MPX_CONN = 1 -FCGI_OVERLOADED = 2 -FCGI_UNKNOWN_ROLE = 3 - -FCGI_MAX_CONNS = 'FCGI_MAX_CONNS' -FCGI_MAX_REQS = 'FCGI_MAX_REQS' -FCGI_MPXS_CONNS = 'FCGI_MPXS_CONNS' - -FCGI_Header = '!BBHHBx' -FCGI_BeginRequestBody = '!HB5x' -FCGI_EndRequestBody = '!LB3x' -FCGI_UnknownTypeBody = '!B7x' - -FCGI_EndRequestBody_LEN = struct.calcsize(FCGI_EndRequestBody) -FCGI_UnknownTypeBody_LEN = struct.calcsize(FCGI_UnknownTypeBody) - -if __debug__: - import time - - # Set non-zero to write debug output to a file. - DEBUG = 0 - DEBUGLOG = '/tmp/fcgi.log' - - def _debug(level, msg): - if DEBUG < level: - return - - try: - f = open(DEBUGLOG, 'a') - f.write('%sfcgi: %s\n' % (time.ctime()[4:-4], msg)) - f.close() - except: - pass - - -class InputStream(object): - """ - File-like object representing FastCGI input streams (FCGI_STDIN and - FCGI_DATA). Supports the minimum methods required by WSGI spec. - """ - - def __init__(self, conn): - self._conn = conn - - # See Server. - self._shrinkThreshold = conn.server.inputStreamShrinkThreshold - - self._buf = '' - self._bufList = [] - self._pos = 0 # Current read position. - self._avail = 0 # Number of bytes currently available. - - self._eof = False # True when server has sent EOF notification. - - def _shrinkBuffer(self): - """Gets rid of already read data (since we can't rewind).""" - if self._pos >= self._shrinkThreshold: - self._buf = self._buf[self._pos:] - self._avail -= self._pos - self._pos = 0 - - assert self._avail >= 0 - - def _waitForData(self): - """Waits for more data to become available.""" - self._conn.process_input() - - def read(self, n=-1): - if self._pos == self._avail and self._eof: - return '' - while True: - if n < 0 or (self._avail - self._pos) < n: - # Not enough data available. - if self._eof: - # And there's no more coming. - newPos = self._avail - break - else: - # Wait for more data. - self._waitForData() - continue - else: - newPos = self._pos + n - break - # Merge buffer list, if necessary. - if self._bufList: - self._buf += ''.join(self._bufList) - self._bufList = [] - r = self._buf[self._pos:newPos] - self._pos = newPos - self._shrinkBuffer() - return r - - def readline(self, length=None): - if self._pos == self._avail and self._eof: - return '' - while True: - # Unfortunately, we need to merge the buffer list early. - if self._bufList: - self._buf += ''.join(self._bufList) - self._bufList = [] - # Find newline. - i = self._buf.find('\n', self._pos) - if i < 0: - # Not found? - if self._eof: - # No more data coming. - newPos = self._avail - break - else: - # Wait for more to come. - self._waitForData() - continue - else: - newPos = i + 1 - break - if length is not None: - if self._pos + length < newPos: - newPos = self._pos + length - r = self._buf[self._pos:newPos] - self._pos = newPos - self._shrinkBuffer() - return r - - def readlines(self, sizehint=0): - total = 0 - lines = [] - line = self.readline() - while line: - lines.append(line) - total += len(line) - if 0 < sizehint <= total: - break - line = self.readline() - return lines - - def __iter__(self): - return self - - def next(self): - r = self.readline() - if not r: - raise StopIteration - return r - - def add_data(self, data): - if not data: - self._eof = True - else: - self._bufList.append(data) - self._avail += len(data) - - -class MultiplexedInputStream(InputStream): - """ - A version of InputStream meant to be used with MultiplexedConnections. - Assumes the MultiplexedConnection (the producer) and the Request - (the consumer) are running in different threads. - """ - - def __init__(self, conn): - super(MultiplexedInputStream, self).__init__(conn) - - # Arbitrates access to this InputStream (it's used simultaneously - # by a Request and its owning Connection object). - lock = threading.RLock() - - # Notifies Request thread that there is new data available. - self._lock = threading.Condition(lock) - - def _waitForData(self): - # Wait for notification from add_data(). - self._lock.wait() - - def read(self, n=-1): - self._lock.acquire() - try: - return super(MultiplexedInputStream, self).read(n) - finally: - self._lock.release() - - def readline(self, length=None): - self._lock.acquire() - try: - return super(MultiplexedInputStream, self).readline(length) - finally: - self._lock.release() - - def add_data(self, data): - self._lock.acquire() - try: - super(MultiplexedInputStream, self).add_data(data) - self._lock.notify() - finally: - self._lock.release() - - -class OutputStream(object): - """ - FastCGI output stream (FCGI_STDOUT/FCGI_STDERR). By default, calls to - write() or writelines() immediately result in Records being sent back - to the server. Buffering should be done in a higher level! - """ - - def __init__(self, conn, req, type, buffered=False): - self._conn = conn - self._req = req - self._type = type - self._buffered = buffered - self._bufList = [] # Used if buffered is True - self.dataWritten = False - self.closed = False - - def _write(self, data): - length = len(data) - while length: - toWrite = min(length, self._req.server.maxwrite - FCGI_HEADER_LEN) - - rec = Record(self._type, self._req.requestId) - rec.contentLength = toWrite - rec.contentData = data[:toWrite] - self._conn.writeRecord(rec) - - data = data[toWrite:] - length -= toWrite - - def write(self, data): - assert not self.closed - - if not data: - return - - self.dataWritten = True - - if self._buffered: - self._bufList.append(data) - else: - self._write(data) - - def writelines(self, lines): - assert not self.closed - - for line in lines: - self.write(line) - - def flush(self): - # Only need to flush if this OutputStream is actually buffered. - if self._buffered: - data = ''.join(self._bufList) - self._bufList = [] - self._write(data) - - # Though available, the following should NOT be called by WSGI apps. - def close(self): - """Sends end-of-stream notification, if necessary.""" - if not self.closed and self.dataWritten: - self.flush() - rec = Record(self._type, self._req.requestId) - self._conn.writeRecord(rec) - self.closed = True - - -class TeeOutputStream(object): - """ - Simple wrapper around two or more output file-like objects that copies - written data to all streams. - """ - - def __init__(self, streamList): - self._streamList = streamList - - def write(self, data): - for f in self._streamList: - f.write(data) - - def writelines(self, lines): - for line in lines: - self.write(line) - - def flush(self): - for f in self._streamList: - f.flush() - - -class StdoutWrapper(object): - """ - Wrapper for sys.stdout so we know if data has actually been written. - """ - - def __init__(self, stdout): - self._file = stdout - self.dataWritten = False - - def write(self, data): - if data: - self.dataWritten = True - self._file.write(data) - - def writelines(self, lines): - for line in lines: - self.write(line) - - def __getattr__(self, name): - return getattr(self._file, name) - - -def decode_pair(s, pos=0): - """ - Decodes a name/value pair. - - The number of bytes decoded as well as the name/value pair - are returned. - """ - nameLength = ord(s[pos]) - if nameLength & 128: - nameLength = struct.unpack('!L', s[pos:pos+4])[0] & 0x7fffffff - pos += 4 - else: - pos += 1 - - valueLength = ord(s[pos]) - if valueLength & 128: - valueLength = struct.unpack('!L', s[pos:pos+4])[0] & 0x7fffffff - pos += 4 - else: - pos += 1 - - name = s[pos:pos+nameLength] - pos += nameLength - value = s[pos:pos+valueLength] - pos += valueLength - - return (pos, (name, value)) - - -def encode_pair(name, value): - """ - Encodes a name/value pair. - - The encoded string is returned. - """ - nameLength = len(name) - if nameLength < 128: - s = chr(nameLength) - else: - s = struct.pack('!L', nameLength | 0x80000000L) - - valueLength = len(value) - if valueLength < 128: - s += chr(valueLength) - else: - s += struct.pack('!L', valueLength | 0x80000000L) - - return s + name + value - - -class Record(object): - """ - A FastCGI Record. - - Used for encoding/decoding records. - """ - - def __init__(self, type=FCGI_UNKNOWN_TYPE, requestId=FCGI_NULL_REQUEST_ID): - self.version = FCGI_VERSION_1 - self.type = type - self.requestId = requestId - self.contentLength = 0 - self.paddingLength = 0 - self.contentData = '' - - def _recvall(sock, length): - """ - Attempts to receive length bytes from a socket, blocking if necessary. - (Socket may be blocking or non-blocking.) - """ - dataList = [] - recvLen = 0 - while length: - try: - data = sock.recv(length) - except socket.error, e: - if e[0] == errno.EAGAIN: - select.select([sock], [], []) - continue - else: - raise - if not data: # EOF - break - dataList.append(data) - dataLen = len(data) - recvLen += dataLen - length -= dataLen - return ''.join(dataList), recvLen - _recvall = staticmethod(_recvall) - - def read(self, sock): - """Read and decode a Record from a socket.""" - try: - header, length = self._recvall(sock, FCGI_HEADER_LEN) - except: - raise EOFError - - if length < FCGI_HEADER_LEN: - raise EOFError - - self.version, self.type, self.requestId, self.contentLength, \ - self.paddingLength = struct.unpack(FCGI_Header, header) - - if __debug__: - _debug(9, 'read: fd = %d, type = %d, requestId = %d, ' - 'contentLength = %d' % - (sock.fileno(), self.type, self.requestId, - self.contentLength)) - - if self.contentLength: - try: - self.contentData, length = self._recvall(sock, - self.contentLength) - except: - raise EOFError - - if length < self.contentLength: - raise EOFError - - if self.paddingLength: - try: - self._recvall(sock, self.paddingLength) - except: - raise EOFError - - def _sendall(sock, data): - """ - Writes data to a socket and does not return until all the data is sent. - """ - length = len(data) - while length: - try: - sent = sock.send(data) - except socket.error, e: - if e[0] == errno.EAGAIN: - select.select([], [sock], []) - continue - else: - raise - data = data[sent:] - length -= sent - _sendall = staticmethod(_sendall) - - def write(self, sock): - """Encode and write a Record to a socket.""" - self.paddingLength = -self.contentLength & 7 - - if __debug__: - _debug(9, 'write: fd = %d, type = %d, requestId = %d, ' - 'contentLength = %d' % - (sock.fileno(), self.type, self.requestId, - self.contentLength)) - - header = struct.pack(FCGI_Header, self.version, self.type, - self.requestId, self.contentLength, - self.paddingLength) - self._sendall(sock, header) - if self.contentLength: - self._sendall(sock, self.contentData) - if self.paddingLength: - self._sendall(sock, '\x00'*self.paddingLength) - - -class Request(object): - """ - Represents a single FastCGI request. - - These objects are passed to your handler and is the main interface - between your handler and the fcgi module. The methods should not - be called by your handler. However, server, params, stdin, stdout, - stderr, and data are free for your handler's use. - """ - - def __init__(self, conn, inputStreamClass): - self._conn = conn - - self.server = conn.server - self.params = {} - self.stdin = inputStreamClass(conn) - self.stdout = OutputStream(conn, self, FCGI_STDOUT) - self.stderr = OutputStream(conn, self, FCGI_STDERR, buffered=True) - self.data = inputStreamClass(conn) - - def run(self): - """Runs the handler, flushes the streams, and ends the request.""" - try: - protocolStatus, appStatus = self.server.handler(self) - except: - traceback.print_exc(file=self.stderr) - self.stderr.flush() - if not self.stdout.dataWritten: - self.server.error(self) - - protocolStatus, appStatus = FCGI_REQUEST_COMPLETE, 0 - - if __debug__: - _debug(1, 'protocolStatus = %d, appStatus = %d' % - (protocolStatus, appStatus)) - - self._flush() - self._end(appStatus, protocolStatus) - - def _end(self, appStatus=0L, protocolStatus=FCGI_REQUEST_COMPLETE): - self._conn.end_request(self, appStatus, protocolStatus) - - def _flush(self): - self.stdout.close() - self.stderr.close() - - -class CGIRequest(Request): - """A normal CGI request disguised as a FastCGI request.""" - - def __init__(self, server): - # These are normally filled in by Connection. - self.requestId = 1 - self.role = FCGI_RESPONDER - self.flags = 0 - self.aborted = False - - self.server = server - self.params = dict(os.environ) - self.stdin = sys.stdin - self.stdout = StdoutWrapper(sys.stdout) # Oh, the humanity! - self.stderr = sys.stderr - self.data = StringIO.StringIO() - - def _end(self, appStatus=0L, protocolStatus=FCGI_REQUEST_COMPLETE): - sys.exit(appStatus) - - def _flush(self): - # Not buffered, do nothing. - pass - - -class Connection(object): - """ - A Connection with the web server. - - Each Connection is associated with a single socket (which is - connected to the web server) and is responsible for handling all - the FastCGI message processing for that socket. - """ - _multiplexed = False - _inputStreamClass = InputStream - - def __init__(self, sock, addr, server): - self._sock = sock - self._addr = addr - self.server = server - - # Active Requests for this Connection, mapped by request ID. - self._requests = {} - - def _cleanupSocket(self): - """Close the Connection's socket.""" - try: - self._sock.shutdown(socket.SHUT_WR) - except: - return - try: - while True: - r, w, e = select.select([self._sock], [], []) - if not r or not self._sock.recv(1024): - break - except: - pass - self._sock.close() - - def run(self): - """Begin processing data from the socket.""" - self._keepGoing = True - while self._keepGoing: - try: - self.process_input() - except EOFError: - break - except (select.error, socket.error), e: - if e[0] == errno.EBADF: # Socket was closed by Request. - break - raise - - self._cleanupSocket() - - def process_input(self): - """Attempt to read a single Record from the socket and process it.""" - # Currently, any children Request threads notify this Connection - # that it is no longer needed by closing the Connection's socket. - # We need to put a timeout on select, otherwise we might get - # stuck in it indefinitely... (I don't like this solution.) - while self._keepGoing: - try: - r, w, e = select.select([self._sock], [], [], 1.0) - except ValueError: - # Sigh. ValueError gets thrown sometimes when passing select - # a closed socket. - raise EOFError - if r: - break - if not self._keepGoing: - return - rec = Record() - rec.read(self._sock) - - if rec.type == FCGI_GET_VALUES: - self._do_get_values(rec) - elif rec.type == FCGI_BEGIN_REQUEST: - self._do_begin_request(rec) - elif rec.type == FCGI_ABORT_REQUEST: - self._do_abort_request(rec) - elif rec.type == FCGI_PARAMS: - self._do_params(rec) - elif rec.type == FCGI_STDIN: - self._do_stdin(rec) - elif rec.type == FCGI_DATA: - self._do_data(rec) - elif rec.requestId == FCGI_NULL_REQUEST_ID: - self._do_unknown_type(rec) - else: - # Need to complain about this. - pass - - def writeRecord(self, rec): - """ - Write a Record to the socket. - """ - rec.write(self._sock) - - def end_request(self, req, appStatus=0L, - protocolStatus=FCGI_REQUEST_COMPLETE, remove=True): - """ - End a Request. - - Called by Request objects. An FCGI_END_REQUEST Record is - sent to the web server. If the web server no longer requires - the connection, the socket is closed, thereby ending this - Connection (run() returns). - """ - rec = Record(FCGI_END_REQUEST, req.requestId) - rec.contentData = struct.pack(FCGI_EndRequestBody, appStatus, - protocolStatus) - rec.contentLength = FCGI_EndRequestBody_LEN - self.writeRecord(rec) - - if remove: - del self._requests[req.requestId] - - if __debug__: - _debug(2, 'end_request: flags = %d' % req.flags) - - if not (req.flags & FCGI_KEEP_CONN) and not self._requests: - self._cleanupSocket() - self._keepGoing = False - - def _do_get_values(self, inrec): - """Handle an FCGI_GET_VALUES request from the web server.""" - outrec = Record(FCGI_GET_VALUES_RESULT) - - pos = 0 - while pos < inrec.contentLength: - pos, (name, value) = decode_pair(inrec.contentData, pos) - cap = self.server.capability.get(name) - if cap is not None: - outrec.contentData += encode_pair(name, str(cap)) - - outrec.contentLength = len(outrec.contentData) - self.writeRecord(outrec) - - def _do_begin_request(self, inrec): - """Handle an FCGI_BEGIN_REQUEST from the web server.""" - role, flags = struct.unpack(FCGI_BeginRequestBody, inrec.contentData) - - req = self.server.request_class(self, self._inputStreamClass) - req.requestId, req.role, req.flags = inrec.requestId, role, flags - req.aborted = False - - if not self._multiplexed and self._requests: - # Can't multiplex requests. - self.end_request(req, 0L, FCGI_CANT_MPX_CONN, remove=False) - else: - self._requests[inrec.requestId] = req - - def _do_abort_request(self, inrec): - """ - Handle an FCGI_ABORT_REQUEST from the web server. - - We just mark a flag in the associated Request. - """ - req = self._requests.get(inrec.requestId) - if req is not None: - req.aborted = True - - def _start_request(self, req): - """Run the request.""" - # Not multiplexed, so run it inline. - req.run() - - def _do_params(self, inrec): - """ - Handle an FCGI_PARAMS Record. - - If the last FCGI_PARAMS Record is received, start the request. - """ - req = self._requests.get(inrec.requestId) - if req is not None: - if inrec.contentLength: - pos = 0 - while pos < inrec.contentLength: - pos, (name, value) = decode_pair(inrec.contentData, pos) - req.params[name] = value - else: - self._start_request(req) - - def _do_stdin(self, inrec): - """Handle the FCGI_STDIN stream.""" - req = self._requests.get(inrec.requestId) - if req is not None: - req.stdin.add_data(inrec.contentData) - - def _do_data(self, inrec): - """Handle the FCGI_DATA stream.""" - req = self._requests.get(inrec.requestId) - if req is not None: - req.data.add_data(inrec.contentData) - - def _do_unknown_type(self, inrec): - """Handle an unknown request type. Respond accordingly.""" - outrec = Record(FCGI_UNKNOWN_TYPE) - outrec.contentData = struct.pack(FCGI_UnknownTypeBody, inrec.type) - outrec.contentLength = FCGI_UnknownTypeBody_LEN - self.writeRecord(rec) - - -class MultiplexedConnection(Connection): - """ - A version of Connection capable of handling multiple requests - simultaneously. - """ - _multiplexed = True - _inputStreamClass = MultiplexedInputStream - - def __init__(self, sock, addr, server): - super(MultiplexedConnection, self).__init__(sock, addr, server) - - # Used to arbitrate access to self._requests. - lock = threading.RLock() - - # Notification is posted everytime a request completes, allowing us - # to quit cleanly. - self._lock = threading.Condition(lock) - - def _cleanupSocket(self): - # Wait for any outstanding requests before closing the socket. - self._lock.acquire() - while self._requests: - self._lock.wait() - self._lock.release() - - super(MultiplexedConnection, self)._cleanupSocket() - - def writeRecord(self, rec): - # Must use locking to prevent intermingling of Records from different - # threads. - self._lock.acquire() - try: - # Probably faster than calling super. ;) - rec.write(self._sock) - finally: - self._lock.release() - - def end_request(self, req, appStatus=0L, - protocolStatus=FCGI_REQUEST_COMPLETE, remove=True): - self._lock.acquire() - try: - super(MultiplexedConnection, self).end_request(req, appStatus, - protocolStatus, - remove) - self._lock.notify() - finally: - self._lock.release() - - def _do_begin_request(self, inrec): - self._lock.acquire() - try: - super(MultiplexedConnection, self)._do_begin_request(inrec) - finally: - self._lock.release() - - def _do_abort_request(self, inrec): - self._lock.acquire() - try: - super(MultiplexedConnection, self)._do_abort_request(inrec) - finally: - self._lock.release() - - def _start_request(self, req): - thread.start_new_thread(req.run, ()) - - def _do_params(self, inrec): - self._lock.acquire() - try: - super(MultiplexedConnection, self)._do_params(inrec) - finally: - self._lock.release() - - def _do_stdin(self, inrec): - self._lock.acquire() - try: - super(MultiplexedConnection, self)._do_stdin(inrec) - finally: - self._lock.release() - - def _do_data(self, inrec): - self._lock.acquire() - try: - super(MultiplexedConnection, self)._do_data(inrec) - finally: - self._lock.release() - - -class Server(object): - """ - The FastCGI server. - - Waits for connections from the web server, processing each - request. - - If run in a normal CGI context, it will instead instantiate a - CGIRequest and run the handler through there. - """ - request_class = Request - cgirequest_class = CGIRequest - - # Limits the size of the InputStream's string buffer to this size + the - # server's maximum Record size. Since the InputStream is not seekable, - # we throw away already-read data once this certain amount has been read. - inputStreamShrinkThreshold = 102400 - 8192 - - def __init__(self, handler=None, maxwrite=8192, bindAddress=None, - umask=None, multiplexed=False): - """ - handler, if present, must reference a function or method that - takes one argument: a Request object. If handler is not - specified at creation time, Server *must* be subclassed. - (The handler method below is abstract.) - - maxwrite is the maximum number of bytes (per Record) to write - to the server. I've noticed mod_fastcgi has a relatively small - receive buffer (8K or so). - - bindAddress, if present, must either be a string or a 2-tuple. If - present, run() will open its own listening socket. You would use - this if you wanted to run your application as an 'external' FastCGI - app. (i.e. the webserver would no longer be responsible for starting - your app) If a string, it will be interpreted as a filename and a UNIX - socket will be opened. If a tuple, the first element, a string, - is the interface name/IP to bind to, and the second element (an int) - is the port number. - - Set multiplexed to True if you want to handle multiple requests - per connection. Some FastCGI backends (namely mod_fastcgi) don't - multiplex requests at all, so by default this is off (which saves - on thread creation/locking overhead). If threads aren't available, - this keyword is ignored; it's not possible to multiplex requests - at all. - """ - if handler is not None: - self.handler = handler - self.maxwrite = maxwrite - if thread_available: - try: - import resource - # Attempt to glean the maximum number of connections - # from the OS. - maxConns = resource.getrlimit(resource.RLIMIT_NOFILE)[0] - except ImportError: - maxConns = 100 # Just some made up number. - maxReqs = maxConns - if multiplexed: - self._connectionClass = MultiplexedConnection - maxReqs *= 5 # Another made up number. - else: - self._connectionClass = Connection - self.capability = { - FCGI_MAX_CONNS: maxConns, - FCGI_MAX_REQS: maxReqs, - FCGI_MPXS_CONNS: multiplexed and 1 or 0 - } - else: - self._connectionClass = Connection - self.capability = { - # If threads aren't available, these are pretty much correct. - FCGI_MAX_CONNS: 1, - FCGI_MAX_REQS: 1, - FCGI_MPXS_CONNS: 0 - } - self._bindAddress = bindAddress - self._umask = umask - - def _setupSocket(self): - if self._bindAddress is None: # Run as a normal FastCGI? - isFCGI = True - - if isFCGI: - try: - sock = socket.fromfd(FCGI_LISTENSOCK_FILENO, socket.AF_INET, - socket.SOCK_STREAM) - sock.getpeername() - except AttributeError: - isFCGI = False - except socket.error, e: - if e[0] == errno.ENOTSOCK: - # Not a socket, assume CGI context. - isFCGI = False - elif e[0] != errno.ENOTCONN: - raise - - # FastCGI/CGI discrimination is broken on Mac OS X. - # Set the environment variable FCGI_FORCE_CGI to "Y" or "y" - # if you want to run your app as a simple CGI. (You can do - # this with Apache's mod_env [not loaded by default in OS X - # client, ha ha] and the SetEnv directive.) - if not isFCGI or \ - os.environ.get('FCGI_FORCE_CGI', 'N').upper().startswith('Y'): - req = self.cgirequest_class(self) - req.run() - sys.exit(0) - else: - # Run as a server - oldUmask = None - if type(self._bindAddress) is str: - # Unix socket - sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - try: - os.unlink(self._bindAddress) - except OSError: - pass - if self._umask is not None: - oldUmask = os.umask(self._umask) - else: - # INET socket - assert type(self._bindAddress) is tuple - assert len(self._bindAddress) == 2 - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - - sock.bind(self._bindAddress) - sock.listen(socket.SOMAXCONN) - - if oldUmask is not None: - os.umask(oldUmask) - - return sock - - def _cleanupSocket(self, sock): - """Closes the main socket.""" - sock.close() - - def _installSignalHandlers(self): - self._oldSIGs = [(x, signal.getsignal(x)) for x in - (signal.SIGHUP, signal.SIGINT, signal.SIGTERM)] - signal.signal(signal.SIGHUP, self._hupHandler) - signal.signal(signal.SIGINT, self._intHandler) - signal.signal(signal.SIGTERM, self._intHandler) - - def _restoreSignalHandlers(self): - for signum, handler in self._oldSIGs: - signal.signal(signum, handler) - - def _hupHandler(self, signum, frame): - self._hupReceived = True - self._keepGoing = False - - def _intHandler(self, signum, frame): - self._keepGoing = False - - def run(self, timeout=1.0): - """ - The main loop. Exits on SIGHUP, SIGINT, SIGTERM. Returns True if - SIGHUP was received, False otherwise. - """ - web_server_addrs = os.environ.get('FCGI_WEB_SERVER_ADDRS') - if web_server_addrs is not None: - web_server_addrs = map(lambda x: x.strip(), - web_server_addrs.split(',')) - - sock = self._setupSocket() - - self._keepGoing = True - self._hupReceived = False - - # Install signal handlers. - self._installSignalHandlers() - - while self._keepGoing: - try: - r, w, e = select.select([sock], [], [], timeout) - except select.error, e: - if e[0] == errno.EINTR: - continue - raise - - if r: - try: - clientSock, addr = sock.accept() - except socket.error, e: - if e[0] in (errno.EINTR, errno.EAGAIN): - continue - raise - - if web_server_addrs and \ - (len(addr) != 2 or addr[0] not in web_server_addrs): - clientSock.close() - continue - - # Instantiate a new Connection and begin processing FastCGI - # messages (either in a new thread or this thread). - conn = self._connectionClass(clientSock, addr, self) - thread.start_new_thread(conn.run, ()) - - self._mainloopPeriodic() - - # Restore signal handlers. - self._restoreSignalHandlers() - - self._cleanupSocket(sock) - - return self._hupReceived - - def _mainloopPeriodic(self): - """ - Called with just about each iteration of the main loop. Meant to - be overridden. - """ - pass - - def _exit(self, reload=False): - """ - Protected convenience method for subclasses to force an exit. Not - really thread-safe, which is why it isn't public. - """ - if self._keepGoing: - self._keepGoing = False - self._hupReceived = reload - - def handler(self, req): - """ - Default handler, which just raises an exception. Unless a handler - is passed at initialization time, this must be implemented by - a subclass. - """ - raise NotImplementedError, self.__class__.__name__ + '.handler' - - def error(self, req): - """ - Called by Request if an exception occurs within the handler. May and - should be overridden. - """ - import cgitb - req.stdout.write('Content-Type: text/html\r\n\r\n' + - cgitb.html(sys.exc_info())) - - -class WSGIServer(Server): - """ - FastCGI server that supports the Web Server Gateway Interface. See - <http://www.python.org/peps/pep-0333.html>. - """ - - def __init__(self, application, environ=None, umask=None, - multithreaded=True, **kw): - """ - environ, if present, must be a dictionary-like object. Its - contents will be copied into application's environ. Useful - for passing application-specific variables. - - Set multithreaded to False if your application is not MT-safe. - """ - if kw.has_key('handler'): - del kw['handler'] # Doesn't make sense to let this through - super(WSGIServer, self).__init__(**kw) - - if environ is None: - environ = {} - - self.application = application - self.environ = environ - self.multithreaded = multithreaded - - # Used to force single-threadedness - self._app_lock = thread.allocate_lock() - - def handler(self, req): - """Special handler for WSGI.""" - if req.role != FCGI_RESPONDER: - return FCGI_UNKNOWN_ROLE, 0 - - # Mostly taken from example CGI gateway. - environ = req.params - environ.update(self.environ) - - environ['wsgi.version'] = (1, 0) - environ['wsgi.input'] = req.stdin - if self._bindAddress is None: - stderr = req.stderr - else: - stderr = TeeOutputStream((sys.stderr, req.stderr)) - environ['wsgi.errors'] = stderr - environ['wsgi.multithread'] = not isinstance(req, CGIRequest) and \ - thread_available and self.multithreaded - # Rationale for the following: If started by the web server - # (self._bindAddress is None) in either FastCGI or CGI mode, the - # possibility of being spawned multiple times simultaneously is quite - # real. And, if started as an external server, multiple copies may be - # spawned for load-balancing/redundancy. (Though I don't think - # mod_fastcgi supports this?) - environ['wsgi.multiprocess'] = True - environ['wsgi.run_once'] = isinstance(req, CGIRequest) - - if environ.get('HTTPS', 'off') in ('on', '1'): - environ['wsgi.url_scheme'] = 'https' - else: - environ['wsgi.url_scheme'] = 'http' - - self._sanitizeEnv(environ) - - headers_set = [] - headers_sent = [] - result = None - - def write(data): - assert type(data) is str, 'write() argument must be string' - assert headers_set, 'write() before start_response()' - - if not headers_sent: - status, responseHeaders = headers_sent[:] = headers_set - found = False - for header, value in responseHeaders: - if header.lower() == 'content-length': - found = True - break - if not found and result is not None: - try: - if len(result) == 1: - responseHeaders.append(('Content-Length', - str(len(data)))) - except: - pass - s = 'Status: %s\r\n' % status - for header in responseHeaders: - s += '%s: %s\r\n' % header - s += '\r\n' - req.stdout.write(s) - - req.stdout.write(data) - req.stdout.flush() - - def start_response(status, response_headers, exc_info=None): - if exc_info: - try: - if headers_sent: - # Re-raise if too late - raise exc_info[0], exc_info[1], exc_info[2] - finally: - exc_info = None # avoid dangling circular ref - else: - assert not headers_set, 'Headers already set!' - - assert type(status) is str, 'Status must be a string' - assert len(status) >= 4, 'Status must be at least 4 characters' - assert int(status[:3]), 'Status must begin with 3-digit code' - assert status[3] == ' ', 'Status must have a space after code' - assert type(response_headers) is list, 'Headers must be a list' - if __debug__: - for name, val in response_headers: - assert type(name) is str, 'Header names must be strings' - assert type(val) is str, 'Header values must be strings' - - headers_set[:] = [status, response_headers] - return write - - if not self.multithreaded: - self._app_lock.acquire() - try: - try: - result = self.application(environ, start_response) - try: - for data in result: - if data: - write(data) - if not headers_sent: - write('') # in case body was empty - finally: - if hasattr(result, 'close'): - result.close() - except socket.error, e: - if e[0] != errno.EPIPE: - raise # Don't let EPIPE propagate beyond server - finally: - if not self.multithreaded: - self._app_lock.release() - - return FCGI_REQUEST_COMPLETE, 0 - - def _sanitizeEnv(self, environ): - """Ensure certain values are present, if required by WSGI.""" - if not environ.has_key('SCRIPT_NAME'): - environ['SCRIPT_NAME'] = '' - if not environ.has_key('PATH_INFO'): - environ['PATH_INFO'] = '' - - # If any of these are missing, it probably signifies a broken - # server... - for name, default in [('REQUEST_METHOD', 'GET'), - ('SERVER_NAME', 'localhost'), - ('SERVER_PORT', '80'), - ('SERVER_PROTOCOL', 'HTTP/1.0')]: - if not environ.has_key(name): - environ['wsgi.errors'].write('%s: missing FastCGI param %s ' - 'required by WSGI!\n' % - (self.__class__.__name__, name)) - environ[name] = default - - -if __name__ == '__main__': - def test_app(environ, start_response): - """Probably not the most efficient example.""" - import cgi - start_response('200 OK', [('Content-Type', 'text/html')]) - yield '<html><head><title>Hello World!</title></head>\n' \ - '<body>\n' \ - '<p>Hello World!</p>\n' \ - '<table border="1">' - names = environ.keys() - names.sort() - for name in names: - yield '<tr><td>%s</td><td>%s</td></tr>\n' % ( - name, cgi.escape(`environ[name]`)) - - form = cgi.FieldStorage(fp=environ['wsgi.input'], environ=environ, - keep_blank_values=1) - if form.list: - yield '<tr><th colspan="2">Form data</th></tr>' - - for field in form.list: - yield '<tr><td>%s</td><td>%s</td></tr>\n' % ( - field.name, field.value) - - yield '</table>\n' \ - '</body></html>\n' - - WSGIServer(test_app).run() diff --git a/cgi/formatting.py b/cgi/formatting.py index 96bb73b..3da64da 100644 --- a/cgi/formatting.py +++ b/cgi/formatting.py @@ -3,7 +3,6 @@ import string import html import os import re -import pickle import time from database import * @@ -111,7 +110,7 @@ def iphash(ip, post, t, useid, mobile, agent, cap_id, hide_end, has_countrycode) if cap_id: id = cap_id - elif post['email'] and useid: + elif post['email'] and useid == 1: id = '???' elif ip == "127.0.0.1": id = '???' @@ -262,9 +261,9 @@ def checkRefLinks(message, parentid, parent_timestamp): """ board = Settings._.BOARD - if board["board_type"] == '1': + if board["board_type"] == 1: # Textboard - if parentid != '0': + if parentid: message = re.compile(r'>>(\d+(,\d+|-(?=[ \d\n])|\d+)*n?)').sub( '<a href="' + Settings.BOARDS_URL + board['dir'] + '/read/' + str(parent_timestamp) + r'/\1">>>\1</a>', message) else: @@ -272,14 +271,14 @@ def checkRefLinks(message, parentid, parent_timestamp): quotes_id_array = re.findall(r">>([0-9]+)", message) for quotes in quotes_id_array: try: - post = FetchOne('SELECT * FROM `posts` WHERE `id` = ' + - quotes + ' AND `boardid` = ' + board['id'] + ' LIMIT 1') - if post['parentid'] != '0': + post = FetchOne('SELECT * FROM `posts` WHERE `id` = %s AND `boardid` = %s LIMIT 1', + (quotes, board['id'])) + if post['parentid']: message = re.compile(">>" + quotes).sub('<a href="' + Settings.BOARDS_URL + - board['dir'] + '/res/' + post['parentid'] + '.html#' + quotes + '">>>' + quotes + '</a>', message) + board['dir'] + '/res/' + str(post['parentid']) + '.html#' + quotes + '">>>' + quotes + '</a>', message) else: message = re.compile(">>" + quotes).sub('<a href="' + Settings.BOARDS_URL + - board['dir'] + '/res/' + post['id'] + '.html#' + quotes + '">>>' + quotes + '</a>', message) + board['dir'] + '/res/' + str(post['id']) + '.html#' + quotes + '">>>' + quotes + '</a>', message) except: message = re.compile( ">>" + quotes).sub(r'<span class="q">>>'+quotes+'</span>', message) @@ -371,9 +370,9 @@ def checkWordfilters(message, ip, board): wordfilters = FetchAll( "SELECT * FROM `filters` WHERE `type` = '0' ORDER BY `id` ASC") for wordfilter in wordfilters: - if wordfilter["boards"] != "": - boards = pickle.loads(wordfilter["boards"].encode("utf-8")) - if wordfilter["boards"] == "" or board in boards: + if wordfilter["boards"]: + boards = str2boards(wordfilter["boards"]) + if not wordfilter["boards"] or board in boards: if wordfilter['action'] == '0': if not re.search(wordfilter['from'], message, re.DOTALL | re.IGNORECASE) is None: raise UserError(wordfilter['reason']) @@ -404,9 +403,9 @@ def checkNamefilters(name, tripcode, ip, board): namefilters = FetchAll("SELECT * FROM `filters` WHERE `type` = '1'") for namefilter in namefilters: - if namefilter["boards"] != "": - boards = pickle.loads(namefilter["boards"]) - if namefilter["boards"] == "" or board in boards: + if namefilter["boards"]: + boards = str2boards(namefilter["boards"]) + if not namefilter["boards"] or board in boards: # check if this filter applies match = False diff --git a/cgi/framework.py b/cgi/framework.py index 5277df0..e2af143 100644 --- a/cgi/framework.py +++ b/cgi/framework.py @@ -4,7 +4,6 @@ import cgi import datetime import time import hashlib -import pickle import socket import urllib.request, urllib.parse, urllib.error import re @@ -38,6 +37,14 @@ def setBoard(dir): return board +def str2boards(sstr): + return sstr.split(',') + + +def boards2str(boards): + return ','.join(boards) + + def cleanDir(path, ext=None): if ext: filelist = [f for f in os.listdir(path) if f.endswith("." + ext)] @@ -49,14 +56,14 @@ def cleanDir(path, ext=None): def addressIsBanned(ip, board, blind_only=False): - query = "SELECT * FROM `bans` WHERE INET6_ATON('"+str(ip)+"') BETWEEN `ipstart` AND `ipend`" + query = "SELECT * FROM `bans` WHERE INET6_ATON(%s) BETWEEN `ipstart` AND `ipend`" if blind_only: query += " AND `blind` = '1'" - bans = FetchAll(query) + bans = FetchAll(query, (ip,)) for ban in bans: - if ban["boards"] != "": - boards = pickle.loads(ban["boards"]) - if ban["boards"] == "" or board in boards: + if ban["boards"]: + boards = str2boards(ban["boards"]) + if not ban["boards"] or board in boards: if board not in Settings.EXCLUDE_GLOBAL_BANS: return True return False @@ -140,15 +147,22 @@ def updateBoardSettings(): Pickle the board's settings and store it in the configuration field """ board = Settings._.BOARD - #UpdateDb("UPDATE `boards` SET `configuration` = '%s' WHERE `id` = %s LIMIT 1" % (_mysql.escape_string(configuration), board["id"])) - + del board["filetypes"] del board["filetypes_ext"] - post_values = ["`" + _mysql.escape_string(str(key)) + "` = '" + _mysql.escape_string( - str(value)) + "'" for key, value in board.items()] - - UpdateDb("UPDATE `boards` SET %s WHERE `id` = '%s' LIMIT 1" % - (", ".join(post_values), board["id"])) + + sql = "UPDATE `boards` SET " + keys = [] + values = [] + for k, v in board.items(): + keys.append("`" + k + "` = %s") + values.append(v) + + sql += ", ".join(keys) + sql += " WHERE `id` = %s LIMIT 1" + values.append(board["id"]) + + UpdateDb(sql, values) def timestamp(t=None): diff --git a/cgi/manage.py b/cgi/manage.py index 40be3b2..0053f54 100644 --- a/cgi/manage.py +++ b/cgi/manage.py @@ -35,7 +35,7 @@ def manage(self, path_split): UpdateDb("DELETE FROM `logs` WHERE `timestamp` < %s", (timestamp() - Settings.MANAGE_LOG_TIME,)) else: page += _('Incorrect username/password.') - logAction('', 'Failed log-in. U:'+_mysql.escape_string(self.formdata['username'])+' IP logged.') + logAction('', 'Failed log-in. U:'+self.formdata['username']+' IP logged.') logging.warn("Failed log-in. U:{} IP:{}".format(self.formdata['username'], self.environ["REMOTE_ADDR"])) else: # Validate existing session @@ -687,10 +687,9 @@ def manage(self, path_split): return if self.formdata['seconds'] != '0': - until = str( - timestamp() + int(self.formdata['seconds'])) + until = timestamp() + int(self.formdata['seconds']) else: - until = '0' + until = 0 where = '' if 'board_all' not in self.formdata: where = [] @@ -701,7 +700,7 @@ def manage(self, path_split): if self.formdata[keyname] == "1": where.append(board['dir']) if len(where) > 0: - where = pickle.dumps(where) + where = boards2str(where) else: self.error( _("You must select where the ban shall be placed")) @@ -719,14 +718,14 @@ def manage(self, path_split): return""" # Blind mode - blind = self.formdata.get('blind', '0') + blind = self.formdata.get('blind', 0) #raise UserError, "{} {} {}".format(ipstart, ipend, ipstr) # Banear sin mensaje - InsertDb("INSERT INTO `bans` (`ipstart`, `ipend`, `ipstr`, `boards`, `added`, `until`, `staff`, `reason`, `note`, `blind`) VALUES (INET6_ATON('" + - ipstart + "'), INET6_ATON('" + ipend + "'), '" + ipstr + "', '" + - _mysql.escape_string(where) + "', " + str(timestamp()) + ", " + until + ", '" + _mysql.escape_string(staff_account['username']) + "', '" + _mysql.escape_string(self.formdata['reason']) + "', '" + _mysql.escape_string(self.formdata['note']) + "', '"+blind+"')") + InsertDb("INSERT INTO `bans` (`ipstart`, `ipend`, `ipstr`, `boards`, `added`, `until`, `staff`, `reason`, `note`, `blind`) VALUES " + "(INET6_ATON(%s), INET6_ATON(%s), %s, %s, %s, %s, %s, %s, %s, %s)", + (ipstart, ipend, ipstr, where, timestamp(), until, staff_account['username'], self.formdata['reason'], self.formdata['note'], blind)) regenerateAccess() if 'edit' in self.formdata: @@ -747,18 +746,18 @@ def manage(self, path_split): 'reason': '', 'note': '', 'message': '(GET OUT)', - 'seconds': '0', - 'blind': '1'} + 'seconds': 0, + 'blind': 1} edit_id = 0 if 'edit' in self.formdata: edit_id = self.formdata['edit'] - ban = FetchOne("SELECT `id`, INET6_NTOA(`ip`) AS 'ip', CASE WHEN `netmask` IS NULL THEN '255.255.255.255' ELSE INET_NTOA(`netmask`) END AS 'netmask', boards, added, until, staff, reason, note, blind FROM `bans` WHERE `id` = '" + - _mysql.escape_string(edit_id) + "' ORDER BY `added` DESC") + ban = FetchOne("SELECT `id`, INET6_NTOA(`ip`) AS 'ip', CASE WHEN `netmask` IS NULL THEN '255.255.255.255' ELSE INET_NTOA(`netmask`) END AS 'netmask', boards, added, until, staff, reason, note, blind FROM `bans` WHERE `id` = %s ORDER BY `added` DESC", + (edit_id,)) if ban: if ban['boards'] == '': where = '' else: - where = pickle.loads(ban['boards']) + where = boards2str(ban['boards']) if ban['until'] == '0': until = 0 else: @@ -785,12 +784,12 @@ def manage(self, path_split): action_taken = False if len(path_split) > 4: if path_split[3] == 'delete': - ip = FetchOne("SELECT ipstr FROM `bans` WHERE `id` = '" + - _mysql.escape_string(path_split[4]) + "' LIMIT 1", 0)[0] - if ip != '': + ip = FetchOne("SELECT ipstr FROM `bans` WHERE `id` = %s LIMIT 1", + (path_split[4],)) + if ip: # Delete ban - UpdateDb('DELETE FROM `bans` WHERE `id` = ' + - _mysql.escape_string(path_split[4]) + ' LIMIT 1') + UpdateDb('DELETE FROM `bans` WHERE `id` = %s LIMIT 1', + (path_split[4],)) regenerateAccess() message = _('Ban successfully deleted.') template_filename = "message.html" @@ -809,18 +808,18 @@ def manage(self, path_split): if ban['boards'] == '': ban['boards'] = _('All boards') else: - where = pickle.loads(ban['boards'].encode('utf-8')) + where = str2boards(ban['boards']) if len(where) > 1: ban['boards'] = '/' + \ '/, /'.join(where) + '/' else: ban['boards'] = '/' + where[0] + '/' ban['added'] = formatTimestamp(ban['added']) - if ban['until'] == '0': + if ban['until'] == 0: ban['until'] = _('Does not expire') else: ban['until'] = formatTimestamp(ban['until']) - if ban['blind'] == '1': + if ban['blind']: ban['blind'] = 'SĆ' else: ban['blind'] = 'No' @@ -876,50 +875,50 @@ def manage(self, path_split): board['slip'] = self.formdata['slip'] board['countrycode'] = self.formdata['countrycode'] if 'recyclebin' in self.formdata: - board['recyclebin'] = '1' + board['recyclebin'] = 1 else: - board['recyclebin'] = '0' + board['recyclebin'] = 0 if 'disable_name' in self.formdata: - board['disable_name'] = '1' + board['disable_name'] = 1 else: - board['disable_name'] = '0' + board['disable_name'] = 0 if 'disable_subject' in self.formdata: - board['disable_subject'] = '1' + board['disable_subject'] = 1 else: - board['disable_subject'] = '0' + board['disable_subject'] = 0 if 'secret' in self.formdata: - board['secret'] = '1' + board['secret'] = 1 else: - board['secret'] = '0' + board['secret'] = 0 if 'locked' in self.formdata: - board['locked'] = '1' + board['locked'] = 1 else: - board['locked'] = '0' + board['locked'] = 0 board['postarea_desc'] = self.formdata['postarea_desc'] if 'allow_noimage' in self.formdata: - board['allow_noimage'] = '1' + board['allow_noimage'] = 1 else: - board['allow_noimage'] = '0' + board['allow_noimage'] = 0 if 'allow_images' in self.formdata: - board['allow_images'] = '1' + board['allow_images'] = 1 else: - board['allow_images'] = '0' + board['allow_images'] = 0 if 'allow_image_replies' in self.formdata: - board['allow_image_replies'] = '1' + board['allow_image_replies'] = 1 else: - board['allow_image_replies'] = '0' + board['allow_image_replies'] = 0 if 'allow_spoilers' in self.formdata: - board['allow_spoilers'] = '1' + board['allow_spoilers'] = 1 else: - board['allow_spoilers'] = '0' + board['allow_spoilers'] = 0 if 'allow_oekaki' in self.formdata: - board['allow_oekaki'] = '1' + board['allow_oekaki'] = 1 else: - board['allow_oekaki'] = '0' + board['allow_oekaki'] = 0 if 'archive' in self.formdata: - board['archive'] = '1' + board['archive'] = 1 else: - board['archive'] = '0' + board['archive'] = 0 board['postarea_extra'] = self.formdata['postarea_extra'] board['force_css'] = self.formdata['force_css'] @@ -932,8 +931,7 @@ def manage(self, path_split): board['id'], filetype['id'])) try: - board['numthreads'] = int( - self.formdata['numthreads']) + board['numthreads'] = int(self.formdata['numthreads']) except: raise UserError(_("Max threads shown must be numeric.")) @@ -963,14 +961,12 @@ def manage(self, path_split): raise UserError(_("Max age must be numeric.")) try: - board['maxinactive'] = int( - self.formdata['maxinactive']) + board['maxinactive'] = int(self.formdata['maxinactive']) except: raise UserError(_("Max inactivity must be numeric.")) try: - board['threadsecs'] = int( - self.formdata['threadsecs']) + board['threadsecs'] = int(self.formdata['threadsecs']) except: raise UserError(_("Time between new threads must be numeric.")) @@ -1306,7 +1302,7 @@ def manage(self, path_split): 'SELECT * FROM archive WHERE boardid = %s ORDER BY timestamp DESC' % board['id']) for item in threads: t = time.time() - self.output += item['timestamp'] + '<br />' + self.output += str(item['timestamp']) + '<br />' fname = Settings.ROOT_DIR + \ board["dir"] + "/kako/" + \ str(item["timestamp"]) + ".json" @@ -1367,10 +1363,10 @@ def manage(self, path_split): new_timestamp_formatted = formatTimestamp( post['timestamp']) tim = 0 - if board["useid"] != '0': + if board["useid"] != 0: new_timestamp_formatted += ' ID:' + \ - iphash(post['ip'], '', tim, '1', - False, False, False, '0') + iphash(post['ip'], '', tim, 1, + False, False, False, 0) self.output += "%s - %s <br />" % ( post['id'], new_timestamp_formatted) query = "UPDATE `posts` SET timestamp_formatted = '%s' WHERE boardid = '%s' AND id = '%s'" % ( @@ -1442,7 +1438,6 @@ def manage(self, path_split): filter_from = '' filter_tripcode = '' - # I don't like pickles... oh well. where = '' if 'board_all' not in self.formdata: where = [] @@ -1453,8 +1448,7 @@ def manage(self, path_split): if self.formdata[keyname] == "1": where.append(board['dir']) if len(where) > 0: - where = _mysql.escape_string( - pickle.dumps(where)) + where = boards2str(where) else: self.error( _("You must select what board the filter will affect")) @@ -1560,10 +1554,10 @@ def manage(self, path_split): edit_id = int(self.formdata['edit']) filt = FetchOne( "SELECT * FROM `filters` WHERE `id` = %s LIMIT 1" % str(edit_id)) - if filt['boards'] == '': + if not filt['boards']: where = '' else: - where = pickle.loads(filt['boards']) + where = str2boards(filt['boards']) startvalues = {'type': filt['type'], 'trip': filt['from_trip'], 'where': where, @@ -1615,18 +1609,13 @@ def manage(self, path_split): action_taken = True if not action_taken: - filters = FetchAll( - "SELECT * FROM `filters` ORDER BY `added` DESC") + filters = FetchAll("SELECT * FROM `filters` ORDER BY `added` DESC") for filter in filters: if not filter['boards']: filter['boards'] = _('All boards') else: - where = pickle.loads(filter['boards'].encode('utf-8')) - if len(where) > 1: - filter['boards'] = '/' + \ - '/, /'.join(where) + '/' - else: - filter['boards'] = '/' + where[0] + '/' + where = str2boards(filter['boards']) + filter['boards'] = '/' + '/, /'.join(where) + '/' if filter['type'] == 0: filter['type_formatted'] = _('Word:') + ' <b>' + html.escape(filter['from']) + '</b>' elif filter['type'] == 1: @@ -1834,10 +1823,10 @@ def manage(self, path_split): message = None import math - pagesize = float(Settings.REPORTS_PER_PAGE) + pagesize = Settings.REPORTS_PER_PAGE totals = FetchOne("SELECT COUNT(id) FROM `reports`") total = int(totals['COUNT(id)']) - pages = int(math.ceil(total / pagesize)) + pages = int(math.ceil(total // pagesize)) try: currentpage = int(path_split[3]) @@ -1847,24 +1836,23 @@ def manage(self, path_split): if len(path_split) > 4: if path_split[4] == 'ignore': # Delete report - UpdateDb("DELETE FROM `reports` WHERE `id` = '" + - _mysql.escape_string(path_split[5])+"'") + UpdateDb("DELETE FROM `reports` WHERE `id` = %s", (path_split[5],)) message = _('Report %s ignored.') % path_split[5] if 'ignore' in self.formdata: ignored = 0 if 'board' in self.formdata and self.formdata['board'] != 'all': - reports = FetchAll("SELECT `id` FROM `reports` WHERE `board` = '%s' ORDER BY `timestamp` DESC LIMIT %d, %d" % ( - _mysql.escape_string(self.formdata['board']), currentpage*pagesize, pagesize)) + reports = FetchAll("SELECT `id` FROM `reports` WHERE `board` = %s ORDER BY `timestamp` DESC LIMIT %s, %s", + (self.formdata['board'], currentpage*pagesize, pagesize)) else: - reports = FetchAll("SELECT `id` FROM `reports` ORDER BY `timestamp` DESC LIMIT %d, %d" % ( - currentpage*pagesize, pagesize)) + reports = FetchAll("SELECT `id` FROM `reports` ORDER BY `timestamp` DESC LIMIT %s, %s", + (currentpage*pagesize, pagesize)) for report in reports: keyname = 'i' + report['id'] if keyname in self.formdata: # Ignore here - UpdateDb("DELETE FROM `reports` WHERE `id` = '" + - _mysql.escape_string(report['id'])+"'") + UpdateDb("DELETE FROM `reports` WHERE `id` = %s", + (report['id'],)) ignored += 1 message = _('Ignored %s report(s).') % str(ignored) @@ -1880,11 +1868,11 @@ def manage(self, path_split): # Tabla if 'board' in self.formdata and self.formdata['board'] != 'all': - reports = FetchAll("SELECT id, timestamp, timestamp_formatted, postid, parentid, link, board, INET6_NTOA(ip) AS ip, reason, INET6_NTOA(repip) AS repip FROM `reports` WHERE `board` = '%s' ORDER BY `timestamp` DESC LIMIT %d, %d" % ( - _mysql.escape_string(self.formdata['board']), currentpage*pagesize, pagesize)) + reports = FetchAll("SELECT id, timestamp, timestamp_formatted, postid, parentid, link, board, INET6_NTOA(ip) AS ip, reason, INET6_NTOA(repip) AS repip FROM `reports` WHERE `board` = %s ORDER BY `timestamp` DESC LIMIT %s, %s", + (self.formdata['board'], currentpage*pagesize, pagesize)) else: - reports = FetchAll("SELECT id, timestamp, timestamp_formatted, postid, parentid, link, board, INET6_NTOA(ip) AS ip, reason, INET6_NTOA(repip) AS repip FROM `reports` ORDER BY `timestamp` DESC LIMIT %d, %d" % ( - currentpage*pagesize, pagesize)) + reports = FetchAll("SELECT id, timestamp, timestamp_formatted, postid, parentid, link, board, INET6_NTOA(ip) AS ip, reason, INET6_NTOA(repip) AS repip FROM `reports` ORDER BY `timestamp` DESC LIMIT %s, %s", + (currentpage*pagesize, pagesize)) if 'board' in self.formdata: curboard = self.formdata['board'] diff --git a/cgi/markdown.py b/cgi/markdown.py deleted file mode 100644 index 846c192..0000000 --- a/cgi/markdown.py +++ /dev/null @@ -1,2093 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2007-2008 ActiveState Corp. -# License: MIT (http://www.opensource.org/licenses/mit-license.php) - -r"""A fast and complete Python implementation of Markdown. - -[from http://daringfireball.net/projects/markdown/] -> Markdown is a text-to-HTML filter; it translates an easy-to-read / -> easy-to-write structured text format into HTML. Markdown's text -> format is most similar to that of plain text email, and supports -> features such as headers, *emphasis*, code blocks, blockquotes, and -> links. -> -> Markdown's syntax is designed not as a generic markup language, but -> specifically to serve as a front-end to (X)HTML. You can use span-level -> HTML tags anywhere in a Markdown document, and you can use block level -> HTML tags (like <div> and <table> as well). - -Module usage: - - >>> import markdown2 - >>> markdown2.markdown("*boo!*") # or use `html = markdown_path(PATH)` - u'<p><em>boo!</em></p>\n' - - >>> markdowner = Markdown() - >>> markdowner.convert("*boo!*") - u'<p><em>boo!</em></p>\n' - >>> markdowner.convert("**boom!**") - u'<p><strong>boom!</strong></p>\n' - -This implementation of Markdown implements the full "core" syntax plus a -number of extras (e.g., code syntax coloring, footnotes) as described on -<http://code.google.com/p/python-markdown2/wiki/Extras>. -""" - -from urllib import quote -import codecs -from random import random, randint -import optparse -import logging -import re -from pprint import pprint -import sys -import os -cmdln_desc = """A fast and complete Python implementation of Markdown, a -text-to-HTML conversion tool for web writers. - -Supported extras (see -x|--extras option below): -* code-friendly: Disable _ and __ for em and strong. -* code-color: Pygments-based syntax coloring of <code> sections. -* cuddled-lists: Allow lists to be cuddled to the preceding paragraph. -* footnotes: Support footnotes as in use on daringfireball.net and - implemented in other Markdown processors (tho not in Markdown.pl v1.0.1). -* html-classes: Takes a dict mapping html tag names (lowercase) to a - string to use for a "class" tag attribute. Currently only supports - "pre" and "code" tags. Add an issue if you require this for other tags. -* pyshell: Treats unindented Python interactive shell sessions as <code> - blocks. -* link-patterns: Auto-link given regex patterns in text (e.g. bug number - references, revision number references). -* xml: Passes one-liner processing instructions and namespaced XML tags. -""" - -# Dev Notes: -# - There is already a Python markdown processor -# (http://www.freewisdom.org/projects/python-markdown/). -# - Python's regex syntax doesn't have '\z', so I'm using '\Z'. I'm -# not yet sure if there implications with this. Compare 'pydoc sre' -# and 'perldoc perlre'. - -__version_info__ = (1, 0, 1, 17) # first three nums match Markdown.pl -__version__ = '1.0.1.17' -__author__ = "Trent Mick" - -try: - from hashlib import md5 -except ImportError: - from md5 import md5 - - -# ---- Python version compat - -if sys.version_info[:2] < (2, 4): - from sets import Set as set - - def reversed(sequence): - for i in sequence[::-1]: - yield i - - def _unicode_decode(s, encoding, errors='xmlcharrefreplace'): - return unicode(s, encoding, errors) -else: - def _unicode_decode(s, encoding, errors='strict'): - return s.decode(encoding, errors) - - -#---- globals - -DEBUG = False -log = logging.getLogger("markdown") - -DEFAULT_TAB_WIDTH = 4 - - -try: - import uuid -except ImportError: - SECRET_SALT = str(randint(0, 1000000)) -else: - SECRET_SALT = str(uuid.uuid4()) - - -def _hash_ascii(s): - # return md5(s).hexdigest() # Markdown.pl effectively does this. - return 'md5-' + md5(SECRET_SALT + s).hexdigest() - - -def _hash_text(s): - return 'md5-' + md5(SECRET_SALT + s.encode("utf-8")).hexdigest() - - -# Table of hash values for escaped characters: -g_escape_table = dict([(ch, _hash_ascii(ch)) - for ch in '\\`*_{}[]()>#+-.!']) - - -#---- exceptions - -class MarkdownError(Exception): - pass - - -# ---- public api - -def markdown_path(path, encoding="utf-8", - html4tags=False, tab_width=DEFAULT_TAB_WIDTH, - safe_mode=None, extras=None, link_patterns=None, - use_file_vars=False): - fp = codecs.open(path, 'r', encoding) - text = fp.read() - fp.close() - return Markdown(html4tags=html4tags, tab_width=tab_width, - safe_mode=safe_mode, extras=extras, - link_patterns=link_patterns, - use_file_vars=use_file_vars).convert(text) - - -def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH, - safe_mode=None, extras=None, link_patterns=None, - use_file_vars=False): - return Markdown(html4tags=html4tags, tab_width=tab_width, - safe_mode=safe_mode, extras=extras, - link_patterns=link_patterns, - use_file_vars=use_file_vars).convert(text) - - -class Markdown(object): - # The dict of "extras" to enable in processing -- a mapping of - # extra name to argument for the extra. Most extras do not have an - # argument, in which case the value is None. - # - # This can be set via (a) subclassing and (b) the constructor - # "extras" argument. - extras = None - - urls = None - titles = None - html_blocks = None - html_spans = None - html_removed_text = "[HTML_REMOVED]" # for compat with markdown.py - - # Used to track when we're inside an ordered or unordered list - # (see _ProcessListItems() for details): - list_level = 0 - - _ws_only_line_re = re.compile(r"^[ \t]+$", re.M) - - def __init__(self, html4tags=False, tab_width=4, safe_mode=None, - extras=None, link_patterns=None, use_file_vars=False): - if html4tags: - self.empty_element_suffix = ">" - else: - self.empty_element_suffix = " />" - self.tab_width = tab_width - - # For compatibility with earlier markdown2.py and with - # markdown.py's safe_mode being a boolean, - # safe_mode == True -> "replace" - if safe_mode is True: - self.safe_mode = "replace" - else: - self.safe_mode = safe_mode - - if self.extras is None: - self.extras = {} - elif not isinstance(self.extras, dict): - self.extras = dict([(e, None) for e in self.extras]) - if extras: - if not isinstance(extras, dict): - extras = dict([(e, None) for e in extras]) - self.extras.update(extras) - assert isinstance(self.extras, dict) - if "toc" in self.extras and not "header-ids" in self.extras: - self.extras["header-ids"] = None # "toc" implies "header-ids" - self._instance_extras = self.extras.copy() - self.link_patterns = link_patterns - self.use_file_vars = use_file_vars - self._outdent_re = re.compile(r'^(\t|[ ]{1,%d})' % tab_width, re.M) - - def reset(self): - self.urls = {} - self.titles = {} - self.html_blocks = {} - self.html_spans = {} - self.list_level = 0 - self.extras = self._instance_extras.copy() - if "footnotes" in self.extras: - self.footnotes = {} - self.footnote_ids = [] - if "header-ids" in self.extras: - self._count_from_header_id = {} # no `defaultdict` in Python 2.4 - - def convert(self, text): - """Convert the given text.""" - # Main function. The order in which other subs are called here is - # essential. Link and image substitutions need to happen before - # _EscapeSpecialChars(), so that any *'s or _'s in the <a> - # and <img> tags get encoded. - - # Clear the global hashes. If we don't clear these, you get conflicts - # from other articles when generating a page which contains more than - # one article (e.g. an index page that shows the N most recent - # articles): - self.reset() - - if not isinstance(text, unicode): - # TODO: perhaps shouldn't presume UTF-8 for string input? - text = unicode(text, 'utf-8') - - if self.use_file_vars: - # Look for emacs-style file variable hints. - emacs_vars = self._get_emacs_vars(text) - if "markdown-extras" in emacs_vars: - splitter = re.compile("[ ,]+") - for e in splitter.split(emacs_vars["markdown-extras"]): - if '=' in e: - ename, earg = e.split('=', 1) - try: - earg = int(earg) - except ValueError: - pass - else: - ename, earg = e, None - self.extras[ename] = earg - - # Standardize line endings: - text = re.sub("\r\n|\r", "\n", text) - - # Make sure $text ends with a couple of newlines: - text += "\n\n" - - # Convert all tabs to spaces. - text = self._detab(text) - - # Strip any lines consisting only of spaces and tabs. - # This makes subsequent regexen easier to write, because we can - # match consecutive blank lines with /\n+/ instead of something - # contorted like /[ \t]*\n+/ . - text = self._ws_only_line_re.sub("", text) - - if self.safe_mode: - text = self._hash_html_spans(text) - - # Turn block-level HTML blocks into hash entries - text = self._hash_html_blocks(text, raw=True) - - # Strip link definitions, store in hashes. - if "footnotes" in self.extras: - # Must do footnotes first because an unlucky footnote defn - # looks like a link defn: - # [^4]: this "looks like a link defn" - text = self._strip_footnote_definitions(text) - text = self._strip_link_definitions(text) - - text = self._run_block_gamut(text) - - if "footnotes" in self.extras: - text = self._add_footnotes(text) - - text = self._unescape_special_chars(text) - - if self.safe_mode: - text = self._unhash_html_spans(text) - - #text += "\n" - - rv = UnicodeWithAttrs(text) - if "toc" in self.extras: - rv._toc = self._toc - return rv - - _emacs_oneliner_vars_pat = re.compile( - r"-\*-\s*([^\r\n]*?)\s*-\*-", re.UNICODE) - # This regular expression is intended to match blocks like this: - # PREFIX Local Variables: SUFFIX - # PREFIX mode: Tcl SUFFIX - # PREFIX End: SUFFIX - # Some notes: - # - "[ \t]" is used instead of "\s" to specifically exclude newlines - # - "(\r\n|\n|\r)" is used instead of "$" because the sre engine does - # not like anything other than Unix-style line terminators. - _emacs_local_vars_pat = re.compile(r"""^ - (?P<prefix>(?:[^\r\n|\n|\r])*?) - [\ \t]*Local\ Variables:[\ \t]* - (?P<suffix>.*?)(?:\r\n|\n|\r) - (?P<content>.*?\1End:) - """, re.IGNORECASE | re.MULTILINE | re.DOTALL | re.VERBOSE) - - def _get_emacs_vars(self, text): - """Return a dictionary of emacs-style local variables. - - Parsing is done loosely according to this spec (and according to - some in-practice deviations from this): - http://www.gnu.org/software/emacs/manual/html_node/emacs/Specifying-File-Variables.html#Specifying-File-Variables - """ - emacs_vars = {} - SIZE = pow(2, 13) # 8kB - - # Search near the start for a '-*-'-style one-liner of variables. - head = text[:SIZE] - if "-*-" in head: - match = self._emacs_oneliner_vars_pat.search(head) - if match: - emacs_vars_str = match.group(1) - assert '\n' not in emacs_vars_str - emacs_var_strs = [s.strip() for s in emacs_vars_str.split(';') - if s.strip()] - if len(emacs_var_strs) == 1 and ':' not in emacs_var_strs[0]: - # While not in the spec, this form is allowed by emacs: - # -*- Tcl -*- - # where the implied "variable" is "mode". This form - # is only allowed if there are no other variables. - emacs_vars["mode"] = emacs_var_strs[0].strip() - else: - for emacs_var_str in emacs_var_strs: - try: - variable, value = emacs_var_str.strip().split(':', 1) - except ValueError: - log.debug("emacs variables error: malformed -*- " - "line: %r", emacs_var_str) - continue - # Lowercase the variable name because Emacs allows "Mode" - # or "mode" or "MoDe", etc. - emacs_vars[variable.lower()] = value.strip() - - tail = text[-SIZE:] - if "Local Variables" in tail: - match = self._emacs_local_vars_pat.search(tail) - if match: - prefix = match.group("prefix") - suffix = match.group("suffix") - lines = match.group("content").splitlines(0) - #print "prefix=%r, suffix=%r, content=%r, lines: %s"\ - # % (prefix, suffix, match.group("content"), lines) - - # Validate the Local Variables block: proper prefix and suffix - # usage. - for i, line in enumerate(lines): - if not line.startswith(prefix): - log.debug("emacs variables error: line '%s' " - "does not use proper prefix '%s'" - % (line, prefix)) - return {} - # Don't validate suffix on last line. Emacs doesn't care, - # neither should we. - if i != len(lines)-1 and not line.endswith(suffix): - log.debug("emacs variables error: line '%s' " - "does not use proper suffix '%s'" - % (line, suffix)) - return {} - - # Parse out one emacs var per line. - continued_for = None - # no var on the last line ("PREFIX End:") - for line in lines[:-1]: - if prefix: - line = line[len(prefix):] # strip prefix - if suffix: - line = line[:-len(suffix)] # strip suffix - line = line.strip() - if continued_for: - variable = continued_for - if line.endswith('\\'): - line = line[:-1].rstrip() - else: - continued_for = None - emacs_vars[variable] += ' ' + line - else: - try: - variable, value = line.split(':', 1) - except ValueError: - log.debug("local variables error: missing colon " - "in local variables entry: '%s'" % line) - continue - # Do NOT lowercase the variable name, because Emacs only - # allows "mode" (and not "Mode", "MoDe", etc.) in this block. - value = value.strip() - if value.endswith('\\'): - value = value[:-1].rstrip() - continued_for = variable - else: - continued_for = None - emacs_vars[variable] = value - - # Unquote values. - for var, val in emacs_vars.items(): - if len(val) > 1 and (val.startswith('"') and val.endswith('"') - or val.startswith('"') and val.endswith('"')): - emacs_vars[var] = val[1:-1] - - return emacs_vars - - # Cribbed from a post by Bart Lateur: - # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154> - _detab_re = re.compile(r'(.*?)\t', re.M) - - def _detab_sub(self, match): - g1 = match.group(1) - return g1 + (' ' * (self.tab_width - len(g1) % self.tab_width)) - - def _detab(self, text): - r"""Remove (leading?) tabs from a file. - - >>> m = Markdown() - >>> m._detab("\tfoo") - ' foo' - >>> m._detab(" \tfoo") - ' foo' - >>> m._detab("\t foo") - ' foo' - >>> m._detab(" foo") - ' foo' - >>> m._detab(" foo\n\tbar\tblam") - ' foo\n bar blam' - """ - if '\t' not in text: - return text - return self._detab_re.subn(self._detab_sub, text)[0] - - _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del' - _strict_tag_block_re = re.compile(r""" - ( # save in \1 - ^ # start of line (with re.M) - <(%s) # start tag = \2 - \b # word break - (.*\n)*? # any number of lines, minimally matching - </\2> # the matching end tag - [ \t]* # trailing spaces/tabs - (?=\n+|\Z) # followed by a newline or end of document - ) - """ % _block_tags_a, - re.X | re.M) - - _block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math' - _liberal_tag_block_re = re.compile(r""" - ( # save in \1 - ^ # start of line (with re.M) - <(%s) # start tag = \2 - \b # word break - (.*\n)*? # any number of lines, minimally matching - .*</\2> # the matching end tag - [ \t]* # trailing spaces/tabs - (?=\n+|\Z) # followed by a newline or end of document - ) - """ % _block_tags_b, - re.X | re.M) - - def _hash_html_block_sub(self, match, raw=False): - html = match.group(1) - if raw and self.safe_mode: - html = self._sanitize_html(html) - key = _hash_text(html) - self.html_blocks[key] = html - return "\n\n" + key + "\n\n" - - def _hash_html_blocks(self, text, raw=False): - """Hashify HTML blocks - - We only want to do this for block-level HTML tags, such as headers, - lists, and tables. That's because we still want to wrap <p>s around - "paragraphs" that are wrapped in non-block-level tags, such as anchors, - phrase emphasis, and spans. The list of tags we're looking for is - hard-coded. - - @param raw {boolean} indicates if these are raw HTML blocks in - the original source. It makes a difference in "safe" mode. - """ - if '<' not in text: - return text - - # Pass `raw` value into our calls to self._hash_html_block_sub. - hash_html_block_sub = _curry(self._hash_html_block_sub, raw=raw) - - # First, look for nested blocks, e.g.: - # <div> - # <div> - # tags for inner block must be indented. - # </div> - # </div> - # - # The outermost tags must start at the left margin for this to match, and - # the inner nested divs must be indented. - # We need to do this before the next, more liberal match, because the next - # match will start at the first `<div>` and stop at the first `</div>`. - text = self._strict_tag_block_re.sub(hash_html_block_sub, text) - - # Now match more liberally, simply from `\n<tag>` to `</tag>\n` - text = self._liberal_tag_block_re.sub(hash_html_block_sub, text) - - # Special case just for <hr />. It was easier to make a special - # case than to make the other regex more complicated. - if "<hr" in text: - _hr_tag_re = _hr_tag_re_from_tab_width(self.tab_width) - text = _hr_tag_re.sub(hash_html_block_sub, text) - - # Special case for standalone HTML comments: - if "<!--" in text: - start = 0 - while True: - # Delimiters for next comment block. - try: - start_idx = text.index("<!--", start) - except ValueError, ex: - break - try: - end_idx = text.index("-->", start_idx) + 3 - except ValueError, ex: - break - - # Start position for next comment block search. - start = end_idx - - # Validate whitespace before comment. - if start_idx: - # - Up to `tab_width - 1` spaces before start_idx. - for i in range(self.tab_width - 1): - if text[start_idx - 1] != ' ': - break - start_idx -= 1 - if start_idx == 0: - break - # - Must be preceded by 2 newlines or hit the start of - # the document. - if start_idx == 0: - pass - elif start_idx == 1 and text[0] == '\n': - start_idx = 0 # to match minute detail of Markdown.pl regex - elif text[start_idx-2:start_idx] == '\n\n': - pass - else: - break - - # Validate whitespace after comment. - # - Any number of spaces and tabs. - while end_idx < len(text): - if text[end_idx] not in ' \t': - break - end_idx += 1 - # - Must be following by 2 newlines or hit end of text. - if text[end_idx:end_idx+2] not in ('', '\n', '\n\n'): - continue - - # Escape and hash (must match `_hash_html_block_sub`). - html = text[start_idx:end_idx] - if raw and self.safe_mode: - html = self._sanitize_html(html) - key = _hash_text(html) - self.html_blocks[key] = html - text = text[:start_idx] + "\n\n" + \ - key + "\n\n" + text[end_idx:] - - if "xml" in self.extras: - # Treat XML processing instructions and namespaced one-liner - # tags as if they were block HTML tags. E.g., if standalone - # (i.e. are their own paragraph), the following do not get - # wrapped in a <p> tag: - # <?foo bar?> - # - # <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="chapter_1.md"/> - _xml_oneliner_re = _xml_oneliner_re_from_tab_width(self.tab_width) - text = _xml_oneliner_re.sub(hash_html_block_sub, text) - - return text - - def _strip_link_definitions(self, text): - # Strips link definitions from text, stores the URLs and titles in - # hash references. - less_than_tab = self.tab_width - 1 - - # Link defs are in the form: - # [id]: url "optional title" - _link_def_re = re.compile(r""" - ^[ ]{0,%d}\[(.+)\]: # id = \1 - [ \t]* - \n? # maybe *one* newline - [ \t]* - <?(.+?)>? # url = \2 - [ \t]* - (?: - \n? # maybe one newline - [ \t]* - (?<=\s) # lookbehind for whitespace - ['"(] - ([^\n]*) # title = \3 - ['")] - [ \t]* - )? # title is optional - (?:\n+|\Z) - """ % less_than_tab, re.X | re.M | re.U) - return _link_def_re.sub(self._extract_link_def_sub, text) - - def _extract_link_def_sub(self, match): - id, url, title = match.groups() - key = id.lower() # Link IDs are case-insensitive - self.urls[key] = self._encode_amps_and_angles(url) - if title: - self.titles[key] = title.replace('"', '"') - return "" - - def _extract_footnote_def_sub(self, match): - id, text = match.groups() - text = _dedent(text, skip_first_line=not text.startswith('\n')).strip() - normed_id = re.sub(r'\W', '-', id) - # Ensure footnote text ends with a couple newlines (for some - # block gamut matches). - self.footnotes[normed_id] = text + "\n\n" - return "" - - def _strip_footnote_definitions(self, text): - """A footnote definition looks like this: - - [^note-id]: Text of the note. - - May include one or more indented paragraphs. - - Where, - - The 'note-id' can be pretty much anything, though typically it - is the number of the footnote. - - The first paragraph may start on the next line, like so: - - [^note-id]: - Text of the note. - """ - less_than_tab = self.tab_width - 1 - footnote_def_re = re.compile(r''' - ^[ ]{0,%d}\[\^(.+)\]: # id = \1 - [ \t]* - ( # footnote text = \2 - # First line need not start with the spaces. - (?:\s*.*\n+) - (?: - (?:[ ]{%d} | \t) # Subsequent lines must be indented. - .*\n+ - )* - ) - # Lookahead for non-space at line-start, or end of doc. - (?:(?=^[ ]{0,%d}\S)|\Z) - ''' % (less_than_tab, self.tab_width, self.tab_width), - re.X | re.M) - return footnote_def_re.sub(self._extract_footnote_def_sub, text) - - _hr_res = [ - re.compile(r"^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$", re.M), - re.compile(r"^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$", re.M), - re.compile(r"^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$", re.M), - ] - - def _run_block_gamut(self, text): - # These are all the transformations that form block-level - # tags like paragraphs, headers, and list items. - - #text = self._do_headers(text) - - # Do Horizontal Rules: - #hr = "\n<hr"+self.empty_element_suffix+"\n" - # for hr_re in self._hr_res: - # text = hr_re.sub(hr, text) - - text = self._do_lists(text) - - if "pyshell" in self.extras: - text = self._prepare_pyshell_blocks(text) - - text = self._do_code_blocks(text) - - text = self._do_block_quotes(text) - - # We already ran _HashHTMLBlocks() before, in Markdown(), but that - # was to escape raw HTML in the original Markdown source. This time, - # we're escaping the markup we've just created, so that we don't wrap - # <p> tags around block-level tags. - text = self._hash_html_blocks(text) - - text = self._form_paragraphs(text) - - return text - - def _pyshell_block_sub(self, match): - lines = match.group(0).splitlines(0) - _dedentlines(lines) - indent = ' ' * self.tab_width - s = ('\n' # separate from possible cuddled paragraph - + indent + ('\n'+indent).join(lines) - + '\n\n') - return s - - def _prepare_pyshell_blocks(self, text): - """Ensure that Python interactive shell sessions are put in - code blocks -- even if not properly indented. - """ - if ">>>" not in text: - return text - - less_than_tab = self.tab_width - 1 - _pyshell_block_re = re.compile(r""" - ^([ ]{0,%d})>>>[ ].*\n # first line - ^(\1.*\S+.*\n)* # any number of subsequent lines - ^\n # ends with a blank line - """ % less_than_tab, re.M | re.X) - - return _pyshell_block_re.sub(self._pyshell_block_sub, text) - - def _run_span_gamut(self, text): - # These are all the transformations that occur *within* block-level - # tags like paragraphs, headers, and list items. - - # text = self._do_code_spans(text) - El AA ! - - text = self._escape_special_chars(text) - - # Process anchor and image tags. - text = self._do_links(text) - - # Make links out of things like `<http://example.com/>` - # Must come after _do_links(), because you can use < and > - # delimiters in inline links like [this](<url>). - #text = self._do_auto_links(text) - - if "link-patterns" in self.extras: - text = self._do_link_patterns(text) - - text = self._encode_amps_and_angles(text) - - text = self._do_italics_and_bold(text) - - # Do hard breaks: - text = re.sub(r"\n", "<br%s" % self.empty_element_suffix, text) - - return text - - # "Sorta" because auto-links are identified as "tag" tokens. - _sorta_html_tokenize_re = re.compile(r""" - ( - # tag - </? - (?:\w+) # tag name - (?:\s+(?:[\w-]+:)?[\w-]+=(?:".*?"|'.*?'))* # attributes - \s*/?> - | - # auto-link (e.g., <http://www.activestate.com/>) - <\w+[^>]*> - | - <!--.*?--> # comment - | - <\?.*?\?> # processing instruction - ) - """, re.X) - - def _escape_special_chars(self, text): - # Python markdown note: the HTML tokenization here differs from - # that in Markdown.pl, hence the behaviour for subtle cases can - # differ (I believe the tokenizer here does a better job because - # it isn't susceptible to unmatched '<' and '>' in HTML tags). - # Note, however, that '>' is not allowed in an auto-link URL - # here. - escaped = [] - is_html_markup = False - for token in self._sorta_html_tokenize_re.split(text): - if is_html_markup: - # Within tags/HTML-comments/auto-links, encode * and _ - # so they don't conflict with their use in Markdown for - # italics and strong. We're replacing each such - # character with its corresponding MD5 checksum value; - # this is likely overkill, but it should prevent us from - # colliding with the escape values by accident. - escaped.append(token.replace('*', g_escape_table['*']) - .replace('_', g_escape_table['_'])) - else: - escaped.append(self._encode_backslash_escapes(token)) - is_html_markup = not is_html_markup - return ''.join(escaped) - - def _hash_html_spans(self, text): - # Used for safe_mode. - - def _is_auto_link(s): - if ':' in s and self._auto_link_re.match(s): - return True - elif '@' in s and self._auto_email_link_re.match(s): - return True - return False - - tokens = [] - is_html_markup = False - for token in self._sorta_html_tokenize_re.split(text): - if is_html_markup and not _is_auto_link(token): - sanitized = self._sanitize_html(token) - key = _hash_text(sanitized) - self.html_spans[key] = sanitized - tokens.append(key) - else: - tokens.append(token) - is_html_markup = not is_html_markup - return ''.join(tokens) - - def _unhash_html_spans(self, text): - for key, sanitized in self.html_spans.items(): - text = text.replace(key, sanitized) - return text - - def _sanitize_html(self, s): - if self.safe_mode == "replace": - return self.html_removed_text - elif self.safe_mode == "escape": - replacements = [ - ('&', '&'), - ('<', '<'), - ('>', '>'), - ] - for before, after in replacements: - s = s.replace(before, after) - return s - else: - raise MarkdownError("invalid value for 'safe_mode': %r (must be " - "'escape' or 'replace')" % self.safe_mode) - - _tail_of_inline_link_re = re.compile(r''' - # Match tail of: [text](/url/) or [text](/url/ "title") - \( # literal paren - [ \t]* - (?P<url> # \1 - <.*?> - | - .*? - ) - [ \t]* - ( # \2 - (['"]) # quote char = \3 - (?P<title>.*?) - \3 # matching quote - )? # title is optional - \) - ''', re.X | re.S) - _tail_of_reference_link_re = re.compile(r''' - # Match tail of: [text][id] - [ ]? # one optional space - (?:\n[ ]*)? # one optional newline followed by spaces - \[ - (?P<id>.*?) - \] - ''', re.X | re.S) - - def _do_links(self, text): - """Turn Markdown link shortcuts into XHTML <a> and <img> tags. - - This is a combination of Markdown.pl's _DoAnchors() and - _DoImages(). They are done together because that simplified the - approach. It was necessary to use a different approach than - Markdown.pl because of the lack of atomic matching support in - Python's regex engine used in $g_nested_brackets. - """ - MAX_LINK_TEXT_SENTINEL = 3000 # markdown2 issue 24 - - # `anchor_allowed_pos` is used to support img links inside - # anchors, but not anchors inside anchors. An anchor's start - # pos must be `>= anchor_allowed_pos`. - anchor_allowed_pos = 0 - - curr_pos = 0 - while True: # Handle the next link. - # The next '[' is the start of: - # - an inline anchor: [text](url "title") - # - a reference anchor: [text][id] - # - an inline img: ![text](url "title") - # - a reference img: ![text][id] - # - a footnote ref: [^id] - # (Only if 'footnotes' extra enabled) - # - a footnote defn: [^id]: ... - # (Only if 'footnotes' extra enabled) These have already - # been stripped in _strip_footnote_definitions() so no - # need to watch for them. - # - a link definition: [id]: url "title" - # These have already been stripped in - # _strip_link_definitions() so no need to watch for them. - # - not markup: [...anything else... - try: - start_idx = text.index('[', curr_pos) - except ValueError: - break - text_length = len(text) - - # Find the matching closing ']'. - # Markdown.pl allows *matching* brackets in link text so we - # will here too. Markdown.pl *doesn't* currently allow - # matching brackets in img alt text -- we'll differ in that - # regard. - bracket_depth = 0 - for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL, - text_length)): - ch = text[p] - if ch == ']': - bracket_depth -= 1 - if bracket_depth < 0: - break - elif ch == '[': - bracket_depth += 1 - else: - # Closing bracket not found within sentinel length. - # This isn't markup. - curr_pos = start_idx + 1 - continue - link_text = text[start_idx+1:p] - - # Possibly a footnote ref? - if "footnotes" in self.extras and link_text.startswith("^"): - normed_id = re.sub(r'\W', '-', link_text[1:]) - if normed_id in self.footnotes: - self.footnote_ids.append(normed_id) - result = '<sup class="footnote-ref" id="fnref-%s">' \ - '<a href="#fn-%s">%s</a></sup>' \ - % (normed_id, normed_id, len(self.footnote_ids)) - text = text[:start_idx] + result + text[p+1:] - else: - # This id isn't defined, leave the markup alone. - curr_pos = p+1 - continue - - # Now determine what this is by the remainder. - p += 1 - if p == text_length: - return text - - # Inline anchor or img? - if text[p] == '(': # attempt at perf improvement - match = self._tail_of_inline_link_re.match(text, p) - if match: - # Handle an inline anchor or img. - #is_img = start_idx > 0 and text[start_idx-1] == "!" - # if is_img: - # start_idx -= 1 - is_img = False - - url, title = match.group("url"), match.group("title") - if url and url[0] == '<': - url = url[1:-1] # '<url>' -> 'url' - # We've got to encode these to avoid conflicting - # with italics/bold. - url = url.replace('*', g_escape_table['*']) \ - .replace('_', g_escape_table['_']) - if title: - title_str = ' title="%s"' \ - % title.replace('*', g_escape_table['*']) \ - .replace('_', g_escape_table['_']) \ - .replace('"', '"') - else: - title_str = '' - if is_img: - result = '<img src="%s" alt="%s"%s%s' \ - % (url.replace('"', '"'), - link_text.replace('"', '"'), - title_str, self.empty_element_suffix) - curr_pos = start_idx + len(result) - text = text[:start_idx] + result + text[match.end():] - elif start_idx >= anchor_allowed_pos: - result_head = '<a href="%s"%s>' % (url, title_str) - result = '%s%s</a>' % (result_head, link_text) - # <img> allowed from curr_pos on, <a> from - # anchor_allowed_pos on. - curr_pos = start_idx + len(result_head) - anchor_allowed_pos = start_idx + len(result) - text = text[:start_idx] + result + text[match.end():] - else: - # Anchor not allowed here. - curr_pos = start_idx + 1 - continue - - # Reference anchor or img? - else: - match = self._tail_of_reference_link_re.match(text, p) - if match: - # Handle a reference-style anchor or img. - #is_img = start_idx > 0 and text[start_idx-1] == "!" - # if is_img: - # start_idx -= 1 - is_img = False - - link_id = match.group("id").lower() - if not link_id: - link_id = link_text.lower() # for links like [this][] - if link_id in self.urls: - url = self.urls[link_id] - # We've got to encode these to avoid conflicting - # with italics/bold. - url = url.replace('*', g_escape_table['*']) \ - .replace('_', g_escape_table['_']) - title = self.titles.get(link_id) - if title: - title = title.replace('*', g_escape_table['*']) \ - .replace('_', g_escape_table['_']) - title_str = ' title="%s"' % title - else: - title_str = '' - if is_img: - result = '<img src="%s" alt="%s"%s%s' \ - % (url.replace('"', '"'), - link_text.replace('"', '"'), - title_str, self.empty_element_suffix) - curr_pos = start_idx + len(result) - text = text[:start_idx] + \ - result + text[match.end():] - elif start_idx >= anchor_allowed_pos: - result = '<a href="%s"%s>%s</a>' \ - % (url, title_str, link_text) - result_head = '<a href="%s"%s>' % (url, title_str) - result = '%s%s</a>' % (result_head, link_text) - # <img> allowed from curr_pos on, <a> from - # anchor_allowed_pos on. - curr_pos = start_idx + len(result_head) - anchor_allowed_pos = start_idx + len(result) - text = text[:start_idx] + \ - result + text[match.end():] - else: - # Anchor not allowed here. - curr_pos = start_idx + 1 - else: - # This id isn't defined, leave the markup alone. - curr_pos = match.end() - continue - - # Otherwise, it isn't markup. - curr_pos = start_idx + 1 - - return text - - def header_id_from_text(self, text, prefix): - """Generate a header id attribute value from the given header - HTML content. - - This is only called if the "header-ids" extra is enabled. - Subclasses may override this for different header ids. - """ - header_id = _slugify(text) - if prefix: - header_id = prefix + '-' + header_id - if header_id in self._count_from_header_id: - self._count_from_header_id[header_id] += 1 - header_id += '-%s' % self._count_from_header_id[header_id] - else: - self._count_from_header_id[header_id] = 1 - return header_id - - _toc = None - - def _toc_add_entry(self, level, id, name): - if self._toc is None: - self._toc = [] - self._toc.append((level, id, name)) - - _setext_h_re = re.compile(r'^(.+)[ \t]*\n(=+|-+)[ \t]*\n+', re.M) - - def _setext_h_sub(self, match): - n = {"=": 1, "-": 2}[match.group(2)[0]] - demote_headers = self.extras.get("demote-headers") - if demote_headers: - n = min(n + demote_headers, 6) - header_id_attr = "" - if "header-ids" in self.extras: - header_id = self.header_id_from_text(match.group(1), - prefix=self.extras["header-ids"]) - header_id_attr = ' id="%s"' % header_id - html = self._run_span_gamut(match.group(1)) - if "toc" in self.extras: - self._toc_add_entry(n, header_id, html) - return "<h%d%s>%s</h%d>\n\n" % (n, header_id_attr, html, n) - - _atx_h_re = re.compile(r''' - ^(\#{1,6}) # \1 = string of #'s - [ \t]* - (.+?) # \2 = Header text - [ \t]* - (?<!\\) # ensure not an escaped trailing '#' - \#* # optional closing #'s (not counted) - \n+ - ''', re.X | re.M) - - def _atx_h_sub(self, match): - n = len(match.group(1)) - demote_headers = self.extras.get("demote-headers") - if demote_headers: - n = min(n + demote_headers, 6) - header_id_attr = "" - if "header-ids" in self.extras: - header_id = self.header_id_from_text(match.group(2), - prefix=self.extras["header-ids"]) - header_id_attr = ' id="%s"' % header_id - html = self._run_span_gamut(match.group(2)) - if "toc" in self.extras: - self._toc_add_entry(n, header_id, html) - return "<h%d%s>%s</h%d>\n\n" % (n, header_id_attr, html, n) - - def _do_headers(self, text): - # Setext-style headers: - # Header 1 - # ======== - # - # Header 2 - # -------- - text = self._setext_h_re.sub(self._setext_h_sub, text) - - # atx-style headers: - # # Header 1 - # ## Header 2 - # ## Header 2 with closing hashes ## - # ... - # ###### Header 6 - text = self._atx_h_re.sub(self._atx_h_sub, text) - - return text - - _marker_ul_chars = '*+-' - _marker_any = r'(?:[%s]|\d+\.)' % _marker_ul_chars - _marker_ul = '(?:[%s])' % _marker_ul_chars - _marker_ol = r'(?:\d+\.)' - - def _list_sub(self, match): - lst = match.group(1) - lst_type = match.group(3) in self._marker_ul_chars and "ul" or "ol" - result = self._process_list_items(lst) - if self.list_level: - return "<%s>\n%s</%s>\n" % (lst_type, result, lst_type) - else: - return "<%s>\n%s</%s>\n\n" % (lst_type, result, lst_type) - - def _do_lists(self, text): - # Form HTML ordered (numbered) and unordered (bulleted) lists. - - for marker_pat in (self._marker_ul, self._marker_ol): - # Re-usable pattern to match any entire ul or ol list: - less_than_tab = self.tab_width - 1 - whole_list = r''' - ( # \1 = whole list - ( # \2 - [ ]{0,%d} - (%s) # \3 = first list item marker - [ \t]+ - ) - (?:.+?) - ( # \4 - \Z - | - \n{2,} - (?=\S) - (?! # Negative lookahead for another list item marker - [ \t]* - %s[ \t]+ - ) - ) - ) - ''' % (less_than_tab, marker_pat, marker_pat) - - # We use a different prefix before nested lists than top-level lists. - # See extended comment in _process_list_items(). - # - # Note: There's a bit of duplication here. My original implementation - # created a scalar regex pattern as the conditional result of the test on - # $g_list_level, and then only ran the $text =~ s{...}{...}egmx - # substitution once, using the scalar as the pattern. This worked, - # everywhere except when running under MT on my hosting account at Pair - # Networks. There, this caused all rebuilds to be killed by the reaper (or - # perhaps they crashed, but that seems incredibly unlikely given that the - # same script on the same server ran fine *except* under MT. I've spent - # more time trying to figure out why this is happening than I'd like to - # admit. My only guess, backed up by the fact that this workaround works, - # is that Perl optimizes the substition when it can figure out that the - # pattern will never change, and when this optimization isn't on, we run - # afoul of the reaper. Thus, the slightly redundant code to that uses two - # static s/// patterns rather than one conditional pattern. - - if self.list_level: - sub_list_re = re.compile("^"+whole_list, re.X | re.M | re.S) - text = sub_list_re.sub(self._list_sub, text) - else: - list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"+whole_list, - re.X | re.M | re.S) - text = list_re.sub(self._list_sub, text) - - return text - - _list_item_re = re.compile(r''' - (\n)? # leading line = \1 - (^[ \t]*) # leading whitespace = \2 - (?P<marker>%s) [ \t]+ # list marker = \3 - ((?:.+?) # list item text = \4 - (\n{1,2})) # eols = \5 - (?= \n* (\Z | \2 (?P<next_marker>%s) [ \t]+)) - ''' % (_marker_any, _marker_any), - re.M | re.X | re.S) - - _last_li_endswith_two_eols = False - - def _list_item_sub(self, match): - item = match.group(4) - leading_line = match.group(1) - leading_space = match.group(2) - if leading_line or "\n\n" in item or self._last_li_endswith_two_eols: - item = self._run_block_gamut(self._outdent(item)) - else: - # Recursion for sub-lists: - item = self._do_lists(self._outdent(item)) - if item.endswith('\n'): - item = item[:-1] - item = self._run_span_gamut(item) - self._last_li_endswith_two_eols = (len(match.group(5)) == 2) - return "<li>%s</li>\n" % item - - def _process_list_items(self, list_str): - # Process the contents of a single ordered or unordered list, - # splitting it into individual list items. - - # The $g_list_level global keeps track of when we're inside a list. - # Each time we enter a list, we increment it; when we leave a list, - # we decrement. If it's zero, we're not in a list anymore. - # - # We do this because when we're not inside a list, we want to treat - # something like this: - # - # I recommend upgrading to version - # 8. Oops, now this line is treated - # as a sub-list. - # - # As a single paragraph, despite the fact that the second line starts - # with a digit-period-space sequence. - # - # Whereas when we're inside a list (or sub-list), that line will be - # treated as the start of a sub-list. What a kludge, huh? This is - # an aspect of Markdown's syntax that's hard to parse perfectly - # without resorting to mind-reading. Perhaps the solution is to - # change the syntax rules such that sub-lists must start with a - # starting cardinal number; e.g. "1." or "a.". - self.list_level += 1 - self._last_li_endswith_two_eols = False - list_str = list_str.rstrip('\n') + '\n' - list_str = self._list_item_re.sub(self._list_item_sub, list_str) - self.list_level -= 1 - return list_str - - def _get_pygments_lexer(self, lexer_name): - try: - from pygments import lexers, util - except ImportError: - return None - try: - return lexers.get_lexer_by_name(lexer_name) - except util.ClassNotFound: - return None - - def _color_with_pygments(self, codeblock, lexer, **formatter_opts): - import pygments - import pygments.formatters - - class HtmlCodeFormatter(pygments.formatters.HtmlFormatter): - def _wrap_code(self, inner): - """A function for use in a Pygments Formatter which - wraps in <code> tags. - """ - yield 0, "<code>" - for tup in inner: - yield tup - yield 0, "</code>" - - def wrap(self, source, outfile): - """Return the source with a code, pre, and div.""" - return self._wrap_div(self._wrap_pre(self._wrap_code(source))) - - formatter = HtmlCodeFormatter(cssclass="codehilite", **formatter_opts) - return pygments.highlight(codeblock, lexer, formatter) - - def _code_block_sub(self, match): - codeblock = match.group(1) - codeblock = self._outdent(codeblock) - codeblock = self._detab(codeblock) - codeblock = codeblock.lstrip('\n') # trim leading newlines - codeblock = codeblock.rstrip() # trim trailing whitespace - - if "code-color" in self.extras and codeblock.startswith(":::"): - lexer_name, rest = codeblock.split('\n', 1) - lexer_name = lexer_name[3:].strip() - lexer = self._get_pygments_lexer(lexer_name) - codeblock = rest.lstrip("\n") # Remove lexer declaration line. - if lexer: - formatter_opts = self.extras['code-color'] or {} - colored = self._color_with_pygments(codeblock, lexer, - **formatter_opts) - return "\n\n%s\n\n" % colored - - codeblock = self._encode_code(codeblock) - pre_class_str = self._html_class_str_from_tag("pre") - code_class_str = self._html_class_str_from_tag("code") - return "\n\n<pre%s><code%s>%s\n</code></pre>\n\n" % ( - pre_class_str, code_class_str, codeblock) - - def _html_class_str_from_tag(self, tag): - """Get the appropriate ' class="..."' string (note the leading - space), if any, for the given tag. - """ - if "html-classes" not in self.extras: - return "" - try: - html_classes_from_tag = self.extras["html-classes"] - except TypeError: - return "" - else: - if tag in html_classes_from_tag: - return ' class="%s"' % html_classes_from_tag[tag] - return "" - - def _do_code_blocks(self, text): - """Process Markdown `<pre><code>` blocks.""" - code_block_re = re.compile(r''' - (?:\n\n|\A) - ( # $1 = the code block -- one or more lines, starting with a space/tab - (?: - (?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces - .*\n+ - )+ - ) - ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc - ''' % (self.tab_width, self.tab_width), - re.M | re.X) - - return code_block_re.sub(self._code_block_sub, text) - - # Rules for a code span: - # - backslash escapes are not interpreted in a code span - # - to include one or or a run of more backticks the delimiters must - # be a longer run of backticks - # - cannot start or end a code span with a backtick; pad with a - # space and that space will be removed in the emitted HTML - # See `test/tm-cases/escapes.text` for a number of edge-case - # examples. - _code_span_re = re.compile(r''' - (?<!\\) - (`+) # \1 = Opening run of ` - (?!`) # See Note A test/tm-cases/escapes.text - (.+?) # \2 = The code block - (?<!`) - \1 # Matching closer - (?!`) - ''', re.X | re.S) - - def _code_span_sub(self, match): - c = match.group(2).strip(" \t") - c = self._encode_code(c) - return "<code>%s</code>" % c - - def _do_code_spans(self, text): - # * Backtick quotes are used for <code></code> spans. - # - # * You can use multiple backticks as the delimiters if you want to - # include literal backticks in the code span. So, this input: - # - # Just type ``foo `bar` baz`` at the prompt. - # - # Will translate to: - # - # <p>Just type <code>foo `bar` baz</code> at the prompt.</p> - # - # There's no arbitrary limit to the number of backticks you - # can use as delimters. If you need three consecutive backticks - # in your code, use four for delimiters, etc. - # - # * You can use spaces to get literal backticks at the edges: - # - # ... type `` `bar` `` ... - # - # Turns to: - # - # ... type <code>`bar`</code> ... - return self._code_span_re.sub(self._code_span_sub, text) - - def _encode_code(self, text): - """Encode/escape certain characters inside Markdown code runs. - The point is that in code, these characters are literals, - and lose their special Markdown meanings. - """ - replacements = [ - # Encode all ampersands; HTML entities are not - # entities within a Markdown code span. - ('&', '&'), - # Do the angle bracket song and dance: - ('<', '<'), - ('>', '>'), - # Now, escape characters that are magic in Markdown: - ('*', g_escape_table['*']), - ('_', g_escape_table['_']), - ('{', g_escape_table['{']), - ('}', g_escape_table['}']), - ('[', g_escape_table['[']), - (']', g_escape_table[']']), - ('\\', g_escape_table['\\']), - ] - for before, after in replacements: - text = text.replace(before, after) - return text - - _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S) - _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S) - # _spoiler_re = re.compile(r"###(?=\S)(.+?[*_]*)(?<=\S)###", re.S) - - _code_friendly_strong_re = re.compile( - r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S) - _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S) - - def _do_italics_and_bold(self, text): - # <strong> must go first: - if "code-friendly" in self.extras: - text = self._code_friendly_strong_re.sub( - r"<strong>\1</strong>", text) - text = self._code_friendly_em_re.sub(r"<em>\1</em>", text) - else: - text = self._strong_re.sub(r"<strong>\2</strong>", text) - text = self._em_re.sub(r"<em>\2</em>", text) - - #text = self._spoiler_re.sub("<del>\\1</del>", text) - return text - - _block_quote_re = re.compile(r''' - ( # Wrap whole match in \1 - ( - ^[ \t]*>[^>] # '>' at the start of a line - .+\n # rest of the first line - \n* # blanks - )+ - ) - ''', re.M | re.X) - _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M) - - _html_pre_block_re = re.compile(r'(\s*<pre>.+?</pre>)', re.S) - - def _dedent_two_spaces_sub(self, match): - return re.sub(r'(?m)^ ', '', match.group(1)) - - def _block_quote_sub(self, match): - bq = match.group(1) - # bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting - bq = self._ws_only_line_re.sub('', bq) # trim whitespace-only lines - bq = bq.strip('\n') - bq = self._run_span_gamut(bq) - # bq = self._run_block_gamut(bq) # recurse - - bq = re.sub('(?m)^', ' ', bq) - # These leading spaces screw with <pre> content, so we need to fix that: - bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq) - - return "<blockquote>\n%s\n</blockquote>\n\n" % bq - - def _do_block_quotes(self, text): - if '>' not in text: - return text - return self._block_quote_re.sub(self._block_quote_sub, text) - - def _form_paragraphs(self, text): - # Strip leading and trailing lines: - text = text.strip('\n') - - # Wrap <p> tags. - grafs = [] - for i, graf in enumerate(re.split(r"\n{2,}", text)): - if graf in self.html_blocks: - # Unhashify HTML blocks - grafs.append(self.html_blocks[graf]) - else: - cuddled_list = None - if "cuddled-lists" in self.extras: - # Need to put back trailing '\n' for `_list_item_re` - # match at the end of the paragraph. - li = self._list_item_re.search(graf + '\n') - # Two of the same list marker in this paragraph: a likely - # candidate for a list cuddled to preceding paragraph - # text (issue 33). Note the `[-1]` is a quick way to - # consider numeric bullets (e.g. "1." and "2.") to be - # equal. - if (li and len(li.group(2)) <= 3 and li.group("next_marker") - and li.group("marker")[-1] == li.group("next_marker")[-1]): - start = li.start() - cuddled_list = self._do_lists( - graf[start:]).rstrip("\n") - assert cuddled_list.startswith( - "<ul>") or cuddled_list.startswith("<ol>") - graf = graf[:start] - - # Wrap <p> tags. - graf = self._run_span_gamut(graf) - grafs.append("<p>" + graf.lstrip(" \t") + "</p>") - - if cuddled_list: - grafs.append(cuddled_list) - - return "\n\n".join(grafs) - - def _add_footnotes(self, text): - if self.footnotes: - footer = [ - '<div class="footnotes">', - '<hr' + self.empty_element_suffix, - '<ol>', - ] - for i, id in enumerate(self.footnote_ids): - if i != 0: - footer.append('') - footer.append('<li id="fn-%s">' % id) - footer.append(self._run_block_gamut(self.footnotes[id])) - backlink = ('<a href="#fnref-%s" ' - 'class="footnoteBackLink" ' - 'title="Jump back to footnote %d in the text.">' - '↩</a>' % (id, i+1)) - if footer[-1].endswith("</p>"): - footer[-1] = footer[-1][:-len("</p>")] \ - + ' ' + backlink + "</p>" - else: - footer.append("\n<p>%s</p>" % backlink) - footer.append('</li>') - footer.append('</ol>') - footer.append('</div>') - return text + '\n\n' + '\n'.join(footer) - else: - return text - - # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: - # http://bumppo.net/projects/amputator/ - _ampersand_re = re.compile(r'&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)') - _naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I) - _naked_gt_re = re.compile(r'''(?<![a-z?!/'"-])>''', re.I) - - def _encode_amps_and_angles(self, text): - # Smart processing for ampersands and angle brackets that need - # to be encoded. - text = self._ampersand_re.sub('&', text) - - # Encode naked <'s - text = self._naked_lt_re.sub('<', text) - - # Encode naked >'s - # Note: Other markdown implementations (e.g. Markdown.pl, PHP - # Markdown) don't do this. - text = self._naked_gt_re.sub('>', text) - return text - - def _encode_backslash_escapes(self, text): - for ch, escape in g_escape_table.items(): - text = text.replace("\\"+ch, escape) - return text - - _auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I) - - def _auto_link_sub(self, match): - g1 = match.group(1) - return '<a href="%s">%s</a>' % (g1, g1) - - _auto_email_link_re = re.compile(r""" - < - (?:mailto:)? - ( - [-.\w]+ - \@ - [-\w]+(\.[-\w]+)*\.[a-z]+ - ) - > - """, re.I | re.X | re.U) - - def _auto_email_link_sub(self, match): - return self._encode_email_address( - self._unescape_special_chars(match.group(1))) - - def _do_auto_links(self, text): - text = self._auto_link_re.sub(self._auto_link_sub, text) - text = self._auto_email_link_re.sub(self._auto_email_link_sub, text) - return text - - def _encode_email_address(self, addr): - # Input: an email address, e.g. "foo@example.com" - # - # Output: the email address as a mailto link, with each character - # of the address encoded as either a decimal or hex entity, in - # the hopes of foiling most address harvesting spam bots. E.g.: - # - # <a href="mailto:foo@e - # xample.com">foo - # @example.com</a> - # - # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk - # mailing list: <http://tinyurl.com/yu7ue> - chars = [_xml_encode_email_char_at_random(ch) - for ch in "mailto:" + addr] - # Strip the mailto: from the visible part. - addr = '<a href="%s">%s</a>' \ - % (''.join(chars), ''.join(chars[7:])) - return addr - - def _do_link_patterns(self, text): - """Caveat emptor: there isn't much guarding against link - patterns being formed inside other standard Markdown links, e.g. - inside a [link def][like this]. - - Dev Notes: *Could* consider prefixing regexes with a negative - lookbehind assertion to attempt to guard against this. - """ - link_from_hash = {} - for regex, repl in self.link_patterns: - replacements = [] - for match in regex.finditer(text): - if hasattr(repl, "__call__"): - href = repl(match) - else: - href = match.expand(repl) - replacements.append((match.span(), href)) - for (start, end), href in reversed(replacements): - escaped_href = ( - href.replace('"', '"') # b/c of attr quote - # To avoid markdown <em> and <strong>: - .replace('*', g_escape_table['*']) - .replace('_', g_escape_table['_'])) - link = '<a href="%s">%s</a>' % (escaped_href, text[start:end]) - hash = _hash_text(link) - link_from_hash[hash] = link - text = text[:start] + hash + text[end:] - for hash, link in link_from_hash.items(): - text = text.replace(hash, link) - return text - - def _unescape_special_chars(self, text): - # Swap back in all the special characters we've hidden. - for ch, hash in g_escape_table.items(): - text = text.replace(hash, ch) - return text - - def _outdent(self, text): - # Remove one level of line-leading tabs or spaces - return self._outdent_re.sub('', text) - - -class MarkdownWithExtras(Markdown): - """A markdowner class that enables most extras: - - - footnotes - - code-color (only has effect if 'pygments' Python module on path) - - These are not included: - - pyshell (specific to Python-related documenting) - - code-friendly (because it *disables* part of the syntax) - - link-patterns (because you need to specify some actual - link-patterns anyway) - """ - extras = ["footnotes", "code-color"] - - -# ---- internal support functions - -class UnicodeWithAttrs(unicode): - """A subclass of unicode used for the return value of conversion to - possibly attach some attributes. E.g. the "toc_html" attribute when - the "toc" extra is used. - """ - _toc = None - @property - def toc_html(self): - """Return the HTML for the current TOC. - - This expects the `_toc` attribute to have been set on this instance. - """ - if self._toc is None: - return None - - def indent(): - return ' ' * (len(h_stack) - 1) - lines = [] - h_stack = [0] # stack of header-level numbers - for level, id, name in self._toc: - if level > h_stack[-1]: - lines.append("%s<ul>" % indent()) - h_stack.append(level) - elif level == h_stack[-1]: - lines[-1] += "</li>" - else: - while level < h_stack[-1]: - h_stack.pop() - if not lines[-1].endswith("</li>"): - lines[-1] += "</li>" - lines.append("%s</ul></li>" % indent()) - lines.append(u'%s<li><a href="#%s">%s</a>' % ( - indent(), id, name)) - while len(h_stack) > 1: - h_stack.pop() - if not lines[-1].endswith("</li>"): - lines[-1] += "</li>" - lines.append("%s</ul>" % indent()) - return '\n'.join(lines) + '\n' - - -_slugify_strip_re = re.compile(r'[^\w\s-]') -_slugify_hyphenate_re = re.compile(r'[-\s]+') - - -def _slugify(value): - """ - Normalizes string, converts to lowercase, removes non-alpha characters, - and converts spaces to hyphens. - - From Django's "django/template/defaultfilters.py". - """ - import unicodedata - value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') - value = unicode(_slugify_strip_re.sub('', value).strip().lower()) - return _slugify_hyphenate_re.sub('-', value) - -# From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549 - - -def _curry(*args, **kwargs): - function, args = args[0], args[1:] - - def result(*rest, **kwrest): - combined = kwargs.copy() - combined.update(kwrest) - return function(*args + rest, **combined) - return result - -# Recipe: regex_from_encoded_pattern (1.0) - - -def _regex_from_encoded_pattern(s): - """'foo' -> re.compile(re.escape('foo')) - '/foo/' -> re.compile('foo') - '/foo/i' -> re.compile('foo', re.I) - """ - if s.startswith('/') and s.rfind('/') != 0: - # Parse it: /PATTERN/FLAGS - idx = s.rfind('/') - pattern, flags_str = s[1:idx], s[idx+1:] - flag_from_char = { - "i": re.IGNORECASE, - "l": re.LOCALE, - "s": re.DOTALL, - "m": re.MULTILINE, - "u": re.UNICODE, - } - flags = 0 - for char in flags_str: - try: - flags |= flag_from_char[char] - except KeyError: - raise ValueError("unsupported regex flag: '%s' in '%s' " - "(must be one of '%s')" - % (char, s, ''.join(flag_from_char.keys()))) - return re.compile(s[1:idx], flags) - else: # not an encoded regex - return re.compile(re.escape(s)) - -# Recipe: dedent (0.1.2) - - -def _dedentlines(lines, tabsize=8, skip_first_line=False): - """_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines - - "lines" is a list of lines to dedent. - "tabsize" is the tab width to use for indent width calculations. - "skip_first_line" is a boolean indicating if the first line should - be skipped for calculating the indent width and for dedenting. - This is sometimes useful for docstrings and similar. - - Same as dedent() except operates on a sequence of lines. Note: the - lines list is modified **in-place**. - """ - DEBUG = False - if DEBUG: - print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\ - % (tabsize, skip_first_line) - indents = [] - margin = None - for i, line in enumerate(lines): - if i == 0 and skip_first_line: - continue - indent = 0 - for ch in line: - if ch == ' ': - indent += 1 - elif ch == '\t': - indent += tabsize - (indent % tabsize) - elif ch in '\r\n': - continue # skip all-whitespace lines - else: - break - else: - continue # skip all-whitespace lines - if DEBUG: - print "dedent: indent=%d: %r" % (indent, line) - if margin is None: - margin = indent - else: - margin = min(margin, indent) - if DEBUG: - print "dedent: margin=%r" % margin - - if margin is not None and margin > 0: - for i, line in enumerate(lines): - if i == 0 and skip_first_line: - continue - removed = 0 - for j, ch in enumerate(line): - if ch == ' ': - removed += 1 - elif ch == '\t': - removed += tabsize - (removed % tabsize) - elif ch in '\r\n': - if DEBUG: - print "dedent: %r: EOL -> strip up to EOL" % line - lines[i] = lines[i][j:] - break - else: - raise ValueError("unexpected non-whitespace char %r in " - "line %r while removing %d-space margin" - % (ch, line, margin)) - if DEBUG: - print "dedent: %r: %r -> removed %d/%d"\ - % (line, ch, removed, margin) - if removed == margin: - lines[i] = lines[i][j+1:] - break - elif removed > margin: - lines[i] = ' '*(removed-margin) + lines[i][j+1:] - break - else: - if removed: - lines[i] = lines[i][removed:] - return lines - - -def _dedent(text, tabsize=8, skip_first_line=False): - """_dedent(text, tabsize=8, skip_first_line=False) -> dedented text - - "text" is the text to dedent. - "tabsize" is the tab width to use for indent width calculations. - "skip_first_line" is a boolean indicating if the first line should - be skipped for calculating the indent width and for dedenting. - This is sometimes useful for docstrings and similar. - - textwrap.dedent(s), but don't expand tabs to spaces - """ - lines = text.splitlines(1) - _dedentlines(lines, tabsize=tabsize, skip_first_line=skip_first_line) - return ''.join(lines) - - -class _memoized(object): - """Decorator that caches a function's return value each time it is called. - If called later with the same arguments, the cached value is returned, and - not re-evaluated. - - http://wiki.python.org/moin/PythonDecoratorLibrary - """ - - def __init__(self, func): - self.func = func - self.cache = {} - - def __call__(self, *args): - try: - return self.cache[args] - except KeyError: - self.cache[args] = value = self.func(*args) - return value - except TypeError: - # uncachable -- for instance, passing a list as an argument. - # Better to not cache than to blow up entirely. - return self.func(*args) - - def __repr__(self): - """Return the function's docstring.""" - return self.func.__doc__ - - -def _xml_oneliner_re_from_tab_width(tab_width): - """Standalone XML processing instruction regex.""" - return re.compile(r""" - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,%d} - (?: - <\?\w+\b\s+.*?\?> # XML processing instruction - | - <\w+:\w+\b\s+.*?/> # namespaced single tag - ) - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - """ % (tab_width - 1), re.X) - - -_xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width) - - -def _hr_tag_re_from_tab_width(tab_width): - return re.compile(r""" - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in \1 - [ ]{0,%d} - <(hr) # start tag = \2 - \b # word break - ([^<>])*? # - /?> # the matching end tag - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - """ % (tab_width - 1), re.X) - - -_hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width) - - -def _xml_encode_email_char_at_random(ch): - r = random() - # Roughly 10% raw, 45% hex, 45% dec. - # '@' *must* be encoded. I [John Gruber] insist. - # Issue 26: '_' must be encoded. - if r > 0.9 and ch not in "@_": - return ch - elif r < 0.45: - # The [1:] is to drop leading '0': 0x63 -> x63 - return '&#%s;' % hex(ord(ch))[1:] - else: - return '&#%s;' % ord(ch) - - -#---- mainline - -class _NoReflowFormatter(optparse.IndentedHelpFormatter): - """An optparse formatter that does NOT reflow the description.""" - - def format_description(self, description): - return description or "" - - -def _test(): - import doctest - doctest.testmod() - - -def main(argv=None): - if argv is None: - argv = sys.argv - if not logging.root.handlers: - logging.basicConfig() - - usage = "usage: %prog [PATHS...]" - version = "%prog "+__version__ - parser = optparse.OptionParser(prog="markdown2", usage=usage, - version=version, description=cmdln_desc, - formatter=_NoReflowFormatter()) - parser.add_option("-v", "--verbose", dest="log_level", - action="store_const", const=logging.DEBUG, - help="more verbose output") - parser.add_option("--encoding", - help="specify encoding of text content") - parser.add_option("--html4tags", action="store_true", default=False, - help="use HTML 4 style for empty element tags") - parser.add_option("-s", "--safe", metavar="MODE", dest="safe_mode", - help="sanitize literal HTML: 'escape' escapes " - "HTML meta chars, 'replace' replaces with an " - "[HTML_REMOVED] note") - parser.add_option("-x", "--extras", action="append", - help="Turn on specific extra features (not part of " - "the core Markdown spec). See above.") - parser.add_option("--use-file-vars", - help="Look for and use Emacs-style 'markdown-extras' " - "file var to turn on extras. See " - "<http://code.google.com/p/python-markdown2/wiki/Extras>.") - parser.add_option("--link-patterns-file", - help="path to a link pattern file") - parser.add_option("--self-test", action="store_true", - help="run internal self-tests (some doctests)") - parser.add_option("--compare", action="store_true", - help="run against Markdown.pl as well (for testing)") - parser.set_defaults(log_level=logging.INFO, compare=False, - encoding="utf-8", safe_mode=None, use_file_vars=False) - opts, paths = parser.parse_args() - log.setLevel(opts.log_level) - - if opts.self_test: - return _test() - - if opts.extras: - extras = {} - for s in opts.extras: - splitter = re.compile("[,;: ]+") - for e in splitter.split(s): - if '=' in e: - ename, earg = e.split('=', 1) - try: - earg = int(earg) - except ValueError: - pass - else: - ename, earg = e, None - extras[ename] = earg - else: - extras = None - - if opts.link_patterns_file: - link_patterns = [] - f = open(opts.link_patterns_file) - try: - for i, line in enumerate(f.readlines()): - if not line.strip(): - continue - if line.lstrip().startswith("#"): - continue - try: - pat, href = line.rstrip().rsplit(None, 1) - except ValueError: - raise MarkdownError("%s:%d: invalid link pattern line: %r" - % (opts.link_patterns_file, i+1, line)) - link_patterns.append( - (_regex_from_encoded_pattern(pat), href)) - finally: - f.close() - else: - link_patterns = None - - from os.path import join, dirname, abspath, exists - markdown_pl = join(dirname(dirname(abspath(__file__))), "test", - "Markdown.pl") - for path in paths: - if opts.compare: - print "==== Markdown.pl ====" - perl_cmd = 'perl %s "%s"' % (markdown_pl, path) - o = os.popen(perl_cmd) - perl_html = o.read() - o.close() - sys.stdout.write(perl_html) - print "==== markdown2.py ====" - html = markdown_path(path, encoding=opts.encoding, - html4tags=opts.html4tags, - safe_mode=opts.safe_mode, - extras=extras, link_patterns=link_patterns, - use_file_vars=opts.use_file_vars) - sys.stdout.write( - html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')) - if extras and "toc" in extras: - log.debug("toc_html: " + - html.toc_html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')) - if opts.compare: - test_dir = join(dirname(dirname(abspath(__file__))), "test") - if exists(join(test_dir, "test_markdown2.py")): - sys.path.insert(0, test_dir) - from test_markdown2 import norm_html_from_html - norm_html = norm_html_from_html(html) - norm_perl_html = norm_html_from_html(perl_html) - else: - norm_html = html - norm_perl_html = perl_html - print "==== match? %r ====" % (norm_perl_html == norm_html) - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/cgi/post.py b/cgi/post.py index 6f7ff03..22d8197 100644 --- a/cgi/post.py +++ b/cgi/post.py @@ -149,10 +149,7 @@ def getThread(postid=0, mobile=False, timestamp=0): thread["message"] = op_post["message"] thread["locked"] = op_post["locked"] thread["size"] = "%d KB" % int(total_bytes / 1000) - - #threads = [thread] else: - raise Exception(postid) return None finally: database_lock.release() @@ -416,7 +413,7 @@ def threadList(mode=0): thread['message'] = thread['message'].replace('<br />', ' ') thread['message'] = thread['message'].split("<hr />")[0] thread['message'] = re.compile(r"<[^>]*?>", re.DOTALL | re.IGNORECASE).sub('', thread['message']) - thread['message'] = thread['message'].decode('utf-8')[:cutFactor].encode('utf-8') + thread['message'] = thread['message'][:cutFactor] thread['message'] = re.compile(r"&(.(?!;))*$", re.DOTALL | re.IGNORECASE).sub('', thread['message']) # Removes incomplete HTML entities thread['timestamp_formatted'] = re.compile(r"\(.{1,3}\)", re.DOTALL | re.IGNORECASE).sub(" ", thread['timestamp_formatted']) @@ -432,7 +429,7 @@ def threadList(mode=0): lastreply['message'] = lastreply['message'].replace('<br />', ' ') lastreply['message'] = lastreply['message'].split("<hr />")[0] lastreply['message'] = re.compile(r"<[^>]*?>", re.DOTALL | re.IGNORECASE).sub('', lastreply['message']) - lastreply['message'] = lastreply['message'].decode('utf-8')[:60].encode('utf-8') + lastreply['message'] = lastreply['message'][:60] lastreply['message'] = re.compile(r"&(.(?!;))*$", re.DOTALL | re.IGNORECASE).sub('', lastreply['message']) # Removes incomplete HTML entities lastreply['timestamp_formatted'] = re.compile(r"\(.{1,3}\)", re.DOTALL | re.IGNORECASE).sub(" ", lastreply['timestamp_formatted']) thread["lastreply"] = lastreply @@ -540,6 +537,7 @@ def dynamicRead(parentid, ranges, mobile=False): if not thread: # Try the archive + import json fname = Settings.ROOT_DIR + board["dir"] + "/kako/" + str(parentid) + ".json" if os.path.isfile(fname): import json @@ -809,6 +807,7 @@ def trimThreads(): logging.debug("Trimming threads") board = Settings._.BOARD archived = False + trimmed = [] # Use limit of the board type if board['board_type'] == 1: @@ -817,21 +816,22 @@ def trimThreads(): limit = Settings.MAX_THREADS # trim expiring threads first - if board['maxage'] != '0': + if board['maxage'] > 0: t = time.time() - alert_time = int(round(int(board['maxage']) * Settings.MAX_AGE_ALERT)) + alert_time = int(round(board['maxage'] * Settings.MAX_AGE_ALERT)) time_limit = t + (alert_time * 86400) old_ops = FetchAll("SELECT `id`, `timestamp`, `expires`, `expires_alert`, `length` FROM `posts` WHERE `boardid` = %s AND `parentid` = 0 AND IS_DELETED = 0 AND `expires` > 0 AND `expires` < %s LIMIT 50", (board['id'], time_limit)) for op in old_ops: - if t >= int(op['expires']): + if t >= op['expires']: # Trim old threads if board['archive'] and op["length"] >= Settings.ARCHIVE_MIN_LENGTH: archiveThread(op["id"]) archived = True deletePost(op["id"], None) + trimmed.append(op["id"]) else: # Add alert to threads approaching deletion UpdateDb("UPDATE `posts` SET expires_alert = 1 WHERE `boardid` = %s AND `id` = %s", (board['id'], op['id'])) @@ -840,7 +840,7 @@ def trimThreads(): if board['maxinactive'] > 0: t = time.time() - oldest_last = t - (int(board['maxinactive']) * 86400) + oldest_last = t - (board['maxinactive'] * 86400) old_ops = FetchAll("SELECT `id`, `length` FROM `posts` WHERE `boardid` = %s AND `parentid` = 0 AND IS_DELETED = 0 AND `last` < %s LIMIT 50", (board['id'], oldest_last)) for op in old_ops: @@ -849,6 +849,7 @@ def trimThreads(): archived = True deletePost(op["id"], None) + trimmed.append(op["id"]) # select trim type by board if board['board_type'] == 1: @@ -874,10 +875,12 @@ def trimThreads(): archived = True deletePost(post["id"], None) - pass + trimmed.append(op["id"]) if archived: regenerateKako() + + return trimmed def autoclose_thread(parentid, t, replies): """ @@ -1099,7 +1102,6 @@ def regenerateAccess(): if not Settings.HTACCESS_GEN: return False - boards = FetchAll('SELECT `dir` FROM `boards`') global_boards = [board['dir'] for board in boards if board['dir'] not in Settings.EXCLUDE_GLOBAL_BANS] @@ -1108,11 +1110,11 @@ def regenerateAccess(): board_bans = {} if Settings.ENABLE_BANS: - bans = FetchAll("SELECT `ipstr`, `boards` FROM `bans` WHERE `blind` = '1' ORDER BY `ipstart` ASC") + bans = FetchAll("SELECT `ipstr`, `boards` FROM `bans` WHERE `blind` = 1 ORDER BY `ipstart` ASC") for ban in bans: if ban["boards"]: - boards = pickle.loads(ban["boards"]) + boards = str2boards(ban["boards"]) for board in boards: board_bans.setdefault(board, []).append(ban["ipstr"]) else: @@ -1233,7 +1235,8 @@ def archiveThread(postid): except: raise UserError("Can't archive: %s" % thread['timestamp']) - UpdateDb("REPLACE INTO archive (id, boardid, timestamp, subject, length) VALUES ('%s', '%s', '%s', '%s', '%s')" % (thread['id'], board['id'], thread['timestamp'], _mysql.escape_string(thread['subject']), thread['length'])) + UpdateDb("REPLACE INTO archive (oldid, boardid, timestamp, subject, length) VALUES (%s, %s, %s, %s, %s)", + (thread['id'], board['id'], thread['timestamp'], thread['subject'], thread['length'])) def throw_dice(dice): qty = int(dice[0][1:]) diff --git a/cgi/templates/bans_geo b/cgi/templates/bans_geo index 64687e8..ba7959a 100644 --- a/cgi/templates/bans_geo +++ b/cgi/templates/bans_geo @@ -4,7 +4,7 @@ geo $bans_global { #{ip} 1; <?py #endfor ?> } -<?py for board, bans in board_bans.iteritems(): ?> +<?py for board, bans in board_bans.items(): ?> geo $bans_#{board} { default 0; <?py for ip in bans: ?> diff --git a/cgi/templates/bans_locations b/cgi/templates/bans_locations index a514ccf..87ac1b7 100644 --- a/cgi/templates/bans_locations +++ b/cgi/templates/bans_locations @@ -4,7 +4,7 @@ location /#{board}/ { if ($bans_global) { rewrite ^ /cgi/banned; } <?py #endif ?> <?py if board in board_bans: ?> - if ($bans_#{board}) { rewrite ^ /cgi/banned; } + if ($bans_#{board}) { rewrite ^ /cgi/banned/#{board}; } <?py #endif ?> } <?py #endfor ?> diff --git a/cgi/templates/manage/boardoptions.html b/cgi/templates/manage/boardoptions.html index fcd3bb8..4e33e5b 100644 --- a/cgi/templates/manage/boardoptions.html +++ b/cgi/templates/manage/boardoptions.html @@ -174,7 +174,7 @@ </tr> <tr> <td class="postblock">Archivar hilos</td> -<td><input type="checkbox" name="archive" id="arch" value="1"#{checked(boardopts['archive'] == '1')} /><label for="arch"></label></td> +<td><input type="checkbox" name="archive" id="arch" value="1"#{checked(boardopts['archive'])} /><label for="arch"></label></td> </tr> <tr> <td class="postblock">Espera para crear nuevo hilo</td> diff --git a/cgi/templates/mobile/txt_thread.html b/cgi/templates/mobile/txt_thread.html index c9b58a8..3df16fc 100644 --- a/cgi/templates/mobile/txt_thread.html +++ b/cgi/templates/mobile/txt_thread.html @@ -11,7 +11,7 @@ <?py if thread['length'] > 50: ?> <a href="#{cgi_url}mobileread/#{board}/#{thread['timestamp']}/-50" rel="nofollow">Primeros 50</a> <?py #endif ?> -<?py r = range(thread['length'] / 50) ?> +<?py r = range(thread['length'] // 50) ?> <?py for i in r[:-1]: ?> <a href="#{cgi_url}mobileread/#{board}/#{thread['timestamp']}/#{(i+1)*50+1}-#{(i+2)*50}" rel="nofollow">#{(i+1)*50+1}-#{(i+2)*50}</a> <?py #endfor ?> @@ -29,9 +29,9 @@ <div id="thread"> <h1>#{thread['subject']} <span>(#{thread['length']})</span></h1> <?py for post in thread['posts']: ?> -<?py if post['IS_DELETED'] == '1': ?> +<?py if post['IS_DELETED'] == 1: ?> <div class="pst"><h3 class="del"><a href="#" class="num">#{str(post['num']).zfill(4)}</a> Eliminado por el usuario.</h3></div> -<?py elif post['IS_DELETED'] == '2': ?> +<?py elif post['IS_DELETED'] == 2: ?> <div class="pst"><h3 class="del"><a href="#" class="num">#{str(post['num']).zfill(4)}</a> Eliminado por miembro del staff.</h3></div> <?py else: ?> <div id="p#{post['id']}" class="pst"> @@ -42,7 +42,7 @@ </div> <?py #endif ?> <?py #endfor ?> -<?py if thread['locked'] != '1': ?> +<?py if not thread['locked']: ?> <a href="#{cgi_url}mobileread/#{board}/#{thread['timestamp']}/#{thread['length']}-n" id="n">Ver nuevos posts</a><span id="n2"></span> <?py #endif ?> <div class="nav"> @@ -51,7 +51,7 @@ <div><a href="#{cgi_url}mobileread/#{board}/#{thread['timestamp']}">Hilo completo</a><a href="#{cgi_url}mobileread/#{board}/#{thread['timestamp']}/-50">Primeros 50</a><a href="#{cgi_url}mobileread/#{board}/#{thread['timestamp']}/l10">Ćltimos 25</a></div> <?py #endif ?> </div> -<?py if thread['locked'] != '1': ?> +<?py if not thread['locked']: ?> <form name="postform" id="postform" action="/cgi/post" method="post" enctype="multipart/form-data"> <input type="hidden" name="board" value="#{board}" /><input type="hidden" name="parent" value="#{thread['id']}" /><input type="hidden" name="mobile" value="true" /><input type="hidden" name="password" value="" /> <div style="display:none"><input type="text" name="name" /><input type="text" name="email" /></div> diff --git a/cgi/templates/revision.html b/cgi/templates/revision.html index 78bc1ab..9028ec6 100644 --- a/cgi/templates/revision.html +++ b/cgi/templates/revision.html @@ -1 +1 @@ -0.10.0 +0.10.5 diff --git a/cgi/templates/txt_archive.html b/cgi/templates/txt_archive.html index 88b3196..1407fcf 100644 --- a/cgi/templates/txt_archive.html +++ b/cgi/templates/txt_archive.html @@ -43,9 +43,9 @@ <div class="thread" data-length="#{thread['length']}"> <h3>#{thread['subject']} <span>(${(str(thread['length'])+" respuestas") if thread['length'] > 1 else "Una respuesta"})</span></h3> <?py for post in thread['posts']: ?> - <?py if post['IS_DELETED'] == '1': ?> + <?py if post['IS_DELETED'] == 1: ?> <div class="reply deleted" data-n="#{post['num']}"><h4>#{post['num']} : Mensaje eliminado por usuario.</h4></div> - <?py elif post['IS_DELETED'] == '2': ?> + <?py elif post['IS_DELETED'] == 2: ?> <div class="reply deleted" data-n="#{post['num']}"><h4>#{post['num']} : Mensaje eliminado por staff.</h4></div> <?py else: ?> <div class="reply#{' first' if post['num'] == 1 else ''}" data-n="#{post['num']}"> diff --git a/cgi/templates/txt_thread.en.html b/cgi/templates/txt_thread.en.html index 4b16fca..c8348a1 100644 --- a/cgi/templates/txt_thread.en.html +++ b/cgi/templates/txt_thread.en.html @@ -9,7 +9,7 @@ <?py if thread['length'] > 100: ?> <a href="#{boards_url}#{board}/read/#{thread['timestamp']}/1-100">First 100</a> <?py #endif ?> - <?py for i in range(thread['length'] / 100): ?> + <?py for i in range(thread['length'] // 100): ?> <a href="#{boards_url}#{board}/read/#{thread['timestamp']}/#{(i+1)*100+1}-#{(i+2)*100}">#{(i+1)*100+1}-</a> <?py #endfor ?> <?py if thread['length'] > 51: ?> @@ -28,9 +28,9 @@ <div class="thread" data-length="#{thread['length']}"> <h3>#{thread['subject']} <span>(${(str(thread['length'])+" replies") if thread['length']>1 else "1 reply"})</span></h3> <?py for post in thread['posts']: ?> - <?py if post['IS_DELETED'] == '1': ?> + <?py if post['IS_DELETED'] == 1: ?> <div class="reply deleted" id="p#{post['id']}" data-n="#{post['num']}"><h4>#{post['num']} : Post deleted by user.</h4></div> - <?py elif post['IS_DELETED'] == '2': ?> + <?py elif post['IS_DELETED'] == 2: ?> <div class="reply deleted" id="p#{post['id']}" data-n="#{post['num']}"><h4>#{post['num']} : Post deleted by staff.</h4></div> <?py else: ?> <div class="reply#{' first' if post['num'] == 1 else ''}" id="p#{post['id']}" data-n="#{post['num']}"> @@ -61,7 +61,7 @@ <div id="size">#{thread['size']}</div> </div> <hr /> -<?py if thread['locked'] != '1': ?> +<?py if not thread['locked']: ?> <div class="lastposts"><a href="#{boards_url}#{board}/read/#{thread['timestamp']}/#{thread['length']}-n" id="n">Show new posts</a></div> <hr /> <?py #endif ?> @@ -105,4 +105,4 @@ <div class="end">weabot.py ver <?py include('templates/revision.html') ?> Bienvenido a Internet BBS/IB</div> <a name="bottom"></a> </body> -</html>
\ No newline at end of file +</html> diff --git a/cgi/tenjin.py b/cgi/tenjin.py deleted file mode 100644 index ddc12bb..0000000 --- a/cgi/tenjin.py +++ /dev/null @@ -1,2293 +0,0 @@ -## -# $Release: 1.1.1 $ -# $Copyright: copyright(c) 2007-2012 kuwata-lab.com all rights reserved. $ -# $License: MIT License $ -## -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -## -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -## -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -## - -"""Very fast and light-weight template engine based embedded Python. - See User's Guide and examples for details. - http://www.kuwata-lab.com/tenjin/pytenjin-users-guide.html - http://www.kuwata-lab.com/tenjin/pytenjin-examples.html -""" - -__version__ = "$Release: 1.1.1 $"[10:-2] -__license__ = "$License: MIT License $"[10:-2] -__all__ = ('Template', 'Engine', ) - - -from os.path import isfile as _isfile -from os.path import getmtime as _getmtime -from time import time as _time -import sys -import os -import re -import time -import marshal -random = pickle = unquote = None # lazy import -python3 = sys.version_info[0] == 3 -python2 = sys.version_info[0] == 2 - -logger = None - - -## -# utilities -## - -def _write_binary_file(filename, content): - global random - if random is None: - from random import random - tmpfile = filename + str(random())[1:] - f = open(tmpfile, 'w+b') # on windows, 'w+b' is preffered than 'wb' - try: - f.write(content) - finally: - f.close() - if os.path.exists(tmpfile): - try: - os.rename(tmpfile, filename) - except: - # on windows, existing file should be removed before renaming - os.remove(filename) - os.rename(tmpfile, filename) - - -def _read_binary_file(filename): - f = open(filename, 'rb') - try: - return f.read() - finally: - f.close() - - -codecs = None # lazy import - - -def _read_text_file(filename, encoding=None): - global codecs - if not codecs: - import codecs - f = codecs.open(filename, encoding=(encoding or 'utf-8')) - try: - return f.read() - finally: - f.close() - - -def _read_template_file(filename, encoding=None): - s = _read_binary_file(filename) # binary(=str) - if encoding: - s = s.decode(encoding) # binary(=str) to unicode - return s - - -_basestring = basestring -_unicode = unicode -_bytes = str - - -def _ignore_not_found_error(f, default=None): - try: - return f() - except OSError, ex: - if ex.errno == 2: # error: No such file or directory - return default - raise - - -def create_module(module_name, dummy_func=None, **kwargs): - """ex. mod = create_module('tenjin.util')""" - try: - mod = type(sys)(module_name) - except: - # The module creation above does not work for Jython 2.5.2 - import imp - mod = imp.new_module(module_name) - - mod.__file__ = __file__ - mod.__dict__.update(kwargs) - sys.modules[module_name] = mod - if dummy_func: - exec(dummy_func.func_code, mod.__dict__) - return mod - - -def _raise(exception_class, *args): - raise exception_class(*args) - - -## -# helper method's module -## - -def _dummy(): - global unquote - unquote = None - global to_str, escape, echo, new_cycle, generate_tostrfunc - global start_capture, stop_capture, capture_as, captured_as, CaptureContext - global _p, _P, _decode_params - - def generate_tostrfunc(encode=None, decode=None): - """Generate 'to_str' function with encode or decode encoding. - ex. generate to_str() function which encodes unicode into binary(=str). - to_str = tenjin.generate_tostrfunc(encode='utf-8') - repr(to_str(u'hoge')) #=> 'hoge' (str) - ex. generate to_str() function which decodes binary(=str) into unicode. - to_str = tenjin.generate_tostrfunc(decode='utf-8') - repr(to_str('hoge')) #=> u'hoge' (unicode) - """ - if encode: - if decode: - raise ValueError( - "can't specify both encode and decode encoding.") - else: - def to_str(val, _str=str, _unicode=unicode, _isa=isinstance, _encode=encode): - """Convert val into string or return '' if None. Unicode will be encoded into binary(=str).""" - if _isa(val, _str): - return val - if val is None: - return '' - # if _isa(val, _unicode): return val.encode(_encode) # unicode to binary(=str) - if _isa(val, _unicode): - return val.encode(_encode) # unicode to binary(=str) - return _str(val) - else: - if decode: - def to_str(val, _str=str, _unicode=unicode, _isa=isinstance, _decode=decode): - """Convert val into string or return '' if None. Binary(=str) will be decoded into unicode.""" - # if _isa(val, _str): return val.decode(_decode) # binary(=str) to unicode - if _isa(val, _str): - return val.decode(_decode) - if val is None: - return '' - if _isa(val, _unicode): - return val - return _unicode(val) - else: - def to_str(val, _str=str, _unicode=unicode, _isa=isinstance): - """Convert val into string or return '' if None. Both binary(=str) and unicode will be retruned as-is.""" - if _isa(val, _str): - return val - if val is None: - return '' - if _isa(val, _unicode): - return val - return _str(val) - return to_str - - to_str = generate_tostrfunc(encode='utf-8') # or encode=None? - - def echo(string): - """add string value into _buf. this is equivarent to '#{string}'.""" - lvars = sys._getframe(1).f_locals # local variables - lvars['_buf'].append(string) - - def new_cycle(*values): - """Generate cycle object. - ex. - cycle = new_cycle('odd', 'even') - print(cycle()) #=> 'odd' - print(cycle()) #=> 'even' - print(cycle()) #=> 'odd' - print(cycle()) #=> 'even' - """ - def gen(values): - i, n = 0, len(values) - while True: - yield values[i] - i = (i + 1) % n - return gen(values).next - - class CaptureContext(object): - - def __init__(self, name, store_to_context=True, lvars=None): - self.name = name - self.store_to_context = store_to_context - self.lvars = lvars or sys._getframe(1).f_locals - - def __enter__(self): - lvars = self.lvars - self._buf_orig = lvars['_buf'] - lvars['_buf'] = _buf = [] - lvars['_extend'] = _buf.extend - return self - - def __exit__(self, *args): - lvars = self.lvars - _buf = lvars['_buf'] - lvars['_buf'] = self._buf_orig - lvars['_extend'] = self._buf_orig.extend - lvars[self.name] = self.captured = ''.join(_buf) - if self.store_to_context and '_context' in lvars: - lvars['_context'][self.name] = self.captured - - def __iter__(self): - self.__enter__() - yield self - self.__exit__() - - def start_capture(varname=None, _depth=1): - """(obsolete) start capturing with name.""" - lvars = sys._getframe(_depth).f_locals - capture_context = CaptureContext(varname, None, lvars) - lvars['_capture_context'] = capture_context - capture_context.__enter__() - - def stop_capture(store_to_context=True, _depth=1): - """(obsolete) stop capturing and return the result of capturing. - if store_to_context is True then the result is stored into _context[varname]. - """ - lvars = sys._getframe(_depth).f_locals - capture_context = lvars.pop('_capture_context', None) - if not capture_context: - raise Exception( - 'stop_capture(): start_capture() is not called before.') - capture_context.store_to_context = store_to_context - capture_context.__exit__() - return capture_context.captured - - def capture_as(name, store_to_context=True): - """capture partial of template.""" - return CaptureContext(name, store_to_context, sys._getframe(1).f_locals) - - def captured_as(name, _depth=1): - """helper method for layout template. - if captured string is found then append it to _buf and return True, - else return False. - """ - lvars = sys._getframe(_depth).f_locals # local variables - if name in lvars: - _buf = lvars['_buf'] - _buf.append(lvars[name]) - return True - return False - - def _p(arg): - """ex. '/show/'+_p("item['id']") => "/show/#{item['id']}" """ - return '<`#%s#`>' % arg # decoded into #{...} by preprocessor - - def _P(arg): - """ex. '<b>%s</b>' % _P("item['id']") => "<b>${item['id']}</b>" """ - return '<`$%s$`>' % arg # decoded into ${...} by preprocessor - - def _decode_params(s): - """decode <`#...#`> and <`$...$`> into #{...} and ${...}""" - global unquote - if unquote is None: - from urllib import unquote - dct = {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', '#039': "'", } - - def unescape(s): - # return s.replace('<', '<').replace('>', '>').replace('"', '"').replace(''', "'").replace('&', '&') - return re.sub(r'&(lt|gt|quot|amp|#039);', lambda m: dct[m.group(1)], s) - s = to_str(s) - s = re.sub(r'%3C%60%23(.*?)%23%60%3E', - lambda m: '#{%s}' % unquote(m.group(1)), s) - s = re.sub(r'%3C%60%24(.*?)%24%60%3E', - lambda m: '${%s}' % unquote(m.group(1)), s) - s = re.sub(r'<`#(.*?)#`>', - lambda m: '#{%s}' % unescape(m.group(1)), s) - s = re.sub(r'<`\$(.*?)\$`>', - lambda m: '${%s}' % unescape(m.group(1)), s) - s = re.sub(r'<`#(.*?)#`>', r'#{\1}', s) - s = re.sub(r'<`\$(.*?)\$`>', r'${\1}', s) - return s - - -helpers = create_module('tenjin.helpers', _dummy, sys=sys, re=re) -helpers.__all__ = ['to_str', 'escape', 'echo', 'new_cycle', 'generate_tostrfunc', - 'start_capture', 'stop_capture', 'capture_as', 'captured_as', - 'not_cached', 'echo_cached', 'cache_as', - '_p', '_P', '_decode_params', - ] -generate_tostrfunc = helpers.generate_tostrfunc - - -## -# escaped module -## -def _dummy(): - global is_escaped, as_escaped, to_escaped - global Escaped, EscapedStr, EscapedUnicode - global __all__ - # 'Escaped', 'EscapedStr', - __all__ = ('is_escaped', 'as_escaped', 'to_escaped', ) - - class Escaped(object): - """marking class that object is already escaped.""" - pass - - def is_escaped(value): - """return True if value is marked as escaped, else return False.""" - return isinstance(value, Escaped) - - class EscapedStr(str, Escaped): - """string class which is marked as escaped.""" - pass - - class EscapedUnicode(unicode, Escaped): - """unicode class which is marked as escaped.""" - pass - - def as_escaped(s): - """mark string as escaped, without escaping.""" - if isinstance(s, str): - return EscapedStr(s) - if isinstance(s, unicode): - return EscapedUnicode(s) - raise TypeError("as_escaped(%r): expected str or unicode." % (s, )) - - def to_escaped(value): - """convert any value into string and escape it. - if value is already marked as escaped, don't escape it.""" - if hasattr(value, '__html__'): - value = value.__html__() - if is_escaped(value): - # return value # EscapedUnicode should be convered into EscapedStr - return as_escaped(_helpers.to_str(value)) - # if isinstance(value, _basestring): - # return as_escaped(_helpers.escape(value)) - return as_escaped(_helpers.escape(_helpers.to_str(value))) - - -escaped = create_module('tenjin.escaped', _dummy, _helpers=helpers) - - -## -# module for html -## -def _dummy(): - global escape_html, escape_xml, escape, tagattr, tagattrs, _normalize_attrs - global checked, selected, disabled, nl2br, text2html, nv, js_link - - # _escape_table = { '&': '&', '<': '<', '>': '>', '"': '"', "'": ''' } - #_escape_pattern = re.compile(r'[&<>"]') - ##_escape_callable = lambda m: _escape_table[m.group(0)] - ##_escape_callable = lambda m: _escape_table.__get__(m.group(0)) - #_escape_get = _escape_table.__getitem__ - #_escape_callable = lambda m: _escape_get(m.group(0)) - #_escape_sub = _escape_pattern.sub - - # def escape_html(s): - # return s # 3.02 - - # def escape_html(s): - # return _escape_pattern.sub(_escape_callable, s) # 6.31 - - # def escape_html(s): - # return _escape_sub(_escape_callable, s) # 6.01 - - # def escape_html(s, _p=_escape_pattern, _f=_escape_callable): - # return _p.sub(_f, s) # 6.27 - - # def escape_html(s, _sub=_escape_pattern.sub, _callable=_escape_callable): - # return _sub(_callable, s) # 6.04 - - # def escape_html(s): - # s = s.replace('&', '&') - # s = s.replace('<', '<') - # s = s.replace('>', '>') - # s = s.replace('"', '"') - # return s # 5.83 - - def escape_html(s): - """Escape '&', '<', '>', '"' into '&', '<', '>', '"'.""" - return s.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''') # 5.72 - - escape_xml = escape_html # for backward compatibility - - def tagattr(name, expr, value=None, escape=True): - """(experimental) Return ' name="value"' if expr is true value, else '' (empty string). - If value is not specified, expr is used as value instead.""" - if not expr and expr != 0: - return _escaped.as_escaped('') - if value is None: - value = expr - if escape: - value = _escaped.to_escaped(value) - return _escaped.as_escaped(' %s="%s"' % (name, value)) - - def tagattrs(**kwargs): - """(experimental) built html tag attribtes. - ex. - >>> tagattrs(klass='main', size=20) - ' class="main" size="20"' - >>> tagattrs(klass='', size=0) - '' - """ - kwargs = _normalize_attrs(kwargs) - esc = _escaped.to_escaped - s = ''.join([' %s="%s"' % (k, esc(v)) - for k, v in kwargs.iteritems() if v or v == 0]) - return _escaped.as_escaped(s) - - def _normalize_attrs(kwargs): - if 'klass' in kwargs: - kwargs['class'] = kwargs.pop('klass') - if 'checked' in kwargs: - kwargs['checked'] = kwargs.pop('checked') and 'checked' or None - if 'selected' in kwargs: - kwargs['selected'] = kwargs.pop('selected') and 'selected' or None - if 'disabled' in kwargs: - kwargs['disabled'] = kwargs.pop('disabled') and 'disabled' or None - return kwargs - - def checked(expr): - """return ' checked="checked"' if expr is true.""" - return _escaped.as_escaped(expr and ' checked="checked"' or '') - - def selected(expr): - """return ' selected="selected"' if expr is true.""" - return _escaped.as_escaped(expr and ' selected="selected"' or '') - - def disabled(expr): - """return ' disabled="disabled"' if expr is true.""" - return _escaped.as_escaped(expr and ' disabled="disabled"' or '') - - def nl2br(text): - """replace "\n" to "<br />\n" and return it.""" - if not text: - return _escaped.as_escaped('') - return _escaped.as_escaped(text.replace('\n', '<br />\n')) - - def text2html(text, use_nbsp=True): - """(experimental) escape xml characters, replace "\n" to "<br />\n", and return it.""" - if not text: - return _escaped.as_escaped('') - s = _escaped.to_escaped(text) - if use_nbsp: - s = s.replace(' ', ' ') - # return nl2br(s) - s = s.replace('\n', '<br />\n') - return _escaped.as_escaped(s) - - def nv(name, value, sep=None, **kwargs): - """(experimental) Build name and value attributes. - ex. - >>> nv('rank', 'A') - 'name="rank" value="A"' - >>> nv('rank', 'A', '.') - 'name="rank" value="A" id="rank.A"' - >>> nv('rank', 'A', '.', checked=True) - 'name="rank" value="A" id="rank.A" checked="checked"' - >>> nv('rank', 'A', '.', klass='error', style='color:red') - 'name="rank" value="A" id="rank.A" class="error" style="color:red"' - """ - name = _escaped.to_escaped(name) - value = _escaped.to_escaped(value) - s = sep and 'name="%s" value="%s" id="%s"' % (name, value, name+sep+value) \ - or 'name="%s" value="%s"' % (name, value) - html = kwargs and s + tagattrs(**kwargs) or s - return _escaped.as_escaped(html) - - def js_link(label, onclick, **kwargs): - s = kwargs and tagattrs(**kwargs) or '' - html = '<a href="javascript:undefined" onclick="%s;return false"%s>%s</a>' % \ - (_escaped.to_escaped(onclick), s, _escaped.to_escaped(label)) - return _escaped.as_escaped(html) - - -html = create_module('tenjin.html', _dummy, helpers=helpers, _escaped=escaped) -helpers.escape = html.escape_html -helpers.html = html # for backward compatibility -sys.modules['tenjin.helpers.html'] = html - - -## -# utility function to set default encoding of template files -## -_template_encoding = (None, 'utf-8') # encodings for decode and encode - - -def set_template_encoding(decode=None, encode=None): - """Set default encoding of template files. - This should be called before importing helper functions. - ex. - ## I like template files to be unicode-base like Django. - import tenjin - tenjin.set_template_encoding('utf-8') # should be called before importing helpers - from tenjin.helpers import * - """ - global _template_encoding - if _template_encoding == (decode, encode): - return - if decode and encode: - raise ValueError( - "set_template_encoding(): cannot specify both decode and encode.") - if not decode and not encode: - raise ValueError( - "set_template_encoding(): decode or encode should be specified.") - if decode: - Template.encoding = decode # unicode base template - helpers.to_str = helpers.generate_tostrfunc(decode=decode) - else: - Template.encoding = None # binary base template - helpers.to_str = helpers.generate_tostrfunc(encode=encode) - _template_encoding = (decode, encode) - - -## -# Template class -## - -class TemplateSyntaxError(SyntaxError): - - def build_error_message(self): - ex = self - if not ex.text: - return self.args[0] - return ''.join([ - "%s:%s:%s: %s\n" % (ex.filename, ex.lineno, ex.offset, ex.msg, ), - "%4d: %s\n" % (ex.lineno, ex.text.rstrip(), ), - " %s^\n" % (' ' * ex.offset, ), - ]) - - -class Template(object): - """Convert and evaluate embedded python string. - See User's Guide and examples for details. - http://www.kuwata-lab.com/tenjin/pytenjin-users-guide.html - http://www.kuwata-lab.com/tenjin/pytenjin-examples.html - """ - - # default value of attributes - filename = None - encoding = None - escapefunc = 'escape' - tostrfunc = 'to_str' - indent = 4 - preamble = None # "_buf = []; _expand = _buf.expand; _to_str = to_str; _escape = escape" - postamble = None # "print ''.join(_buf)" - smarttrim = None - args = None - timestamp = None - trace = False # if True then '<!-- begin: file -->' and '<!-- end: file -->' are printed - - def __init__(self, filename=None, encoding=None, input=None, escapefunc=None, tostrfunc=None, - indent=None, preamble=None, postamble=None, smarttrim=None, trace=None): - """Initailizer of Template class. - - filename:str (=None) - Filename to convert (optional). If None, no convert. - encoding:str (=None) - Encoding name. If specified, template string is converted into - unicode object internally. - Template.render() returns str object if encoding is None, - else returns unicode object if encoding name is specified. - input:str (=None) - Input string. In other words, content of template file. - Template file will not be read if this argument is specified. - escapefunc:str (='escape') - Escape function name. - tostrfunc:str (='to_str') - 'to_str' function name. - indent:int (=4) - Indent width. - preamble:str or bool (=None) - Preamble string which is inserted into python code. - If true, '_buf = []; ' is used insated. - postamble:str or bool (=None) - Postamble string which is appended to python code. - If true, 'print("".join(_buf))' is used instead. - smarttrim:bool (=None) - If True then "<div>\\n#{_context}\\n</div>" is parsed as - "<div>\\n#{_context}</div>". - """ - if encoding is not None: - self.encoding = encoding - if escapefunc is not None: - self.escapefunc = escapefunc - if tostrfunc is not None: - self.tostrfunc = tostrfunc - if indent is not None: - self.indent = indent - if preamble is not None: - self.preamble = preamble - if postamble is not None: - self.postamble = postamble - if smarttrim is not None: - self.smarttrim = smarttrim - if trace is not None: - self.trace = trace - # - if preamble is True: - self.preamble = "_buf = []" - if postamble is True: - self.postamble = "print(''.join(_buf))" - if input: - self.convert(input, filename) - # False means 'file not exist' (= Engine should not check timestamp of file) - self.timestamp = False - elif filename: - self.convert_file(filename) - else: - self._reset() - - def _reset(self, input=None, filename=None): - self.script = None - self.bytecode = None - self.input = input - self.filename = filename - if input != None: - i = input.find("\n") - if i < 0: - self.newline = "\n" # or None - elif len(input) >= 2 and input[i-1] == "\r": - self.newline = "\r\n" - else: - self.newline = "\n" - self._localvars_assignments_added = False - - def _localvars_assignments(self): - return "_extend=_buf.extend;_to_str=%s;_escape=%s; " % (self.tostrfunc, self.escapefunc) - - def before_convert(self, buf): - if self.preamble: - eol = self.input.startswith('<?py') and "\n" or "; " - buf.append(self.preamble + eol) - - def after_convert(self, buf): - if self.postamble: - if buf and not buf[-1].endswith("\n"): - buf.append("\n") - buf.append(self.postamble + "\n") - - def convert_file(self, filename): - """Convert file into python script and return it. - This is equivarent to convert(open(filename).read(), filename). - """ - input = _read_template_file(filename) - return self.convert(input, filename) - - def convert(self, input, filename=None): - """Convert string in which python code is embedded into python script and return it. - - input:str - Input string to convert into python code. - filename:str (=None) - Filename of input. this is optional but recommended to report errors. - """ - if self.encoding and isinstance(input, str): - input = input.decode(self.encoding) - self._reset(input, filename) - buf = [] - self.before_convert(buf) - self.parse_stmts(buf, input) - self.after_convert(buf) - script = ''.join(buf) - self.script = script - return script - - STMT_PATTERN = (r'<\?py( |\t|\r?\n)(.*?) ?\?>([ \t]*\r?\n)?', re.S) - - def stmt_pattern(self): - pat = self.STMT_PATTERN - if isinstance(pat, tuple): - pat = self.__class__.STMT_PATTERN = re.compile(*pat) - return pat - - def parse_stmts(self, buf, input): - if not input: - return - rexp = self.stmt_pattern() - is_bol = True - index = 0 - for m in rexp.finditer(input): - mspace, code, rspace = m.groups() - #mspace, close, rspace = m.groups() - #code = input[m.start()+4+len(mspace):m.end()-len(close)-(rspace and len(rspace) or 0)] - text = input[index:m.start()] - index = m.end() - # detect spaces at beginning of line - lspace = None - if text == '': - if is_bol: - lspace = '' - elif text[-1] == '\n': - lspace = '' - else: - rindex = text.rfind('\n') - if rindex < 0: - if is_bol and text.isspace(): - lspace, text = text, '' - else: - s = text[rindex+1:] - if s.isspace(): - lspace, text = s, text[:rindex+1] - #is_bol = rspace is not None - # add text, spaces, and statement - self.parse_exprs(buf, text, is_bol) - is_bol = rspace is not None - # if mspace == "\n": - if mspace and mspace.endswith("\n"): - code = "\n" + (code or "") - # if rspace == "\n": - if rspace and rspace.endswith("\n"): - code = (code or "") + "\n" - if code: - code = self.statement_hook(code) - m = self._match_to_args_declaration(code) - if m: - self._add_args_declaration(buf, m) - else: - self.add_stmt(buf, code) - rest = input[index:] - if rest: - self.parse_exprs(buf, rest) - self._arrange_indent(buf) - - def statement_hook(self, stmt): - """expand macros and parse '#@ARGS' in a statement.""" - return stmt.replace("\r\n", "\n") # Python can't handle "\r\n" in code - - def _match_to_args_declaration(self, stmt): - if self.args is not None: - return None - args_pattern = r'^ *#@ARGS(?:[ \t]+(.*?))?$' - return re.match(args_pattern, stmt) - - def _add_args_declaration(self, buf, m): - arr = (m.group(1) or '').split(',') - args = [] - declares = [] - for s in arr: - arg = s.strip() - if not s: - continue - if not re.match('^[a-zA-Z_]\w*$', arg): - raise ValueError("%r: invalid template argument." % arg) - args.append(arg) - declares.append("%s = _context.get('%s'); " % (arg, arg)) - self.args = args - #nl = stmt[m.end():] - #if nl: declares.append(nl) - buf.append(''.join(declares) + "\n") - - s = '(?:\{.*?\}.*?)*' - EXPR_PATTERN = ( - r'#\{(.*?'+s+r')\}|\$\{(.*?'+s+r')\}|\{=(?:=(.*?)=|(.*?))=\}', re.S) - del s - - def expr_pattern(self): - pat = self.EXPR_PATTERN - if isinstance(pat, tuple): - self.__class__.EXPR_PATTERN = pat = re.compile(*pat) - return pat - - def get_expr_and_flags(self, match): - expr1, expr2, expr3, expr4 = match.groups() - if expr1 is not None: - return expr1, (False, True) # not escape, call to_str - if expr2 is not None: - return expr2, (True, True) # call escape, call to_str - if expr3 is not None: - return expr3, (False, True) # not escape, call to_str - if expr4 is not None: - return expr4, (True, True) # call escape, call to_str - - def parse_exprs(self, buf, input, is_bol=False): - buf2 = [] - self._parse_exprs(buf2, input, is_bol) - if buf2: - buf.append(''.join(buf2)) - - def _parse_exprs(self, buf, input, is_bol=False): - if not input: - return - self.start_text_part(buf) - rexp = self.expr_pattern() - smarttrim = self.smarttrim - nl = self.newline - nl_len = len(nl) - pos = 0 - for m in rexp.finditer(input): - start = m.start() - text = input[pos:start] - pos = m.end() - expr, flags = self.get_expr_and_flags(m) - # - if text: - self.add_text(buf, text) - self.add_expr(buf, expr, *flags) - # - if smarttrim: - flag_bol = text.endswith( - nl) or not text and (start > 0 or is_bol) - if flag_bol and not flags[0] and input[pos:pos+nl_len] == nl: - pos += nl_len - buf.append("\n") - if smarttrim: - if buf and buf[-1] == "\n": - buf.pop() - rest = input[pos:] - if rest: - self.add_text(buf, rest, True) - self.stop_text_part(buf) - if input[-1] == '\n': - buf.append("\n") - - def start_text_part(self, buf): - self._add_localvars_assignments_to_text(buf) - # buf.append("_buf.extend((") - buf.append("_extend((") - - def _add_localvars_assignments_to_text(self, buf): - if not self._localvars_assignments_added: - self._localvars_assignments_added = True - buf.append(self._localvars_assignments()) - - def stop_text_part(self, buf): - buf.append("));") - - def _quote_text(self, text): - text = re.sub(r"(['\\\\])", r"\\\1", text) - text = text.replace("\r\n", "\\r\n") - return text - - def add_text(self, buf, text, encode_newline=False): - if not text: - return - use_unicode = self.encoding and python2 - buf.append(use_unicode and "u'''" or "'''") - text = self._quote_text(text) - if not encode_newline: - buf.extend((text, "''', ")) - elif text.endswith("\r\n"): - buf.extend((text[0:-2], "\\r\\n''', ")) - elif text.endswith("\n"): - buf.extend((text[0:-1], "\\n''', ")) - else: - buf.extend((text, "''', ")) - - _add_text = add_text - - def add_expr(self, buf, code, *flags): - if not code or code.isspace(): - return - flag_escape, flag_tostr = flags - if not self.tostrfunc: - flag_tostr = False - if not self.escapefunc: - flag_escape = False - if flag_tostr and flag_escape: - s1, s2 = "_escape(_to_str(", ")), " - elif flag_tostr: - s1, s2 = "_to_str(", "), " - elif flag_escape: - s1, s2 = "_escape(", "), " - else: - s1, s2 = "(", "), " - buf.extend((s1, code, s2, )) - - def add_stmt(self, buf, code): - if not code: - return - lines = code.splitlines(True) # keep "\n" - if lines[-1][-1] != "\n": - lines[-1] = lines[-1] + "\n" - buf.extend(lines) - self._add_localvars_assignments_to_stmts(buf) - - def _add_localvars_assignments_to_stmts(self, buf): - if self._localvars_assignments_added: - return - for index, stmt in enumerate(buf): - if not re.match(r'^[ \t]*(?:\#|_buf ?= ?\[\]|from __future__)', stmt): - break - else: - return - self._localvars_assignments_added = True - if re.match(r'^[ \t]*(if|for|while|def|with|class)\b', stmt): - buf.insert(index, self._localvars_assignments() + "\n") - else: - buf[index] = self._localvars_assignments() + buf[index] - - _START_WORDS = dict.fromkeys( - ('for', 'if', 'while', 'def', 'try:', 'with', 'class'), True) - _END_WORDS = dict.fromkeys(('#end', '#endfor', '#endif', '#endwhile', - '#enddef', '#endtry', '#endwith', '#endclass'), True) - _CONT_WORDS = dict.fromkeys( - ('elif', 'else:', 'except', 'except:', 'finally:'), True) - _WORD_REXP = re.compile(r'\S+') - - depth = -1 - - ## - # ex. - # input = r""" - # if items: - ## _buf.extend(('<ul>\n', )) - ## i = 0 - # for item in items: - ## i += 1 - ## _buf.extend(('<li>', to_str(item), '</li>\n', )) - # endfor - ## _buf.extend(('</ul>\n', )) - # endif - # """[1:] - ## lines = input.splitlines(True) - ## block = self.parse_lines(lines) - # => [ "if items:\n", - # [ "_buf.extend(('<ul>\n', ))\n", - ## "i = 0\n", - ## "for item in items:\n", - # [ "i += 1\n", - ## "_buf.extend(('<li>', to_str(item), '</li>\n', ))\n", - # ], - # "#endfor\n", - ## "_buf.extend(('</ul>\n', ))\n", - # ], - # "#endif\n", - # ] - def parse_lines(self, lines): - block = [] - try: - self._parse_lines(lines.__iter__(), False, block, 0) - except StopIteration: - if self.depth > 0: - fname, linenum, colnum, linetext = self.filename, len( - lines), None, None - raise TemplateSyntaxError( - "unexpected EOF.", (fname, linenum, colnum, linetext)) - else: - pass - return block - - def _parse_lines(self, lines_iter, end_block, block, linenum): - if block is None: - block = [] - _START_WORDS = self._START_WORDS - _END_WORDS = self._END_WORDS - _CONT_WORDS = self._CONT_WORDS - _WORD_REXP = self._WORD_REXP - get_line = lines_iter.next - while True: - line = get_line() - linenum += line.count("\n") - m = _WORD_REXP.search(line) - if not m: - block.append(line) - continue - word = m.group(0) - if word in _END_WORDS: - if word != end_block and word != '#end': - if end_block is False: - msg = "'%s' found but corresponding statement is missing." % ( - word, ) - else: - msg = "'%s' expected but got '%s'." % (end_block, word) - colnum = m.start() + 1 - raise TemplateSyntaxError( - msg, (self.filename, linenum, colnum, line)) - return block, line, None, linenum - elif line.endswith(':\n') or line.endswith(':\r\n'): - if word in _CONT_WORDS: - return block, line, word, linenum - elif word in _START_WORDS: - block.append(line) - self.depth += 1 - cont_word = None - try: - child_block, line, cont_word, linenum = \ - self._parse_lines( - lines_iter, '#end'+word, [], linenum) - block.extend((child_block, line, )) - while cont_word: # 'elif' or 'else:' - child_block, line, cont_word, linenum = \ - self._parse_lines( - lines_iter, '#end'+word, [], linenum) - block.extend((child_block, line, )) - except StopIteration: - msg = "'%s' is not closed." % (cont_word or word) - colnum = m.start() + 1 - raise TemplateSyntaxError( - msg, (self.filename, linenum, colnum, line)) - self.depth -= 1 - else: - block.append(line) - else: - block.append(line) - assert "unreachable" - - def _join_block(self, block, buf, depth): - indent = ' ' * (self.indent * depth) - for line in block: - if isinstance(line, list): - self._join_block(line, buf, depth+1) - elif line.isspace(): - buf.append(line) - else: - buf.append(indent + line.lstrip()) - - def _arrange_indent(self, buf): - """arrange indentation of statements in buf""" - block = self.parse_lines(buf) - buf[:] = [] - self._join_block(block, buf, 0) - - def render(self, context=None, globals=None, _buf=None): - """Evaluate python code with context dictionary. - If _buf is None then return the result of evaluation as str, - else return None. - - context:dict (=None) - Context object to evaluate. If None then new dict is created. - globals:dict (=None) - Global object. If None then globals() is used. - _buf:list (=None) - If None then new list is created. - """ - if context is None: - locals = context = {} - elif self.args is None: - locals = context.copy() - else: - locals = {} - if '_engine' in context: - context.get('_engine').hook_context(locals) - locals['_context'] = context - if globals is None: - globals = sys._getframe(1).f_globals - bufarg = _buf - if _buf is None: - _buf = [] - locals['_buf'] = _buf - if not self.bytecode: - self.compile() - if self.trace: - _buf.append("<!-- ***** begin: %s ***** -->\n" % self.filename) - exec(self.bytecode, globals, locals) - _buf.append("<!-- ***** end: %s ***** -->\n" % self.filename) - else: - exec(self.bytecode, globals, locals) - if bufarg is not None: - return bufarg - elif not logger: - return ''.join(_buf) - else: - try: - return ''.join(_buf) - except UnicodeDecodeError, ex: - logger.error("[tenjin.Template] " + str(ex)) - logger.error("[tenjin.Template] (_buf=%r)" % (_buf, )) - raise - - def compile(self): - """compile self.script into self.bytecode""" - self.bytecode = compile( - self.script, self.filename or '(tenjin)', 'exec') - - -## -# preprocessor class -## - -class Preprocessor(Template): - """Template class for preprocessing.""" - - STMT_PATTERN = (r'<\?PY( |\t|\r?\n)(.*?) ?\?>([ \t]*\r?\n)?', re.S) - - EXPR_PATTERN = ( - r'#\{\{(.*?)\}\}|\$\{\{(.*?)\}\}|\{#=(?:=(.*?)=|(.*?))=#\}', re.S) - - def add_expr(self, buf, code, *flags): - if not code or code.isspace(): - return - code = "_decode_params(%s)" % code - Template.add_expr(self, buf, code, *flags) - - -class TemplatePreprocessor(object): - factory = Preprocessor - - def __init__(self, factory=None): - if factory is not None: - self.factory = factory - self.globals = sys._getframe(1).f_globals - - def __call__(self, input, **kwargs): - filename = kwargs.get('filename') - context = kwargs.get('context') or {} - globals = kwargs.get('globals') or self.globals - template = self.factory() - template.convert(input, filename) - return template.render(context, globals=globals) - - -class TrimPreprocessor(object): - - _rexp = re.compile(r'^[ \t]+<', re.M) - _rexp_all = re.compile(r'^[ \t]+', re.M) - - def __init__(self, all=False): - self.all = all - - def __call__(self, input, **kwargs): - if self.all: - return self._rexp_all.sub('', input) - else: - return self._rexp.sub('<', input) - - -class PrefixedLinePreprocessor(object): - - def __init__(self, prefix='::(?=[ \t]|$)'): - self.prefix = prefix - self.regexp = re.compile(r'^([ \t]*)' + prefix + r'(.*)', re.M) - - def convert_prefixed_lines(self, text): - def fn(m): return "%s<?py%s ?>" % (m.group(1), m.group(2)) - return self.regexp.sub(fn, text) - - STMT_REXP = re.compile(r'<\?py\s.*?\?>', re.S) - - def __call__(self, input, **kwargs): - buf = [] - append = buf.append - pos = 0 - for m in self.STMT_REXP.finditer(input): - text = input[pos:m.start()] - stmt = m.group(0) - pos = m.end() - if text: - append(self.convert_prefixed_lines(text)) - append(stmt) - rest = input[pos:] - if rest: - append(self.convert_prefixed_lines(rest)) - return "".join(buf) - - -class ParseError(Exception): - pass - - -class JavaScriptPreprocessor(object): - - def __init__(self, **attrs): - self._attrs = attrs - - def __call__(self, input, **kwargs): - return self.parse(input, kwargs.get('filename')) - - def parse(self, input, filename=None): - buf = [] - self._parse_chunks(input, buf, filename) - return ''.join(buf) - - CHUNK_REXP = re.compile( - r'(?:^( *)<|<)!-- *#(?:JS: (\$?\w+(?:\.\w+)*\(.*?\))|/JS:?) *-->([ \t]*\r?\n)?', re.M) - - def _scan_chunks(self, input, filename): - rexp = self.CHUNK_REXP - pos = 0 - curr_funcdecl = None - for m in rexp.finditer(input): - lspace, funcdecl, rspace = m.groups() - text = input[pos:m.start()] - pos = m.end() - if funcdecl: - if curr_funcdecl: - raise ParseError("%s is nested in %s. (file: %s, line: %s)" % - (funcdecl, curr_funcdecl, filename, _linenum(input, m.start()), )) - curr_funcdecl = funcdecl - else: - if not curr_funcdecl: - raise ParseError("unexpected '<!-- #/JS -->'. (file: %s, line: %s)" % - (filename, _linenum(input, m.start()), )) - curr_funcdecl = None - yield text, lspace, funcdecl, rspace, False - if curr_funcdecl: - raise ParseError("%s is not closed by '<!-- #/JS -->'. (file: %s, line: %s)" % - (curr_funcdecl, filename, _linenum(input, m.start()), )) - rest = input[pos:] - yield rest, None, None, None, True - - def _parse_chunks(self, input, buf, filename=None): - if not input: - return - stag = '<script' - if self._attrs: - for k in self._attrs: - stag = "".join((stag, ' ', k, '="', self._attrs[k], '"')) - stag += '>' - etag = '</script>' - for text, lspace, funcdecl, rspace, end_p in self._scan_chunks(input, filename): - if end_p: - break - if funcdecl: - buf.append(text) - if re.match(r'^\$?\w+\(', funcdecl): - buf.extend((lspace or '', stag, 'function ', - funcdecl, "{var _buf='';", rspace or '')) - else: - m = re.match(r'(.+?)\((.*)\)', funcdecl) - buf.extend((lspace or '', stag, m.group( - 1), '=function(', m.group(2), "){var _buf='';", rspace or '')) - else: - self._parse_stmts(text, buf) - buf.extend( - (lspace or '', "return _buf;};", etag, rspace or '')) - # - buf.append(text) - - STMT_REXP = re.compile( - r'(?:^( *)<|<)\?js(\s.*?) ?\?>([ \t]*\r?\n)?', re.M | re.S) - - def _scan_stmts(self, input): - rexp = self.STMT_REXP - pos = 0 - for m in rexp.finditer(input): - lspace, code, rspace = m.groups() - text = input[pos:m.start()] - pos = m.end() - yield text, lspace, code, rspace, False - rest = input[pos:] - yield rest, None, None, None, True - - def _parse_stmts(self, input, buf): - if not input: - return - for text, lspace, code, rspace, end_p in self._scan_stmts(input): - if end_p: - break - if lspace is not None and rspace is not None: - self._parse_exprs(text, buf) - buf.extend((lspace, code, rspace)) - else: - if lspace: - text += lspace - self._parse_exprs(text, buf) - buf.append(code) - if rspace: - self._parse_exprs(rspace, buf) - if text: - self._parse_exprs(text, buf) - - s = r'(?:\{[^{}]*?\}[^{}]*?)*' - EXPR_REXP = re.compile(r'\{=(.*?)=\}|([$#])\{(.*?' + s + r')\}', re.S) - del s - - def _get_expr(self, m): - code1, ch, code2 = m.groups() - if ch: - code = code2 - escape_p = ch == '$' - elif code1[0] == code1[-1] == '=': - code = code1[1:-1] - escape_p = False - else: - code = code1 - escape_p = True - return code, escape_p - - def _scan_exprs(self, input): - rexp = self.EXPR_REXP - pos = 0 - for m in rexp.finditer(input): - text = input[pos:m.start()] - pos = m.end() - code, escape_p = self._get_expr(m) - yield text, code, escape_p, False - rest = input[pos:] - yield rest, None, None, True - - def _parse_exprs(self, input, buf): - if not input: - return - buf.append("_buf+=") - extend = buf.extend - op = '' - for text, code, escape_p, end_p in self._scan_exprs(input): - if end_p: - break - if text: - extend((op, self._escape_text(text))) - op = '+' - if code: - extend((op, escape_p and '_E(' or '_S(', code, ')')) - op = '+' - rest = text - if rest: - extend((op, self._escape_text(rest))) - if input.endswith("\n"): - buf.append(";\n") - else: - buf.append(";") - - def _escape_text(self, text): - lines = text.splitlines(True) - fn = self._escape_str - s = "\\\n".join(fn(line) for line in lines) - return "".join(("'", s, "'")) - - def _escape_str(self, string): - return string.replace("\\", "\\\\").replace("'", "\\'").replace("\n", r"\n") - - -def _linenum(input, pos): - return input[0:pos].count("\n") + 1 - - -JS_FUNC = r""" -function _S(x){return x==null?'':x;} -function _E(x){return x==null?'':typeof(x)!=='string'?x:x.replace(/[&<>"']/g,_EF);} -var _ET={'&':"&",'<':"<",'>':">",'"':""","'":"'"}; -function _EF(c){return _ET[c];}; -"""[1:-1] -JS_FUNC = escaped.EscapedStr(JS_FUNC) - - -## -# cache storages -## - -class CacheStorage(object): - """[abstract] Template object cache class (in memory and/or file)""" - - def __init__(self): - self.items = {} # key: full path, value: template object - - def get(self, cachepath, create_template): - """get template object. if not found, load attributes from cache file and restore template object.""" - template = self.items.get(cachepath) - if not template: - dct = self._load(cachepath) - if dct: - template = create_template() - for k in dct: - setattr(template, k, dct[k]) - self.items[cachepath] = template - return template - - def set(self, cachepath, template): - """set template object and save template attributes into cache file.""" - self.items[cachepath] = template - dct = self._save_data_of(template) - return self._store(cachepath, dct) - - def _save_data_of(self, template): - return {'args': template.args, 'bytecode': template.bytecode, - 'script': template.script, 'timestamp': template.timestamp} - - def unset(self, cachepath): - """remove template object from dict and cache file.""" - self.items.pop(cachepath, None) - return self._delete(cachepath) - - def clear(self): - """remove all template objects and attributes from dict and cache file.""" - d, self.items = self.items, {} - for k in d.iterkeys(): - self._delete(k) - d.clear() - - def _load(self, cachepath): - """(abstract) load dict object which represents template object attributes from cache file.""" - raise NotImplementedError.new( - "%s#_load(): not implemented yet." % self.__class__.__name__) - - def _store(self, cachepath, template): - """(abstract) load dict object which represents template object attributes from cache file.""" - raise NotImplementedError.new( - "%s#_store(): not implemented yet." % self.__class__.__name__) - - def _delete(self, cachepath): - """(abstract) remove template object from cache file.""" - raise NotImplementedError.new( - "%s#_delete(): not implemented yet." % self.__class__.__name__) - - -class MemoryCacheStorage(CacheStorage): - - def _load(self, cachepath): - return None - - def _store(self, cachepath, template): - pass - - def _delete(self, cachepath): - pass - - -class FileCacheStorage(CacheStorage): - - def _load(self, cachepath): - if not _isfile(cachepath): - return None - if logger: - logger.info("[tenjin.%s] load cache (file=%r)" % - (self.__class__.__name__, cachepath)) - data = _read_binary_file(cachepath) - return self._restore(data) - - def _store(self, cachepath, dct): - if logger: - logger.info("[tenjin.%s] store cache (file=%r)" % - (self.__class__.__name__, cachepath)) - data = self._dump(dct) - _write_binary_file(cachepath, data) - - def _restore(self, data): - raise NotImplementedError( - "%s._restore(): not implemented yet." % self.__class__.__name__) - - def _dump(self, dct): - raise NotImplementedError( - "%s._dump(): not implemented yet." % self.__class__.__name__) - - def _delete(self, cachepath): - _ignore_not_found_error(lambda: os.unlink(cachepath)) - - -class MarshalCacheStorage(FileCacheStorage): - - def _restore(self, data): - return marshal.loads(data) - - def _dump(self, dct): - return marshal.dumps(dct) - - -class PickleCacheStorage(FileCacheStorage): - - def __init__(self, *args, **kwargs): - global pickle - if pickle is None: - import cPickle as pickle - FileCacheStorage.__init__(self, *args, **kwargs) - - def _restore(self, data): - return pickle.loads(data) - - def _dump(self, dct): - dct.pop('bytecode', None) - return pickle.dumps(dct) - - -class TextCacheStorage(FileCacheStorage): - - def _restore(self, data): - header, script = data.split("\n\n", 1) - timestamp = encoding = args = None - for line in header.split("\n"): - key, val = line.split(": ", 1) - if key == 'timestamp': - timestamp = float(val) - elif key == 'encoding': - encoding = val - elif key == 'args': - args = val.split(', ') - if encoding: - script = script.decode(encoding) # binary(=str) to unicode - return {'args': args, 'script': script, 'timestamp': timestamp} - - def _dump(self, dct): - s = dct['script'] - if dct.get('encoding') and isinstance(s, unicode): - s = s.encode(dct['encoding']) # unicode to binary(=str) - sb = [] - sb.append("timestamp: %s\n" % dct['timestamp']) - if dct.get('encoding'): - sb.append("encoding: %s\n" % dct['encoding']) - if dct.get('args') is not None: - sb.append("args: %s\n" % ', '.join(dct['args'])) - sb.append("\n") - sb.append(s) - s = ''.join(sb) - if python3: - if isinstance(s, str): - # unicode(=str) to binary - s = s.encode(dct.get('encoding') or 'utf-8') - return s - - def _save_data_of(self, template): - dct = FileCacheStorage._save_data_of(self, template) - dct['encoding'] = template.encoding - return dct - - -## -# abstract class for data cache -## -class KeyValueStore(object): - - def get(self, key, *options): - raise NotImplementedError( - "%s.get(): not implemented yet." % self.__class__.__name__) - - def set(self, key, value, *options): - raise NotImplementedError( - "%s.set(): not implemented yet." % self.__class__.__name__) - - def delete(self, key, *options): - raise NotImplementedError( - "%s.del(): not implemented yet." % self.__class__.__name__) - - def has(self, key, *options): - raise NotImplementedError( - "%s.has(): not implemented yet." % self.__class__.__name__) - - -## -# memory base data cache -## -class MemoryBaseStore(KeyValueStore): - - def __init__(self): - self.values = {} - - def get(self, key, original_timestamp=None): - tupl = self.values.get(key) - if not tupl: - return None - value, created_at, expires_at = tupl - if original_timestamp is not None and created_at < original_timestamp: - self.delete(key) - return None - if expires_at < _time(): - self.delete(key) - return None - return value - - def set(self, key, value, lifetime=0): - created_at = _time() - expires_at = lifetime and created_at + lifetime or 0 - self.values[key] = (value, created_at, expires_at) - return True - - def delete(self, key): - try: - del self.values[key] - return True - except KeyError: - return False - - def has(self, key): - pair = self.values.get(key) - if not pair: - return False - value, created_at, expires_at = pair - if expires_at and expires_at < _time(): - self.delete(key) - return False - return True - - -## -# file base data cache -## -class FileBaseStore(KeyValueStore): - - lifetime = 604800 # = 60*60*24*7 - - def __init__(self, root_path, encoding=None): - if not os.path.isdir(root_path): - raise ValueError("%r: directory not found." % (root_path, )) - self.root_path = root_path - if encoding is None and python3: - encoding = 'utf-8' - self.encoding = encoding - - _pat = re.compile(r'[^-.\/\w]') - - def filepath(self, key, _pat1=_pat): - return os.path.join(self.root_path, _pat1.sub('_', key)) - - def get(self, key, original_timestamp=None): - fpath = self.filepath(key) - # if not _isfile(fpath): return None - stat = _ignore_not_found_error(lambda: os.stat(fpath), None) - if stat is None: - return None - created_at = stat.st_ctime - expires_at = stat.st_mtime - if original_timestamp is not None and created_at < original_timestamp: - self.delete(key) - return None - if expires_at < _time(): - self.delete(key) - return None - if self.encoding: - def f(): return _read_text_file(fpath, self.encoding) - else: - def f(): return _read_binary_file(fpath) - return _ignore_not_found_error(f, None) - - def set(self, key, value, lifetime=0): - fpath = self.filepath(key) - dirname = os.path.dirname(fpath) - if not os.path.isdir(dirname): - os.makedirs(dirname) - now = _time() - if isinstance(value, _unicode): - value = value.encode(self.encoding or 'utf-8') - _write_binary_file(fpath, value) - expires_at = now + (lifetime or self.lifetime) # timestamp - os.utime(fpath, (expires_at, expires_at)) - return True - - def delete(self, key): - fpath = self.filepath(key) - ret = _ignore_not_found_error(lambda: os.unlink(fpath), False) - return ret != False - - def has(self, key): - fpath = self.filepath(key) - if not _isfile(fpath): - return False - if _getmtime(fpath) < _time(): - self.delete(key) - return False - return True - - -## -# html fragment cache helper class -## -class FragmentCacheHelper(object): - """html fragment cache helper class.""" - - lifetime = 60 # 1 minute - prefix = None - - def __init__(self, store, lifetime=None, prefix=None): - self.store = store - if lifetime is not None: - self.lifetime = lifetime - if prefix is not None: - self.prefix = prefix - - def not_cached(self, cache_key, lifetime=None): - """(obsolete. use cache_as() instead of this.) - html fragment cache helper. see document of FragmentCacheHelper class.""" - context = sys._getframe(1).f_locals['_context'] - context['_cache_key'] = cache_key - key = self.prefix and self.prefix + cache_key or cache_key - value = self.store.get(key) - if value: # cached - if logger: - logger.debug('[tenjin.not_cached] %r: cached.' % (cache_key, )) - context[key] = value - return False - else: # not cached - if logger: - logger.debug( - '[tenjin.not_cached]: %r: not cached.' % (cache_key, )) - if key in context: - del context[key] - if lifetime is None: - lifetime = self.lifetime - context['_cache_lifetime'] = lifetime - helpers.start_capture(cache_key, _depth=2) - return True - - def echo_cached(self): - """(obsolete. use cache_as() instead of this.) - html fragment cache helper. see document of FragmentCacheHelper class.""" - f_locals = sys._getframe(1).f_locals - context = f_locals['_context'] - cache_key = context.pop('_cache_key') - key = self.prefix and self.prefix + cache_key or cache_key - if key in context: # cached - value = context.pop(key) - else: # not cached - value = helpers.stop_capture(False, _depth=2) - lifetime = context.pop('_cache_lifetime') - self.store.set(key, value, lifetime) - f_locals['_buf'].append(value) - - def functions(self): - """(obsolete. use cache_as() instead of this.)""" - return (self.not_cached, self.echo_cached) - - def cache_as(self, cache_key, lifetime=None): - key = self.prefix and self.prefix + cache_key or cache_key - _buf = sys._getframe(1).f_locals['_buf'] - value = self.store.get(key) - if value: - if logger: - logger.debug('[tenjin.cache_as] %r: cache found.' % - (cache_key, )) - _buf.append(value) - else: - if logger: - logger.debug( - '[tenjin.cache_as] %r: expired or not cached yet.' % (cache_key, )) - _buf_len = len(_buf) - yield None - value = ''.join(_buf[_buf_len:]) - self.store.set(key, value, lifetime) - - -# you can change default store by 'tenjin.helpers.fragment_cache.store = ...' -helpers.fragment_cache = FragmentCacheHelper(MemoryBaseStore()) -helpers.not_cached = helpers.fragment_cache.not_cached -helpers.echo_cached = helpers.fragment_cache.echo_cached -helpers.cache_as = helpers.fragment_cache.cache_as -helpers.__all__.extend(('not_cached', 'echo_cached', 'cache_as')) - - -## -# helper class to find and read template -## -class Loader(object): - - def exists(self, filepath): - raise NotImplementedError( - "%s.exists(): not implemented yet." % self.__class__.__name__) - - def find(self, filename, dirs=None): - #: if dirs provided then search template file from it. - if dirs: - for dirname in dirs: - filepath = os.path.join(dirname, filename) - if self.exists(filepath): - return filepath - #: if dirs not provided then just return filename if file exists. - else: - if self.exists(filename): - return filename - #: if file not found then return None. - return None - - def abspath(self, filename): - raise NotImplementedError( - "%s.abspath(): not implemented yet." % self.__class__.__name__) - - def timestamp(self, filepath): - raise NotImplementedError( - "%s.timestamp(): not implemented yet." % self.__class__.__name__) - - def load(self, filepath): - raise NotImplementedError( - "%s.timestamp(): not implemented yet." % self.__class__.__name__) - - -## -# helper class to find and read files -## -class FileSystemLoader(Loader): - - def exists(self, filepath): - #: return True if filepath exists as a file. - return os.path.isfile(filepath) - - def abspath(self, filepath): - #: return full-path of filepath - return os.path.abspath(filepath) - - def timestamp(self, filepath): - #: return mtime of file - return _getmtime(filepath) - - def load(self, filepath): - #: if file exists, return file content and mtime - def f(): - mtime = _getmtime(filepath) - input = _read_template_file(filepath) - mtime2 = _getmtime(filepath) - if mtime != mtime2: - mtime = mtime2 - input = _read_template_file(filepath) - mtime2 = _getmtime(filepath) - if mtime != mtime2: - if logger: - logger.warn( - "[tenjin] %s.load(): timestamp is changed while reading file." % self.__class__.__name__) - return input, mtime - #: if file not exist, return None - return _ignore_not_found_error(f) - - -## -## -## -class TemplateNotFoundError(Exception): - pass - - -## -# template engine class -## - -class Engine(object): - """Template Engine class. - See User's Guide and examples for details. - http://www.kuwata-lab.com/tenjin/pytenjin-users-guide.html - http://www.kuwata-lab.com/tenjin/pytenjin-examples.html - """ - - # default value of attributes - prefix = '' - postfix = '' - layout = None - templateclass = Template - path = None - cache = TextCacheStorage() # save converted Python code into text file - lang = None - loader = FileSystemLoader() - preprocess = False - preprocessorclass = Preprocessor - timestamp_interval = 1 # seconds - - def __init__(self, prefix=None, postfix=None, layout=None, path=None, cache=True, preprocess=None, templateclass=None, preprocessorclass=None, lang=None, loader=None, pp=None, **kwargs): - """Initializer of Engine class. - - prefix:str (='') - Prefix string used to convert template short name to template filename. - postfix:str (='') - Postfix string used to convert template short name to template filename. - layout:str (=None) - Default layout template name. - path:list of str(=None) - List of directory names which contain template files. - cache:bool or CacheStorage instance (=True) - Cache storage object to store converted python code. - If True, default cache storage (=Engine.cache) is used (if it is None - then create MarshalCacheStorage object for each engine object). - If False, no cache storage is used nor no cache files are created. - preprocess:bool(=False) - Activate preprocessing or not. - templateclass:class (=Template) - Template class which engine creates automatically. - lang:str (=None) - Language name such as 'en', 'fr', 'ja', and so on. If you specify - this, cache file path will be 'inex.html.en.cache' for example. - pp:list (=None) - List of preprocessor object which is callable and manipulates template content. - kwargs:dict - Options for Template class constructor. - See document of Template.__init__() for details. - """ - if prefix: - self.prefix = prefix - if postfix: - self.postfix = postfix - if layout: - self.layout = layout - if templateclass: - self.templateclass = templateclass - if preprocessorclass: - self.preprocessorclass = preprocessorclass - if path is not None: - self.path = path - if lang is not None: - self.lang = lang - if loader is not None: - self.loader = loader - if preprocess is not None: - self.preprocess = preprocess - if pp is None: - pp = [] - elif isinstance(pp, list): - pass - elif isinstance(pp, tuple): - pp = list(pp) - else: - raise TypeError("'pp' expected to be a list but got %r." % (pp,)) - self.pp = pp - if preprocess: - self.pp.append(TemplatePreprocessor(self.preprocessorclass)) - self.kwargs = kwargs - self.encoding = kwargs.get('encoding') - self._filepaths = {} # template_name => relative path and absolute path - self._added_templates = {} # templates added by add_template() - #self.cache = cache - self._set_cache_storage(cache) - - def _set_cache_storage(self, cache): - if cache is True: - if not self.cache: - self.cache = MarshalCacheStorage() - elif cache is None: - pass - elif cache is False: - self.cache = None - elif isinstance(cache, CacheStorage): - self.cache = cache - else: - raise ValueError("%r: invalid cache object." % (cache, )) - - def cachename(self, filepath): - #: if lang is provided then add it to cache filename. - if self.lang: - return '%s.%s.cache' % (filepath, self.lang) - #: return cache file name. - else: - return filepath + '.cache' - - def to_filename(self, template_name): - """Convert template short name into filename. - ex. - >>> engine = tenjin.Engine(prefix='user_', postfix='.pyhtml') - >>> engine.to_filename(':list') - 'user_list.pyhtml' - >>> engine.to_filename('list') - 'list' - """ - #: if template_name starts with ':', add prefix and postfix to it. - if template_name[0] == ':': - return self.prefix + template_name[1:] + self.postfix - #: if template_name doesn't start with ':', just return it. - return template_name - - def _create_template(self, input=None, filepath=None, _context=None, _globals=None): - #: if input is not specified then just create empty template object. - template = self.templateclass(None, **self.kwargs) - #: if input is specified then create template object and return it. - if input: - template.convert(input, filepath) - return template - - def _preprocess(self, input, filepath, _context, _globals): - #if _context is None: _context = {} - #if _globals is None: _globals = sys._getframe(3).f_globals - #: preprocess template and return result - #preprocessor = self.preprocessorclass(filepath, input=input) - # return preprocessor.render(_context, globals=_globals) - #: preprocesses input with _context and returns result. - if '_engine' not in _context: - self.hook_context(_context) - for pp in self.pp: - input = pp.__call__(input, filename=filepath, - context=_context, globals=_globals) - return input - - def add_template(self, template): - self._added_templates[template.filename] = template - - def _get_template_from_cache(self, cachepath, filepath): - #: if template not found in cache, return None - template = self.cache.get(cachepath, self.templateclass) - if not template: - return None - assert template.timestamp is not None - #: if checked within a sec, skip timestamp check. - now = _time() - last_checked = getattr(template, '_last_checked_at', None) - if last_checked and now < last_checked + self.timestamp_interval: - # if logger: logger.trace('[tenjin.%s] timestamp check skipped (%f < %f + %f)' % \ - # (self.__class__.__name__, now, template._last_checked_at, self.timestamp_interval)) - return template - #: if timestamp of template objectis same as file, return it. - if template.timestamp == self.loader.timestamp(filepath): - template._last_checked_at = now - return template - #: if timestamp of template object is different from file, clear it - # cache._delete(cachepath) - if logger: - logger.info("[tenjin.%s] cache expired (filepath=%r)" % - (self.__class__.__name__, filepath)) - return None - - def get_template(self, template_name, _context=None, _globals=None): - """Return template object. - If template object has not registered, template engine creates - and registers template object automatically. - """ - #: accept template_name such as ':index'. - filename = self.to_filename(template_name) - #: if template object is added by add_template(), return it. - if filename in self._added_templates: - return self._added_templates[filename] - #: get filepath and fullpath of template - pair = self._filepaths.get(filename) - if pair: - filepath, fullpath = pair - else: - #: if template file is not found then raise TemplateNotFoundError. - filepath = self.loader.find(filename, self.path) - if not filepath: - raise TemplateNotFoundError( - '%s: filename not found (path=%r).' % (filename, self.path)) - # - fullpath = self.loader.abspath(filepath) - self._filepaths[filename] = (filepath, fullpath) - #: use full path as base of cache file path - cachepath = self.cachename(fullpath) - #: get template object from cache - cache = self.cache - template = cache and self._get_template_from_cache( - cachepath, filepath) or None - #: if template object is not found in cache or is expired... - if not template: - ret = self.loader.load(filepath) - if not ret: - raise TemplateNotFoundError( - "%r: template not found." % filepath) - input, timestamp = ret - if self.pp: # required for preprocessing - if _context is None: - _context = {} - if _globals is None: - _globals = sys._getframe(1).f_globals - input = self._preprocess(input, filepath, _context, _globals) - #: create template object. - template = self._create_template( - input, filepath, _context, _globals) - #: set timestamp and filename of template object. - template.timestamp = timestamp - template._last_checked_at = _time() - #: save template object into cache. - if cache: - if not template.bytecode: - #: ignores syntax error when compiling. - try: - template.compile() - except SyntaxError: - pass - cache.set(cachepath, template) - # else: - # template.compile() - #: - template.filename = filepath - return template - - def include(self, template_name, append_to_buf=True, **kwargs): - """Evaluate template using current local variables as context. - - template_name:str - Filename (ex. 'user_list.pyhtml') or short name (ex. ':list') of template. - append_to_buf:boolean (=True) - If True then append output into _buf and return None, - else return stirng output. - - ex. - <?py include('file.pyhtml') ?> - #{include('file.pyhtml', False)} - <?py val = include('file.pyhtml', False) ?> - """ - #: get local and global vars of caller. - frame = sys._getframe(1) - locals = frame.f_locals - globals = frame.f_globals - #: get _context from caller's local vars. - assert '_context' in locals - context = locals['_context'] - #: if kwargs specified then add them into context. - if kwargs: - context.update(kwargs) - #: get template object with context data and global vars. - # (context and globals are passed to get_template() only for preprocessing.) - template = self.get_template(template_name, context, globals) - #: if append_to_buf is true then add output to _buf. - #: if append_to_buf is false then don't add output to _buf. - if append_to_buf: - _buf = locals['_buf'] - else: - _buf = None - #: render template and return output. - s = template.render(context, globals, _buf=_buf) - #: kwargs are removed from context data. - if kwargs: - for k in kwargs: - del context[k] - return s - - def render(self, template_name, context=None, globals=None, layout=True): - """Evaluate template with layout file and return result of evaluation. - - template_name:str - Filename (ex. 'user_list.pyhtml') or short name (ex. ':list') of template. - context:dict (=None) - Context object to evaluate. If None then new dict is used. - globals:dict (=None) - Global context to evaluate. If None then globals() is used. - layout:str or Bool(=True) - If True, the default layout name specified in constructor is used. - If False, no layout template is used. - If str, it is regarded as layout template name. - - If temlate object related with the 'template_name' argument is not exist, - engine generates a template object and register it automatically. - """ - if context is None: - context = {} - if globals is None: - globals = sys._getframe(1).f_globals - self.hook_context(context) - while True: - # context and globals are passed to get_template() only for preprocessing - template = self.get_template(template_name, context, globals) - content = template.render(context, globals) - layout = context.pop('_layout', layout) - if layout is True or layout is None: - layout = self.layout - if not layout: - break - template_name = layout - layout = False - context['_content'] = content - context.pop('_content', None) - return content - - def hook_context(self, context): - #: add engine itself into context data. - context['_engine'] = self - #context['render'] = self.render - #: add include() method into context data. - context['include'] = self.include - - -## -# safe template and engine -## - -class SafeTemplate(Template): - """Uses 'to_escaped()' instead of 'escape()'. - '#{...}' is not allowed with this class. Use '[==...==]' instead. - """ - - tostrfunc = 'to_str' - escapefunc = 'to_escaped' - - def get_expr_and_flags(self, match): - return _get_expr_and_flags(match, "#{%s}: '#{}' is not allowed with SafeTemplate.") - - -class SafePreprocessor(Preprocessor): - - tostrfunc = 'to_str' - escapefunc = 'to_escaped' - - def get_expr_and_flags(self, match): - return _get_expr_and_flags(match, "#{{%s}}: '#{{}}' is not allowed with SafePreprocessor.") - - -def _get_expr_and_flags(match, errmsg): - expr1, expr2, expr3, expr4 = match.groups() - if expr1 is not None: - raise TemplateSyntaxError(errmsg % match.group(1)) - if expr2 is not None: - return expr2, (True, False) # #{...} : call escape, not to_str - if expr3 is not None: - return expr3, (False, True) # [==...==] : not escape, call to_str - if expr4 is not None: - return expr4, (True, False) # [=...=] : call escape, not to_str - - -class SafeEngine(Engine): - - templateclass = SafeTemplate - preprocessorclass = SafePreprocessor - - -## -# for Google App Engine -# (should separate into individual file or module?) -## - -def _dummy(): - global memcache, _tenjin - memcache = _tenjin = None # lazy import of google.appengine.api.memcache - global GaeMemcacheCacheStorage, GaeMemcacheStore, init - - class GaeMemcacheCacheStorage(CacheStorage): - - lifetime = 0 # 0 means unlimited - - def __init__(self, lifetime=None, namespace=None): - CacheStorage.__init__(self) - if lifetime is not None: - self.lifetime = lifetime - self.namespace = namespace - - def _load(self, cachepath): - key = cachepath - if _tenjin.logger: - _tenjin.logger.info( - "[tenjin.gae.GaeMemcacheCacheStorage] load cache (key=%r)" % (key, )) - return memcache.get(key, namespace=self.namespace) - - def _store(self, cachepath, dct): - dct.pop('bytecode', None) - key = cachepath - if _tenjin.logger: - _tenjin.logger.info( - "[tenjin.gae.GaeMemcacheCacheStorage] store cache (key=%r)" % (key, )) - ret = memcache.set(key, dct, self.lifetime, - namespace=self.namespace) - if not ret: - if _tenjin.logger: - _tenjin.logger.info( - "[tenjin.gae.GaeMemcacheCacheStorage] failed to store cache (key=%r)" % (key, )) - - def _delete(self, cachepath): - key = cachepath - memcache.delete(key, namespace=self.namespace) - - class GaeMemcacheStore(KeyValueStore): - - lifetime = 0 - - def __init__(self, lifetime=None, namespace=None): - if lifetime is not None: - self.lifetime = lifetime - self.namespace = namespace - - def get(self, key): - return memcache.get(key, namespace=self.namespace) - - def set(self, key, value, lifetime=None): - if lifetime is None: - lifetime = self.lifetime - if memcache.set(key, value, lifetime, namespace=self.namespace): - return True - else: - if _tenjin.logger: - _tenjin.logger.info( - "[tenjin.gae.GaeMemcacheStore] failed to set (key=%r)" % (key, )) - return False - - def delete(self, key): - return memcache.delete(key, namespace=self.namespace) - - def has(self, key): - if memcache.add(key, 'dummy', namespace=self.namespace): - memcache.delete(key, namespace=self.namespace) - return False - else: - return True - - def init(): - global memcache, _tenjin - if not memcache: - from google.appengine.api import memcache - if not _tenjin: - import tenjin as _tenjin - # avoid cache confliction between versions - ver = os.environ.get('CURRENT_VERSION_ID', '1.1') # .split('.')[0] - Engine.cache = GaeMemcacheCacheStorage(namespace=ver) - # set fragment cache store - helpers.fragment_cache.store = GaeMemcacheStore(namespace=ver) - helpers.fragment_cache.lifetime = 60 # 1 minute - helpers.fragment_cache.prefix = 'fragment.' - - -gae = create_module('tenjin.gae', _dummy, - os=os, helpers=helpers, Engine=Engine, - CacheStorage=CacheStorage, KeyValueStore=KeyValueStore) - - -del _dummy diff --git a/cgi/weabot.py b/cgi/weabot.py index 720916d..636eb02 100755 --- a/cgi/weabot.py +++ b/cgi/weabot.py @@ -23,7 +23,7 @@ from formatting import * from post import * from img import * -__version__ = "0.10.0" +__version__ = "0.10.5" # Set to True to disable weabot's exception routing and enable profiling _DEBUG = False @@ -35,7 +35,7 @@ class weabot(object): def __init__(self, environ, start_response): global _DEBUG - logging.basicConfig(filename='weabot.log', format='%(asctime)s %(levelname)s %(message)s', level=logging.DEBUG) + logging.basicConfig(filename='weabot.log', format='%(asctime)s %(levelname)s %(message)s', level=logging.INFO) self.environ = environ if self.environ["PATH_INFO"].startswith("/weabot.py/"): @@ -85,7 +85,7 @@ class weabot(object): def error(self, message): board = Settings._.BOARD if board: - if board['board_type'] == '1': + if board['board_type'] == 1: info = {} info['host'] = self.environ["REMOTE_ADDR"] info['name'] = self.formdata.get('fielda', '') @@ -265,7 +265,7 @@ class weabot(object): if Settings.ENABLE_BANS and addressIsBanned(self.environ['REMOTE_ADDR'], board["dir"], blind_only=True): raise UserError('<meta http-equiv="refresh" content="0; url=/cgi/banned/%s">' % board["dir"]) - if len(path_split) > 4 and path_split[4] and board['board_type'] == '1': + if len(path_split) > 4 and path_split[4] and board['board_type'] == 1: self.output = dynamicRead(int(path_split[3]), path_split[4], True) elif board['board_type'] == 1: self.output = threadPage(0, True, int(path_split[3])) @@ -329,12 +329,12 @@ class weabot(object): self.output += '<html xmlns="http://www.w3.org/1999/xhtml"><meta http-equiv="refresh" content="0;url=%s" /><body><p>...</p></body></html>' % url elif path_split[1] == "banned": OpenDb() - bans = FetchAll("SELECT * FROM `bans` WHERE INET6_ATON('"+self.environ["REMOTE_ADDR"]+"') BETWEEN `ipstart` AND `ipend`") + bans = FetchAll("SELECT * FROM `bans` WHERE INET6_ATON(%s) BETWEEN `ipstart` AND `ipend`", (self.environ["REMOTE_ADDR"],)) if bans: for ban in bans: if ban["boards"]: - boards = pickle.loads(ban["boards"]) - if ban["boards"] or path_split[2] in boards: + boards = str2boards(ban["boards"]) + if not ban["boards"] or path_split[2] in boards: caught = True if ban["boards"]: boards_str = '/' + '/, /'.join(boards) + '/' @@ -592,7 +592,7 @@ class weabot(object): # make ID hash if board["useid"]: post["timestamp_formatted"] += ' ID:' + iphash(ip, post, tim, board["useid"], mobile, - self.environ["HTTP_USER_AGENT"], cap_id, hide_end, (board["countrycode"] in ['1', '2'])) + self.environ["HTTP_USER_AGENT"], cap_id, hide_end, (board["countrycode"] in [1, 2])) # use for future file checks xfile = (file is not None or oek_file) @@ -793,7 +793,13 @@ class weabot(object): postid = post.insert() # delete threads that have crossed last page - trimThreads() + trimmed = trimThreads() + + # let's stop here if the thread we posted in got trimmed + if post["parentid"] and post["parentid"] in trimmed: + regenerateFrontPages() + regenerateHome() + raise UserError("El hilo en el que publicaste ya fue eliminado.") # fix null references when creating thread if board["board_type"] == 1 and not post["parentid"]: @@ -947,8 +953,8 @@ class weabot(object): raise UserError(_("You're banned.")) # check if post exists - post = FetchOne("SELECT `id`, `parentid`, `ip` FROM `posts` WHERE `id` = '%s' AND `boardid` = '%s'" % ( - _mysql.escape_string(str(postid)), _mysql.escape_string(board['id']))) + post = FetchOne("SELECT `id`, `parentid`, `ip` FROM `posts` WHERE `id` = %s AND `boardid` = %s", + (postid, board['id'])) if not post: raise UserError(_("Post doesn't exist.")) @@ -963,13 +969,12 @@ class weabot(object): # insert report t = time.time() - message = cgi.escape(self.formdata["reason"]).strip()[0:8000] + message = html.escape(self.formdata["reason"]).strip()[0:800] message = message.replace("\n", "<br />") UpdateDb("INSERT INTO `reports` (board, postid, parentid, link, ip, reason, repip, timestamp, timestamp_formatted) " + - "VALUES ('%s', '%s', '%s', '%s', '%s', '%s', INET6_ATON('%s'), '%s', '%s')" % ( - board["dir"], post['id'], post['parentid'], link, post['ip'], _mysql.escape_string(message), - _mysql.escape_string(self.environ["REMOTE_ADDR"]), str(t), formatTimestamp(t))) + "VALUES (%s, %s, %s, %s, %s, %s, INET6_ATON(%s), %s, %s)", + (board["dir"], post['id'], post['parentid'], link, post['ip'], message, self.environ["REMOTE_ADDR"], t, formatTimestamp(t))) self.output = renderTemplate("report.html", {'finished': True}) def stats(self): |