diff options
Diffstat (limited to 'cgi/formatting.py')
-rw-r--r-- | cgi/formatting.py | 425 |
1 files changed, 425 insertions, 0 deletions
diff --git a/cgi/formatting.py b/cgi/formatting.py new file mode 100644 index 0000000..d21bee2 --- /dev/null +++ b/cgi/formatting.py @@ -0,0 +1,425 @@ +# coding=utf-8 +import string +import cgi +import os +import re +import pickle +import time +import _mysql + +from database import * +from framework import * +from post import regenerateAccess +#from xhtml_clean import Cleaner + +from settings import Settings + +def format_post(message, ip, parentid, parent_timestamp=0): + """ + Formats posts using the specified format + """ + board = Settings._.BOARD + using_markdown = False + + # Escape any HTML if user is not using Markdown or HTML + if not Settings.USE_HTML: + message = cgi.escape(message) + + # Strip text + message = message.rstrip()[0:8000] + + # Treat HTML + if Settings.USE_MARKDOWN: + message = markdown(message) + using_markdown = True + if Settings.USE_HTML: + message = onlyAllowedHTML(message) + + # [code] tag + if board["dir"] == "tech": + message = re.compile(r"\[code\](.+)\[/code\]", re.DOTALL | re.IGNORECASE).sub(r"<pre><code>\1</code></pre>", message) + if board["allow_spoilers"]: + message = re.compile(r"\[spoiler\](.+)\[/spoiler\]", re.DOTALL | re.IGNORECASE).sub(r'<span class="spoil">\1</span>', message) + + if Settings.VIDEO_THUMBS: + (message, affected) = videoThumbs(message) + if affected: + message = close_html(message) + + message = clickableURLs(message) + message = checkRefLinks(message, parentid, parent_timestamp) + message = checkWordfilters(message, ip, board["dir"]) + + # If not using markdown quotes must be created and \n changed for HTML line breaks + if not using_markdown: + message = re.compile(r"^(\n)+").sub('', message) + message = checkQuotes(message) + message = message.replace("\n", "<br />") + + return message + +def tripcode(name): + """ + Calculate tripcode to match output of most imageboards + """ + if name == '': + return '', '' + + board = Settings._.BOARD + + name = name.decode('utf-8') + key = Settings.TRIP_CHAR.decode('utf-8') + + # if there's a trip + (namepart, marker, trippart) = name.partition('#') + if marker: + namepart = cleanString(namepart) + trip = '' + + # secure tripcode + if Settings.ALLOW_SECURE_TRIPCODES and '#' in trippart: + (trippart, securemarker, securepart) = trippart.partition('#') + try: + securepart = securepart.encode("sjis", "ignore") + except: + pass + + # encode secure tripcode + trip = getMD5(securepart + Settings.SECRET) + trip = trip.encode('base64').replace('\n', '') + trip = trip.encode('rot13') + trip = key+key+trip[2:12] + + # return it if we don't have a normal tripcode + if trippart == '': + return namepart.encode('utf-8'), trip.encode('utf-8') + + # do normal tripcode + from crypt import crypt + try: + trippart = trippart.encode("sjis", "ignore") + except: + pass + + trippart = cleanString(trippart, True, True) + salt = re.sub(r"[^\.-z]", ".", (trippart + "H..")[1:3]) + salt = salt.translate(string.maketrans(r":;=?@[\]^_`", "ABDFGabcdef")) + trip = key + crypt(trippart, salt)[-10:] + trip + + return namepart.encode('utf-8'), trip.encode('utf-8') + + return name.encode('utf-8'), '' + +def iphash(ip, post, t, useid, mobile, agent, cap_id, hide_end, has_countrycode): + current_t = time.time() + + if cap_id: + id = cap_id + elif 'sage' in post['email'] and useid == '1': + id = '???' + elif ip == "127.0.0.1": + id = '???' + else: + day = int((current_t + (Settings.TIME_ZONE*3600)) / 86400) + word = ',' + str(day) + + # Make difference by thread + word += ',' + str(t) + + id = hide_data(ip + word, 6, "id", Settings.SECRET) + + if hide_end: + id += '*' + elif addressIsTor(ip): + id += 'T' + elif 'Dalvik' in agent: + id += 'R' + elif 'Android' in agent: + id += 'a' + elif 'iPhone' in agent: + id += 'i' + elif useid == '3': + if 'Firefox' in agent: + id += 'F' + elif 'Safari' in agent and not 'Chrome' in agent: + id += 's' + elif 'Chrome' in agent: + id += 'C' + elif 'SeaMonkey' in agent: + id += 'S' + elif 'Edge' in agent: + id += 'E' + elif 'Opera' in agent or 'OPR' in agent: + id += 'o' + elif 'MSIE' in agent or 'Trident' in agent: + id += 'I' + elif mobile: + id += 'Q' + else: + id += '0' + elif mobile: + id += 'Q' + else: + id += '0' + + if addressIsBanned(ip, ""): + id += '#' + if (not has_countrycode and + not addressIsTor(ip) and + (addressIsProxy(ip) or not addressIsES(ip))): + id += '!' + + return id + +def cleanString(string, escape=True, quote=False): + string = string.strip() + if escape: + string = cgi.escape(string, quote) + return string + +def clickableURLs(message): + # URL + message = re.compile(r'( |^|:|\(|\[)((?:https?://|ftp://|mailto:|news:|irc:)[^\s<>()"]*?(?:\([^\s<>()"]*?\)[^\s<>()"]*?)*)((?:\s|<|>|"|\.|\|\]|!|\?|,|,|")*(?:[\s<>()"]|$))', re.M).sub(r'\1<a href="\2" rel="nofollow" target="_blank">\2</a>\3', message) + # Emails + message = re.compile(r"( |^|:)([A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,6})", re.I | re.M).sub(r'\1<a href="mailto:\2" rel="nofollow"><\2></a>', message) + + return message + +def videoThumbs(message): + # Youtube + __RE = re.compile(r"^(?: +)?(https?://(?:www\.)?youtu(?:be\.com/watch\?v=|\.be/)([\w\-]+))(?: +)?$", re.M) + matches = __RE.finditer(message) + if matches: + import json + import urllib, urllib2 + + v_ids = [] + videos = {} + + for match in matches: + v_id = match.group(2) + if v_id not in v_ids: + v_ids.append(v_id) + videos[v_id] = { + 'span': match.span(0), + 'url': match.group(1), + } + if len(v_ids) >= Settings.VIDEO_THUMBS_LIMIT: + raise UserError, "Has incluído muchos videos en tu mensaje. El máximo es %d." % Settings.VIDEO_THUMBS_LIMIT + + if videos: + params = { + 'key': Settings.GOOGLE_API_KEY, + 'part': 'snippet,contentDetails', + 'id': ','.join(v_ids) + } + r_url = "https://www.googleapis.com/youtube/v3/videos?"+urllib.urlencode(params) + res = urllib2.urlopen(r_url) + res_json = json.load(res) + + offset = 0 + for item in res_json['items']: + v_id = item['id'] + (start, end) = videos[v_id]['span'] + end += 1 # remove endline + + try: + new_url = '<a href="%(url)s" target="_blank" class="yt"><span class="pvw"><img src="%(thumb)s" /></span><b>%(title)s</b> (%(secs)s)<br />%(channel)s</a><br />' \ + % {'title': item['snippet']['title'].encode('utf-8'), + 'channel': item['snippet']['channelTitle'].encode('utf-8'), + 'secs': parseIsoPeriod(item['contentDetails']['duration']).encode('utf-8'), + 'url': videos[v_id]['url'], + 'id': v_id.encode('utf-8'), + 'thumb': item['snippet']['thumbnails']['default']['url'].encode('utf-8'),} + except UnicodeDecodeError: + raise UserError, repr(v_id) + message = message[:start+offset] + new_url + message[end+offset:] + offset += len(new_url) - (end-start) + + return (message, len(videos)) + +def fixMobileLinks(message): + """ + Shorten long links; Convert >># links into a mobile version + """ + board = Settings._.BOARD + + # If textboard + if board["board_type"] == '1': + message = re.compile(r'<a href="/(\w+)/read/(\d+)(\.html)?/*(.+)"').sub(r'<a href="/cgi/mobileread/\1/\2/\4"', message) + else: + message = re.compile(r'<a href="/(\w+)/res/(\d+)\.html#(\d+)"').sub(r'<a href="/cgi/mobileread/\1/\2#\3"', message) + + return message + +def checkRefLinks(message, parentid, parent_timestamp): + """ + Check for >># links in posts and replace with the HTML to make them clickable + """ + board = Settings._.BOARD + + if board["board_type"] == '1': + # Textboard + if parentid != '0': + message = re.compile(r'>>(\d+(,\d+|-(?=[ \d\n])|\d+)*n?)').sub('<a href="' + Settings.BOARDS_URL + board['dir'] + '/read/' + str(parent_timestamp) + r'/\1">>>\1</a>', message) + else: + # Imageboard + quotes_id_array = re.findall(r">>([0-9]+)", message) + for quotes in quotes_id_array: + try: + post = FetchOne('SELECT * FROM `posts` WHERE `id` = ' + quotes + ' AND `boardid` = ' + board['id'] + ' LIMIT 1') + if post['parentid'] != '0': + message = re.compile(">>" + quotes).sub('<a href="' + Settings.BOARDS_URL + board['dir'] + '/res/' + post['parentid'] + '.html#' + quotes + '">>>' + quotes + '</a>', message) + else: + message = re.compile(">>" + quotes).sub('<a href="' + Settings.BOARDS_URL + board['dir'] + '/res/' + post['id'] + '.html#' + quotes + '">>>' + quotes + '</a>', message) + except: + message = re.compile(">>" + quotes).sub(r'<span class="q">>>'+quotes+'</span>', message) + + return message + +def checkQuotes(message): + """ + Check for >text in posts and add span around it to color according to the css + """ + message = re.compile(r"^>(.*)$", re.MULTILINE).sub(r'<span class="q">>\1</span>', message) + return message + +def escapeHTML(string): + string = string.replace('<', '<') + string = string.replace('>', '>') + return string + +def onlyAllowedHTML(message): + """ + Allow <b>, <i>, <u>, <strike>, and <pre> in posts, along with the special <aa> + """ + message = sanitize_html(message) + #message = re.compile(r"\[aa\](.+?)\[/aa\]", re.DOTALL | re.IGNORECASE).sub("<span class=\"sjis\">\\1</span>", message) + + return message + +def close_html(message): + """ + Old retarded version of sanitize_html, it just closes open tags. + """ + import BeautifulSoup + return unicode(BeautifulSoup.BeautifulSoup(message)).replace(' ', '').encode('utf-8') + +def sanitize_html(message, decode=True): + """ + Clean the code and allow only a few safe tags. + """ + import BeautifulSoup + + # Decode message from utf-8 if required + if decode: + message = message.decode('utf-8', 'replace') + + # Create HTML Cleaner with our allowed tags + whitelist_tags = ["a","b","br","blink","code","del","em","i","marquee","root","strike","strong","sub","sup","u"] + whitelist_attr = ["href"] + + soup = BeautifulSoup.BeautifulSoup(message) + + # Remove tags that aren't allowed + for tag in soup.findAll(): + if not tag.name.lower() in whitelist_tags: + tag.name = "span" + tag.attrs = [] + else: + for attr in [attr for attr in tag.attrs if attr not in whitelist_attr]: + del tag[attr] + + # We export the soup into a correct XHTML string + string = unicode(soup).encode('utf-8') + # We remove some anomalies we don't want + string = string.replace('<br/>', '<br />').replace(' ', '') + + return string + +def markdown(message): + import markdown + if message.strip() != "": + #return markdown.markdown(message).rstrip("\n").rstrip("<br />") + return markdown.markdown(message, extras=["cuddled-lists", "code-friendly"]).encode('utf-8') + else: + return "" + +def checkWordfilters(message, ip, board): + fixed_ip = inet_aton(ip) + wordfilters = FetchAll("SELECT * FROM `filters` WHERE `type` = '0' ORDER BY `id` ASC") + for wordfilter in wordfilters: + if wordfilter["boards"] != "": + boards = pickle.loads(wordfilter["boards"]) + if wordfilter["boards"] == "" or board in boards: + if wordfilter['action'] == '0': + if not re.search(wordfilter['from'], message, re.DOTALL | re.IGNORECASE) is None: + raise UserError, wordfilter['reason'] + elif wordfilter['action'] == '1': + message = re.compile(wordfilter['from'], re.DOTALL | re.IGNORECASE).sub(wordfilter['to'], message) + elif wordfilter['action'] == '2': + # Ban + if not re.search(wordfilter['from'], message, re.DOTALL | re.IGNORECASE) is None: + if wordfilter['seconds'] != '0': + until = str(timestamp() + int(wordfilter['seconds'])) + else: + until = '0' + + InsertDb("INSERT INTO `bans` (`ip`, `boards`, `added`, `until`, `staff`, `reason`, `note`, `blind`) VALUES (" + \ + "'" + str(fixed_ip) + "', '" + _mysql.escape_string(wordfilter['boards']) + \ + "', " + str(timestamp()) + ", " + until + ", 'System', '" + _mysql.escape_string(wordfilter['reason']) + \ + "', 'Word Auto-ban', '"+_mysql.escape_string(wordfilter['blind'])+"')") + regenerateAccess() + raise UserError, wordfilter['reason'] + elif wordfilter['action'] == '3': + if not re.search(wordfilter['from'], message, re.DOTALL | re.IGNORECASE) is None: + raise UserError, '<meta http-equiv="refresh" content="%s;url=%s" />%s' % (wordfilter['redirect_time'], wordfilter['redirect_url'], wordfilter['reason']) + return message + +def checkNamefilters(name, tripcode, ip, board): + namefilters = FetchAll("SELECT * FROM `filters` WHERE `type` = '1'") + + for namefilter in namefilters: + if namefilter["boards"] != "": + boards = pickle.loads(namefilter["boards"]) + if namefilter["boards"] == "" or board in boards: + # check if this filter applies + match = False + + if namefilter['from'] and namefilter['from_trip']: + # both name and trip filter + if re.search(namefilter['from'], name, re.DOTALL | re.IGNORECASE) and tripcode == namefilter['from_trip']: + match = True + elif namefilter['from'] and not namefilter['from_trip']: + # name filter + if re.search(namefilter['from'], name, re.DOTALL | re.IGNORECASE): + match = True + elif not namefilter['from'] and namefilter['from_trip']: + # trip filter + if tripcode == namefilter['from_trip']: + match = True + + if match: + # do action + if namefilter['action'] == '0': + raise UserError, namefilter['reason'] + elif namefilter['action'] == '1': + name = namefilter['to'] + tripcode = '' + return name, tripcode + elif namefilter['action'] == '2': + # Ban + if namefilter['seconds'] != '0': + until = str(timestamp() + int(namefilter['seconds'])) + else: + until = '0' + + InsertDb("INSERT INTO `bans` (`ip`, `boards`, `added`, `until`, `staff`, `reason`, `note`, `blind`) VALUES (" + \ + "'" + _mysql.escape_string(ip) + "', '" + _mysql.escape_string(namefilter['boards']) + \ + "', " + str(timestamp()) + ", " + until + ", 'System', '" + _mysql.escape_string(namefilter['reason']) + \ + "', 'Name Auto-ban', '"+_mysql.escape_string(namefilter['blind'])+"')") + regenerateAccess() + raise UserError, namefilter['reason'] + elif namefilter['action'] == '3': + raise UserError, '<meta http-equiv="refresh" content="%s;url=%s" />%s' % (namefilter['redirect_time'], namefilter['redirect_url'], namefilter['reason']) + return name, tripcode |