# coding=utf-8 import string import html import os import re import time from database import * from framework import * from post import regenerateAccess #from xhtml_clean import Cleaner from settings import Settings def format_post(message, ip, parentid, parent_timestamp=0): """ Formats posts using the specified format """ board = Settings._.BOARD using_markdown = False # Escape any HTML if user is not using Markdown or HTML if not Settings.USE_HTML: message = html.escape(message) # Strip text message = message.rstrip()[0:8000] # Treat HTML if Settings.USE_MARKDOWN: message = markdown(message) using_markdown = True if Settings.USE_HTML: message = onlyAllowedHTML(message) if Settings.VIDEO_THUMBS: (message, affected) = videoThumbs(message) # if affected: # message = close_html(message) message = clickableURLs(message) message = checkRefLinks(message, parentid, parent_timestamp) message = checkWordfilters(message, ip, board["dir"]) # If not using markdown quotes must be created and \n changed for HTML line breaks if not using_markdown: message = re.compile(r"^[\s\t]*(\n)+").sub("", message) message = re.compile(r"\n(\n)+").sub("\n\n", message) message = checkQuotes(message) message = message.replace("\n", "
") return message def tripcode(name): """ Calculate tripcode to match output of most imageboards """ if name == '': return '', '' board = Settings._.BOARD key = Settings.TRIP_CHAR # if there's a trip (namepart, marker, trippart) = name.partition('#') if marker: namepart = cleanString(namepart) trip = '' # secure tripcode if Settings.ALLOW_SECURE_TRIPCODES and '#' in trippart: (trippart, securemarker, securepart) = trippart.partition('#') try: securepart = securepart.encode("sjis", "ignore") except: pass # encode secure tripcode trip = getMD5(securepart + Settings.SECRET) trip = trip.encode('base64').replace('\n', '') trip = trip.encode('rot13') trip = key+key+trip[2:12] # return it if we don't have a normal tripcode if trippart == '': return namepart, trip # do normal tripcode from crypt import crypt #try: # trippart = trippart.encode("sjis", "ignore") #except: # pass trippart = html.unescape(trippart) trippart = html.escape(trippart, True) salt = re.sub(r"[^\.-z]", ".", (trippart + "H..")[1:3]) salt = salt.translate(str.maketrans(r":;=?@[\]^_`", "ABDFGabcdef")) trip = key + crypt(trippart, salt)[-10:] + trip return namepart, trip return name, '' def iphash(ip, post, t, useid, mobile, agent, cap_id, hide_end, has_countrycode): current_t = time.time() if cap_id: id = cap_id elif post['email'] and useid == 1: id = '???' elif ip == "127.0.0.1": id = '???' else: day = int((current_t + (Settings.TIME_ZONE*3600)) / 86400) word = ',' + str(day) # IDs change every 24 hours word += ',' + str(t) # IDs vary depending on thread id = getb64(getMD5(ip + word + Settings.SECRET))[-10:-2] if hide_end: id += '*' elif addressIsTor(ip): id += 'T' elif 'Dalvik' in agent: id += 'R' elif 'Android' in agent: id += 'a' elif 'iPhone' in agent: id += 'i' elif useid == 3: if 'Firefox' in agent: id += 'F' elif 'Safari' in agent and not 'Chrome' in agent: id += 's' elif 'Chrome' in agent: id += 'C' elif 'SeaMonkey' in agent: id += 'S' elif 'Edge' in agent: id += 'E' elif 'Opera' in agent or 'OPR' in agent: id += 'o' elif 'MSIE' in agent or 'Trident' in agent: id += 'I' elif mobile: id += 'Q' else: id += '0' elif mobile: id += 'Q' else: id += '0' if addressIsBanned(ip, ""): id += '#' elif (not has_countrycode and not addressIsTor(ip) and (addressIsProxy(ip) or not addressIsES(ip))): id += '!' logging.info("{} {} {}".format(ip, agent, id)) return id def cleanString(string, escape=True, quote=False): string = string.strip() if escape: string = html.escape(string, quote) return string def clickableURLs(message): # URL message = re.compile(r'( |^|:|\(|\[)((?:https?://|ftp://|mailto:|news:|irc:)[^\s<>()"]*?(?:\([^\s<>()"]*?\)[^\s<>()"]*?)*)((?:\s|<|>|"|\.|\|\]|!|\?|,|,|")*(?:[\s<>()"]|$))', re.M).sub(r'\1\2\3', message) # Emails message = re.compile(r"( |^|:)([A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,6})", re.I | re.M).sub( r'\1<\2>', message) return message def videoThumbs(message): # Youtube __RE = re.compile( r"^(?: +)?(https?://(?:www\.)?youtu(?:be\.com/watch\?v=|\.be/)([\w\-]+))(?: +)?$", re.M) matches = __RE.finditer(message) if matches: import json import urllib.request, urllib.parse, urllib.error v_ids = [] videos = {} for match in matches: v_id = match.group(2) if v_id not in v_ids: v_ids.append(v_id) videos[v_id] = { 'span': match.span(0), 'url': match.group(1), } if len(v_ids) > Settings.VIDEO_THUMBS_LIMIT: raise UserError("Has incluído muchos videos en tu mensaje. El máximo es %d." % Settings.VIDEO_THUMBS_LIMIT) if videos: params = { 'key': Settings.GOOGLE_API_KEY, 'part': 'snippet,contentDetails', 'id': ','.join(v_ids) } r_url = "https://www.googleapis.com/youtube/v3/videos?" + \ urllib.parse.urlencode(params) res = urllib.request.urlopen(r_url) res_json = json.load(res) offset = 0 for item in res_json['items']: v_id = item['id'] (start, end) = videos[v_id]['span'] end += 1 # remove endline try: new_url = '%(title)s (%(secs)s)
%(channel)s

' \ % {'title': item['snippet']['title'], 'channel': item['snippet']['channelTitle'], 'secs': parseIsoPeriod(item['contentDetails']['duration']), 'url': videos[v_id]['url'], 'id': v_id, 'thumb': item['snippet']['thumbnails']['default']['url'], } except UnicodeDecodeError: raise UserError(repr(v_id)) message = message[:start+offset] + new_url + message[end+offset:] offset += len(new_url) - (end-start) return (message, len(videos)) def fixMobileLinks(message): """ Shorten long links; Convert >># links into a mobile version """ board = Settings._.BOARD # If textboard if board["board_type"] == 1: message = re.compile(r'># links in posts and replace with the HTML to make them clickable """ board = Settings._.BOARD if board["board_type"] == 1: # Textboard if parentid: message = re.compile(r'>>(\d+(,\d+|-(?=[ \d\n])|\d+)*n?)').sub( '>>\1', message) else: # Imageboard quotes_id_array = re.findall(r">>([0-9]+)", message) for quotes in quotes_id_array: try: post = FetchOne('SELECT * FROM `posts` WHERE `id` = %s AND `boardid` = %s LIMIT 1', (quotes, board['id'])) if post['parentid']: message = re.compile(">>" + quotes).sub('>>' + quotes + '', message) else: message = re.compile(">>" + quotes).sub('>>' + quotes + '', message) except: message = re.compile( ">>" + quotes).sub(r'>>'+quotes+'', message) return message def checkQuotes(message): """ Check for >text in posts and add span around it to color according to the css """ message = re.compile( r"^>(.*)$", re.MULTILINE).sub(r'>\1', message) return message def escapeHTML(string): string = string.replace('<', '<') string = string.replace('>', '>') return string def onlyAllowedHTML(message): """ Allow , , , , and
 in posts, along with the special 
    """
    message = sanitize_html(message)
    #message = re.compile(r"\[aa\](.+?)\[/aa\]", re.DOTALL | re.IGNORECASE).sub("\\1", message)

    return message


def close_html(message):
    """
    Old retarded version of sanitize_html, it just closes open tags.
    """
    import BeautifulSoup

    message = message.encode('utf-8')
    soup = BeautifulSoup.BeautifulSoup(message)

    return str(soup).replace('
', '').encode('utf-8')


def sanitize_html(message, decode=True):
    """
    Clean the code and allow only a few safe tags.
    """
    import BeautifulSoup

    # Decode message from utf-8 if required
    if decode:
        message = message.decode('utf-8', 'replace')

    # Create HTML Cleaner with our allowed tags
    whitelist_tags = ["a", "b", "br", "blink", "code", "del", "em",
                      "i", "marquee", "root", "strike", "strong", "sub", "sup", "u"]
    whitelist_attr = ["href"]

    soup = BeautifulSoup.BeautifulSoup(message)

    # Remove tags that aren't allowed
    for tag in soup.findAll():
        if not tag.name.lower() in whitelist_tags:
            tag.name = "span"
            tag.attrs = []
        else:
            for attr in [attr for attr in tag.attrs if attr not in whitelist_attr]:
                del tag[attr]

    # We export the soup into a correct XHTML string
    string = str(soup).encode('utf-8')
    # We remove some anomalies we don't want
    string = string.replace('
', '
').replace(' ', '') return string def markdown(message): import markdown if message.strip() != "": # return markdown.markdown(message).rstrip("\n").rstrip("
") return markdown.markdown(message, extras=["cuddled-lists", "code-friendly"]).encode('utf-8') else: return "" def checkWordfilters(message, ip, board): wordfilters = FetchAll( "SELECT * FROM `filters` WHERE `type` = '0' ORDER BY `id` ASC") for wordfilter in wordfilters: if wordfilter["boards"]: boards = str2boards(wordfilter["boards"]) if not wordfilter["boards"] or board in boards: if wordfilter['action'] == '0': if not re.search(wordfilter['from'], message, re.DOTALL | re.IGNORECASE) is None: raise UserError(wordfilter['reason']) elif wordfilter['action'] == '1': message = re.compile(wordfilter['from'], re.DOTALL | re.IGNORECASE).sub( wordfilter['to'], message) elif wordfilter['action'] == '2': # Ban if not re.search(wordfilter['from'], message, re.DOTALL | re.IGNORECASE) is None: if wordfilter['seconds'] != '0': until = str(timestamp() + int(wordfilter['seconds'])) else: until = '0' InsertDb("INSERT INTO `bans` (`ip`, `boards`, `added`, `until`, `staff`, `reason`, `note`, `blind`) VALUES (" + "INET6_ATON('" + str(ip) + "'), '" + _mysql.escape_string(wordfilter['boards']) + "', " + str(timestamp()) + ", " + until + ", 'System', '" + _mysql.escape_string(wordfilter['reason']) + "', 'Word Auto-ban', '"+_mysql.escape_string(wordfilter['blind'])+"')") regenerateAccess() raise UserError(wordfilter['reason']) elif wordfilter['action'] == '3': if not re.search(wordfilter['from'], message, re.DOTALL | re.IGNORECASE) is None: raise UserError('%s' % (wordfilter['redirect_time'], wordfilter['redirect_url'], wordfilter['reason'])) return message def checkNamefilters(name, tripcode, ip, board): namefilters = FetchAll("SELECT * FROM `filters` WHERE `type` = '1'") for namefilter in namefilters: if namefilter["boards"]: boards = str2boards(namefilter["boards"]) if not namefilter["boards"] or board in boards: # check if this filter applies match = False if namefilter['from'] and namefilter['from_trip']: # both name and trip filter if re.search(namefilter['from'], name, re.DOTALL | re.IGNORECASE) and tripcode == namefilter['from_trip']: match = True elif namefilter['from'] and not namefilter['from_trip']: # name filter if re.search(namefilter['from'], name, re.DOTALL | re.IGNORECASE): match = True elif not namefilter['from'] and namefilter['from_trip']: # trip filter if tripcode == namefilter['from_trip']: match = True if match: # do action if namefilter['action'] == '0': raise UserError(namefilter['reason']) elif namefilter['action'] == '1': name = namefilter['to'] tripcode = '' return name, tripcode elif namefilter['action'] == '2': # Ban if namefilter['seconds'] != '0': until = str(timestamp() + int(namefilter['seconds'])) else: until = '0' InsertDb("INSERT INTO `bans` (`ip`, `boards`, `added`, `until`, `staff`, `reason`, `note`, `blind`) VALUES (" + "'" + _mysql.escape_string(ip) + "', '" + _mysql.escape_string(namefilter['boards']) + "', " + str(timestamp()) + ", " + until + ", 'System', '" + _mysql.escape_string(namefilter['reason']) + "', 'Name Auto-ban', '"+_mysql.escape_string(namefilter['blind'])+"')") regenerateAccess() raise UserError(namefilter['reason']) elif namefilter['action'] == '3': raise UserError('%s' % (namefilter['redirect_time'], namefilter['redirect_url'], namefilter['reason'])) return name, tripcode