path: root/cgi/formatting.py


              
# coding=utf-8
import string
import html
import os
import re
import time

from database import *
from framework import *
from post import regenerateAccess
#from xhtml_clean import Cleaner

from settings import Settings


def format_post(message, ip, parentid, parent_timestamp=0):
    """
    Formats posts using the specified format
    """
    board = Settings._.BOARD
    using_markdown = False

    # Escape any HTML if user is not using Markdown or HTML
    if not Settings.USE_HTML:
        message = html.escape(message)

    # Strip text
    message = message.rstrip()[0:8000]

    # Treat HTML
    if Settings.USE_MARKDOWN:
        message = markdown(message)
        using_markdown = True
    if Settings.USE_HTML:
        message = onlyAllowedHTML(message)

    if Settings.VIDEO_THUMBS:
        (message, affected) = videoThumbs(message)
        # if affected:
        #  message = close_html(message)

    message = clickableURLs(message)
    message = checkRefLinks(message, parentid, parent_timestamp)
    message = checkWordfilters(message, ip, board["dir"])

    # If not using markdown quotes must be created and \n changed for HTML line breaks
    if not using_markdown:
        message = re.compile(r"^[\s\t]*(\n)+").sub("", message)
        message = re.compile(r"\n(\n)+").sub("\n\n", message)
        message = checkQuotes(message)
        message = message.replace("\n", "<br />")

    return message


def tripcode(name):
    """
    Calculate tripcode to match output of most imageboards
    """
    if name == '':
        return '', ''

    board = Settings._.BOARD
    key = Settings.TRIP_CHAR

    # if there's a trip
    (namepart, marker, trippart) = name.partition('#')
    if marker:
        namepart = cleanString(namepart)
        trip = ''

        # secure tripcode
        if Settings.ALLOW_SECURE_TRIPCODES and '#' in trippart:
            (trippart, securemarker, securepart) = trippart.partition('#')
            try:
                securepart = securepart.encode("sjis", "ignore")
            except:
                pass

            # encode secure tripcode
            trip = getMD5(securepart + Settings.SECRET)
            trip = trip.encode('base64').replace('\n', '')
            trip = trip.encode('rot13')
            trip = key+key+trip[2:12]

            # return it if we don't have a normal tripcode
            if trippart == '':
                return namepart, trip

        # do normal tripcode
        from crypt import crypt
        #try:
        #    trippart = trippart.encode("sjis", "ignore")
        #except:
        #    pass

        trippart = html.unescape(trippart)
        trippart = html.escape(trippart, True)
        salt = re.sub(r"[^\.-z]", ".", (trippart + "H..")[1:3])
        salt = salt.translate(str.maketrans(r":;=?@[\]^_`", "ABDFGabcdef"))
        trip = key + crypt(trippart, salt)[-10:] + trip

        return namepart, trip

    return name, ''


def iphash(ip, post, t, useid, mobile, agent, cap_id, hide_end, has_countrycode):
    current_t = time.time()

    if cap_id:
        id = cap_id
    elif post['email'] and useid == 1:
        id = '???'
    elif ip == "127.0.0.1":
        id = '???'
    else:
        day = int((current_t + (Settings.TIME_ZONE*3600)) / 86400)
        word = ',' + str(day)  # IDs change every 24 hours
        word += ',' + str(t)  # IDs vary depending on thread
        
        id = getb64(getMD5(ip + word + Settings.SECRET))[-10:-2]

    if hide_end:
        id += '*'
    elif addressIsTor(ip):
        id += 'T'
    elif 'Dalvik' in agent:
        id += 'R'
    elif 'Android' in agent:
        id += 'a'
    elif 'iPhone' in agent:
        id += 'i'
    elif useid == 3:
        if 'Firefox' in agent:
            id += 'F'
        elif 'Safari' in agent and not 'Chrome' in agent:
            id += 's'
        elif 'Chrome' in agent:
            id += 'C'
        elif 'SeaMonkey' in agent:
            id += 'S'
        elif 'Edge' in agent:
            id += 'E'
        elif 'Opera' in agent or 'OPR' in agent:
            id += 'o'
        elif 'MSIE' in agent or 'Trident' in agent:
            id += 'I'
        elif mobile:
            id += 'Q'
        else:
            id += '0'
    elif mobile:
        id += 'Q'
    else:
        id += '0'

    if addressIsBanned(ip, ""):
        id += '#'
    elif (not has_countrycode and not addressIsTor(ip) and
          (addressIsProxy(ip) or not addressIsES(ip))):
        id += '!'
    
    logging.info("{} {} {}".format(ip, agent, id))

    return id


def cleanString(string, escape=True, quote=False):
    string = string.strip()
    if escape:
        string = html.escape(string, quote)
    return string


def clickableURLs(message):
    # URL
    message = re.compile(r'( |^|:|\(|\[)((?:https?://|ftp://|mailto:|news:|irc:)[^\s<>()"]*?(?:\([^\s<>()"]*?\)[^\s<>()"]*?)*)((?:\s|<|>|"|\.|\|\]|!|\?|,|&#44;|&quot;)*(?:[\s<>()"]|$))',
                         re.M).sub(r'\1<a href="\2" rel="nofollow" target="_blank">\2</a>\3', message)
    # Emails
    message = re.compile(r"( |^|:)([A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,6})", re.I | re.M).sub(
        r'\1<a href="mailto:\2" rel="nofollow">&lt;\2&gt;</a>', message)

    return message


def videoThumbs(message):
    # Youtube
    __RE = re.compile(
        r"^(?: +)?(https?://(?:www\.)?youtu(?:be\.com/watch\?v=|\.be/)([\w\-]+))(?: +)?$", re.M)
    matches = __RE.finditer(message)
    if matches:
        import json
        import urllib.request, urllib.parse, urllib.error

        v_ids = []
        videos = {}

        for match in matches:
            v_id = match.group(2)
            if v_id not in v_ids:
                v_ids.append(v_id)
                videos[v_id] = {
                    'span': match.span(0),
                    'url':  match.group(1),
                }
            if len(v_ids) > Settings.VIDEO_THUMBS_LIMIT:
                raise UserError("Has incluído muchos videos en tu mensaje. El máximo es %d." % Settings.VIDEO_THUMBS_LIMIT)

    if videos:
        params = {
            'key': Settings.GOOGLE_API_KEY,
            'part': 'snippet,contentDetails',
            'id': ','.join(v_ids)
        }
        r_url = "https://www.googleapis.com/youtube/v3/videos?" + \
            urllib.parse.urlencode(params)
        res = urllib.request.urlopen(r_url)
        res_json = json.load(res)

        offset = 0
        for item in res_json['items']:
            v_id = item['id']
            (start, end) = videos[v_id]['span']
            end += 1  # remove endline

            try:
                new_url = '<a href="%(url)s" target="_blank" class="yt"><span class="pvw"><img src="%(thumb)s" /></span><b>%(title)s</b> (%(secs)s)<br />%(channel)s</a><br />' \
                    % {'title': item['snippet']['title'],
                       'channel': item['snippet']['channelTitle'],
                       'secs': parseIsoPeriod(item['contentDetails']['duration']),
                       'url': videos[v_id]['url'],
                       'id': v_id,
                       'thumb': item['snippet']['thumbnails']['default']['url'], }
            except UnicodeDecodeError:
                raise UserError(repr(v_id))
            message = message[:start+offset] + new_url + message[end+offset:]
            offset += len(new_url) - (end-start)

    return (message, len(videos))


def fixMobileLinks(message):
    """
    Shorten long links; Convert >># links into a mobile version
    """
    board = Settings._.BOARD

    # If textboard
    if board["board_type"] == 1:
        message = re.compile(r'<a href="/(\w+)/read/(\d+)(\.html)?/*(.+)"').sub(
            r'<a href="/cgi/mobileread/\1/\2/\4"', message)
    else:
        message = re.compile(r'<a href="/(\w+)/res/(\d+)\.html#(\d+)"').sub(
            r'<a href="/cgi/mobileread/\1/\2#\3"', message)

    return message


def checkRefLinks(message, parentid, parent_timestamp):
    """
    Check for >># links in posts and replace with the HTML to make them clickable
    """
    board = Settings._.BOARD

    if board["board_type"] == 1:
        # Textboard
        if parentid:
            message = re.compile(r'&gt;&gt;(\d+(,\d+|-(?=[ \d\n])|\d+)*n?)').sub(
                '<a href="' + Settings.BOARDS_URL + board['dir'] + '/read/' + str(parent_timestamp) + r'/\1">&gt;&gt;\1</a>', message)
    else:
        # Imageboard
        quotes_id_array = re.findall(r"&gt;&gt;([0-9]+)", message)
        for quotes in quotes_id_array:
            try:
                post = FetchOne('SELECT * FROM `posts` WHERE `id` = %s AND `boardid` = %s LIMIT 1',
                                (quotes, board['id']))
                if post['parentid']:
                    message = re.compile("&gt;&gt;" + quotes).sub('<a href="' + Settings.BOARDS_URL +
                                                                  board['dir'] + '/res/' + str(post['parentid']) + '.html#' + quotes + '">&gt;&gt;' + quotes + '</a>', message)
                else:
                    message = re.compile("&gt;&gt;" + quotes).sub('<a href="' + Settings.BOARDS_URL +
                                                                  board['dir'] + '/res/' + str(post['id']) + '.html#' + quotes + '">&gt;&gt;' + quotes + '</a>', message)
            except:
                message = re.compile(
                    "&gt;&gt;" + quotes).sub(r'<span class="q">&gt;&gt;'+quotes+'</span>', message)

    return message


def checkQuotes(message):
    """
    Check for >text in posts and add span around it to color according to the css
    """
    message = re.compile(
        r"^&gt;(.*)$", re.MULTILINE).sub(r'<span class="q">&gt;\1</span>', message)
    return message


def escapeHTML(string):
    string = string.replace('<', '&lt;')
    string = string.replace('>', '&gt;')
    return string


def onlyAllowedHTML(message):
    """
    Allow <b>, <i>, <u>, <strike>, and <pre> in posts, along with the special <aa>
    """
    message = sanitize_html(message)
    #message = re.compile(r"\[aa\](.+?)\[/aa\]", re.DOTALL | re.IGNORECASE).sub("<span class=\"sjis\">\\1</span>", message)

    return message


def close_html(message):
    """
    Old retarded version of sanitize_html, it just closes open tags.
    """
    #import BeautifulSoup

    #message = message.encode('utf-8')
    #soup = BeautifulSoup.BeautifulSoup(message)

    #return str(soup).replace('&#13;', '').encode('utf-8')

    try:
      l = message.rindex('<')
    except ValueError:
      return message
    test = message[l:]
    try:
      r = test.rindex('>')
      return message
    except ValueError:
      return message[:l]


def sanitize_html(message, decode=True):
    """
    Clean the code and allow only a few safe tags.
    """
    import BeautifulSoup

    # Decode message from utf-8 if required
    if decode:
        message = message.decode('utf-8', 'replace')

    # Create HTML Cleaner with our allowed tags
    whitelist_tags = ["a", "b", "br", "blink", "code", "del", "em",
                      "i", "marquee", "root", "strike", "strong", "sub", "sup", "u"]
    whitelist_attr = ["href"]

    soup = BeautifulSoup.BeautifulSoup(message)

    # Remove tags that aren't allowed
    for tag in soup.findAll():
        if not tag.name.lower() in whitelist_tags:
            tag.name = "span"
            tag.attrs = []
        else:
            for attr in [attr for attr in tag.attrs if attr not in whitelist_attr]:
                del tag[attr]

    # We export the soup into a correct XHTML string
    string = str(soup).encode('utf-8')
    # We remove some anomalies we don't want
    string = string.replace('<br/>', '<br />').replace('&#13;', '')

    return string


def markdown(message):
    import markdown
    if message.strip() != "":
        # return markdown.markdown(message).rstrip("\n").rstrip("<br />")
        return markdown.markdown(message, extras=["cuddled-lists", "code-friendly"]).encode('utf-8')
    else:
        return ""


def checkWordfilters(message, ip, board):
    wordfilters = FetchAll(
        "SELECT * FROM `filters` WHERE `type` = '0' ORDER BY `id` ASC")
    for wordfilter in wordfilters:
        if wordfilter["boards"]:
            boards = str2boards(wordfilter["boards"])
        if not wordfilter["boards"] or board in boards:
            if wordfilter['action'] == 0:
                if not re.search(wordfilter['from'], message, re.DOTALL | re.IGNORECASE) is None:
                    raise UserError(wordfilter['reason'])
            elif wordfilter['action'] == 1:
                message = re.compile(wordfilter['from'], re.DOTALL | re.IGNORECASE).sub(
                    wordfilter['to'], message)
            elif wordfilter['action'] == 2:
                # Ban
                if not re.search(wordfilter['from'], message, re.DOTALL | re.IGNORECASE) is None:
                    if wordfilter['seconds']:
                        until = timestamp() + int(wordfilter['seconds'])
                    else:
                        until = 0

                    sql_query = "INSERT INTO `bans` (`ipstart`, `ipend`, `ipstr`, `boards`, `added`, `until`, `staff`, `reason`, `note`, `blind`) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
                    sql_params = (ip, ip, ip, wordfilter['boards'], timestamp(), until, "System", wordfilter['reason'], "Filter auto-ban", wordfilter['blind'])
                    InsertDb(sql_query, sql_params)
                    regenerateAccess()
                    raise UserError(wordfilter['reason'])
            elif wordfilter['action'] == 3:
                if not re.search(wordfilter['from'], message, re.DOTALL | re.IGNORECASE) is None:
                    raise UserError('<meta http-equiv="refresh" content="%s;url=%s" />%s' % (wordfilter['redirect_time'], wordfilter['redirect_url'], wordfilter['reason']))
    return message


def checkNamefilters(name, tripcode, ip, board):
    namefilters = FetchAll("SELECT * FROM `filters` WHERE `type` = '1'")

    for namefilter in namefilters:
        if namefilter["boards"]:
            boards = str2boards(namefilter["boards"])
        if not namefilter["boards"] or board in boards:
            # check if this filter applies
            match = False

            if namefilter['from'] and namefilter['from_trip']:
                # both name and trip filter
                if re.search(namefilter['from'], name, re.DOTALL | re.IGNORECASE) and tripcode == namefilter['from_trip']:
                    match = True
            elif namefilter['from'] and not namefilter['from_trip']:
                # name filter
                if re.search(namefilter['from'], name, re.DOTALL | re.IGNORECASE):
                    match = True
            elif not namefilter['from'] and namefilter['from_trip']:
                # trip filter
                if tripcode == namefilter['from_trip']:
                    match = True

        if match:
            # do action
            if namefilter['action'] == 0:
                raise UserError(namefilter['reason'])
            elif namefilter['action'] == 1:
                name = namefilter['to']
                tripcode = ''
                return name, tripcode
            elif namefilter['action'] == 2:
                # Ban
                if namefilter['seconds']:
                    until = timestamp() + int(namefilter['seconds'])
                else:
                    until = 0

                InsertDb("INSERT INTO `bans` (`ip`, `boards`, `added`, `until`, `staff`, `reason`, `note`, `blind`) VALUES (" +
                         "'" + _mysql.escape_string(ip) + "', '" + _mysql.escape_string(namefilter['boards']) +
                         "', " + str(timestamp()) + ", " + until + ", 'System', '" + _mysql.escape_string(namefilter['reason']) +
                         "', 'Name Auto-ban', '"+_mysql.escape_string(namefilter['blind'])+"')")
                regenerateAccess()
                raise UserError(namefilter['reason'])
            elif namefilter['action'] == 3:
                raise UserError('<meta http-equiv="refresh" content="%s;url=%s" />%s' % (namefilter['redirect_time'], namefilter['redirect_url'], namefilter['reason']))
    return name, tripcode