// ==UserScript==
// @name        xkcd-substitutions_lite
// @namespace   hans
// @description SUBSTITUTIONS That make reading the news more "fun". (http://xkcd.com/1288/)
// @updateURL   https://gitea.bmsch.de/balthasar/Scripts/raw/branch/master/xkcdHans_lite.user.js
// @downloadURL   https://gitea.bmsch.de/balthasar/Scripts/raw/branch/master/xkcdHans_lite.user.js
// @include     *bbc.co.uk/*
// @include     *bbc.com/*
// @include     *cbc.ca/*
// @include     *theguardian.com/*
// @include     *telegraph.co.uk/*
// @include     *theonion.com/*
// @include     *foxnews.com/*
// @include     *wikipedia.org/*
// @include     *merkur-online.de
// @include     *computerbase.de/*
// @include     *facebook.com/*
// @include     *geeksaresexy.net/*
// @include     *golem.de/*
// @include     *gulli.com/*
// @include     *heise.de/*
// @include     *minecraft.gamepedia.com/*
// @include     *merkur.de/*
// @include     *spiegel.de/*
// @include     *sueddeutsche.de/*
// @include     *zeit.de/*
// @include     *ze.tt/*
// @include     *bento.de/*
// @include     *bigpanda.cern.ch/*
// @include     *bibleserver.com/*
// @include     *wikisource.org/*
// @include     file:///tmp/test.html
// @version     1.1.44
// @grant       none
// ==/UserScript==

(function() {
    var substitutions;
    var textNodes;
    var regexps = {};
    var swapregexps = {};
    var backswapregexps = {};

    substitutions = {
        'witnesses': 'these dudes I know',
        'allegedgly': 'kinda probably',
        'new study': 'Tumblr post',
        'rebuild': 'avenge',
        'space': 'spaaace',
        'google glass': 'Virtual Boy',
        'smartphone': 'Pokédex',
        'senator': 'elf-lord',
        'car': 'cat',
        'election': 'eating contest',
        'congressional leaders': 'river spirits',
        'homeland security': 'homestar runner',
        'could not be reached for comment': 'is guilty and everyone knows it',
        'russland': 'deine Mutter',
        'Barack Obama': 'Darth Vader',
        'Obama': 'Vader',
        'million': 'melon',
        'ministerium' : 'hansdb97531',
        'ministerien' : '83497543niars',
        'mysterium' : 'ministerium',
        'mysterien' : 'ministerien',
        'hansdb97531' : 'mysterium',
        '83497543niars' : 'mysterien',
        'apple': 'banana',
        'polizei': 'prinzengarde',
        'papst': 'breznsepp',
        'päpst': 'breznsepp',
        'atom(en|e|)': 'rosinenkuchen',
        'ele(c|k)tr': 'atom',
        'nabla': 'blabla',
        'quanten': 'knödel',
        'deutschland': 'schland',
        'internet': 'neuland',
        'force': 'horse',
        'fu(ß|ss)ball': 'hallenhalma',
        'drohne': 'reichsflugscheibe',
        'bayreuth': 'Wo? Fraglos nur ein Ort: Bayreuth!',
        'microsoft': 'mordor',
        'laser': '"laser""',
        'prozent': 'bier',
        '%': ' Bier',
        'promille': 'bierchen',
        '‰': ' Bierchen',
        'waffe': 'waffel',
        'iphone': 'banana phone',
        'milliarde': 'marmelade',
        'window': 'door',
        'nuclear': 'nucular',
        'sparta': 'this is sparta',
        'Charles( Robert)* Darwin': 'Charles "Jerusalem" Darwin',
        'mi(c|k)ro': 'ma$1ro',
        'erzbischof': 'erdbeerschorsch',
        'weihbischof': 'weißbierschorsch',
        'erzbischöfe': 'erdberschorschs',
        'weihbischöfe': 'weißbierschorschs',
        'Margot Käßmann': 'Margot "1,54‰" Käßmann',
        'Joachim Gauck': 'Joachim "Hipster" Gauck',
        'Günter Grass': 'Günter "Anders" Grass',
        'broken' : 'screwed',
        'aborted' : 'ceased to be',
        'nuklear': 'nukular',
        'sehr' : 'anders',
        'debate': 'dance off',
        'candidate': 'airbender',
        'drone': 'dog',
        'vows to': 'probably won\'t',
        'at large': 'very large',
        'successfully': 'suddenly',
        'expands': 'physically expands',
        'an unknown number': 'like hundreds',
        'front runner': 'blade runner',
        'global': 'spherical',
        'no indication': 'lots of signs',
        'horsepower': 'tons of horsemeat'
    }

    swaps ={
        'histo' : 'hyste',
        'years': 'minutes',
        'ortho': 'para',
        'link': 'recht',
        'right': 'left',
        'defense': 'offense',
        'defence': 'offence',
        'verteidigung': 'angriff',
        'ice': 'fire',
        'fence': 'bridge',
        'mauer':'brücke',
        'hetero': 'homo',
        'produktion': 'provokation',
        'production': 'provocation',
        'produce': 'provoke',
        'produz': 'provoz',
        'hyper': 'hypo',
        'amüsant' : 'relevant',
        'Bundestag' : 'Schützenverein',
        'kryptisch' : 'kritisch',
        'Mars' : 'Mond',
        'up' : 'down',
        'top+' : 'bottom',
        'over' : 'under',
        'online': 'offline'
    }

    sometimesSubstitutions = [
        // [regex, subst, probability]
        ['\\.(\\s)', '. Und Gott sah, dass es gut war.$1', 0.03],
    ]

    var tmpsub = {}
    // Add capitalized versions to map
    // Put lower and uppercase immediately after each other (hoping
    // that JS actually reads the dict later this order). This helps
    // sometimes avoiding successive replacements.
    for (var key in substitutions){
        tmpsub[key] = substitutions[key];
        tmpsub[key.charAt(0).toUpperCase() + key.slice(1)] = substitutions[key].charAt(0).toUpperCase() + substitutions[key].slice(1);
    }
    substitutions = tmpsub;

    var tmpsometimessub = []
    for (var i in sometimesSubstitutions) {
        tmpsometimessub.push(sometimesSubstitutions[i]);
        var firstLetterRegex = sometimesSubstitutions[i][0].charAt(0).toUpperCase()
        var firstLetterSubst = sometimesSubstitutions[i][1].charAt(0).toUpperCase()
        if (firstLetterRegex.toLowerCase() != firstLetterRegex &&
            firstLetterSubst.toLowerCase() != firstLetterSubst) {
            // append capitalized versions if first symbol is a letter
            tmpsometimessub.push([
                sometimesSubstitutions[i][0].charAt(0).toUpperCase() + sometimesSubstitutions[i][0].slice(1),
                sometimesSubstitutions[i][1].charAt(0).toUpperCase() + sometimesSubstitutions[i][1].slice(1),
                sometimesSubstitutions[i][2]
            ]);
        }
    }
    sometimesSubstitutions = tmpsometimessub

    for (var key in swaps){
        swaps[key.charAt(0).toUpperCase() + key.slice(1)] = swaps[key].charAt(0).toUpperCase() + swaps[key].slice(1);
    }

    for (var key in substitutions) {
        regexps[key] = new RegExp(key ,'g');
    }
    for (var key in swaps) {
        swapregexps[key] = new RegExp(key ,'g');
        backswapregexps[swaps[key]] = new RegExp(swaps[key] ,'g');
    }
    for (var i in sometimesSubstitutions) {
        sometimesSubstitutions[i][0] = new RegExp(sometimesSubstitutions[i][0], 'g');
    }

    var t0 = performance.now();
    textNodes = document.evaluate("//text()", document, null, XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, null);
    var totaltime_normal = 0;
    var totaltime_sometimes = 0;
    var totaltime_swap = 0;
    var totaltime_randQuotes = 0;
    var t0_sub = 0;
    var t1_sub = 0;
    for (var i = 0; i < textNodes.snapshotLength; i++) {
        var node = textNodes.snapshotItem(i);
        t0_sub = performance.now();
        node.data = substituteTextIn(node.data);
        t1_sub = performance.now();
        totaltime_normal += t1_sub - t0_sub;
        t0_sub = performance.now();
        node.data = substituteTextInSometimes(node.data);
        t1_sub = performance.now();
        totaltime_sometimes += t1_sub - t0_sub;
        t0_sub = performance.now();
        node.data = swapTextIn(node.data);
        t1_sub = performance.now();
        totaltime_swap += t1_sub - t0_sub;
        t0_sub = performance.now();
        node.data = randomQuotes(node.data);
        t1_sub = performance.now();
        totaltime_randQuotes += t1_sub - t0_sub;
    }
    var t1 = performance.now();
    console.log("Call xkcdHans took " + (t1 - t0) + " milliseconds.");
    console.log("Normal substitutions " + totaltime_normal + " milliseconds.");
    console.log("Sometimes substitutions " + totaltime_sometimes + " milliseconds.");
    console.log("Swap substitutions " + totaltime_swap + " milliseconds.");
    console.log("Random quote substitutions " + totaltime_randQuotes + " milliseconds.");

    function substituteTextIn(text){
        for (var key in substitutions) {
            text = text.replace(regexps[key], substitutions[key]);
        }
        return text;
    }

    function substituteTextInSometimes(text){
        for (var i in sometimesSubstitutions) {
            text = replaceSometimes(text,
                                    sometimesSubstitutions[i][0],
                                    sometimesSubstitutions[i][1],
                                    sometimesSubstitutions[i][2]);
        }
        return text;
    }

    function swapTextIn(text){
        for (var key in swaps) {
            text = text.replace(swapregexps[key], 'superhans12342321');
            text = text.replace(backswapregexps[swaps[key]], removeRegexChars(key));
            var regex = new RegExp('superhans12342321', 'g');
            text = text.replace(regex, removeRegexChars(swaps[key]));
        }
        return text;
    }

    function replaceRandomly(textList){
        var newList = [];
        for (var i in textList){
            var regex = new RegExp('([^\.,:\"\']+)', 'g');
            var wordMatch = regex.exec(textList[i]);
            var relativeProbability = 0.;
            if (wordMatch) {
                /*
                  Turn-on curve for probability - for fine tuning try this in gnuplot:
                  maxprob=0.1
                  scaling=0.3
                  threshold=10
                  set xr [0:30]
                  set yr [0:0.1]
                  plot 0.5*maxprob*(tanh(scaling*(x-threshold))+1)
                 */
                var maxprob = 0.1; // maximum probability
                var scaling = 0.3; // scale the "turn-on" - smaller value means slower turn on
                var threshold = 10; // threshold where 0.5*maxprob is reached
                var relativeProbability = 0.5*maxprob*(Math.tanh(scaling*(wordMatch[0].length-threshold))+1)
            }
            if (Math.random() > (1-relativeProbability)) {
                newList.push(textList[i].replace(regex, '"$1"'));
            }
            else{
                newList.push(textList[i]);
            }
        }
        return newList;
    }

    function randomQuotes(text){
        var spaceMatch = new RegExp('\\s+', 'g');
        var split = text.split(spaceMatch);
        text = replaceRandomly(split).join(' ');

        return text;

    }

    function replaceSometimes(text, regex, substitution, probability){
        // replace the given regex by the given substitution only with a certain probability
        var match = 1;
        var nextPiece = text;
        var newText = "";
        while (match) {
            var nextMatchPos = nextPiece.search(regex);
            if (nextMatchPos < 0) {
                newText += nextPiece;
                break;
            }
            newText += nextPiece.substr(0, nextMatchPos);
            nextPiece = nextPiece.slice(nextMatchPos);
            match = nextPiece.match(regex);
            var stuffToReplace = nextPiece.substr(0, match[0].length);
            nextPiece = nextPiece.slice(match[0].length);
            if (Math.random() < probability) {
                newText += stuffToReplace.replace(regex, substitution);
            } else {
                newText += stuffToReplace;
            }
        }
        return newText;
    }

    function removeRegexChars(text) {
      // adapt depending on what will be needed
      return text.replace("+", "").replace("*", "").replace(".", "")
    }
        
})();