<?php

// PHP code copyright (C) 2002 Sean C. Nichols
//                             seanni :AT: trichotomy.ca
//                             http://trichotomy.ca/


/////////////////////////////////////////////////////////////////////
// The following functions comprise a rudimentary text translator: //
//                                                                 //
// fTranslate() -- the main translation function, which is called  //
//  with (1) the text to translate and (2) a dictionary. The       //
//  dictionary is expected in the form of an associative array,    //
//  basically a list of entries where each entry contains a token  //
//  and that token's translation. It will return as a string the   //
//  same text after it has been translated.                        //
//                                                                 //
// The following supplementary dictionaries are supplied (which    //
//  can be combined together if desired and / or passed into       //
//  fTranslate() as-is (it is of course possible to also use       //
//  user-defined dictionaries, so long as they follow the correct  //
//  format):                                                       //
// $engl_char => A list of single-character tokens to translate    //
//               "1337 5kr!pt" into a latin-based character set.   //
// $engl_word => A list of common "1337" words and their english   //
//               transliterations.                                 //
// $engl_all  => A combination of the above two.                   //
//                                                                 //
// $leet_char => A list of single-character tokens to translate a  //
//               latin-based character set into "1337 5kr!pt".     //
// $leet_word => A list of common english words and their "1337"   //
//               transliterations.                                 //
// $leet_all  => A combination of the above two.                   //
//                                                                 //
// $html_conv => Utility dictionary to convert special characters  //
//               into their HTML equivalents.                      //
//                                                                 //
// Notes:                                                          //
//  * Specifying an empty dictionary will result in the original   //
//    string being returned as-is.                                 //
//  * Specifying a malformed dictionary will result in undefined   //
//    behaviour (most likely an error).                            //
//  * Translation is performed sequentially from the beginning to  //
//    the end of the string, with the translation being done on    //
//    the largest token found starting at each successive offset.  //
//  * No token match at a given offset results in a single         //
//    character being transliterated as-is.                        //
//  * A character can be escaped by a backslash (\) to force       //
//    non-translation (ie: it will be left as-is). A translated    //
//    token will never contain an escaped character. To include a  //
//    (non-escaped) backslash, double it (\\).                     //
//                                                                 //
// Examples:                                                       //
//  * The following code gives "ph34r tha k0d3!":                  //
//      $result = fTranslate( "fear the code!", $leet_all );       //
//  * The following code gives "hackers rule":                     //
//      $result = fTranslate( "haX0rz Rul3", $engl_all );          //
/////////////////////////////////////////////////////////////////////

$engl_char = array( "4" => "a", "k" => "c", "3" => "e",
                    "ph" => "f", "9" => "g", "!" => "i",
                    "K" => "k", "1" => "l", "0" => "o",
                    "kw" => "qu", "R" => "r", "5" => "s",
                    "7" => "t", "\\/" => "v", "XX" => "x",
                    "2" => "z", "8" => "B", "|)" => "D",
                    "pH" => "F", "#" => "H", "|" => "I",
                    "|v|" => "M", "|\\|" => "N", '$' => "S",
                    "><" => "X", "][" => "2", "]|[" => "3",
                    "'z" => "'s" );
$engl_word = array( "haX0r" => "hacker", "hax0rz" => "hackers",
                    "hax0R" => "hack", "fuX0r" => "fuck",
                    "fuckers" => "fuX0rz", "tha " => "the ",
                    "HaX0r" => "Hacker", "HaX0rz" => "Hackers",
                    "Hax0R" => "Hack", "FuX0r" => "Fuck",
                    "FuX0rz" => "Fuckers", "Tha " => "The ",
                    "spoooo0000nnnn!!!" => "spoon", 'M$' => "MS",
                    'Micro$oft' => "Microsoft",
                    'micro$oft' => "microsoft", "assz0r" => "ass",
                    "Assz0r" => "Ass", "ph34r" => "fear",
                    "Ph34r" => "Fear", "jo0o" => "you",
                    "Jo0o" => "You", "n!t3" => "night",
                    "N!t3" => "Night", "31337" => "elite",
                    "3r337" => "Elite", "roX0r" => "rock",
                    "roX0rz" => "rocks", "RoX0r" => "Rock",
                    "RoX0rz" => "Rocks", "sexxx0r" => "sex",
                    "SeXXX0r" => "Sex" );
$engl_all = $engl_char + $engl_word;

$leet_char = array( "a" => "4", "c" => "k", "e" => "3",
                    "f" => "ph", "g" => "9", "i" => "!",
                    "k" => "K", "l" => "1", "o" => "0",
                    "qu" => "kw", "r" => "R", "s" => "5",
                    "t" => "7", "v" => "\\/", "x" => "XX",
                    "z" => "2", "A" => "4", "B" => "8",
                    "C" => "K", "D" => "|)", "E" => "3",
                    "F" => "pH", "H" => "#", "I" => "|",
                    "L" => "1", "M" => "|v|", "N" => "|\\|",
                    "O" => "0", "S" => '$', "T" => "7",
                    "V" => "\\/", "X" => "><", "Z" => "2",
                    "1" => "I", "2" => "][", "3" => "]|[",
                    "'s" => "'z" );
$leet_word = array( "hacker" => "haX0r", "hackers" => "haX0rz",
                    "hack" => "hax0R", "fuck" => "fuX0r",
                    "fuckers" => "fuX0rz", "the " => "tha ",
                    "Hacker" => "HaX0r", "Hackers" => "HaX0rz",
                    "Hack" => "Hax0R", "Fuck" => "FuX0r",
                    "Fuckers" => "FuX0rz", "The " => "Tha ",
                    "spoon" => "spoooo0000nnnn!!!", "MS" => 'M$',
                    "Microsoft" => 'Micro$oft',
                    "microsoft" => 'micro$oft', "ass" => "assz0r",
                    "Ass" => "Assz0r", "fear" => "ph34r",
                    "Fear" => "Ph34r", "you" => "jo0o",
                    "You" => "Jo0o", "night" => "n!t3",
                    "Night" => "N!t3", "elite" => "31337",
                    "Elite" => "3r337", "rock" => "roX0r",
                    "rocks" => "rox0rz", "Rock" => "RoX0r",
                    "Rocks" => "RoX0rz", "sex" => "sexxx0r",
                    "Sex" => "SeXXX0r" );
$leet_all = $leet_char + $leet_word;

$html_conv = array( "\n" => "<BR>", "&" => "&amp;", "<" => "&lt;",
                    ">" => "&gt;", "\t" => "&nbsp;&nbsp;&nbsp;&nbsp;",
                    "[" => "&#91;", "]" => "&#93;" );

function fGrabToken( $sSource, $iOffset, $iMaxLen ) {
    $sSource = substr( $sSource, $iOffset );
    $sResult = "";

    while ($iMaxLen > 0) {
        if (strlen( $sSource ) == 0) {
            // End of string
            break;
        }

        if (substr( $sSource, 0, 1 ) == "\\") {
            // Escaped char?

            if (substr( $sSource, 0, 2 ) == "\\\\") {
                // No, just a backslash
                $sResult .= "\\";
                $sSource = substr( $sSource, 2 );
            } else {
                // Yes. End token now
                break;
            }
        } else {
            // Normal char
            $sResult .= substr( $sSource, 0, 1 );
            $sSource = substr( $sSource, 1 );
        }

        $iMaxLen--;
    }

    return $sResult;
}

function fTranslate( $sScript, $sDict ) {
    // Check for empty dictionary
    if (count( $sDict ) == 0) {
        return $sScript;
    }

    // Some prep work
    $iMaxLen = 0;
    foreach ($sDict as $sKey) {
        if (strlen( $sKey ) > $iMaxLen) {
            $iMaxLen = strlen( $sKey );
        }
    }

    // Run through the string and translate tokens
    $sResult = "";
    $iOffset = 0;
    while ($iOffset < strlen( $sScript )) {
        // Search for largest token at current offset
        $sNew = "";
        $sToken = fGrabToken( $sScript, $iOffset, $iMaxLen );
        while (strlen( $sToken ) > 0) {
            if (array_key_exists( $sToken, $sDict )) {
                $sNew = $sDict[ $sToken ];
                $iRepLen = strlen( $sToken );
                break;
            }
            $sToken = substr( $sToken, 0, -1 );
        }

        // Insert the translated token into the new string
        if ($sNew == "") {
            if (substr( $sScript, $iOffset, 1 ) == "\\") {
                $sResult .= substr( $sScript, $iOffset + 1, 1 );
                $iOffset += 2;
            } else {
                $sResult .= substr( $sScript, $iOffset, 1 );
                $iOffset++;
            }
        } else {
            $sResult .= $sNew;
            $iOffset += $iRepLen + substr_count( $sToken, "\\" );
        }
    }

    return $sResult;
}

?>