A list of single-character tokens to translate // // "1337 5kr!pt" into a latin-based character set. // // $engl_word => A list of common "1337" words and their english // // transliterations. // // $engl_all => A combination of the above two. // // // // $leet_char => A list of single-character tokens to translate a // // latin-based character set into "1337 5kr!pt". // // $leet_word => A list of common english words and their "1337" // // transliterations. // // $leet_all => A combination of the above two. // // // // $html_conv => Utility dictionary to convert special characters // // into their HTML equivalents. // // // // Notes: // // * Specifying an empty dictionary will result in the original // // string being returned as-is. // // * Specifying a malformed dictionary will result in undefined // // behaviour (most likely an error). // // * Translation is performed sequentially from the beginning to // // the end of the string, with the translation being done on // // the largest token found starting at each successive offset. // // * No token match at a given offset results in a single // // character being transliterated as-is. // // * A character can be escaped by a backslash (\) to force // // non-translation (ie: it will be left as-is). A translated // // token will never contain an escaped character. To include a // // (non-escaped) backslash, double it (\\). // // // // Examples: // // * The following code gives "ph34r tha k0d3!": // // $result = fTranslate( "fear the code!", $leet_all ); // // * The following code gives "hackers rule": // // $result = fTranslate( "haX0rz Rul3", $engl_all ); // ///////////////////////////////////////////////////////////////////// $engl_char = array( "4" => "a", "k" => "c", "3" => "e", "ph" => "f", "9" => "g", "!" => "i", "K" => "k", "1" => "l", "0" => "o", "kw" => "qu", "R" => "r", "5" => "s", "7" => "t", "\\/" => "v", "XX" => "x", "2" => "z", "8" => "B", "|)" => "D", "pH" => "F", "#" => "H", "|" => "I", "|v|" => "M", "|\\|" => "N", '$' => "S", "><" => "X", "][" => "2", "]|[" => "3", "'z" => "'s" ); $engl_word = array( "haX0r" => "hacker", "hax0rz" => "hackers", "hax0R" => "hack", "fuX0r" => "fuck", "fuckers" => "fuX0rz", "tha " => "the ", "HaX0r" => "Hacker", "HaX0rz" => "Hackers", "Hax0R" => "Hack", "FuX0r" => "Fuck", "FuX0rz" => "Fuckers", "Tha " => "The ", "spoooo0000nnnn!!!" => "spoon", 'M$' => "MS", 'Micro$oft' => "Microsoft", 'micro$oft' => "microsoft", "assz0r" => "ass", "Assz0r" => "Ass", "ph34r" => "fear", "Ph34r" => "Fear", "jo0o" => "you", "Jo0o" => "You", "n!t3" => "night", "N!t3" => "Night", "31337" => "elite", "3r337" => "Elite", "roX0r" => "rock", "roX0rz" => "rocks", "RoX0r" => "Rock", "RoX0rz" => "Rocks", "sexxx0r" => "sex", "SeXXX0r" => "Sex" ); $engl_all = $engl_char + $engl_word; $leet_char = array( "a" => "4", "c" => "k", "e" => "3", "f" => "ph", "g" => "9", "i" => "!", "k" => "K", "l" => "1", "o" => "0", "qu" => "kw", "r" => "R", "s" => "5", "t" => "7", "v" => "\\/", "x" => "XX", "z" => "2", "A" => "4", "B" => "8", "C" => "K", "D" => "|)", "E" => "3", "F" => "pH", "H" => "#", "I" => "|", "L" => "1", "M" => "|v|", "N" => "|\\|", "O" => "0", "S" => '$', "T" => "7", "V" => "\\/", "X" => "><", "Z" => "2", "1" => "I", "2" => "][", "3" => "]|[", "'s" => "'z" ); $leet_word = array( "hacker" => "haX0r", "hackers" => "haX0rz", "hack" => "hax0R", "fuck" => "fuX0r", "fuckers" => "fuX0rz", "the " => "tha ", "Hacker" => "HaX0r", "Hackers" => "HaX0rz", "Hack" => "Hax0R", "Fuck" => "FuX0r", "Fuckers" => "FuX0rz", "The " => "Tha ", "spoon" => "spoooo0000nnnn!!!", "MS" => 'M$', "Microsoft" => 'Micro$oft', "microsoft" => 'micro$oft', "ass" => "assz0r", "Ass" => "Assz0r", "fear" => "ph34r", "Fear" => "Ph34r", "you" => "jo0o", "You" => "Jo0o", "night" => "n!t3", "Night" => "N!t3", "elite" => "31337", "Elite" => "3r337", "rock" => "roX0r", "rocks" => "rox0rz", "Rock" => "RoX0r", "Rocks" => "RoX0rz", "sex" => "sexxx0r", "Sex" => "SeXXX0r" ); $leet_all = $leet_char + $leet_word; $html_conv = array( "\n" => "
", "&" => "&", "<" => "<", ">" => ">", "\t" => "    ", "[" => "[", "]" => "]" ); function fGrabToken( $sSource, $iOffset, $iMaxLen ) { $sSource = substr( $sSource, $iOffset ); $sResult = ""; while ($iMaxLen > 0) { if (strlen( $sSource ) == 0) { // End of string break; } if (substr( $sSource, 0, 1 ) == "\\") { // Escaped char? if (substr( $sSource, 0, 2 ) == "\\\\") { // No, just a backslash $sResult .= "\\"; $sSource = substr( $sSource, 2 ); } else { // Yes. End token now break; } } else { // Normal char $sResult .= substr( $sSource, 0, 1 ); $sSource = substr( $sSource, 1 ); } $iMaxLen--; } return $sResult; } function fTranslate( $sScript, $sDict ) { // Check for empty dictionary if (count( $sDict ) == 0) { return $sScript; } // Some prep work $iMaxLen = 0; foreach ($sDict as $sKey) { if (strlen( $sKey ) > $iMaxLen) { $iMaxLen = strlen( $sKey ); } } // Run through the string and translate tokens $sResult = ""; $iOffset = 0; while ($iOffset < strlen( $sScript )) { // Search for largest token at current offset $sNew = ""; $sToken = fGrabToken( $sScript, $iOffset, $iMaxLen ); while (strlen( $sToken ) > 0) { if (array_key_exists( $sToken, $sDict )) { $sNew = $sDict[ $sToken ]; $iRepLen = strlen( $sToken ); break; } $sToken = substr( $sToken, 0, -1 ); } // Insert the translated token into the new string if ($sNew == "") { if (substr( $sScript, $iOffset, 1 ) == "\\") { $sResult .= substr( $sScript, $iOffset + 1, 1 ); $iOffset += 2; } else { $sResult .= substr( $sScript, $iOffset, 1 ); $iOffset++; } } else { $sResult .= $sNew; $iOffset += $iRepLen + substr_count( $sToken, "\\" ); } } return $sResult; } ?>