Posted by Master on Thu 9 Jul 13:14 (modification of post by Master view diff)
report abuse | View followups from Master | download | new post
- function xss_clean($str) {
- /*
- * Remove Null Characters
- *
- * This prevents sandwiching null characters
- * between ascii characters, like Java\0script.
- */
- /*
- * Validate standard character entities
- *
- * Add a semicolon if missing. We do this to enable
- * the conversion of entities to ASCII later.
- */
- /*
- * Validate UTF16 two byte encoding (x00)
- *
- * Just as above, adds a semicolon if missing.
- */
- /*
- * URL Decode
- *
- * Just in case stuff like this is submitted:
- * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
- * Note: Use rawurldecode() so it does not remove plus signs
- */
- /*
- * Convert character entities to ASCII
- *
- * This permits our tests below to work reliably.
- * We only convert entities that are within tags since
- * these are the ones that will pose security problems.
- */
- $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_attribute_conversion'), $str);
- $str = preg_replace_callback("/<([\w]+)[^>]*>/si", array($this, '_html_entity_decode_callback'), $str);
- /*
- Old Code that when modified to use preg_replace()'s above became more efficient memory-wise
- if (preg_match_all("/[a-z]+=([\'\"]).*?\\1/si", $str, $matches)) {
- for ($i = 0; $i < count($matches[0]); $i++) {
- if (stristr($matches[0][$i], '>')) {
- $str = str_replace( $matches['0'][$i], str_replace('>', '<', $matches[0][$i]), $str);
- }
- }
- }
- if (preg_match_all("/<([\w]+)[^>]*>/si", $str, $matches)) {
- for ($i = 0; $i < count($matches[0]); $i++) {
- $str = str_replace($matches[0][$i], $this->_html_entity_decode($matches[0][$i], $charset), $str);
- }
- }
- */
- /*
- * Convert all tabs to spaces
- *
- * This prevents strings like this: ja vascript
- * NOTE: we deal with spaces between characters later.
- * NOTE: preg_replace was found to be amazingly slow here on large blocks of data,
- * so we use str_replace.
- */
- /*
- * Not Allowed Under Any Conditions
- */
- 'document.cookie' => '[removed]',
- 'document.write' => '[removed]',
- '.parentNode' => '[removed]',
- '.innerHTML' => '[removed]',
- 'window.location' => '[removed]',
- '-moz-binding' => '[removed]',
- '<!--' => '<!--',
- '-->' => '-->',
- '<!CDATA[' => '<![CDATA['
- );
- foreach ($bad as $key=>$val) {
- }
- "javascript\s*:" => '[removed]',
- "expression\s*\(" => '[removed]', // CSS and IE
- "Redirect\s+302" => '[removed]'
- );
- foreach ($bad as $key=>$val) {
- }
- /*
- * Makes PHP tags safe
- *
- * Note: XML tags are inadvertently replaced too:
- * <?xml
- * But it doesn't seem to pose a problem.
- */
- $str = str_replace(array('<?php', '<?PHP', '<?', '?' . '>'), array('<?php', '<?PHP', '<?', '?>'), $str);
- /*
- * Compact any exploded words
- *
- * This corrects words like: j a v a s c r i p t
- * These words are compacted back to their correct state.
- */
- $words = array('javascript', 'expression', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
- foreach ($words as $word) {
- $temp = '';
- }
- // We only want to do this when it is followed by a non-word character
- // That way valid stuff like "dealer to" does not become "dealerto"
- $str = preg_replace('#(' . substr($temp, 0, -3) . ')(\W)#ise', "preg_replace('/\s+/s', '', '\\1').'\\2'", $str);
- }
- /*
- * Remove disallowed Javascript in links or img tags
- */
- do {
- $original = $str;
- if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && stripos($str, '</a>') !== FALSE) or preg_match("/<\/a>/i", $str)) {
- }
- if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && stripos($str, '<img') !== FALSE) or preg_match("/img/i", $str)) {
- }
- if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && (stripos($str, 'script') !== FALSE or stripos($str, 'xss') !== FALSE)) or preg_match("/(script|xss)/i", $str)) {
- }
- } while ($original != $str);
- /*
- * Remove JavaScript Event Handlers
- *
- * Note: This code is a little blunt. It removes the event handler and anything up to the closing >,
- * but it's unlikely to be a problem.
- */
- $event_handlers = array('onblur', 'onchange', 'onclick', 'onfocus', 'onload', 'onmouseover', 'onmouseup', 'onmousedown', 'onselect', 'onsubmit', 'onunload', 'onkeypress', 'onkeydown', 'onkeyup', 'onresize', 'xmlns');
- $str = preg_replace("#<([^>]+)(" . implode('|', $event_handlers) . ")([^>]*)>#iU", "<\\1\\2\\3>", $str);
- /*
- * Sanitize naughty HTML elements
- *
- * If a tag containing any of the words in the list
- * below is found, the tag gets converted to entities.
- *
- * So this: <blink>
- * Becomes: <blink>
- */
- $str = preg_replace('#<(/*\s*)(alert|applet|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|layer|link|meta|object|plaintext|style|script|textarea|title|xml|xss)([^>]*)>#is', "<\\1\\2\\3>", $str);
- /*
- * Sanitize naughty scripting elements
- *
- * Similar to above, only instead of looking for tags it looks for PHP and JavaScript commands
- * that are disallowed. Rather than removing the code, it simply converts the parenthesis to entities
- * rendering the code un-executable.
- *
- * For example: eval('some code')
- * Becomes: eval('some code')
- */
- $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2(\\3)", $str);
- /*
- * Final clean up
- *
- * This adds a bit of extra precaution in case something got through the above filters
- */
- 'document.cookie' => '[removed]',
- 'document.write' => '[removed]',
- '.parentNode' => '[removed]',
- '.innerHTML' => '[removed]',
- 'window.location' => '[removed]',
- '-moz-binding' => '[removed]',
- '<!--' => '<!--', '-->' => '-->',
- '<!CDATA[' => '<![CDATA['
- );
- foreach ($bad as $key=>$val) {
- }
- "javascript\s*:" => '[removed]',
- "expression\s*\(" => '[removed]', // CSS and IE
- "Redirect\s+302" => '[removed]'
- );
- foreach ($bad as $key=>$val) {
- }
- log_message('debug', "XSS Filtering completed");
- return $str;
- }
Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.