Security.php 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078
  1. <?php
  2. /**
  3. * CodeIgniter
  4. *
  5. * An open source application development framework for PHP
  6. *
  7. * This content is released under the MIT License (MIT)
  8. *
  9. * Copyright (c) 2014 - 2016, British Columbia Institute of Technology
  10. *
  11. * Permission is hereby granted, free of charge, to any person obtaining a copy
  12. * of this software and associated documentation files (the "Software"), to deal
  13. * in the Software without restriction, including without limitation the rights
  14. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  15. * copies of the Software, and to permit persons to whom the Software is
  16. * furnished to do so, subject to the following conditions:
  17. *
  18. * The above copyright notice and this permission notice shall be included in
  19. * all copies or substantial portions of the Software.
  20. *
  21. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  22. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  23. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  24. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  25. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  26. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  27. * THE SOFTWARE.
  28. *
  29. * @package CodeIgniter
  30. * @author EllisLab Dev Team
  31. * @copyright Copyright (c) 2008 - 2014, EllisLab, Inc. (https://ellislab.com/)
  32. * @copyright Copyright (c) 2014 - 2016, British Columbia Institute of Technology (http://bcit.ca/)
  33. * @license http://opensource.org/licenses/MIT MIT License
  34. * @link https://codeigniter.com
  35. * @since Version 1.0.0
  36. * @filesource
  37. */
  38. defined('BASEPATH') OR exit('No direct script access allowed');
  39. /**
  40. * Security Class
  41. *
  42. * @package CodeIgniter
  43. * @subpackage Libraries
  44. * @category Security
  45. * @author EllisLab Dev Team
  46. * @link https://codeigniter.com/user_guide/libraries/security.html
  47. */
  48. class CI_Security {
  49. /**
  50. * List of sanitize filename strings
  51. *
  52. * @var array
  53. */
  54. public $filename_bad_chars = array(
  55. '../', '<!--', '-->', '<', '>',
  56. "'", '"', '&', '$', '#',
  57. '{', '}', '[', ']', '=',
  58. ';', '?', '%20', '%22',
  59. '%3c', // <
  60. '%253c', // <
  61. '%3e', // >
  62. '%0e', // >
  63. '%28', // (
  64. '%29', // )
  65. '%2528', // (
  66. '%26', // &
  67. '%24', // $
  68. '%3f', // ?
  69. '%3b', // ;
  70. '%3d' // =
  71. );
  72. /**
  73. * Character set
  74. *
  75. * Will be overridden by the constructor.
  76. *
  77. * @var string
  78. */
  79. public $charset = 'UTF-8';
  80. /**
  81. * XSS Hash
  82. *
  83. * Random Hash for protecting URLs.
  84. *
  85. * @var string
  86. */
  87. protected $_xss_hash;
  88. /**
  89. * CSRF Hash
  90. *
  91. * Random hash for Cross Site Request Forgery protection cookie
  92. *
  93. * @var string
  94. */
  95. protected $_csrf_hash;
  96. /**
  97. * CSRF Expire time
  98. *
  99. * Expiration time for Cross Site Request Forgery protection cookie.
  100. * Defaults to two hours (in seconds).
  101. *
  102. * @var int
  103. */
  104. protected $_csrf_expire = 7200;
  105. /**
  106. * CSRF Token name
  107. *
  108. * Token name for Cross Site Request Forgery protection cookie.
  109. *
  110. * @var string
  111. */
  112. protected $_csrf_token_name = 'ci_csrf_token';
  113. /**
  114. * CSRF Cookie name
  115. *
  116. * Cookie name for Cross Site Request Forgery protection cookie.
  117. *
  118. * @var string
  119. */
  120. protected $_csrf_cookie_name = 'ci_csrf_token';
  121. /**
  122. * List of never allowed strings
  123. *
  124. * @var array
  125. */
  126. protected $_never_allowed_str = array(
  127. 'document.cookie' => '[removed]',
  128. 'document.write' => '[removed]',
  129. '.parentNode' => '[removed]',
  130. '.innerHTML' => '[removed]',
  131. '-moz-binding' => '[removed]',
  132. '<!--' => '&lt;!--',
  133. '-->' => '--&gt;',
  134. '<![CDATA[' => '&lt;![CDATA[',
  135. '<comment>' => '&lt;comment&gt;',
  136. '<%' => '&lt;&#37;'
  137. );
  138. /**
  139. * List of never allowed regex replacements
  140. *
  141. * @var array
  142. */
  143. protected $_never_allowed_regex = array(
  144. 'javascript\s*:',
  145. '(document|(document\.)?window)\.(location|on\w*)',
  146. 'expression\s*(\(|&\#40;)', // CSS and IE
  147. 'vbscript\s*:', // IE, surprise!
  148. 'wscript\s*:', // IE
  149. 'jscript\s*:', // IE
  150. 'vbs\s*:', // IE
  151. 'Redirect\s+30\d',
  152. "([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?"
  153. );
  154. /**
  155. * Class constructor
  156. *
  157. * @return void
  158. */
  159. public function __construct()
  160. {
  161. // Is CSRF protection enabled?
  162. if (config_item('csrf_protection'))
  163. {
  164. // CSRF config
  165. foreach (array('csrf_expire', 'csrf_token_name', 'csrf_cookie_name') as $key)
  166. {
  167. if (NULL !== ($val = config_item($key)))
  168. {
  169. $this->{'_'.$key} = $val;
  170. }
  171. }
  172. // Append application specific cookie prefix
  173. if ($cookie_prefix = config_item('cookie_prefix'))
  174. {
  175. $this->_csrf_cookie_name = $cookie_prefix.$this->_csrf_cookie_name;
  176. }
  177. // Set the CSRF hash
  178. $this->_csrf_set_hash();
  179. }
  180. $this->charset = strtoupper(config_item('charset'));
  181. log_message('info', 'Security Class Initialized');
  182. }
  183. // --------------------------------------------------------------------
  184. /**
  185. * CSRF Verify
  186. *
  187. * @return CI_Security
  188. */
  189. public function csrf_verify()
  190. {
  191. // If it's not a POST request we will set the CSRF cookie
  192. if (strtoupper($_SERVER['REQUEST_METHOD']) !== 'POST')
  193. {
  194. return $this->csrf_set_cookie();
  195. }
  196. // Check if URI has been whitelisted from CSRF checks
  197. if ($exclude_uris = config_item('csrf_exclude_uris'))
  198. {
  199. $uri = load_class('URI', 'core');
  200. foreach ($exclude_uris as $excluded)
  201. {
  202. if (preg_match('#^'.$excluded.'$#i'.(UTF8_ENABLED ? 'u' : ''), $uri->uri_string()))
  203. {
  204. return $this;
  205. }
  206. }
  207. }
  208. // Do the tokens exist in both the _POST and _COOKIE arrays?
  209. if ( ! isset($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name])
  210. OR $_POST[$this->_csrf_token_name] !== $_COOKIE[$this->_csrf_cookie_name]) // Do the tokens match?
  211. {
  212. $this->csrf_show_error();
  213. }
  214. // We kill this since we're done and we don't want to pollute the _POST array
  215. unset($_POST[$this->_csrf_token_name]);
  216. // Regenerate on every submission?
  217. if (config_item('csrf_regenerate'))
  218. {
  219. // Nothing should last forever
  220. unset($_COOKIE[$this->_csrf_cookie_name]);
  221. $this->_csrf_hash = NULL;
  222. }
  223. $this->_csrf_set_hash();
  224. $this->csrf_set_cookie();
  225. log_message('info', 'CSRF token verified');
  226. return $this;
  227. }
  228. // --------------------------------------------------------------------
  229. /**
  230. * CSRF Set Cookie
  231. *
  232. * @codeCoverageIgnore
  233. * @return CI_Security
  234. */
  235. public function csrf_set_cookie()
  236. {
  237. $expire = time() + $this->_csrf_expire;
  238. $secure_cookie = (bool) config_item('cookie_secure');
  239. if ($secure_cookie && ! is_https())
  240. {
  241. return FALSE;
  242. }
  243. setcookie(
  244. $this->_csrf_cookie_name,
  245. $this->_csrf_hash,
  246. $expire,
  247. config_item('cookie_path'),
  248. config_item('cookie_domain'),
  249. $secure_cookie,
  250. config_item('cookie_httponly')
  251. );
  252. log_message('info', 'CSRF cookie sent');
  253. return $this;
  254. }
  255. // --------------------------------------------------------------------
  256. /**
  257. * Show CSRF Error
  258. *
  259. * @return void
  260. */
  261. public function csrf_show_error()
  262. {
  263. show_error('The action you have requested is not allowed.', 403);
  264. }
  265. // --------------------------------------------------------------------
  266. /**
  267. * Get CSRF Hash
  268. *
  269. * @see CI_Security::$_csrf_hash
  270. * @return string CSRF hash
  271. */
  272. public function get_csrf_hash()
  273. {
  274. return $this->_csrf_hash;
  275. }
  276. // --------------------------------------------------------------------
  277. /**
  278. * Get CSRF Token Name
  279. *
  280. * @see CI_Security::$_csrf_token_name
  281. * @return string CSRF token name
  282. */
  283. public function get_csrf_token_name()
  284. {
  285. return $this->_csrf_token_name;
  286. }
  287. // --------------------------------------------------------------------
  288. /**
  289. * XSS Clean
  290. *
  291. * Sanitizes data so that Cross Site Scripting Hacks can be
  292. * prevented. This method does a fair amount of work but
  293. * it is extremely thorough, designed to prevent even the
  294. * most obscure XSS attempts. Nothing is ever 100% foolproof,
  295. * of course, but I haven't been able to get anything passed
  296. * the filter.
  297. *
  298. * Note: Should only be used to deal with data upon submission.
  299. * It's not something that should be used for general
  300. * runtime processing.
  301. *
  302. * @link http://channel.bitflux.ch/wiki/XSS_Prevention
  303. * Based in part on some code and ideas from Bitflux.
  304. *
  305. * @link http://ha.ckers.org/xss.html
  306. * To help develop this script I used this great list of
  307. * vulnerabilities along with a few other hacks I've
  308. * harvested from examining vulnerabilities in other programs.
  309. *
  310. * @param string|string[] $str Input data
  311. * @param bool $is_image Whether the input is an image
  312. * @return string
  313. */
  314. public function xss_clean($str, $is_image = FALSE)
  315. {
  316. // Is the string an array?
  317. if (is_array($str))
  318. {
  319. while (list($key) = each($str))
  320. {
  321. $str[$key] = $this->xss_clean($str[$key]);
  322. }
  323. return $str;
  324. }
  325. // Remove Invisible Characters
  326. $str = remove_invisible_characters($str);
  327. /*
  328. * URL Decode
  329. *
  330. * Just in case stuff like this is submitted:
  331. *
  332. * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
  333. *
  334. * Note: Use rawurldecode() so it does not remove plus signs
  335. */
  336. if (stripos($str, '%') !== false)
  337. {
  338. do
  339. {
  340. $oldstr = $str;
  341. $str = rawurldecode($str);
  342. $str = preg_replace_callback('#%(?:\s*[0-9a-f]){2,}#i', array($this, '_urldecodespaces'), $str);
  343. }
  344. while ($oldstr !== $str);
  345. unset($oldstr);
  346. }
  347. /*
  348. * Convert character entities to ASCII
  349. *
  350. * This permits our tests below to work reliably.
  351. * We only convert entities that are within tags since
  352. * these are the ones that will pose security problems.
  353. */
  354. $str = preg_replace_callback("/[^a-z0-9>]+[a-z0-9]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
  355. $str = preg_replace_callback('/<\w+.*/si', array($this, '_decode_entity'), $str);
  356. // Remove Invisible Characters Again!
  357. $str = remove_invisible_characters($str);
  358. /*
  359. * Convert all tabs to spaces
  360. *
  361. * This prevents strings like this: ja vascript
  362. * NOTE: we deal with spaces between characters later.
  363. * NOTE: preg_replace was found to be amazingly slow here on
  364. * large blocks of data, so we use str_replace.
  365. */
  366. $str = str_replace("\t", ' ', $str);
  367. // Capture converted string for later comparison
  368. $converted_string = $str;
  369. // Remove Strings that are never allowed
  370. $str = $this->_do_never_allowed($str);
  371. /*
  372. * Makes PHP tags safe
  373. *
  374. * Note: XML tags are inadvertently replaced too:
  375. *
  376. * <?xml
  377. *
  378. * But it doesn't seem to pose a problem.
  379. */
  380. if ($is_image === TRUE)
  381. {
  382. // Images have a tendency to have the PHP short opening and
  383. // closing tags every so often so we skip those and only
  384. // do the long opening tags.
  385. $str = preg_replace('/<\?(php)/i', '&lt;?\\1', $str);
  386. }
  387. else
  388. {
  389. $str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
  390. }
  391. /*
  392. * Compact any exploded words
  393. *
  394. * This corrects words like: j a v a s c r i p t
  395. * These words are compacted back to their correct state.
  396. */
  397. $words = array(
  398. 'javascript', 'expression', 'vbscript', 'jscript', 'wscript',
  399. 'vbs', 'script', 'base64', 'applet', 'alert', 'document',
  400. 'write', 'cookie', 'window', 'confirm', 'prompt', 'eval'
  401. );
  402. foreach ($words as $word)
  403. {
  404. $word = implode('\s*', str_split($word)).'\s*';
  405. // We only want to do this when it is followed by a non-word character
  406. // That way valid stuff like "dealer to" does not become "dealerto"
  407. $str = preg_replace_callback('#('.substr($word, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
  408. }
  409. /*
  410. * Remove disallowed Javascript in links or img tags
  411. * We used to do some version comparisons and use of stripos(),
  412. * but it is dog slow compared to these simplified non-capturing
  413. * preg_match(), especially if the pattern exists in the string
  414. *
  415. * Note: It was reported that not only space characters, but all in
  416. * the following pattern can be parsed as separators between a tag name
  417. * and its attributes: [\d\s"\'`;,\/\=\(\x00\x0B\x09\x0C]
  418. * ... however, remove_invisible_characters() above already strips the
  419. * hex-encoded ones, so we'll skip them below.
  420. */
  421. do
  422. {
  423. $original = $str;
  424. if (preg_match('/<a/i', $str))
  425. {
  426. $str = preg_replace_callback('#<a(?:rea)?[^a-z0-9>]+([^>]*?)(?:>|$)#si', array($this, '_js_link_removal'), $str);
  427. }
  428. if (preg_match('/<img/i', $str))
  429. {
  430. $str = preg_replace_callback('#<img[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#si', array($this, '_js_img_removal'), $str);
  431. }
  432. if (preg_match('/script|xss/i', $str))
  433. {
  434. $str = preg_replace('#</*(?:script|xss).*?>#si', '[removed]', $str);
  435. }
  436. }
  437. while ($original !== $str);
  438. unset($original);
  439. /*
  440. * Sanitize naughty HTML elements
  441. *
  442. * If a tag containing any of the words in the list
  443. * below is found, the tag gets converted to entities.
  444. *
  445. * So this: <blink>
  446. * Becomes: &lt;blink&gt;
  447. */
  448. $pattern = '#'
  449. .'<((?<slash>/*\s*)(?<tagName>[a-z0-9]+)(?=[^a-z0-9]|$)' // tag start and name, followed by a non-tag character
  450. .'[^\s\042\047a-z0-9>/=]*' // a valid attribute character immediately after the tag would count as a separator
  451. // optional attributes
  452. .'(?<attributes>(?:[\s\042\047/=]*' // non-attribute characters, excluding > (tag close) for obvious reasons
  453. .'[^\s\042\047>/=]+' // attribute characters
  454. // optional attribute-value
  455. .'(?:\s*=' // attribute-value separator
  456. .'(?:[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*))' // single, double or non-quoted value
  457. .')?' // end optional attribute-value group
  458. .')*)' // end optional attributes group
  459. .'[^>]*)(?<closeTag>\>)?#isS';
  460. // Note: It would be nice to optimize this for speed, BUT
  461. // only matching the naughty elements here results in
  462. // false positives and in turn - vulnerabilities!
  463. do
  464. {
  465. $old_str = $str;
  466. $str = preg_replace_callback($pattern, array($this, '_sanitize_naughty_html'), $str);
  467. }
  468. while ($old_str !== $str);
  469. unset($old_str);
  470. /*
  471. * Sanitize naughty scripting elements
  472. *
  473. * Similar to above, only instead of looking for
  474. * tags it looks for PHP and JavaScript commands
  475. * that are disallowed. Rather than removing the
  476. * code, it simply converts the parenthesis to entities
  477. * rendering the code un-executable.
  478. *
  479. * For example: eval('some code')
  480. * Becomes: eval&#40;'some code'&#41;
  481. */
  482. $str = preg_replace(
  483. '#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si',
  484. '\\1\\2&#40;\\3&#41;',
  485. $str
  486. );
  487. // Final clean up
  488. // This adds a bit of extra precaution in case
  489. // something got through the above filters
  490. $str = $this->_do_never_allowed($str);
  491. /*
  492. * Images are Handled in a Special Way
  493. * - Essentially, we want to know that after all of the character
  494. * conversion is done whether any unwanted, likely XSS, code was found.
  495. * If not, we return TRUE, as the image is clean.
  496. * However, if the string post-conversion does not matched the
  497. * string post-removal of XSS, then it fails, as there was unwanted XSS
  498. * code found and removed/changed during processing.
  499. */
  500. if ($is_image === TRUE)
  501. {
  502. return ($str === $converted_string);
  503. }
  504. return $str;
  505. }
  506. // --------------------------------------------------------------------
  507. /**
  508. * XSS Hash
  509. *
  510. * Generates the XSS hash if needed and returns it.
  511. *
  512. * @see CI_Security::$_xss_hash
  513. * @return string XSS hash
  514. */
  515. public function xss_hash()
  516. {
  517. if ($this->_xss_hash === NULL)
  518. {
  519. $rand = $this->get_random_bytes(16);
  520. $this->_xss_hash = ($rand === FALSE)
  521. ? md5(uniqid(mt_rand(), TRUE))
  522. : bin2hex($rand);
  523. }
  524. return $this->_xss_hash;
  525. }
  526. // --------------------------------------------------------------------
  527. /**
  528. * Get random bytes
  529. *
  530. * @param int $length Output length
  531. * @return string
  532. */
  533. public function get_random_bytes($length)
  534. {
  535. if (empty($length) OR ! ctype_digit((string) $length))
  536. {
  537. return FALSE;
  538. }
  539. if (function_exists('random_bytes'))
  540. {
  541. try
  542. {
  543. // The cast is required to avoid TypeError
  544. return random_bytes((int) $length);
  545. }
  546. catch (Exception $e)
  547. {
  548. // If random_bytes() can't do the job, we can't either ...
  549. // There's no point in using fallbacks.
  550. log_message('error', $e->getMessage());
  551. return FALSE;
  552. }
  553. }
  554. // Unfortunately, none of the following PRNGs is guaranteed to exist ...
  555. if (defined('MCRYPT_DEV_URANDOM') && ($output = mcrypt_create_iv($length, MCRYPT_DEV_URANDOM)) !== FALSE)
  556. {
  557. return $output;
  558. }
  559. if (is_readable('/dev/urandom') && ($fp = fopen('/dev/urandom', 'rb')) !== FALSE)
  560. {
  561. // Try not to waste entropy ...
  562. is_php('5.4') && stream_set_chunk_size($fp, $length);
  563. $output = fread($fp, $length);
  564. fclose($fp);
  565. if ($output !== FALSE)
  566. {
  567. return $output;
  568. }
  569. }
  570. if (function_exists('openssl_random_pseudo_bytes'))
  571. {
  572. return openssl_random_pseudo_bytes($length);
  573. }
  574. return FALSE;
  575. }
  576. // --------------------------------------------------------------------
  577. /**
  578. * HTML Entities Decode
  579. *
  580. * A replacement for html_entity_decode()
  581. *
  582. * The reason we are not using html_entity_decode() by itself is because
  583. * while it is not technically correct to leave out the semicolon
  584. * at the end of an entity most browsers will still interpret the entity
  585. * correctly. html_entity_decode() does not convert entities without
  586. * semicolons, so we are left with our own little solution here. Bummer.
  587. *
  588. * @link http://php.net/html-entity-decode
  589. *
  590. * @param string $str Input
  591. * @param string $charset Character set
  592. * @return string
  593. */
  594. public function entity_decode($str, $charset = NULL)
  595. {
  596. if (strpos($str, '&') === FALSE)
  597. {
  598. return $str;
  599. }
  600. static $_entities;
  601. isset($charset) OR $charset = $this->charset;
  602. $flag = is_php('5.4')
  603. ? ENT_COMPAT | ENT_HTML5
  604. : ENT_COMPAT;
  605. if ( ! isset($_entities))
  606. {
  607. $_entities = array_map('strtolower', get_html_translation_table(HTML_ENTITIES, $flag, $charset));
  608. // If we're not on PHP 5.4+, add the possibly dangerous HTML 5
  609. // entities to the array manually
  610. if ($flag === ENT_COMPAT)
  611. {
  612. $_entities[':'] = '&colon;';
  613. $_entities['('] = '&lpar;';
  614. $_entities[')'] = '&rpar;';
  615. $_entities["\n"] = '&NewLine;';
  616. $_entities["\t"] = '&Tab;';
  617. }
  618. }
  619. do
  620. {
  621. $str_compare = $str;
  622. // Decode standard entities, avoiding false positives
  623. if (preg_match_all('/&[a-z]{2,}(?![a-z;])/i', $str, $matches))
  624. {
  625. $replace = array();
  626. $matches = array_unique(array_map('strtolower', $matches[0]));
  627. foreach ($matches as &$match)
  628. {
  629. if (($char = array_search($match.';', $_entities, TRUE)) !== FALSE)
  630. {
  631. $replace[$match] = $char;
  632. }
  633. }
  634. $str = str_replace(array_keys($replace), array_values($replace), $str);
  635. }
  636. // Decode numeric & UTF16 two byte entities
  637. $str = html_entity_decode(
  638. preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
  639. $flag,
  640. $charset
  641. );
  642. if ($flag === ENT_COMPAT)
  643. {
  644. $str = str_replace(array_values($_entities), array_keys($_entities), $str);
  645. }
  646. }
  647. while ($str_compare !== $str);
  648. return $str;
  649. }
  650. // --------------------------------------------------------------------
  651. /**
  652. * Sanitize Filename
  653. *
  654. * @param string $str Input file name
  655. * @param bool $relative_path Whether to preserve paths
  656. * @return string
  657. */
  658. public function sanitize_filename($str, $relative_path = FALSE)
  659. {
  660. $bad = $this->filename_bad_chars;
  661. if ( ! $relative_path)
  662. {
  663. $bad[] = './';
  664. $bad[] = '/';
  665. }
  666. $str = remove_invisible_characters($str, FALSE);
  667. do
  668. {
  669. $old = $str;
  670. $str = str_replace($bad, '', $str);
  671. }
  672. while ($old !== $str);
  673. return stripslashes($str);
  674. }
  675. // ----------------------------------------------------------------
  676. /**
  677. * Strip Image Tags
  678. *
  679. * @param string $str
  680. * @return string
  681. */
  682. public function strip_image_tags($str)
  683. {
  684. return preg_replace(
  685. array(
  686. '#<img[\s/]+.*?src\s*=\s*(["\'])([^\\1]+?)\\1.*?\>#i',
  687. '#<img[\s/]+.*?src\s*=\s*?(([^\s"\'=<>`]+)).*?\>#i'
  688. ),
  689. '\\2',
  690. $str
  691. );
  692. }
  693. // ----------------------------------------------------------------
  694. /**
  695. * URL-decode taking spaces into account
  696. *
  697. * @see https://github.com/bcit-ci/CodeIgniter/issues/4877
  698. * @param array $matches
  699. * @return string
  700. */
  701. protected function _urldecodespaces($matches)
  702. {
  703. $input = $matches[0];
  704. $nospaces = preg_replace('#\s+#', '', $input);
  705. return ($nospaces === $input)
  706. ? $input
  707. : rawurldecode($nospaces);
  708. }
  709. // ----------------------------------------------------------------
  710. /**
  711. * Compact Exploded Words
  712. *
  713. * Callback method for xss_clean() to remove whitespace from
  714. * things like 'j a v a s c r i p t'.
  715. *
  716. * @used-by CI_Security::xss_clean()
  717. * @param array $matches
  718. * @return string
  719. */
  720. protected function _compact_exploded_words($matches)
  721. {
  722. return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
  723. }
  724. // --------------------------------------------------------------------
  725. /**
  726. * Sanitize Naughty HTML
  727. *
  728. * Callback method for xss_clean() to remove naughty HTML elements.
  729. *
  730. * @used-by CI_Security::xss_clean()
  731. * @param array $matches
  732. * @return string
  733. */
  734. protected function _sanitize_naughty_html($matches)
  735. {
  736. static $naughty_tags = array(
  737. 'alert', 'area', 'prompt', 'confirm', 'applet', 'audio', 'basefont', 'base', 'behavior', 'bgsound',
  738. 'blink', 'body', 'embed', 'expression', 'form', 'frameset', 'frame', 'head', 'html', 'ilayer',
  739. 'iframe', 'input', 'button', 'select', 'isindex', 'layer', 'link', 'meta', 'keygen', 'object',
  740. 'plaintext', 'style', 'script', 'textarea', 'title', 'math', 'video', 'svg', 'xml', 'xss'
  741. );
  742. static $evil_attributes = array(
  743. 'on\w+', 'style', 'xmlns', 'formaction', 'form', 'xlink:href', 'FSCommand', 'seekSegmentTime'
  744. );
  745. // First, escape unclosed tags
  746. if (empty($matches['closeTag']))
  747. {
  748. return '&lt;'.$matches[1];
  749. }
  750. // Is the element that we caught naughty? If so, escape it
  751. elseif (in_array(strtolower($matches['tagName']), $naughty_tags, TRUE))
  752. {
  753. return '&lt;'.$matches[1].'&gt;';
  754. }
  755. // For other tags, see if their attributes are "evil" and strip those
  756. elseif (isset($matches['attributes']))
  757. {
  758. // We'll store the already fitlered attributes here
  759. $attributes = array();
  760. // Attribute-catching pattern
  761. $attributes_pattern = '#'
  762. .'(?<name>[^\s\042\047>/=]+)' // attribute characters
  763. // optional attribute-value
  764. .'(?:\s*=(?<value>[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*)))' // attribute-value separator
  765. .'#i';
  766. // Blacklist pattern for evil attribute names
  767. $is_evil_pattern = '#^('.implode('|', $evil_attributes).')$#i';
  768. // Each iteration filters a single attribute
  769. do
  770. {
  771. // Strip any non-alpha characters that may preceed an attribute.
  772. // Browsers often parse these incorrectly and that has been a
  773. // of numerous XSS issues we've had.
  774. $matches['attributes'] = preg_replace('#^[^a-z]+#i', '', $matches['attributes']);
  775. if ( ! preg_match($attributes_pattern, $matches['attributes'], $attribute, PREG_OFFSET_CAPTURE))
  776. {
  777. // No (valid) attribute found? Discard everything else inside the tag
  778. break;
  779. }
  780. if (
  781. // Is it indeed an "evil" attribute?
  782. preg_match($is_evil_pattern, $attribute['name'][0])
  783. // Or does it have an equals sign, but no value and not quoted? Strip that too!
  784. OR (trim($attribute['value'][0]) === '')
  785. )
  786. {
  787. $attributes[] = 'xss=removed';
  788. }
  789. else
  790. {
  791. $attributes[] = $attribute[0][0];
  792. }
  793. $matches['attributes'] = substr($matches['attributes'], $attribute[0][1] + strlen($attribute[0][0]));
  794. }
  795. while ($matches['attributes'] !== '');
  796. $attributes = empty($attributes)
  797. ? ''
  798. : ' '.implode(' ', $attributes);
  799. return '<'.$matches['slash'].$matches['tagName'].$attributes.'>';
  800. }
  801. return $matches[0];
  802. }
  803. // --------------------------------------------------------------------
  804. /**
  805. * JS Link Removal
  806. *
  807. * Callback method for xss_clean() to sanitize links.
  808. *
  809. * This limits the PCRE backtracks, making it more performance friendly
  810. * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
  811. * PHP 5.2+ on link-heavy strings.
  812. *
  813. * @used-by CI_Security::xss_clean()
  814. * @param array $match
  815. * @return string
  816. */
  817. protected function _js_link_removal($match)
  818. {
  819. return str_replace(
  820. $match[1],
  821. preg_replace(
  822. '#href=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|d\s*a\s*t\s*a\s*:)#si',
  823. '',
  824. $this->_filter_attributes($match[1])
  825. ),
  826. $match[0]
  827. );
  828. }
  829. // --------------------------------------------------------------------
  830. /**
  831. * JS Image Removal
  832. *
  833. * Callback method for xss_clean() to sanitize image tags.
  834. *
  835. * This limits the PCRE backtracks, making it more performance friendly
  836. * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
  837. * PHP 5.2+ on image tag heavy strings.
  838. *
  839. * @used-by CI_Security::xss_clean()
  840. * @param array $match
  841. * @return string
  842. */
  843. protected function _js_img_removal($match)
  844. {
  845. return str_replace(
  846. $match[1],
  847. preg_replace(
  848. '#src=.*?(?:(?:alert|prompt|confirm|eval)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si',
  849. '',
  850. $this->_filter_attributes($match[1])
  851. ),
  852. $match[0]
  853. );
  854. }
  855. // --------------------------------------------------------------------
  856. /**
  857. * Attribute Conversion
  858. *
  859. * @used-by CI_Security::xss_clean()
  860. * @param array $match
  861. * @return string
  862. */
  863. protected function _convert_attribute($match)
  864. {
  865. return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
  866. }
  867. // --------------------------------------------------------------------
  868. /**
  869. * Filter Attributes
  870. *
  871. * Filters tag attributes for consistency and safety.
  872. *
  873. * @used-by CI_Security::_js_img_removal()
  874. * @used-by CI_Security::_js_link_removal()
  875. * @param string $str
  876. * @return string
  877. */
  878. protected function _filter_attributes($str)
  879. {
  880. $out = '';
  881. if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
  882. {
  883. foreach ($matches[0] as $match)
  884. {
  885. $out .= preg_replace('#/\*.*?\*/#s', '', $match);
  886. }
  887. }
  888. return $out;
  889. }
  890. // --------------------------------------------------------------------
  891. /**
  892. * HTML Entity Decode Callback
  893. *
  894. * @used-by CI_Security::xss_clean()
  895. * @param array $match
  896. * @return string
  897. */
  898. protected function _decode_entity($match)
  899. {
  900. // Protect GET variables in URLs
  901. // 901119URL5918AMP18930PROTECT8198
  902. $match = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-/]+)|i', $this->xss_hash().'\\1=\\2', $match[0]);
  903. // Decode, then un-protect URL GET vars
  904. return str_replace(
  905. $this->xss_hash(),
  906. '&',
  907. $this->entity_decode($match, $this->charset)
  908. );
  909. }
  910. // --------------------------------------------------------------------
  911. /**
  912. * Do Never Allowed
  913. *
  914. * @used-by CI_Security::xss_clean()
  915. * @param string
  916. * @return string
  917. */
  918. protected function _do_never_allowed($str)
  919. {
  920. $str = str_replace(array_keys($this->_never_allowed_str), $this->_never_allowed_str, $str);
  921. foreach ($this->_never_allowed_regex as $regex)
  922. {
  923. $str = preg_replace('#'.$regex.'#is', '[removed]', $str);
  924. }
  925. return $str;
  926. }
  927. // --------------------------------------------------------------------
  928. /**
  929. * Set CSRF Hash and Cookie
  930. *
  931. * @return string
  932. */
  933. protected function _csrf_set_hash()
  934. {
  935. if ($this->_csrf_hash === NULL)
  936. {
  937. // If the cookie exists we will use its value.
  938. // We don't necessarily want to regenerate it with
  939. // each page load since a page could contain embedded
  940. // sub-pages causing this feature to fail
  941. if (isset($_COOKIE[$this->_csrf_cookie_name]) && is_string($_COOKIE[$this->_csrf_cookie_name])
  942. && preg_match('#^[0-9a-f]{32}$#iS', $_COOKIE[$this->_csrf_cookie_name]) === 1)
  943. {
  944. return $this->_csrf_hash = $_COOKIE[$this->_csrf_cookie_name];
  945. }
  946. $rand = $this->get_random_bytes(16);
  947. $this->_csrf_hash = ($rand === FALSE)
  948. ? md5(uniqid(mt_rand(), TRUE))
  949. : bin2hex($rand);
  950. }
  951. return $this->_csrf_hash;
  952. }
  953. }