htmlpurifier.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. <?php
  2. /**
  3. * HTML Purifier Phorum Mod. Filter your HTML the Standards-Compliant Way!
  4. *
  5. * This Phorum mod enables users to post raw HTML into Phorum. But never
  6. * fear: with the help of HTML Purifier, this HTML will be beat into
  7. * de-XSSed and standards-compliant form, safe for general consumption.
  8. * It is not recommended, but possible to run this mod in parallel
  9. * with other formatters (in short, please DISABLE the BBcode mod).
  10. *
  11. * For help migrating from your previous markup language to pure HTML
  12. * please check the migrate.bbcode.php file.
  13. *
  14. * If you'd like to use this with a WYSIWYG editor, make sure that
  15. * editor sets $PHORUM['mod_htmlpurifier']['wysiwyg'] to true. Otherwise,
  16. * administrators who need to edit other people's comments may be at
  17. * risk for some nasty attacks.
  18. *
  19. * Tested with Phorum 5.2.11.
  20. */
  21. // Note: Cache data is base64 encoded because Phorum insists on flinging
  22. // to the user and expecting it to come back unharmed, newlines and
  23. // all, which ain't happening. It's slower, it takes up more space, but
  24. // at least it won't get mutilated
  25. /**
  26. * Purifies a data array
  27. */
  28. function phorum_htmlpurifier_format($data)
  29. {
  30. $PHORUM = $GLOBALS["PHORUM"];
  31. $purifier =& HTMLPurifier::getInstance();
  32. $cache_serial = $PHORUM['mod_htmlpurifier']['body_cache_serial'];
  33. foreach($data as $message_id => $message){
  34. if(isset($message['body'])) {
  35. if ($message_id) {
  36. // we're dealing with a real message, not a fake, so
  37. // there a number of shortcuts that can be taken
  38. if (isset($message['meta']['htmlpurifier_light'])) {
  39. // format hook was called outside of Phorum's normal
  40. // functions, do the abridged purification
  41. $data[$message_id]['body'] = $purifier->purify($message['body']);
  42. continue;
  43. }
  44. if (!empty($PHORUM['args']['purge'])) {
  45. // purge the cache, must be below the following if
  46. unset($message['meta']['body_cache']);
  47. }
  48. if (
  49. isset($message['meta']['body_cache']) &&
  50. isset($message['meta']['body_cache_serial']) &&
  51. $message['meta']['body_cache_serial'] == $cache_serial
  52. ) {
  53. // cached version is present, bail out early
  54. $data[$message_id]['body'] = base64_decode($message['meta']['body_cache']);
  55. continue;
  56. }
  57. }
  58. // migration might edit this array, that's why it's defined
  59. // so early
  60. $updated_message = array();
  61. // create the $body variable
  62. if (
  63. $message_id && // message must be real to migrate
  64. !isset($message['meta']['body_cache_serial'])
  65. ) {
  66. // perform migration
  67. $fake_data = array();
  68. list($signature, $edit_message) = phorum_htmlpurifier_remove_sig_and_editmessage($message);
  69. $fake_data[$message_id] = $message;
  70. $fake_data = phorum_htmlpurifier_migrate($fake_data);
  71. $body = $fake_data[$message_id]['body'];
  72. $body = str_replace("<phorum break>\n", "\n", $body);
  73. $updated_message['body'] = $body; // save it in
  74. $body .= $signature . $edit_message; // add it back in
  75. } else {
  76. // reverse Phorum's pre-processing
  77. $body = $message['body'];
  78. // order is important
  79. $body = str_replace("<phorum break>\n", "\n", $body);
  80. $body = str_replace(array('&lt;','&gt;','&amp;', '&quot;'), array('<','>','&','"'), $body);
  81. if (!$message_id && defined('PHORUM_CONTROL_CENTER')) {
  82. // we're in control.php, so it was double-escaped
  83. $body = str_replace(array('&lt;','&gt;','&amp;', '&quot;'), array('<','>','&','"'), $body);
  84. }
  85. }
  86. $body = $purifier->purify($body);
  87. // dynamically update the cache (MUST BE DONE HERE!)
  88. // this is inefficient because it's one db call per
  89. // cache miss, but once the cache is in place things are
  90. // a lot zippier.
  91. if ($message_id) { // make sure it's not a fake id
  92. $updated_message['meta'] = $message['meta'];
  93. $updated_message['meta']['body_cache'] = base64_encode($body);
  94. $updated_message['meta']['body_cache_serial'] = $cache_serial;
  95. phorum_db_update_message($message_id, $updated_message);
  96. }
  97. // must not get overloaded until after we cache it, otherwise
  98. // we'll inadvertently change the original text
  99. $data[$message_id]['body'] = $body;
  100. }
  101. }
  102. return $data;
  103. }
  104. // -----------------------------------------------------------------------
  105. // This is fragile code, copied from read.php:596 (Phorum 5.2.6). Please
  106. // keep this code in-sync with Phorum
  107. /**
  108. * Generates a signature based on a message array
  109. */
  110. function phorum_htmlpurifier_generate_sig($row)
  111. {
  112. $phorum_sig = '';
  113. if(isset($row["user"]["signature"])
  114. && isset($row['meta']['show_signature']) && $row['meta']['show_signature']==1){
  115. $phorum_sig=trim($row["user"]["signature"]);
  116. if(!empty($phorum_sig)){
  117. $phorum_sig="\n\n$phorum_sig";
  118. }
  119. }
  120. return $phorum_sig;
  121. }
  122. /**
  123. * Generates an edit message based on a message array
  124. */
  125. function phorum_htmlpurifier_generate_editmessage($row)
  126. {
  127. $PHORUM = $GLOBALS['PHORUM'];
  128. $editmessage = '';
  129. if(isset($row['meta']['edit_count']) && $row['meta']['edit_count'] > 0) {
  130. $editmessage = str_replace ("%count%", $row['meta']['edit_count'], $PHORUM["DATA"]["LANG"]["EditedMessage"]);
  131. $editmessage = str_replace ("%lastedit%", phorum_date($PHORUM["short_date_time"],$row['meta']['edit_date']), $editmessage);
  132. $editmessage = str_replace ("%lastuser%", $row['meta']['edit_username'], $editmessage);
  133. $editmessage = "\n\n\n\n$editmessage";
  134. }
  135. return $editmessage;
  136. }
  137. // End fragile code
  138. // -----------------------------------------------------------------------
  139. /**
  140. * Removes the signature and edit message from a message
  141. * @param $row Message passed by reference
  142. */
  143. function phorum_htmlpurifier_remove_sig_and_editmessage(&$row)
  144. {
  145. $signature = phorum_htmlpurifier_generate_sig($row);
  146. $editmessage = phorum_htmlpurifier_generate_editmessage($row);
  147. $replacements = array();
  148. // we need to remove add <phorum break> as that is the form these
  149. // extra bits are in.
  150. if ($signature) $replacements[str_replace("\n", "<phorum break>\n", $signature)] = '';
  151. if ($editmessage) $replacements[str_replace("\n", "<phorum break>\n", $editmessage)] = '';
  152. $row['body'] = strtr($row['body'], $replacements);
  153. return array($signature, $editmessage);
  154. }
  155. /**
  156. * Indicate that data is fully HTML and not from migration, invalidate
  157. * previous caches
  158. * @note This function could generate the actual cache entries, but
  159. * since there's data missing that must be deferred to the first read
  160. */
  161. function phorum_htmlpurifier_posting($message)
  162. {
  163. $PHORUM = $GLOBALS["PHORUM"];
  164. unset($message['meta']['body_cache']); // invalidate the cache
  165. $message['meta']['body_cache_serial'] = $PHORUM['mod_htmlpurifier']['body_cache_serial'];
  166. return $message;
  167. }
  168. /**
  169. * Overload quoting mechanism to prevent default, mail-style quote from happening
  170. */
  171. function phorum_htmlpurifier_quote($array)
  172. {
  173. $PHORUM = $GLOBALS["PHORUM"];
  174. $purifier =& HTMLPurifier::getInstance();
  175. $text = $purifier->purify($array[1]);
  176. $source = htmlspecialchars($array[0]);
  177. return "<blockquote cite=\"$source\">\n$text\n</blockquote>";
  178. }
  179. /**
  180. * Ensure that our format hook is processed last. Also, loads the library.
  181. * @credits <http://secretsauce.phorum.org/snippets/make_bbcode_last_formatter.php.txt>
  182. */
  183. function phorum_htmlpurifier_common()
  184. {
  185. require_once(dirname(__FILE__).'/htmlpurifier/HTMLPurifier.auto.php');
  186. require(dirname(__FILE__).'/init-config.php');
  187. $config = phorum_htmlpurifier_get_config();
  188. HTMLPurifier::getInstance($config);
  189. // increment revision.txt if you want to invalidate the cache
  190. $GLOBALS['PHORUM']['mod_htmlpurifier']['body_cache_serial'] = $config->getSerial();
  191. // load migration
  192. if (file_exists(dirname(__FILE__) . '/migrate.php')) {
  193. include(dirname(__FILE__) . '/migrate.php');
  194. } else {
  195. echo '<strong>Error:</strong> No migration path specified for HTML Purifier, please check
  196. <tt>modes/htmlpurifier/migrate.bbcode.php</tt> for instructions on
  197. how to migrate from your previous markup language.';
  198. exit;
  199. }
  200. if (!function_exists('phorum_htmlpurifier_migrate')) {
  201. // Dummy function
  202. function phorum_htmlpurifier_migrate($data) {return $data;}
  203. }
  204. }
  205. /**
  206. * Pre-emptively performs purification if it looks like a WYSIWYG editor
  207. * is being used
  208. */
  209. function phorum_htmlpurifier_before_editor($message)
  210. {
  211. if (!empty($GLOBALS['PHORUM']['mod_htmlpurifier']['wysiwyg'])) {
  212. if (!empty($message['body'])) {
  213. $body = $message['body'];
  214. // de-entity-ize contents
  215. $body = str_replace(array('&lt;','&gt;','&amp;'), array('<','>','&'), $body);
  216. $purifier =& HTMLPurifier::getInstance();
  217. $body = $purifier->purify($body);
  218. // re-entity-ize contents
  219. $body = htmlspecialchars($body, ENT_QUOTES, $GLOBALS['PHORUM']['DATA']['CHARSET']);
  220. $message['body'] = $body;
  221. }
  222. }
  223. return $message;
  224. }
  225. function phorum_htmlpurifier_editor_after_subject()
  226. {
  227. // don't show this message if it's a WYSIWYG editor, since it will
  228. // then be handled automatically
  229. if (!empty($GLOBALS['PHORUM']['mod_htmlpurifier']['wysiwyg'])) {
  230. $i = $GLOBALS['PHORUM']['DATA']['MODE'];
  231. if ($i == 'quote' || $i == 'edit' || $i == 'moderation') {
  232. ?>
  233. <div>
  234. <p>
  235. <strong>Notice:</strong> HTML has been scrubbed for your safety.
  236. If you would like to see the original, turn off WYSIWYG mode
  237. (consult your administrator for details.)
  238. </p>
  239. </div>
  240. <?php
  241. }
  242. return;
  243. }
  244. if (!empty($GLOBALS['PHORUM']['mod_htmlpurifier']['suppress_message'])) return;
  245. ?><div class="htmlpurifier-help">
  246. <p>
  247. <strong>HTML input</strong> is enabled. Make sure you escape all HTML and
  248. angled brackets with <code>&amp;lt;</code> and <code>&amp;gt;</code>.
  249. </p><?php
  250. $purifier =& HTMLPurifier::getInstance();
  251. $config = $purifier->config;
  252. if ($config->get('AutoFormat.AutoParagraph')) {
  253. ?><p>
  254. <strong>Auto-paragraphing</strong> is enabled. Double
  255. newlines will be converted to paragraphs; for single
  256. newlines, use the <code>pre</code> tag.
  257. </p><?php
  258. }
  259. $html_definition = $config->getDefinition('HTML');
  260. $allowed = array();
  261. foreach ($html_definition->info as $name => $x) $allowed[] = "<code>$name</code>";
  262. sort($allowed);
  263. $allowed_text = implode(', ', $allowed);
  264. ?><p><strong>Allowed tags:</strong> <?php
  265. echo $allowed_text;
  266. ?>.</p><?php
  267. ?>
  268. </p>
  269. <p>
  270. For inputting literal code such as HTML and PHP for display, use
  271. CDATA tags to auto-escape your angled brackets, and <code>pre</code>
  272. to preserve newlines:
  273. </p>
  274. <pre>&lt;pre&gt;&lt;![CDATA[
  275. <em>Place code here</em>
  276. ]]&gt;&lt;/pre&gt;</pre>
  277. <p>
  278. Power users, you can hide this notice with:
  279. <pre>.htmlpurifier-help {display:none;}</pre>
  280. </p>
  281. </div><?php
  282. }
  283. // vim: et sw=4 sts=4