config-scanner.php 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. #!/usr/bin/php
  2. <?php
  3. chdir(dirname(__FILE__));
  4. require_once 'common.php';
  5. require_once '../library/HTMLPurifier.auto.php';
  6. assertCli();
  7. if (version_compare(PHP_VERSION, '5.2.2', '<')) {
  8. echo "This script requires PHP 5.2.2 or later, for tokenizer line numbers.";
  9. exit(1);
  10. }
  11. /**
  12. * @file
  13. * Scans HTML Purifier source code for $config tokens and records the
  14. * directive being used; configdoc can use this info later.
  15. *
  16. * Currently, this just dumps all the info onto the console. Eventually, it
  17. * will create an XML file that our XSLT transform can use.
  18. */
  19. $FS = new FSTools();
  20. chdir(dirname(__FILE__) . '/../library/');
  21. $raw_files = $FS->globr('.', '*.php');
  22. $files = array();
  23. foreach ($raw_files as $file) {
  24. $file = substr($file, 2); // rm leading './'
  25. if (strncmp('standalone/', $file, 11) === 0) continue; // rm generated files
  26. if (substr_count($file, '.') > 1) continue; // rm meta files
  27. $files[] = $file;
  28. }
  29. /**
  30. * Moves the $i cursor to the next non-whitespace token
  31. */
  32. function consumeWhitespace($tokens, &$i)
  33. {
  34. do {$i++;} while (is_array($tokens[$i]) && $tokens[$i][0] === T_WHITESPACE);
  35. }
  36. /**
  37. * Tests whether or not a token is a particular type. There are three run-cases:
  38. * - ($token, $expect_token): tests if the token is $expect_token type;
  39. * - ($token, $expect_value): tests if the token is the string $expect_value;
  40. * - ($token, $expect_token, $expect_value): tests if token is $expect_token type, and
  41. * its string representation is $expect_value
  42. */
  43. function testToken($token, $value_or_token, $value = null)
  44. {
  45. if (is_null($value)) {
  46. if (is_int($value_or_token)) return is_array($token) && $token[0] === $value_or_token;
  47. else return $token === $value_or_token;
  48. } else {
  49. return is_array($token) && $token[0] === $value_or_token && $token[1] === $value;
  50. }
  51. }
  52. $counter = 0;
  53. $full_counter = 0;
  54. $tracker = array();
  55. foreach ($files as $file) {
  56. $tokens = token_get_all(file_get_contents($file));
  57. $file = str_replace('\\', '/', $file);
  58. for ($i = 0, $c = count($tokens); $i < $c; $i++) {
  59. $ok = false;
  60. // Match $config
  61. if (!$ok && testToken($tokens[$i], T_VARIABLE, '$config')) $ok = true;
  62. // Match $this->config
  63. while (!$ok && testToken($tokens[$i], T_VARIABLE, '$this')) {
  64. consumeWhitespace($tokens, $i);
  65. if (!testToken($tokens[$i], T_OBJECT_OPERATOR)) break;
  66. consumeWhitespace($tokens, $i);
  67. if (testToken($tokens[$i], T_STRING, 'config')) $ok = true;
  68. break;
  69. }
  70. if (!$ok) continue;
  71. $ok = false;
  72. for($i++; $i < $c; $i++) {
  73. if ($tokens[$i] === ',' || $tokens[$i] === ')' || $tokens[$i] === ';') {
  74. break;
  75. }
  76. if (is_string($tokens[$i])) continue;
  77. if ($tokens[$i][0] === T_OBJECT_OPERATOR) {
  78. $ok = true;
  79. break;
  80. }
  81. }
  82. if (!$ok) continue;
  83. $line = $tokens[$i][2];
  84. consumeWhitespace($tokens, $i);
  85. if (!testToken($tokens[$i], T_STRING, 'get')) continue;
  86. consumeWhitespace($tokens, $i);
  87. if (!testToken($tokens[$i], '(')) continue;
  88. $full_counter++;
  89. $matched = false;
  90. do {
  91. // What we currently don't match are batch retrievals, and
  92. // wildcard retrievals. This data might be useful in the future,
  93. // which is why we have a do {} while loop that doesn't actually
  94. // do anything.
  95. consumeWhitespace($tokens, $i);
  96. if (!testToken($tokens[$i], T_CONSTANT_ENCAPSED_STRING)) continue;
  97. $id = substr($tokens[$i][1], 1, -1);
  98. $counter++;
  99. $matched = true;
  100. if (!isset($tracker[$id])) $tracker[$id] = array();
  101. if (!isset($tracker[$id][$file])) $tracker[$id][$file] = array();
  102. $tracker[$id][$file][] = $line;
  103. } while (0);
  104. //echo "$file:$line uses $namespace.$directive\n";
  105. }
  106. }
  107. echo "\n$counter/$full_counter instances of \$config or \$this->config found in source code.\n";
  108. echo "Generating XML... ";
  109. $xw = new XMLWriter();
  110. $xw->openURI('../configdoc/usage.xml');
  111. $xw->setIndent(true);
  112. $xw->startDocument('1.0', 'UTF-8');
  113. $xw->startElement('usage');
  114. foreach ($tracker as $id => $files) {
  115. $xw->startElement('directive');
  116. $xw->writeAttribute('id', $id);
  117. foreach ($files as $file => $lines) {
  118. $xw->startElement('file');
  119. $xw->writeAttribute('name', $file);
  120. foreach ($lines as $line) {
  121. $xw->writeElement('line', $line);
  122. }
  123. $xw->endElement();
  124. }
  125. $xw->endElement();
  126. }
  127. $xw->endElement();
  128. $xw->flush();
  129. echo "done!\n";
  130. // vim: et sw=4 sts=4