utils_diff.class.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. <?php
  2. /* Copyright (C) 2016 Jean-François Ferry <hello@librethic.io>
  3. *
  4. * A class containing a diff implementation
  5. *
  6. * Created by Stephen Morley - http://stephenmorley.org/ - and released under the
  7. * terms of the CC0 1.0 Universal legal code:
  8. *
  9. * http://creativecommons.org/publicdomain/zero/1.0/legalcode
  10. */
  11. /**
  12. * A class containing functions for computing diffs and formatting the output.
  13. */
  14. class Diff
  15. {
  16. // define the constants
  17. const UNMODIFIED = 0;
  18. const DELETED = 1;
  19. const INSERTED = 2;
  20. /* Returns the diff for two strings. The return value is an array, each of
  21. * whose values is an array containing two values: a line (or character, if
  22. * $compareCharacters is true), and one of the constants DIFF::UNMODIFIED (the
  23. * line or character is in both strings), DIFF::DELETED (the line or character
  24. * is only in the first string), and DIFF::INSERTED (the line or character is
  25. * only in the second string). The parameters are:
  26. *
  27. * $string1 - the first string
  28. * $string2 - the second string
  29. * $compareCharacters - true to compare characters, and false to compare
  30. * lines; this optional parameter defaults to false
  31. */
  32. public static function compare(
  33. $string1,
  34. $string2,
  35. $compareCharacters = false
  36. ) {
  37. // initialise the sequences and comparison start and end positions
  38. $start = 0;
  39. if ($compareCharacters) {
  40. $sequence1 = $string1;
  41. $sequence2 = $string2;
  42. $end1 = strlen($string1) - 1;
  43. $end2 = strlen($string2) - 1;
  44. } else {
  45. $sequence1 = preg_split('/\R/', $string1);
  46. $sequence2 = preg_split('/\R/', $string2);
  47. $end1 = count($sequence1) - 1;
  48. $end2 = count($sequence2) - 1;
  49. }
  50. // skip any common prefix
  51. while ($start <= $end1 && $start <= $end2
  52. && $sequence1[$start] == $sequence2[$start]) {
  53. $start++;
  54. }
  55. // skip any common suffix
  56. while ($end1 >= $start && $end2 >= $start
  57. && $sequence1[$end1] == $sequence2[$end2]) {
  58. $end1--;
  59. $end2--;
  60. }
  61. // compute the table of longest common subsequence lengths
  62. $table = self::computeTable($sequence1, $sequence2, $start, $end1, $end2);
  63. // generate the partial diff
  64. $partialDiff = self::generatePartialDiff($table, $sequence1, $sequence2, $start);
  65. // generate the full diff
  66. $diff = array();
  67. for ($index = 0; $index < $start; $index++) {
  68. $diff[] = array($sequence1[$index], self::UNMODIFIED);
  69. }
  70. while (count($partialDiff) > 0) {
  71. $diff[] = array_pop($partialDiff);
  72. }
  73. $end2 = ($compareCharacters ? strlen($sequence1) : count($sequence1));
  74. for ($index = $end1 + 1; $index < $end2; $index++)
  75. {
  76. $diff[] = array($sequence1[$index], self::UNMODIFIED);
  77. }
  78. // return the diff
  79. return $diff;
  80. }
  81. /* Returns the diff for two files. The parameters are:
  82. *
  83. * $file1 - the path to the first file
  84. * $file2 - the path to the second file
  85. * $compareCharacters - true to compare characters, and false to compare
  86. * lines; this optional parameter defaults to false
  87. */
  88. public static function compareFiles(
  89. $file1,
  90. $file2,
  91. $compareCharacters = false
  92. ) {
  93. // return the diff of the files
  94. return self::compare(
  95. file_get_contents($file1),
  96. file_get_contents($file2),
  97. $compareCharacters
  98. );
  99. }
  100. /* Returns the table of longest common subsequence lengths for the specified
  101. * sequences. The parameters are:
  102. *
  103. * $sequence1 - the first sequence
  104. * $sequence2 - the second sequence
  105. * $start - the starting index
  106. * $end1 - the ending index for the first sequence
  107. * $end2 - the ending index for the second sequence
  108. */
  109. private static function computeTable(
  110. $sequence1,
  111. $sequence2,
  112. $start,
  113. $end1,
  114. $end2
  115. ) {
  116. // determine the lengths to be compared
  117. $length1 = $end1 - $start + 1;
  118. $length2 = $end2 - $start + 1;
  119. // initialise the table
  120. $table = array(array_fill(0, $length2 + 1, 0));
  121. // loop over the rows
  122. for ($index1 = 1; $index1 <= $length1; $index1++) {
  123. // create the new row
  124. $table[$index1] = array(0);
  125. // loop over the columns
  126. for ($index2 = 1; $index2 <= $length2; $index2++) {
  127. // store the longest common subsequence length
  128. if ($sequence1[$index1 + $start - 1]== $sequence2[$index2 + $start - 1]
  129. ) {
  130. $table[$index1][$index2] = $table[$index1 - 1][$index2 - 1] + 1;
  131. } else {
  132. $table[$index1][$index2] = max($table[$index1 - 1][$index2], $table[$index1][$index2 - 1]);
  133. }
  134. }
  135. }
  136. // return the table
  137. return $table;
  138. }
  139. /* Returns the partial diff for the specificed sequences, in reverse order.
  140. * The parameters are:
  141. *
  142. * $table - the table returned by the computeTable function
  143. * $sequence1 - the first sequence
  144. * $sequence2 - the second sequence
  145. * $start - the starting index
  146. */
  147. private static function generatePartialDiff(
  148. $table,
  149. $sequence1,
  150. $sequence2,
  151. $start
  152. ) {
  153. // initialise the diff
  154. $diff = array();
  155. // initialise the indices
  156. $index1 = count($table) - 1;
  157. $index2 = count($table[0]) - 1;
  158. // loop until there are no items remaining in either sequence
  159. while ($index1 > 0 || $index2 > 0) {
  160. // check what has happened to the items at these indices
  161. if ($index1 > 0 && $index2 > 0
  162. && $sequence1[$index1 + $start - 1]== $sequence2[$index2 + $start - 1]
  163. ) {
  164. // update the diff and the indices
  165. $diff[] = array($sequence1[$index1 + $start - 1], self::UNMODIFIED);
  166. $index1--;
  167. $index2--;
  168. } elseif ($index2 > 0
  169. && $table[$index1][$index2] == $table[$index1][$index2 - 1]
  170. ) {
  171. // update the diff and the indices
  172. $diff[] = array($sequence2[$index2 + $start - 1], self::INSERTED);
  173. $index2--;
  174. } else {
  175. // update the diff and the indices
  176. $diff[] = array($sequence1[$index1 + $start - 1], self::DELETED);
  177. $index1--;
  178. }
  179. }
  180. // return the diff
  181. return $diff;
  182. }
  183. /* Returns a diff as a string, where unmodified lines are prefixed by ' ',
  184. * deletions are prefixed by '- ', and insertions are prefixed by '+ '. The
  185. * parameters are:
  186. *
  187. * $diff - the diff array
  188. * $separator - the separator between lines; this optional parameter defaults
  189. * to "\n"
  190. */
  191. public static function toString($diff, $separator = "\n")
  192. {
  193. // initialise the string
  194. $string = '';
  195. // loop over the lines in the diff
  196. foreach ($diff as $line) {
  197. // extend the string with the line
  198. switch ($line[1]) {
  199. case self::UNMODIFIED:
  200. $string .= ' ' . $line[0];
  201. break;
  202. case self::DELETED:
  203. $string .= '- ' . $line[0];
  204. break;
  205. case self::INSERTED:
  206. $string .= '+ ' . $line[0];
  207. break;
  208. }
  209. // extend the string with the separator
  210. $string .= $separator;
  211. }
  212. // return the string
  213. return $string;
  214. }
  215. /* Returns a diff as an HTML string, where unmodified lines are contained
  216. * within 'span' elements, deletions are contained within 'del' elements, and
  217. * insertions are contained within 'ins' elements. The parameters are:
  218. *
  219. * $diff - the diff array
  220. * $separator - the separator between lines; this optional parameter defaults
  221. * to '<br>'
  222. */
  223. public static function toHTML($diff, $separator = '<br>')
  224. {
  225. // initialise the HTML
  226. $html = '';
  227. // loop over the lines in the diff
  228. foreach ($diff as $line) {
  229. // extend the HTML with the line
  230. switch ($line[1]) {
  231. case self::UNMODIFIED:
  232. $element = 'span';
  233. break;
  234. case self::DELETED:
  235. $element = 'del';
  236. break;
  237. case self::INSERTED:
  238. $element = 'ins';
  239. break;
  240. }
  241. $html .=
  242. '<' . $element . '>'
  243. . htmlspecialchars($line[0])
  244. . '</' . $element . '>';
  245. // extend the HTML with the separator
  246. $html .= $separator;
  247. }
  248. // return the HTML
  249. return $html;
  250. }
  251. /* Returns a diff as an HTML table. The parameters are:
  252. *
  253. * $diff - the diff array
  254. * $indentation - indentation to add to every line of the generated HTML; this
  255. * optional parameter defaults to ''
  256. * $separator - the separator between lines; this optional parameter
  257. * defaults to '<br>'
  258. */
  259. public static function toTable($diff, $indentation = '', $separator = '<br>')
  260. {
  261. // initialise the HTML
  262. $html = $indentation . "<table class=\"diff\">\n";
  263. // loop over the lines in the diff
  264. $index = 0;
  265. while ($index < count($diff)) {
  266. // determine the line type
  267. switch ($diff[$index][1]) {
  268. // display the content on the left and right
  269. case self::UNMODIFIED:
  270. $leftCell = self::getCellContent(
  271. $diff,
  272. $indentation,
  273. $separator,
  274. $index,
  275. self::UNMODIFIED
  276. );
  277. $rightCell = $leftCell;
  278. break;
  279. // display the deleted on the left and inserted content on the right
  280. case self::DELETED:
  281. $leftCell = self::getCellContent(
  282. $diff,
  283. $indentation,
  284. $separator,
  285. $index,
  286. self::DELETED
  287. );
  288. $rightCell = self::getCellContent(
  289. $diff,
  290. $indentation,
  291. $separator,
  292. $index,
  293. self::INSERTED
  294. );
  295. break;
  296. // display the inserted content on the right
  297. case self::INSERTED:
  298. $leftCell = '';
  299. $rightCell = self::getCellContent(
  300. $diff,
  301. $indentation,
  302. $separator,
  303. $index,
  304. self::INSERTED
  305. );
  306. break;
  307. }
  308. // extend the HTML with the new row
  309. $html .=
  310. $indentation
  311. . " <tr>\n"
  312. . $indentation
  313. . ' <td class="diff'
  314. . ($leftCell == $rightCell
  315. ? 'Unmodified'
  316. : ($leftCell == '' ? 'Blank' : 'Deleted'))
  317. . '">'
  318. . $leftCell
  319. . "</td>\n"
  320. . $indentation
  321. . ' <td class="diff'
  322. . ($leftCell == $rightCell
  323. ? 'Unmodified'
  324. : ($rightCell == '' ? 'Blank' : 'Inserted'))
  325. . '">'
  326. . $rightCell
  327. . "</td>\n"
  328. . $indentation
  329. . " </tr>\n";
  330. }
  331. // return the HTML
  332. return $html . $indentation . "</table>\n";
  333. }
  334. /* Returns the content of the cell, for use in the toTable function. The
  335. * parameters are:
  336. *
  337. * $diff - the diff array
  338. * $indentation - indentation to add to every line of the generated HTML
  339. * $separator - the separator between lines
  340. * $index - the current index, passes by reference
  341. * $type - the type of line
  342. */
  343. private static function getCellContent($diff, $indentation, $separator, &$index, $type)
  344. {
  345. // initialise the HTML
  346. $html = '';
  347. // loop over the matching lines, adding them to the HTML
  348. while ($index < count($diff) && $diff[$index][1] == $type) {
  349. $html .=
  350. '<span>'
  351. . htmlspecialchars($diff[$index][0])
  352. . '</span>'
  353. . $separator;
  354. $index++;
  355. }
  356. // return the HTML
  357. return $html;
  358. }
  359. }