utils_diff.class.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. <?php
  2. /* Copyright (C) 2016 Jean-François Ferry <hello@librethic.io>
  3. *
  4. * A class containing a diff implementation
  5. *
  6. * Created by Stephen Morley - http://stephenmorley.org/ - and released under the
  7. * terms of the CC0 1.0 Universal legal code:
  8. *
  9. * http://creativecommons.org/publicdomain/zero/1.0/legalcode
  10. */
  11. /**
  12. * A class containing functions for computing diffs and formatting the output.
  13. */
  14. class Diff
  15. {
  16. // define the constants
  17. const UNMODIFIED = 0;
  18. const DELETED = 1;
  19. const INSERTED = 2;
  20. /**
  21. * Returns the diff for two strings. The return value is an array, each of
  22. * whose values is an array containing two values: a line (or character, if
  23. * $compareCharacters is true), and one of the constants DIFF::UNMODIFIED (the
  24. * line or character is in both strings), DIFF::DELETED (the line or character
  25. * is only in the first string), and DIFF::INSERTED (the line or character is
  26. * only in the second string). The parameters are:
  27. *
  28. * @param string $string1 First string
  29. * @param string $string2 Second string
  30. * @param string $compareCharacters true to compare characters, and false to compare lines; this optional parameter defaults to false
  31. * @return array Array of diff
  32. */
  33. public static function compare($string1, $string2, $compareCharacters = false)
  34. {
  35. // initialise the sequences and comparison start and end positions
  36. $start = 0;
  37. if ($compareCharacters) {
  38. $sequence1 = $string1;
  39. $sequence2 = $string2;
  40. $end1 = strlen($string1) - 1;
  41. $end2 = strlen($string2) - 1;
  42. } else {
  43. $sequence1 = preg_split('/\R/', $string1);
  44. $sequence2 = preg_split('/\R/', $string2);
  45. $end1 = count($sequence1) - 1;
  46. $end2 = count($sequence2) - 1;
  47. }
  48. // skip any common prefix
  49. while ($start <= $end1 && $start <= $end2
  50. && $sequence1[$start] == $sequence2[$start]) {
  51. $start++;
  52. }
  53. // skip any common suffix
  54. while ($end1 >= $start && $end2 >= $start
  55. && $sequence1[$end1] == $sequence2[$end2]) {
  56. $end1--;
  57. $end2--;
  58. }
  59. // compute the table of longest common subsequence lengths
  60. $table = self::computeTable($sequence1, $sequence2, $start, $end1, $end2);
  61. // generate the partial diff
  62. $partialDiff = self::generatePartialDiff($table, $sequence1, $sequence2, $start);
  63. // generate the full diff
  64. $diff = array();
  65. for ($index = 0; $index < $start; $index++) {
  66. $diff[] = array($sequence1[$index], self::UNMODIFIED);
  67. }
  68. while (count($partialDiff) > 0) {
  69. $diff[] = array_pop($partialDiff);
  70. }
  71. $end2 = ($compareCharacters ? strlen($sequence1) : count($sequence1));
  72. for ($index = $end1 + 1; $index < $end2; $index++) {
  73. $diff[] = array($sequence1[$index], self::UNMODIFIED);
  74. }
  75. // return the diff
  76. return $diff;
  77. }
  78. /**
  79. * Returns the diff for two files. The parameters are:
  80. *
  81. * @param string $file1 Path to the first file
  82. * @param string $file2 Path to the second file
  83. * @param boolean $compareCharacters true to compare characters, and false to compare lines; this optional parameter defaults to false
  84. * @return array Array of diff
  85. */
  86. public static function compareFiles(
  87. $file1,
  88. $file2,
  89. $compareCharacters = false
  90. ) {
  91. // return the diff of the files
  92. return self::compare(
  93. file_get_contents($file1),
  94. file_get_contents($file2),
  95. $compareCharacters
  96. );
  97. }
  98. /**
  99. * Returns the table of longest common subsequence lengths for the specified sequences. The parameters are:
  100. *
  101. * @param string $sequence1 the first sequence
  102. * @param string $sequence2 the second sequence
  103. * @param string $start the starting index
  104. * @param string $end1 the ending index for the first sequence
  105. * @param string $end2 the ending index for the second sequence
  106. * @return array array of diff
  107. */
  108. private static function computeTable($sequence1, $sequence2, $start, $end1, $end2)
  109. {
  110. // determine the lengths to be compared
  111. $length1 = $end1 - $start + 1;
  112. $length2 = $end2 - $start + 1;
  113. // initialise the table
  114. $table = array(array_fill(0, $length2 + 1, 0));
  115. // loop over the rows
  116. for ($index1 = 1; $index1 <= $length1; $index1++) {
  117. // create the new row
  118. $table[$index1] = array(0);
  119. // loop over the columns
  120. for ($index2 = 1; $index2 <= $length2; $index2++) {
  121. // store the longest common subsequence length
  122. if ($sequence1[$index1 + $start - 1] == $sequence2[$index2 + $start - 1]
  123. ) {
  124. $table[$index1][$index2] = $table[$index1 - 1][$index2 - 1] + 1;
  125. } else {
  126. $table[$index1][$index2] = max($table[$index1 - 1][$index2], $table[$index1][$index2 - 1]);
  127. }
  128. }
  129. }
  130. // return the table
  131. return $table;
  132. }
  133. /**
  134. * Returns the partial diff for the specificed sequences, in reverse order.
  135. * The parameters are:
  136. *
  137. * @param string $table the table returned by the computeTable function
  138. * @param string $sequence1 the first sequence
  139. * @param string $sequence2 the second sequence
  140. * @param string $start the starting index
  141. * @return array array of diff
  142. */
  143. private static function generatePartialDiff($table, $sequence1, $sequence2, $start)
  144. {
  145. // initialise the diff
  146. $diff = array();
  147. // initialise the indices
  148. $index1 = count($table) - 1;
  149. $index2 = count($table[0]) - 1;
  150. // loop until there are no items remaining in either sequence
  151. while ($index1 > 0 || $index2 > 0) {
  152. // check what has happened to the items at these indices
  153. if ($index1 > 0 && $index2 > 0
  154. && $sequence1[$index1 + $start - 1] == $sequence2[$index2 + $start - 1]
  155. ) {
  156. // update the diff and the indices
  157. $diff[] = array($sequence1[$index1 + $start - 1], self::UNMODIFIED);
  158. $index1--;
  159. $index2--;
  160. } elseif ($index2 > 0
  161. && $table[$index1][$index2] == $table[$index1][$index2 - 1]
  162. ) {
  163. // update the diff and the indices
  164. $diff[] = array($sequence2[$index2 + $start - 1], self::INSERTED);
  165. $index2--;
  166. } else {
  167. // update the diff and the indices
  168. $diff[] = array($sequence1[$index1 + $start - 1], self::DELETED);
  169. $index1--;
  170. }
  171. }
  172. // return the diff
  173. return $diff;
  174. }
  175. /**
  176. * Returns a diff as a string, where unmodified lines are prefixed by ' ',
  177. * deletions are prefixed by '- ', and insertions are prefixed by '+ '. The
  178. * parameters are:
  179. *
  180. * @param array $diff the diff array
  181. * @param string $separator the separator between lines; this optional parameter defaults to "\n"
  182. * @return string String
  183. */
  184. public static function toString($diff, $separator = "\n")
  185. {
  186. // initialise the string
  187. $string = '';
  188. // loop over the lines in the diff
  189. foreach ($diff as $line) {
  190. // extend the string with the line
  191. switch ($line[1]) {
  192. case self::UNMODIFIED:
  193. $string .= ' '.$line[0];
  194. break;
  195. case self::DELETED:
  196. $string .= '- '.$line[0];
  197. break;
  198. case self::INSERTED:
  199. $string .= '+ '.$line[0];
  200. break;
  201. }
  202. // extend the string with the separator
  203. $string .= $separator;
  204. }
  205. // return the string
  206. return $string;
  207. }
  208. /**
  209. * Returns a diff as an HTML string, where unmodified lines are contained
  210. * within 'span' elements, deletions are contained within 'del' elements, and
  211. * insertions are contained within 'ins' elements. The parameters are:
  212. *
  213. * @param string $diff the diff array
  214. * @param string $separator the separator between lines; this optional parameter defaults to '<br>'
  215. * @return string HTML string
  216. */
  217. public static function toHTML($diff, $separator = '<br>')
  218. {
  219. // initialise the HTML
  220. $html = '';
  221. // loop over the lines in the diff
  222. foreach ($diff as $line) {
  223. // extend the HTML with the line
  224. switch ($line[1]) {
  225. case self::UNMODIFIED:
  226. $element = 'span';
  227. break;
  228. case self::DELETED:
  229. $element = 'del';
  230. break;
  231. case self::INSERTED:
  232. $element = 'ins';
  233. break;
  234. }
  235. $html .=
  236. '<'.$element.'>'
  237. . htmlspecialchars($line[0])
  238. . '</'.$element.'>';
  239. // extend the HTML with the separator
  240. $html .= $separator;
  241. }
  242. // return the HTML
  243. return $html;
  244. }
  245. /**
  246. * Returns a diff as an HTML table. The parameters are:
  247. *
  248. * @param string $diff the diff array
  249. * @param string $indentation indentation to add to every line of the generated HTML; this optional parameter defaults to ''
  250. * @param string $separator the separator between lines; this optional parameter defaults to '<br>'
  251. * @return string HTML string
  252. */
  253. public static function toTable($diff, $indentation = '', $separator = '<br>')
  254. {
  255. // initialise the HTML
  256. $html = $indentation."<table class=\"diff\">\n";
  257. // loop over the lines in the diff
  258. $index = 0;
  259. while ($index < count($diff)) {
  260. // determine the line type
  261. switch ($diff[$index][1]) {
  262. // display the content on the left and right
  263. case self::UNMODIFIED:
  264. $leftCell = self::getCellContent(
  265. $diff,
  266. $indentation,
  267. $separator,
  268. $index,
  269. self::UNMODIFIED
  270. );
  271. $rightCell = $leftCell;
  272. break;
  273. // display the deleted on the left and inserted content on the right
  274. case self::DELETED:
  275. $leftCell = self::getCellContent(
  276. $diff,
  277. $indentation,
  278. $separator,
  279. $index,
  280. self::DELETED
  281. );
  282. $rightCell = self::getCellContent(
  283. $diff,
  284. $indentation,
  285. $separator,
  286. $index,
  287. self::INSERTED
  288. );
  289. break;
  290. // display the inserted content on the right
  291. case self::INSERTED:
  292. $leftCell = '';
  293. $rightCell = self::getCellContent(
  294. $diff,
  295. $indentation,
  296. $separator,
  297. $index,
  298. self::INSERTED
  299. );
  300. break;
  301. }
  302. // extend the HTML with the new row
  303. $html .=
  304. $indentation
  305. . " <tr>\n"
  306. . $indentation
  307. . ' <td class="diff'
  308. . ($leftCell == $rightCell
  309. ? 'Unmodified'
  310. : ($leftCell == '' ? 'Blank' : 'Deleted'))
  311. . '">'
  312. . $leftCell
  313. . "</td>\n"
  314. . $indentation
  315. . ' <td class="diff'
  316. . ($leftCell == $rightCell
  317. ? 'Unmodified'
  318. : ($rightCell == '' ? 'Blank' : 'Inserted'))
  319. . '">'
  320. . $rightCell
  321. . "</td>\n"
  322. . $indentation
  323. . " </tr>\n";
  324. }
  325. // return the HTML
  326. return $html.$indentation."</table>\n";
  327. }
  328. /**
  329. * Returns the content of the cell, for use in the toTable function. The
  330. * parameters are:
  331. *
  332. * @param string $diff the diff array
  333. * @param string $indentation indentation to add to every line of the generated HTML
  334. * @param string $separator the separator between lines
  335. * @param string $index the current index, passes by reference
  336. * @param string $type the type of line
  337. * @return string HTML string
  338. */
  339. private static function getCellContent($diff, $indentation, $separator, &$index, $type)
  340. {
  341. // initialise the HTML
  342. $html = '';
  343. // loop over the matching lines, adding them to the HTML
  344. while ($index < count($diff) && $diff[$index][1] == $type) {
  345. $html .=
  346. '<span>'
  347. . htmlspecialchars($diff[$index][0])
  348. . '</span>'
  349. . $separator;
  350. $index++;
  351. }
  352. // return the HTML
  353. return $html;
  354. }
  355. }