From 38c4bcad55ce1e8203e5507b6db2700120688aae Mon Sep 17 00:00:00 2001 From: Meik Sievertsen Date: Fri, 18 Sep 2009 18:18:54 +0000 Subject: [PATCH] Ok, after 20+ hours i think i fixed all grave issues with the updater - smaller memory footprint - better checks for already updated files - even less conflicts - fixed automatic conflict resolving after successful merges - no more conflicts for $Id$ changes - fixed skip_whitespace_changes bug where code blocks were added to diff_op_add whereby the previous or next diff_op_copy already had the change - correctly display merged files in diff view (previously it happened that the old file was used for comparision, although the new file was different/newer/merged) git-svn-id: file:///svn/phpbb/branches/phpBB-3_0_0@10163 89ea8834-ac86-4346-8a33-228a782c2dd0 --- phpBB/includes/diff/diff.php | 321 +++++++++++++++++++++++++------ phpBB/includes/diff/engine.php | 17 +- phpBB/install/install_update.php | 208 +++++++++++--------- 3 files changed, 392 insertions(+), 154 deletions(-) diff --git a/phpBB/includes/diff/diff.php b/phpBB/includes/diff/diff.php index d8ee43feec..6bda3df43e 100644 --- a/phpBB/includes/diff/diff.php +++ b/phpBB/includes/diff/diff.php @@ -71,8 +71,10 @@ class diff { $count = 0; - foreach ($this->_edits as $edit) + for ($i = 0, $size = sizeof($this->_edits); $i < $size; $i++) { + $edit = $this->_edits[$i]; + if (is_a($edit, 'diff_op_add') || is_a($edit, 'diff_op_change')) { $count += $edit->nfinal(); @@ -92,8 +94,10 @@ class diff { $count = 0; - foreach ($this->_edits as $edit) + for ($i = 0, $size = sizeof($this->_edits); $i < $size; $i++) { + $edit = $this->_edits[$i]; + if (is_a($edit, 'diff_op_delete') || is_a($edit, 'diff_op_change')) { $count += $edit->norig(); @@ -128,8 +132,9 @@ class diff $rev->_edits = array(); - foreach ($this->_edits as $edit) + for ($i = 0, $size = sizeof($this->_edits); $i < $size; $i++) { + $edit = $this->_edits[$i]; $rev->_edits[] = $edit->reverse(); } @@ -143,13 +148,36 @@ class diff */ function is_empty() { - foreach ($this->_edits as $edit) + for ($i = 0, $size = sizeof($this->_edits); $i < $size; $i++) { - if (!is_a($edit, 'diff_op_copy')) + $edit = $this->_edits[$i]; + + // skip diff_op_copy + if (is_a($edit, 'diff_op_copy')) { + continue; + } + + if (is_a($edit, 'diff_op_delete') || is_a($edit, 'diff_op_add')) + { + $orig = $edit->orig; + $final = $edit->final; + + // We can simplify one case where the array is usually supposed to be empty... + if (sizeof($orig) == 1 && trim($orig[0]) === '') $orig = array(); + if (sizeof($final) == 1 && trim($final[0]) === '') $final = array(); + + if (!$orig && !$final) + { + continue; + } + return false; } + + return false; } + return true; } @@ -164,8 +192,10 @@ class diff { $lcs = 0; - foreach ($this->_edits as $edit) + for ($i = 0, $size = sizeof($this->_edits); $i < $size; $i++) { + $edit = $this->_edits[$i]; + if (is_a($edit, 'diff_op_copy')) { $lcs += sizeof($edit->orig); @@ -185,8 +215,10 @@ class diff { $lines = array(); - foreach ($this->_edits as $edit) + for ($i = 0, $size = sizeof($this->_edits); $i < $size; $i++) { + $edit = $this->_edits[$i]; + if ($edit->orig) { array_splice($lines, sizeof($lines), 0, $edit->orig); @@ -206,8 +238,10 @@ class diff { $lines = array(); - foreach ($this->_edits as $edit) + for ($i = 0, $size = sizeof($this->_edits); $i < $size; $i++) { + $edit = $this->_edits[$i]; + if ($edit->final) { array_splice($lines, sizeof($lines), 0, $edit->final); @@ -258,8 +292,10 @@ class diff $prevtype = null; - foreach ($this->_edits as $edit) + for ($i = 0, $size = sizeof($this->_edits); $i < $size; $i++) { + $edit = $this->_edits[$i]; + if ($prevtype == get_class($edit)) { trigger_error("[diff] Edit sequence is non-optimal", E_USER_ERROR); @@ -456,14 +492,14 @@ class diff3 extends diff * @param array $final1 The first version to compare to. * @param array $final2 The second version to compare to. */ - function diff3(&$orig, &$final1, &$final2) + function diff3(&$orig, &$final1, &$final2, $preserve_cr = true) { $diff_engine = new diff_engine(); - $diff_1 = $diff_engine->diff($orig, $final1); - $diff_2 = $diff_engine->diff($orig, $final2); + $diff_1 = $diff_engine->diff($orig, $final1, $preserve_cr); + $diff_2 = $diff_engine->diff($orig, $final2, $preserve_cr); - unset($engine); + unset($diff_engine); $this->_edits = $this->_diff3($diff_1, $diff_2); } @@ -475,8 +511,10 @@ class diff3 extends diff { $conflicts = 0; - foreach ($this->_edits as $edit) + for ($i = 0, $size = sizeof($this->_edits); $i < $size; $i++) { + $edit = $this->_edits[$i]; + if ($edit->is_conflict()) { $conflicts++; @@ -506,8 +544,10 @@ class diff3 extends diff $lines = array(); - foreach ($this->_edits as $edit) + for ($i = 0, $size = sizeof($this->_edits); $i < $size; $i++) { + $edit = $this->_edits[$i]; + if ($edit->is_conflict()) { // Start conflict label @@ -544,8 +584,10 @@ class diff3 extends diff { $lines = array(); - foreach ($this->_edits as $edit) + for ($i = 0, $size = sizeof($this->_edits); $i < $size; $i++) { + $edit = $this->_edits[$i]; + if ($edit->is_conflict()) { $lines = array_merge($lines, $edit->final2); @@ -566,8 +608,10 @@ class diff3 extends diff { $lines = array(); - foreach ($this->_edits as $edit) + for ($i = 0, $size = sizeof($this->_edits); $i < $size; $i++) { + $edit = $this->_edits[$i]; + if ($edit->is_conflict()) { $lines = array_merge($lines, $edit->final1); @@ -588,8 +632,10 @@ class diff3 extends diff { $conflicts = array(); - foreach ($this->_edits as $edit) + for ($i = 0, $size = sizeof($this->_edits); $i < $size; $i++) { + $edit = $this->_edits[$i]; + if ($edit->is_conflict()) { $conflicts[] = array($edit->final1, $edit->final2); @@ -713,6 +759,9 @@ class diff3_op { if (!isset($this->_merged)) { + // Prepare the arrays before we compare them. ;) + $this->solve_prepare(); + if ($this->final1 === $this->final2) { $this->_merged = &$this->final1; @@ -741,6 +790,95 @@ class diff3_op return ($this->merged() === false) ? true : false; } + /** + * Function to prepare the arrays for comparing - we want to skip over newline changes + * @author acydburn + */ + function solve_prepare() + { + // We can simplify one case where the array is usually supposed to be empty... + if (sizeof($this->orig) == 1 && trim($this->orig[0]) === '') $this->orig = array(); + if (sizeof($this->final1) == 1 && trim($this->final1[0]) === '') $this->final1 = array(); + if (sizeof($this->final2) == 1 && trim($this->final2[0]) === '') $this->final2 = array(); + + // Now we only can have the case where the only difference between arrays are newlines, so compare all cases + + // First, some strings we can compare... + $orig = $final1 = $final2 = ''; + + foreach ($this->orig as $null => $line) $orig .= trim($line); + foreach ($this->final1 as $null => $line) $final1 .= trim($line); + foreach ($this->final2 as $null => $line) $final2 .= trim($line); + + // final1 === final2 + if ($final1 === $final2) + { + // We preserve the part which will be used in the merge later + $this->final2 = $this->final1; + } + // final1 === orig + else if ($final1 === $orig) + { + // Here it does not really matter what we choose, but we will use the new code + $this->orig = $this->final1; + } + // final2 === orig + else if ($final2 === $orig) + { + // Here it does not really matter too (final1 will be used), but we will use the new code + $this->orig = $this->final2; + } + } + + /** + * Find code portions from $orig in $final1 and use $final2 as merged instance if provided + * @author acydburn + */ + function _compare_conflict_seq($orig, $final1, $final2 = false) + { + $result = array('merge_found' => false, 'merge' => array()); + + $_orig = &$this->$orig; + $_final1 = &$this->$final1; + + // Ok, we basically search for $orig in $final1 + $compare_seq = sizeof($_orig); + + // Go through the conflict code + for ($i = 0, $j = 0, $size = sizeof($_final1); $i < $size; $i++, $j = $i) + { + $line = $_final1[$i]; + $skip = 0; + + for ($x = 0; $x < $compare_seq; $x++) + { + // Try to skip all matching lines + if (trim($line) === trim($_orig[$x])) + { + $line = (++$j < $size) ? $_final1[$j] : $line; + $skip++; + } + } + + if ($skip === $compare_seq) + { + $result['merge_found'] = true; + + if ($final2 !== false) + { + $result['merge'] = array_merge($result['merge'], $this->$final2); + } + $i += ($skip - 1); + } + else if ($final2 !== false) + { + $result['merge'][] = $line; + } + } + + return $result; + } + /** * Tries to solve conflicts aggressively based on typical "assumptions" * @author acydburn @@ -753,45 +891,75 @@ class diff3_op // IF orig is found "as is" in final1 we replace the code directly in final1 and populate this as final2/merge if (sizeof($this->orig) && sizeof($this->final2)) { - // Ok, we basically search for $this->orig in $this->final1 and replace it with $this->final2 - $compare_seq = sizeof($this->orig); + $result = $this->_compare_conflict_seq('orig', 'final1', 'final2'); - // Search for matching code block - $merge = array(); - $merge_found = false; - - // Go through the conflict code - for ($i = 0, $j = 0, $size = sizeof($this->final1); $i < $size; $i++, $j = $i) + if ($result['merge_found']) { - $line = $this->final1[$i]; - $skip = 0; + $this->final2 = $result['merge']; + $this->_merged = &$this->final2; + return; + } - for ($x = 0; $x < $compare_seq; $x++) - { - // Try to skip all matching lines - if (trim($line) === trim($this->orig[$x])) - { - $line = (++$j < $size) ? $this->final1[$j] : $line; - $skip++; - } - } + $result = $this->_compare_conflict_seq('final2', 'final1'); - if ($skip === $compare_seq) + if ($result['merge_found']) + { + $this->_merged = &$this->final1; + return; + } + + // Try to solve $Id$ issues. ;) + if (sizeof($this->orig) == 1 && sizeof($this->final1) == 1 && sizeof($this->final2) == 1) + { + $match = '#^' . preg_quote('* @version $Id: ', '#') . '[a-z\._\- ]+[0-9]+ [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9\:Z]+ [a-z0-9_\- ]+\$$#'; + + if (preg_match($match, $this->orig[0]) && preg_match($match, $this->final1[0]) && preg_match($match, $this->final2[0])) { - $merge_found = true; - $merge = array_merge($merge, $this->final2); - $i += ($skip - 1); - } - else - { - $merge[] = $line; + $this->_merged = &$this->final2; + return; } } - if ($merge_found) + $second_run = false; + + // Try to solve issues where the only reason why the above did not work is a newline being removed in the final1 code but exist in the orig/final2 code + if (trim($this->orig[0]) === '' && trim($this->final2[0]) === '') { - $this->final2 = $merge; - $this->_merged = &$this->final2; + unset($this->orig[0], $this->final2[0]); + $this->orig = array_values($this->orig); + $this->final2 = array_values($this->final2); + + $second_run = true; + } + + // The same is true for a line at the end. ;) + if (sizeof($this->orig) && sizeof($this->final2) && sizeof($this->orig) === sizeof($this->final2) && trim($this->orig[sizeof($this->orig)-1]) === '' && trim($this->final2[sizeof($this->final2)-1]) === '') + { + unset($this->orig[sizeof($this->orig)-1], $this->final2[sizeof($this->final2)-1]); + $this->orig = array_values($this->orig); + $this->final2 = array_values($this->final2); + + $second_run = true; + } + + if ($second_run) + { + $result = $this->_compare_conflict_seq('orig', 'final1', 'final2'); + + if ($result['merge_found']) + { + $this->final2 = $result['merge']; + $this->_merged = &$this->final2; + return; + } + + $result = $this->_compare_conflict_seq('final2', 'final1'); + + if ($result['merge_found']) + { + $this->_merged = &$this->final1; + return; + } } return; @@ -800,8 +968,28 @@ class diff3_op // CASE TWO: Added lines from orig to final2 but final1 had added lines too. Just merge them. if (!sizeof($this->orig) && $this->final1 !== $this->final2 && sizeof($this->final1) && sizeof($this->final2)) { - $this->final2 = array_merge($this->final1, $this->final2); - $this->_merged = &$this->final2; + $result = $this->_compare_conflict_seq('final2', 'final1'); + + if ($result['merge_found']) + { + $this->final2 = $this->final1; + $this->_merged = &$this->final1; + } + else + { + $result = $this->_compare_conflict_seq('final1', 'final2'); + + if (!$result['merge_found']) + { + $this->final2 = array_merge($this->final1, $this->final2); + $this->_merged = &$this->final2; + } + else + { + $this->final2 = $this->final1; + $this->_merged = &$this->final1; + } + } return; } @@ -809,22 +997,43 @@ class diff3_op // CASE THREE: Removed lines (orig has the to-remove line(s), but final1 has additional lines which does not need to be removed). Just remove orig from final1 and then use final1 as final2/merge if (!sizeof($this->final2) && sizeof($this->orig) && sizeof($this->final1) && $this->orig !== $this->final1) { - $merged = $this->final1; + // First of all, try to find the code in orig in final1. ;) + $compare_seq = sizeof($this->orig); + $begin = -1; + $j = $end = 0; foreach ($this->final1 as $i => $line) { - foreach ($this->orig as $j => $old_line) + if (trim($line) === trim($this->orig[$j])) { - if (trim($line) === trim($old_line)) + if ($begin === -1) { - unset($merged[$i]); + $begin = $i; } + + if (isset($this->orig[$j+1])) + { + $j++; + } + } + + if ($begin !== -1) + { + $end++; } } - if (sizeof($merged)) + if ($begin !== -1 && $begin + ($compare_seq - 1) == $end) { - $this->final2 = array_values($merged); + foreach ($this->final1 as $i => $line) + { + if ($i < $begin || $i > $end) + { + $merged[] = $line; + } + } + + $this->final2 = $merged; $this->_merged = &$this->final2; } diff --git a/phpBB/includes/diff/engine.php b/phpBB/includes/diff/engine.php index da7b4344a3..982149457d 100644 --- a/phpBB/includes/diff/engine.php +++ b/phpBB/includes/diff/engine.php @@ -145,8 +145,21 @@ class diff_engine $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv)); // Merge edits when possible. - $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged); - $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged); + if ($this->skip_whitespace_changes) + { + $from_lines_clean = array_map('trim', $from_lines); + $to_lines_clean = array_map('trim', $to_lines); + + $this->_shift_boundaries($from_lines_clean, $this->xchanged, $this->ychanged); + $this->_shift_boundaries($to_lines_clean, $this->ychanged, $this->xchanged); + + unset($from_lines_clean, $to_lines_clean); + } + else + { + $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged); + $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged); + } // Compute the edit operations. $edits = array(); diff --git a/phpBB/install/install_update.php b/phpBB/install/install_update.php index ad72652e97..3c16a2e82b 100644 --- a/phpBB/install/install_update.php +++ b/phpBB/install/install_update.php @@ -687,7 +687,7 @@ class install_update extends module default: $diff = $this->return_diff($this->old_location . $original_filename, $phpbb_root_path . $file_struct['filename'], $this->new_location . $original_filename); - $contents = implode("\n", $diff->merged_new_output()); + $contents = implode("\n", $diff->merged_output()); unset($diff); break; } @@ -1104,24 +1104,6 @@ class install_update extends module break; -/* - $diff = $this->return_diff($this->old_location . $original_file, $phpbb_root_path . $file, $this->new_location . $original_file); - - $tmp = array( - 'file1' => array(), - 'file2' => ($option == MERGE_NEW_FILE) ? implode("\n", $diff->merged_new_output()) : implode("\n", $diff->merged_orig_output()), - ); - - $diff = new diff($tmp['file1'], $tmp['file2']); - - unset($tmp); - - $template->assign_var('S_DIFF_NEW_FILE', true); - $diff_mode = 'inline'; - $this->page_title = 'VIEWING_FILE_CONTENTS'; - - break; -*/ // Merge differences and use new phpBB code for conflicted blocks case MERGE_NEW_FILE: case MERGE_MOD_FILE: @@ -1175,6 +1157,7 @@ class install_update extends module default: $diff = $this->return_diff($this->old_location . $original_file, $phpbb_root_path . $original_file, $this->new_location . $file); + $diff = $this->return_diff($phpbb_root_path . $file, $diff->merged_output()); break; } break; @@ -1362,6 +1345,9 @@ class install_update extends module $update_ary['original'] = $original_file; } + // we only want to know if the files are successfully merged and newlines could result in errors (duplicate addition of lines and such things) + // Therefore we check for empty diffs with two methods, preserving newlines and not preserving them (which mostly works best, therefore the first option) + // On a successfull update the new location file exists but the old one does not exist. // Check for this circumstance, the new file need to be up-to-date with the current file then... if (!file_exists($this->old_location . $original_file) && file_exists($this->new_location . $original_file) && file_exists($phpbb_root_path . $file)) @@ -1401,104 +1387,134 @@ class install_update extends module trigger_error($user->lang['INCOMPLETE_UPDATE_FILES'], E_USER_ERROR); } - $tmp = array( - 'file1' => file_get_contents($this->old_location . $original_file), - 'file2' => file_get_contents($phpbb_root_path . $file), - ); + $preserve_cr_ary = array(false, true); - // We need to diff the contents here to make sure the file is really the one we expect - $diff = new diff($tmp['file1'], $tmp['file2'], false); - $empty_1 = $diff->is_empty(); - - unset($tmp, $diff); - - $tmp = array( - 'file1' => file_get_contents($this->new_location . $original_file), - 'file2' => file_get_contents($phpbb_root_path . $file), - ); - - // We need to diff the contents here to make sure the file is really the one we expect - $diff = new diff($tmp['file1'], $tmp['file2'], false); - $empty_2 = $diff->is_empty(); - - unset($tmp, $diff); - - // If the file is not modified we are finished here... - if ($empty_1) + foreach ($preserve_cr_ary as $preserve_cr) { - // Further check if it is already up to date - it could happen that non-modified files - // slip through + $tmp = array( + 'file1' => file_get_contents($this->old_location . $original_file), + 'file2' => file_get_contents($phpbb_root_path . $file), + ); + + // We need to diff the contents here to make sure the file is really the one we expect + $diff = new diff($tmp['file1'], $tmp['file2'], $preserve_cr); + $empty_1 = $diff->is_empty(); + + unset($tmp, $diff); + + $tmp = array( + 'file1' => file_get_contents($this->new_location . $original_file), + 'file2' => file_get_contents($phpbb_root_path . $file), + ); + + $diff = new diff($tmp['file1'], $tmp['file2'], $preserve_cr); + $empty_2 = $diff->is_empty(); + + unset($tmp, $diff); + + // If the file is not modified we are finished here... + if ($empty_1) + { + // Further check if it is already up to date - it could happen that non-modified files + // slip through + if ($empty_2) + { + $update_list['up_to_date'][] = $update_ary; + return; + } + + $update_list['not_modified'][] = $update_ary; + return; + } + + // If the file had been modified then we need to check if it is already up to date + + // if there are no differences we have an up-to-date file... if ($empty_2) { $update_list['up_to_date'][] = $update_ary; return; } - - $update_list['not_modified'][] = $update_ary; - return; } - // If the file had been modified then we need to check if it is already up to date + $conflicts = false; - // if there are no differences we have an up-to-date file... - if ($empty_2) + foreach ($preserve_cr_ary as $preserve_cr) { - $update_list['up_to_date'][] = $update_ary; - return; - } - - // if the file is modified we try to make sure a merge succeed - $tmp = array( - 'file1' => file_get_contents($this->old_location . $original_file), - 'file2' => file_get_contents($phpbb_root_path . $file), - 'file3' => file_get_contents($this->new_location . $original_file), - ); - - $diff = new diff3($tmp['file1'], $tmp['file2'], $tmp['file3'], false); - - unset($tmp); - - if ($diff->get_num_conflicts()) - { - $update_ary['conflicts'] = $diff->get_num_conflicts(); - - // There is one special case... users having merged with a conflicting file... we need to check this + // if the file is modified we try to make sure a merge succeed $tmp = array( - 'file1' => file_get_contents($phpbb_root_path . $file), - 'file2' => implode("\n", $diff->merged_orig_output()), + 'orig' => file_get_contents($this->old_location . $original_file), + 'final1' => file_get_contents($phpbb_root_path . $file), + 'final2' => file_get_contents($this->new_location . $original_file), ); - $diff = new diff($tmp['file1'], $tmp['file2'], false); - $empty = $diff->is_empty(); + $diff = new diff3($tmp['orig'], $tmp['final1'], $tmp['final2'], $preserve_cr); + unset($tmp); - if ($empty) + if (!$diff->get_num_conflicts()) { - unset($update_ary['conflicts']); - unset($diff); - $update_list['up_to_date'][] = $update_ary; - return; + $tmp = array( + 'file1' => file_get_contents($phpbb_root_path . $file), + 'file2' => implode("\n", $diff->merged_output()), + ); + + // now compare the merged output with the original file to see if the modified file is up to date + $diff2 = new diff($tmp['file1'], $tmp['file2'], $preserve_cr); + $empty = $diff2->is_empty(); + + unset($diff, $diff2); + + if ($empty) + { + $update_list['up_to_date'][] = $update_ary; + return; + } } + else + { + // There is one special case... users having merged with a conflicting file... we need to check this + $tmp = array( + 'file1' => file_get_contents($phpbb_root_path . $file), + 'file2' => implode("\n", $diff->merged_new_output()), + ); - $update_list['conflict'][] = $update_ary; - unset($diff); + $diff2 = new diff($tmp['file1'], $tmp['file2'], $preserve_cr); + $empty = $diff2->is_empty(); - return; + if (!$empty) + { + unset($tmp, $diff2); + + // We check if the user merged with his output + $tmp = array( + 'file1' => file_get_contents($phpbb_root_path . $file), + 'file2' => implode("\n", $diff->merged_orig_output()), + ); + + $diff2 = new diff($tmp['file1'], $tmp['file2'], $preserve_cr); + $empty = $diff2->is_empty(); + } + + if (!$empty) + { + $conflicts = $diff->get_num_conflicts(); + } + + unset($diff, $diff2); + + if ($empty) + { + // A conflict got resolved... + $update_list['up_to_date'][] = $update_ary; + return; + } + } } - $tmp = array( - 'file1' => file_get_contents($phpbb_root_path . $file), - 'file2' => implode("\n", $diff->merged_new_output()), - ); - - // now compare the merged output with the original file to see if the modified file is up to date - $diff = new diff($tmp['file1'], $tmp['file2'], false); - $empty = $diff->is_empty(); - - if ($empty) + if ($conflicts !== false) { - unset($diff); - - $update_list['up_to_date'][] = $update_ary; + $update_ary['conflicts'] = $conflicts; + $update_list['conflict'][] = $update_ary; return; } @@ -1650,7 +1666,7 @@ class install_update extends module /** * Wrapper for returning a diff object */ - function &return_diff() + function return_diff() { $args = func_get_args(); $three_way_diff = (func_num_args() > 2) ? true : false;