%
function getparas($fname){
$data = file($fname);
$ii = 0;
$jj = 0;
while(!empty($data[$ii])){
$line=$data[$ii];
//$line = str_replace("\r\n", "", $line);
$line = str_replace("\r", "", $line);
$line = str_replace("\n", "", $line);
$line = str_replace("\t", " ", $line);
$line = trim($line);
if($line == "") {
$line = trim($paras[$jj]);
if($line != "") $jj++;
}
else $paras[$jj] = $paras[$jj] . $line . " ";
$ii++;
}
if($paras[$jj] != "") $jj++;
$paras["count"] = $jj;
return $paras;
}
function getwords($data, $thresh){
$jj = 0;
$parts = explode(" ", $data);
$kk = 0;
while(!empty($parts[$kk])){
if(strlen($parts[$kk]) >= $thresh){
$words[$jj] = trim($parts[$kk]);
$jj++;
}
$kk++;
}
$ii++;
$words["count"] = $jj;
return $words;
}
function lcs($words1, $words2, $flag){
$a = $words1["count"];
$b = $words2["count"];
$c = $a;
$max = $a;
if ($b < $a) $c = $b;
if ($b > $a) $max = $b;
$ii = 0;
while($words1[$ii] == $words2[$ii] and $ii < $c)$ii++;
$hh = $ii;
if($hh < $c){
for($ii = $hh; $ii < $b; $ii++) $lcb[0][$ii] = 0;
$k0 = 0;
$k1 = 1;
for($ii = $hh; $ii < $a; $ii++){
if($k0 == 0){
$k0 = 1;
$k1 = 0;
}
else {
$k0 = 0;
$k1 = 1;
}
for($jj = $hh; $jj < $b; $jj++){
if ($words1[$ii] == $words2[$jj]) {
$lcb[$k1][$jj] = $lcb[$k0][$jj-1] + 1;
}
elseif ($lcb[$k0][$jj] >= $lcb[$k1][$jj-1]) {
$lcb[$k1][$jj] = $lcb[$k0][$jj];
}
else {
$lcb[$k1][$jj] = $lcb[$k1][$jj-1];
}
if($flag) $lc[$ii][$jj] = $lcb[$k1][$jj];
}
}
$total = $lcb[$k1][$b-1];
}
$total = $total + $hh;
$same = true;
if($flag and $total <> $max){
$same = false;
for($ii = 0; $ii < $a; $ii++) $lc[$ii][$hh - 1] = -1;
for($ii = 0; $ii < $b; $ii++) $lc[$hh - 1][$ii] = -1;
$ii = $a - 1;
$jj = $b - 1;
$kk = 0;
while (($ii >= $hh or $jj >= $hh)) {
if($ii == $hh and $jj == $hh){
$out[$kk] = "+" . $words2[$jj];
$kk++;
$out[$kk] = "-" . $words1[$ii];
$kk++;
$ii--;
$jj--;
}
else{
if($lc[$ii][$jj] == $lc[$ii][$jj-1]){
$out[$kk] = "+" . $words2[$jj];
$jj--;
$kk++;
}
elseif($lc[$ii][$jj] == $lc[$ii-1][$jj]){
$out[$kk] = "-" . $words1[$ii];
$ii--;
$kk++;
}
else{
$out[$kk] = "0" . $words1[$ii];
$ii--;
$jj--;
$kk++;
}
}
}
for ($ii = $hh - 1; $ii >= 0; $ii--){
$out[$kk] = "0" . $words1[$ii];
$kk++;
}
$out["count"] = $kk;
$result["out"] = $out;
}
$result["total"] = $total;
$result["same"] = $same;
return $result;
}
$start = time();
//$args = explode(" ", $arg);
$args[0] = $src_file;
$args[1] = $trg_file;
$paras1 = getparas($args[0]);
$paras2 = getparas($args[1]);
$base_plus = "";
$base_minus = "";
$base_table_plus = "
";
$base_table_minus_p = " | ";
$base_table_minus_x = " | ";
$base_table_else = " | ";
$a_plus = $base_plus;
$a_minus = $base_minus;
$a_table_plus = $base_table_plus;
$a_table_minus_p = $base_table_minus_p;
$a_table_minus_x = $base_table_minus_x;
$a_table_else = $base_table_else;
$a_plus_font = "";
$a_minus_font = "";
if($paras1["count"] < $paras2["count"]){
$paras3 = $paras1;
$paras1 = $paras2;
$paras2 = $paras3;
$a_plus = $base_minus;
$a_minus = $base_plus;
$a_table_plus = $base_table_minus_p;
$a_table_minus_p = $base_table_plus;
$a_table_minus_x = $base_table_else;
$a_table_else = $base_table_table_minus_x;
$a_plus_font = "";
$a_minus_font = "";
}
for($ii = 0; $ii < $paras1["count"]; $ii++) $ptok1[$ii] = "1p" . $ii;
$ptok1["count"] = $paras1["count"];
for($ii = 0; $ii < $paras2["count"]; $ii++) $ptok2[$ii] = "2p" . $ii;
$ptok2["count"] = $paras2["count"];
//
// Tuning Parameters
$thresh = 11;
$cutoff = .33;
$ymin = 8;
$tmin = 4;
//
//
$cutoff2 = $cutoff/2;
$targ = -1;
for($ii = 0; $ii <= $paras1["count"]; $ii++){
$test = $thresh;
$nfound = true;
$words0 = getwords($paras1[$ii], 1);
$wrds = $words0["count"];
if($wrds <= $ymin) $test = 2;
$words1 = getwords($paras1[$ii], $test);
$y1 = $words1["count"];
if($wrds > $ymin){
while($test > $tmin and $y1 < $ymin){
$test--;
$words1 = getwords($paras1[$ii], $test);
$y1 = $words1["count"];
}
}
//echo " " . $ii . " " . $targ . " " . $test . " " . $y1 . " " . (time() - $xxx); flush();
$xxx = time();
if($y1 > 0){
$d = 0;
$continue = true;
$targ++;
while ($continue){
$jj = $targ + $d;
if($d >= 0) $d = -$d - 1;
else $d = -$d;
$jjx = $targ + $d;
if($d >= 0) {
$jj0 = $jj;
$jj1 = $jjx;
}
else {
$jj0 = $jjx;
$jj1 = $jj;
}
if($jj0 < 0 and $jj1 > $paras2["count"]) $continue = false;
else {
if($jj >= 0 and $jj <= $paras2["count"] and substr($ptok2[$jj],0,1) == "2"){
$words2 = getwords($paras2[$jj], $test);
$lcs0 = lcs($words1, $words2, false);
$x = $lcs0["total"];
$y2 = $words2["count"];
if($y2 > 0){
$z = $x/($y1 + $y2);
if($z > $cutoff2) {
$words2 = getwords($paras2[$jj], 1);
$lcs = lcs($words0, $words2, true);
$x = $lcs["total"];
$y2 = $words2["count"];
$z = $x/($words0["count"] + $y2);
if($z > $cutoff2){
if($lcs["same"] == false){
$paras1[$ii] = "";
$out = $lcs["out"];
$kmax = $out["count"];
for ($aa = $kmax - 1; $aa >= 0; $aa--){
$temp = substr($out[$aa],1) . " ";
$test = substr($out[$aa],0,1);
if($test=="+")$temp = $a_plus . $temp . "" . $a_plus_font;
if($test=="-")$temp = $a_minus . $temp . "" . $a_minus_font;
$paras1[$ii] = $paras1[$ii] . $temp;
}
}
$ptok2[$jj] = "1p" . $ii;
$targ = $jj;
$continue = false;
$nfound = false;
}
}
}
}
}
}
if($nfound) $ptok1[$ii] = "1x" . $ii;
//if($nfound) echo "x";
}
else $ptok1[$ii] = "1xz";
}
//for ($ii = 0; $ii<= $ptok1["count"]; $ii++) echo $ptok1[$ii] . " ";
//echo " ";
//for ($ii = 0; $ii<= $ptok2["count"]; $ii++) echo $ptok2[$ii] . " ";
//echo " ";
$lcs = lcs($ptok1, $ptok2, true);
$out = $lcs["out"];
$kmax = $out["count"];
for ($aa = $kmax - 1; $aa >= 0; $aa--){
$type = substr($out[$aa],0,1);
$nfound = substr($out[$aa],2,1);
$src = substr($out[$aa],1,1);
$ii = substr($out[$aa],3);
if($src == "1") {
if($type == "0") echo $paras1[$ii] . "";
//moved from another location
if($type == "+") echo " " . $a_table_plus . " " . $paras1[$ii] . " | ";
//moved to another location
if($type == "-" and $nfound == "p") echo "" . $a_table_minus_p . " " . $paras1[$ii] . " | ";
//removed
if($type == "-" and $nfound == "x" and $ii != "z") echo "" . $a_table_minus_x . " " . $paras1[$ii] . " | ";
}
else echo "" . $a_table_else . " " . $paras2[$ii] . " |
";
}
echo " took: " . ((time() - $start)/60) . " minutes";
%>
|