Changeset 1021


Ignore:
Timestamp:
03/01/09 17:42:38 (3 years ago)
Author:
gcosmin
Message:

Optimized longest common subsequence algorithm with O(N + M) memory and wrote it in C++
http://reviewboard.infoarena.ro/r/70/

Location:
trunk
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/common/diff.php

    r1018 r1021  
    8888 
    8989// compute longest common subsequence using dynamic programming 
    90 // FIXME: both time and memory complexity can be reduced 
    9190function lcs($a, $b) { 
    92     $N = mb_strlen($a); 
    93     $M = mb_strlen($b); 
    94     $C = array(array(2), array($M+1)); 
     91    $descriptorspec = array( 
     92        0 => array("pipe", "r"), 
     93        1 => array("pipe", "w"), 
     94        2 => array("pipe", "w"), 
     95    ); 
    9596 
    96     for ($i = 0; $i < 2; ++$i) 
    97         for ($j = 0; $j <= $M; ++$j) 
    98             $C[$i][$j] = ""; 
    99      
    100     for ($i = 1; $i <= $N; ++$i) { 
    101         for ($j = 1; $j <= $M; ++$j) { 
    102             if (mb_substr($a, $i-1, 1) == mb_substr($b, $j-1, 1)) { 
    103                 $C[$i%2][$j] = $C[($i-1)%2][$j-1].mb_substr($a, $i-1, 1); 
    104             } else if (mb_strlen($C[($i-1)%2][$j]) > mb_strlen($C[$i%2][$j-1])) { 
    105                 $C[$i%2][$j] = $C[($i-1)%2][$j]; 
    106             } else { 
    107                 $C[$i%2][$j] = $C[$i%2][$j-1]; 
    108             } 
    109         } 
     97    // run lcs process 
     98    $process = proc_open("iconv -f utf8 -t utf32 | " . IA_ROOT_DIR. 
     99        "/common/lcs" . " | iconv -f utf32 -t utf8", $descriptorspec, $pipes); 
     100    log_assert(is_resource($process), "Could not create process."); 
     101 
     102    // feed script to pipe 
     103    list($lcs_in, $lcs_out, $lcs_err) = $pipes; 
     104 
     105    fwrite($lcs_in, $a."\n"); 
     106    fwrite($lcs_in, $b."\n"); 
     107    fclose($lcs_in); 
     108 
     109    $result = fread($lcs_out, 10000); 
     110    fclose($lcs_out); 
     111 
     112    // check for errors 
     113    $errors = fread($lcs_err, 1000); 
     114    if ($errors) { 
     115        log_error($errors); 
    110116    } 
    111     return $C[$N%2][$M]; 
     117    fclose($lcs_err); 
     118 
     119    // clean-up 
     120    proc_close($process); 
     121 
     122    $result = trim($result, "\n"); 
     123 
     124    return $result; 
    112125} 
    113126 
  • trunk/scripts/setup

    r907 r1021  
    8383        chgrp($dstfile, getmygid()); 
    8484    } 
     85} 
     86 
     87// Compiles longest common substrig written in c++ 
     88function compile_lcs($IA_ROOT_DIR) 
     89{ 
     90    $command = "g++ " . $IA_ROOT_DIR . "common/lcs.cpp -O2 -static -o " . 
     91        $IA_ROOT_DIR . "common/lcs"; 
     92 
     93    $last_line = system($command, $ret_val); 
     94 
     95    if ($ret_val) { // check for errors 
     96        print("\nWARNING!!! There has been a problem when trying to compile a " . 
     97            "cpp source! Check your gcc/g++ installation then rerun " . 
     98            "the script.\nThis will not affect the setup except for " . 
     99            "comparing revisons, but you should take a look at this!\n\n"); 
     100        return 0; 
     101    } 
     102 
     103    return 1; 
    85104} 
    86105 
     
    128147        $config_vars['IA_URL_PREFIX'], true, true); 
    129148 
     149// Compile needed files 
     150compile_lcs($config_vars['IA_ROOT_DIR']); 
     151 
    130152// Database configuration here. 
    131153while (true) { 
     
    246268print("FIXME: forum is not completely functional\n"); 
    247269print("FIXME: eval won't work, but it doesn't matter.\n"); 
     270 
     271?> 
Note: See TracChangeset for help on using the changeset viewer.