2011-11-03 1 views
3

J'essaie de créer une classe qui utilise la fonction de distance Levenshtein pour comparer le texte d'un document spécifié parmi tous les autres documents d'un répertoire.Computing Levenshtein Distance

J'ai l'idée de base en tête mais je ne sais pas comment la coder en PHP. Je viens d'un contexte C# donc je vais fournir autant de détails que possible.

class ComputeLevenshtein 
{ 
    public $filePathList = new Array(); //The array that stores the absolute path of all documents within a specified directory 
    public $directory; 
    public $filePath; //This is the document that will be compared for each document in a directory 

    public function __construct() { 
     $this->directory = //; 
     /* I'm stuck here, once a user registers, a separate directory is 
      named after the user. I need to be able to read the username 
      from the Session Variable once the user logs in. 
      I'll just have to pass it in as a parameter. 
      Do I have to create a session wrapper? 
      If it's too complex, 
      then I'll just start off with a static directory */ 
    } 

     // Returns the array containing each filePath for every document in a directory. 
     function computeFilePathList($directory) 
     { 
      for each file in Directory 
      { 
      $filepath = file.FilePath(); //store the filepath in a variable 
      $this->filePathList.add($filePath) //add the filepath to the array 
      } 

     } 

     function ($docFilePath) // returns the Levenshtein Distance 
     { 

      for each path in filePathList 
      { 
       $input= readDoc($docFilePath); 
       $lev = levenshtein($input, readDoc($path)); 
      } 

      return $lev; 
     } 

    function readDoc($docFilePath) // Returns the raw text of that doc 
    { 
     //I Have the code for reading the doc in a seperate function 
     return $text; 
    } 
} 
+4

Toute raison pour laquelle vous réimplémentant quelque chose PHP a déjà? [php levenshtein] (http://php.net/manual/fr/function.levenshtein.php) – birryree

+3

Je ne réimplémente pas levenshtein, je l'utilise juste pour comparer le texte brut d'un document entre et la liste d'autres documents dans un répertoire. – user478636

+0

écrire un emballage de session est à mon humble avis le chemin à parcourir. Vous pouvez le faire en créant une classe nommée User. – greg0ire

Répondre

1

Que diriez-vous ceci:

class Levenshtein 
{ 
    private $_p = array(); 

    public function __construct($input, $compare) 
    { 
     $this->_p['input'] = $input; 
     $this->_p['compare'] = $compare; // string to check against 
    } 

    public function __get($property) 
    { 
     if (array_key_exists($property, $this->_p)) { 
      return $this->_p[$property]; 
     } 

     if (!isset($this->_p['dist']) && $property === 'dist') { 
      $this->_p['dist'] = levenshtein($this->_p['input'], 
              $this->_p['compare']); 
      return $this->_p['dist']; 
     } 
    } 
} 

class DirectoryLevenshtein 
{ 
    private $_directory; 
    private $_filePath; 
    private $_distances = array(); 

    public function __construct($directoryPath, $filePath = null) 
    { 
     if (!is_dir($directoryPath)) { 
      throw new Exception("Path '$directoryPath' does not exist"); 
     } 

     if (substr($directoryPath, -1) !== '/') { 
      $directoryPath .= '/'; 
     } 

     $this->_directory = $directoryPath; 

     if ($filePath !== null) { 
      if (!$this->setFilePath($filePath)) { 
       throw new Exception("File '$filePath' is not readable"); 
      } 
     } 
    } 

    public function __get($file) 
    { 
     if (array_key_exists($file, $this->_distances)) { 
      return $this->_distances[$file]; 
     } 

     if (is_readable($this->_directory . $file)) { 
      if (empty($this->_filePath)) { 
       return null; 
      } 

      $input = file_get_contents($this->_filePath); 
      $compare = file_get_contents($this->_directory . $file); 
      $this->_distances[$file] = new Levenshtein($input, $compare); 
      return $this->_distances[$file]; 
     } 
    } 

    public function getDirectoryContents() 
    { 
     $files = scandir($this->_directory); 

     while ($files[0] === '.' || $files[0] === '..') { 
      array_shift($files); 
     } 

     return $files; 
    } 

    public function setFilePath($filePath) 
    { 
     if (empty($this->_filePath) && is_readable($filePath)) { 
      $this->_filePath = $filePath; 
      return true; 
     } 

     return false; 
    } 
} 

Pour l'utiliser faire quelque chose comme ce qui suit:

// could user session wrapper instead 
$userDir = '/path/to/user/dirs/' . $_SESSION['user']; 
// file to compare all files with 
$filePath = /path/to/file.txt 

$dirLev = new DirectoryLevenshtein($userDir, $filePath); 

// Files in directory 
$files = $dirLev->getDirectoryContents(); 

// Distances 
foreach ($files as $file) { 
    echo "$file: {$dirLev->file->dist}\n"; 
} 
Questions connexes