2010-07-27 4 views
0

J'ai construit un index sur mes lignes de base de données (chaque ligne comme un document) qui sont de type Unicode dans MySQL (c'est-à-dire Charset: utf8 et Collation: utf8-bin). Mais quand je recherche un mot anglais ou non-anglais, il ne me donne aucune réponse. Il dit:Lucene Problème de recherche

0 total des documents correspondants

Mon code est le code de démonstration de Lucene pour la recherche sauf que j'ai changé les noms de champs à mes noms de colonnes insérés. Quoi qu'il en soit, il imprime ce message avant d'atteindre cette partie du code. Et aussi j'ai changé le codage de requête de lecture en UTF-8.

J'ai vérifié la lecture de la partie base de données. C'est bon.

Quel est le problème?

Si elle aide, voici mon code d'insertion:

static void indexDocs(IndexWriter writer, Connection conn) throws SQLException, CorruptIndexException, IOException { 
    String sql = "select id, name, description, text from users"; 
    Statement stmt = conn.createStatement(); 

    ResultSet rs = stmt.executeQuery(sql); 
    while (rs.next()) { 
     Document d = new Document(); 
     d.add(new Field("id", rs.getString("id"), Field.Store.YES, Field.Index.NOT_ANALYZED)); 
     d.add(new Field("name", rs.getString("name"), Field.Store.NO, Field.Index.NOT_ANALYZED)); 
     String tmp = rs.getString("description"); 
     if (tmp == null) { 
      tmp = ""; 
     } 
     d.add(new Field("description", tmp, Field.Store.NO, Field.Index.ANALYZED)); 
     tmp = rs.getString("text"); 
     if (tmp == null) { 
      tmp = ""; 
     } 
     d.add(new Field("text", tmp, Field.Store.NO, Field.Index.ANALYZED)); 
     writer.addDocument(d); 
    } 
} 

Aussi ceci est mon code de recherche:

import java.io.BufferedReader; 
import java.io.File; 
import java.io.FileReader; 
import java.io.IOException; 
import java.io.InputStreamReader; 
import java.util.Date; 

import org.apache.lucene.analysis.Analyzer; 
import org.apache.lucene.analysis.standard.StandardAnalyzer; 
import org.apache.lucene.document.Document; 
import org.apache.lucene.index.FilterIndexReader; 
import org.apache.lucene.index.IndexReader; 
import org.apache.lucene.queryParser.QueryParser; 
import org.apache.lucene.search.Collector; 
import org.apache.lucene.search.IndexSearcher; 
import org.apache.lucene.search.Query; 
import org.apache.lucene.search.ScoreDoc; 
import org.apache.lucene.search.Scorer; 
import org.apache.lucene.search.Searcher; 
import org.apache.lucene.search.TopScoreDocCollector; 
import org.apache.lucene.store.FSDirectory; 
import org.apache.lucene.util.Version; 

/** Simple command-line based search demo. */ 
public class Search { 

    /** Use the norms from one field for all fields. Norms are read into memory, 
    * using a byte of memory per document per searched field. This can cause 
    * search of large collections with a large number of fields to run out of 
    * memory. If all of the fields contain only a single token, then the norms 
    * are all identical, then single norm vector may be shared. */ 
    private static class OneNormsReader extends FilterIndexReader { 

     private String field; 

     public OneNormsReader(IndexReader in, String field) { 
      super(in); 
      this.field = field; 
     } 

     @Override 
     public byte[] norms(String field) throws IOException { 
      return in.norms(this.field); 
     } 
    } 

    private Search() { 
    } 

    /** Simple command-line based search demo. */ 
    public static void main(String[] args) throws Exception { 
     String usage = 
       "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field] [-paging hitsPerPage]"; 
     usage += "\n\tSpecify 'false' for hitsPerPage to use streaming instead of paging search."; 
     if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { 
      System.out.println(usage); 
      System.exit(0); 
     } 

     String index = "index"; 
     String field = "contents"; 
     String queries = null; 
     int repeat = 0; 
     boolean raw = false; 
     String normsField = null; 
     boolean paging = true; 
     int hitsPerPage = 10; 

     for (int i = 0; i < args.length; i++) { 
      if ("-index".equals(args[i])) { 
       index = args[i + 1]; 
       i++; 
      } else if ("-field".equals(args[i])) { 
       field = args[i + 1]; 
       i++; 
      } else if ("-queries".equals(args[i])) { 
       queries = args[i + 1]; 
       i++; 
      } else if ("-repeat".equals(args[i])) { 
       repeat = Integer.parseInt(args[i + 1]); 
       i++; 
      } else if ("-raw".equals(args[i])) { 
       raw = true; 
      } else if ("-norms".equals(args[i])) { 
       normsField = args[i + 1]; 
       i++; 
      } else if ("-paging".equals(args[i])) { 
       if (args[i + 1].equals("false")) { 
        paging = false; 
       } else { 
        hitsPerPage = Integer.parseInt(args[i + 1]); 
        if (hitsPerPage == 0) { 
         paging = false; 
        } 
       } 
       i++; 
      } 
     } 

     IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)), true); // only searching, so read-only=true 

     if (normsField != null) { 
      reader = new OneNormsReader(reader, normsField); 
     } 

     Searcher searcher = new IndexSearcher(reader); 
     Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); 

     BufferedReader in = null; 
     if (queries != null) { 
      in = new BufferedReader(new FileReader(queries)); 
     } else { 
      in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); 
     } 
     QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, field, analyzer); 
     while (true) { 
      if (queries == null) // prompt the user 
      { 
       System.out.println("Enter query: "); 
      } 

      String line = in.readLine(); 
      line = new String(line.getBytes("8859_1"), "UTF8"); 

      if (line == null || line.length() == -1) { 
       break; 
      } 

      line = line.trim(); 
      if (line.length() == 0) { 
       break; 
      } 

      Query query = parser.parse(line); 
      System.out.println("Searching for: " + query.toString(field)); 


      if (repeat > 0) {       // repeat & time as benchmark 
       Date start = new Date(); 
       for (int i = 0; i < repeat; i++) { 
        searcher.search(query, null, 100); 
       } 
       Date end = new Date(); 
       System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); 
      } 

      if (paging) { 
       doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null); 
      } else { 
       doStreamingSearch(searcher, query); 
      } 
     } 
     reader.close(); 
    } 

    /** 
    * This method uses a custom HitCollector implementation which simply prints out 
    * the docId and score of every matching document. 
    * 
    * This simulates the streaming search use case, where all hits are supposed to 
    * be processed, regardless of their relevance. 
    */ 
    public static void doStreamingSearch(final Searcher searcher, Query query) throws IOException { 
     Collector streamingHitCollector = new Collector() { 

      private Scorer scorer; 
      private int docBase; 

      // simply print docId and score of every matching document 
      @Override 
      public void collect(int doc) throws IOException { 
       System.out.println("doc=" + doc + docBase + " score=" + scorer.score()); 
      } 

      @Override 
      public boolean acceptsDocsOutOfOrder() { 
       return true; 
      } 

      @Override 
      public void setNextReader(IndexReader reader, int docBase) 
        throws IOException { 
       this.docBase = docBase; 
      } 

      @Override 
      public void setScorer(Scorer scorer) throws IOException { 
       this.scorer = scorer; 
      } 
     }; 

     searcher.search(query, streamingHitCollector); 
    } 

    /** 
    * This demonstrates a typical paging search scenario, where the search engine presents 
    * pages of size n to the user. The user can then go to the next page if interested in 
    * the next hits. 
    * 
    * When the query is executed for the first time, then only enough results are collected 
    * to fill 5 result pages. If the user wants to page beyond this limit, then the query 
    * is executed another time and all hits are collected. 
    * 
    */ 
    public static void doPagingSearch(BufferedReader in, Searcher searcher, Query query, 
      int hitsPerPage, boolean raw, boolean interactive) throws IOException { 

     // Collect enough docs to show 5 pages 
     TopScoreDocCollector collector = TopScoreDocCollector.create(
       5 * hitsPerPage, false); 
     searcher.search(query, collector); 
     ScoreDoc[] hits = collector.topDocs().scoreDocs; 

     int numTotalHits = collector.getTotalHits(); 
     System.out.println(numTotalHits + " total matching documents"); 

     int start = 0; 
     int end = Math.min(numTotalHits, hitsPerPage); 

     while (true) { 
      if (end > hits.length) { 
       System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); 
       System.out.println("Collect more (y/n) ?"); 
       String line = in.readLine(); 
       if (line.length() == 0 || line.charAt(0) == 'n') { 
        break; 
       } 

       collector = TopScoreDocCollector.create(numTotalHits, false); 
       searcher.search(query, collector); 
       hits = collector.topDocs().scoreDocs; 
      } 

      end = Math.min(hits.length, start + hitsPerPage); 

      for (int i = start; i < end; i++) { 
       if (raw) {        // output raw format 
        System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); 
        continue; 
       } 

       Document doc = searcher.doc(hits[i].doc); 
       String id = doc.get("id"); 
       if (id != null) { 
        System.out.println((i + 1) + ". " + id); 
        String name = doc.get("name"); 
        if (name != null) { 
         System.out.println(" name: " + doc.get("name")); 
        } 
        String description = doc.get("description"); 
        if (description != null) { 
         System.out.println(" description: " + doc.get("description")); 
        } 
        String text= doc.get("text"); 
        if (text != null) { 
         System.out.println(" text: " + doc.get("text")); 
        } 
       } else { 
        System.out.println((i + 1) + ". " + "No path for this document"); 
       } 

      } 

      if (!interactive) { 
       break; 
      } 

      if (numTotalHits >= end) { 
       boolean quit = false; 
       while (true) { 
        System.out.print("Press "); 
        if (start - hitsPerPage >= 0) { 
         System.out.print("(p)revious page, "); 
        } 
        if (start + hitsPerPage < numTotalHits) { 
         System.out.print("(n)ext page, "); 
        } 
        System.out.println("(q)uit or enter number to jump to a page."); 

        String line = in.readLine(); 
        if (line.length() == 0 || line.charAt(0) == 'q') { 
         quit = true; 
         break; 
        } 
        if (line.charAt(0) == 'p') { 
         start = Math.max(0, start - hitsPerPage); 
         break; 
        } else if (line.charAt(0) == 'n') { 
         if (start + hitsPerPage < numTotalHits) { 
          start += hitsPerPage; 
         } 
         break; 
        } else { 
         int page = Integer.parseInt(line); 
         if ((page - 1) * hitsPerPage < numTotalHits) { 
          start = (page - 1) * hitsPerPage; 
          break; 
         } else { 
          System.out.println("No such page"); 
         } 
        } 
       } 
       if (quit) { 
        break; 
       } 
       end = Math.min(numTotalHits, start + hitsPerPage); 
      } 

     } 

    } 
} 

Merci.

Répondre

0

J'ai découvert. Je devrais spécifier la colonne que je veux rechercher. par exemple. Pour la recherche dans le champ de texte, je devrais dire: "text: MyWord"

Questions connexes