2013-10-10 4 views
-6

c'est mon programme stemmer porteur .... j'ai eu la sortie dans le tableau char dans la fonction stem() .... et j'ai essayé de convertir cela en chaîne ... mais cela prend la valeur la plus ancienne et non la nouvelle. C'est-à-dire par exemple "regarder" est étouffé pour donner "look" ..... "look" est dans char array (b [c] dans stem()) mais lors de la conversion en chaîne prend la valeur "recherche"
fichier de package;comment convertir un tableau char en chaîne en java

import java.io.BufferedReader; 
import java.io.BufferedWriter; 
import java.io.FileReader; 
import java.io.FileWriter; 
import java.util.StringTokenizer; 
import java.util.Vector; 

/** 
* 
* @author sky 
*/ 

public class stemmer { 



    public static String line1,line,element,sentence,str; 
    private char[] b; 
    private int i,  /* offset into b */ 
       i_end, /* offset to end of stemmed word */ 
       j, k; 
    private static final int INC = 50; 
        /* unit of size whereby b is increased */ 

    public stemmer() 
    { 
     //b = new char[INC]; 
     i = 0; 
     i_end = 0; 
    } 

    /** 
    * Add a character to the word being stemmed. When you are finished 
    * adding characters, you can call stem(void) to stem the word. 
    */ 

    public void add(char ch) 
    { 
     System.out.println("in add() function"); 
     if (i == b.length) 
     { 
      char[] new_b = new char[i+INC]; 
     for (int c = 0; c < i; c++) 
     new_b[c] = b[c]; 
     b = new_b; 
     } 
     b[i++] = ch; 

    } 



    /** Adds wLen characters to the word being stemmed contained in a portion 
    * of a char[] array. This is like repeated calls of add(char ch), but 
    * faster. 
    */ 

    public void add(char[] w, int wLen) 
    { if (i+wLen >= b.length) 
     { 
     char[] new_b = new char[i+wLen+INC]; 
     for (int c = 0; c < i; c++) 
      new_b[c] = b[c]; 
      b = new_b; 
     } 
     for (int c = 0; c < wLen; c++) 
      b[i++] = w[c]; 
    } 
public void addstring(String s1) 
{ 
    b=new char[s1.length()]; 
    for(int k=0;k<s1.length();k++) 
    { 
     b[k] = s1.charAt(k); 
    //System.out.println(b[k]); 
    } 
    i=s1.length(); 
} 
    /** 
    * After a word has been stemmed, it can be retrieved by toString(), 
    * or a reference to the internal buffer can be retrieved by getResultBuffer 
    * and getResultLength (which is generally more efficient.) 
    */ 
    public String toString() { return new String(b,0,i_end); } 

    /** 
    * Returns the length of the word resulting from the stemming process. 
    */ 
    public int getResultLength() { return i_end; } 

    /** 
    * Returns a reference to a character buffer containing the results of 
    * the stemming process. You also need to consult getResultLength() 
    * to determine the length of the result. 
    */ 
    public char[] getResultBuffer() { return b; } 

    /* cons(i) is true <=> b[i] is a consonant. */ 

    private final boolean cons(int i) 
    { switch (b[i]) 
     { case 'a': case 'e': case 'i': case 'o': case 'u': return false; 
     case 'y': return (i==0) ? true : !cons(i-1); 
     default: return true; 
     } 
    } 

    /* m() measures the number of consonant sequences between 0 and j. if c is 
     a consonant sequence and v a vowel sequence, and <..> indicates arbitrary 
     presence, 

     <c><v>  gives 0 
     <c>vc<v>  gives 1 
     <c>vcvc<v> gives 2 
     <c>vcvcvc<v> gives 3 
     .... 
    */ 

    private final int m() 
    { int n = 0; 
     int i = 0; 
     while(true) 
     { if (i > j) return n; 
     if (! cons(i)) break; i++; 
     } 
     i++; 
     while(true) 
     { while(true) 
     { if (i > j) return n; 
       if (cons(i)) break; 
       i++; 
     } 
     i++; 
     n++; 
     while(true) 
     { if (i > j) return n; 
      if (! cons(i)) break; 
      i++; 
     } 
     i++; 
     } 
    } 

    /* vowelinstem() is true <=> 0,...j contains a vowel */ 

    private final boolean vowelinstem() 
    { int i; for (i = 0; i <= j; i++) if (! cons(i)) return true; 
     return false; 
    } 

    /* doublec(j) is true <=> j,(j-1) contain a double consonant. */ 

    private final boolean doublec(int j) 
    { if (j < 1) return false; 
     if (b[j] != b[j-1]) return false; 
     return cons(j); 
    } 

    /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant 
     and also if the second c is not w,x or y. this is used when trying to 
     restore an e at the end of a short word. e.g. 

     cav(e), lov(e), hop(e), crim(e), but 
     snow, box, tray. 

    */ 

    private final boolean cvc(int i) 
    { if (i < 2 || !cons(i) || cons(i-1) || !cons(i-2)) return false; 
     { int ch = b[i]; 
     if (ch == 'w' || ch == 'x' || ch == 'y') return false; 
     } 
     return true; 
    } 

    private final boolean ends(String s) 
    { 

     int l = s.length(); 
    int o = k-l+1; 
     if (o < 0) 
      return false; 
     for (int i = 0; i < l; i++) 
      if (b[o+i] != s.charAt(i)) 
      return false; 
     j = k-l; 
     return true; 
    } 

    /* setto(s) sets (j+1),...k to the characters in the string s, readjusting 
     k. */ 

    private final void setto(String s) 
    { int l = s.length(); 
     int o = j+1; 
     for (int i = 0; i < l; i++) 
      b[o+i] = s.charAt(i); 
     k = j+l; 
    } 

    /* r(s) is used further down. */ 

    private final void r(String s) { if (m() > 0) setto(s); } 

    /* step1() gets rid of plurals and -ed or -ing. e.g. 

      caresses -> caress 
      ponies -> poni 
      ties  -> ti 
      caress -> caress 
      cats  -> cat 

      feed  -> feed 
      agreed -> agree 
      disabled -> disable 

      matting -> mat 
      mating -> mate 
      meeting -> meet 
      milling -> mill 
      messing -> mess 

      meetings -> meet 

    */ 

    private final void step1() 
    { 

     if (b[k] == 's') 
     { if (ends("sses")) k -= 2; else 
     if (ends("ies")) setto("i"); else 
     if (b[k-1] != 's') k--; 
     } 
     if (ends("eed")) { if (m() > 0) k--; } else 
     if ((ends("ed") || ends("ing")) && vowelinstem()) 
     { k = j; 
     if (ends("at")) setto("ate"); else 
     if (ends("bl")) setto("ble"); else 
     if (ends("iz")) setto("ize"); else 
     if (doublec(k)) 
     { k--; 
      { int ch = b[k]; 
       if (ch == 'l' || ch == 's' || ch == 'z') k++; 
      } 
     } 
     else if (m() == 1 && cvc(k)) setto("e"); 
    } 

    } 

    /* step2() turns terminal y to i when there is another vowel in the stem. */ 

    private final void step2() { if (ends("y") && vowelinstem()) b[k] = 'i'; } 

    /* step3() maps double suffices to single ones. so -ization (= -ize plus 
     -ation) maps to -ize etc. note that the string before the suffix must give 
     m() > 0. */ 

    private final void step3() { if (k == 0) return; /* For Bug 1 */ switch (b[k-1]) 
    { 
     case 'a': if (ends("ational")) { r("ate"); break; } 
       if (ends("tional")) { r("tion"); break; } 
       break; 
     case 'c': if (ends("enci")) { r("ence"); break; } 
       if (ends("anci")) { r("ance"); break; } 
       break; 
     case 'e': if (ends("izer")) { r("ize"); break; } 
       break; 
     case 'l': if (ends("bli")) { r("ble"); break; } 
       if (ends("alli")) { r("al"); break; } 
       if (ends("entli")) { r("ent"); break; } 
       if (ends("eli")) { r("e"); break; } 
       if (ends("ousli")) { r("ous"); break; } 
       break; 
     case 'o': if (ends("ization")) { r("ize"); break; } 
       if (ends("ation")) { r("ate"); break; } 
       if (ends("ator")) { r("ate"); break; } 
       break; 
     case 's': if (ends("alism")) { r("al"); break; } 
       if (ends("iveness")) { r("ive"); break; } 
       if (ends("fulness")) { r("ful"); break; } 
       if (ends("ousness")) { r("ous"); break; } 
       break; 
     case 't': if (ends("aliti")) { r("al"); break; } 
       if (ends("iviti")) { r("ive"); break; } 
       if (ends("biliti")) { r("ble"); break; } 
       break; 
     case 'g': if (ends("logi")) { r("log"); break; } 
    } } 

    /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */ 

    private final void step4() { switch (b[k]) 
    { 
     case 'e': if (ends("icate")) { r("ic"); break; } 
       if (ends("ative")) { r(""); break; } 
       if (ends("alize")) { r("al"); break; } 
       break; 
     case 'i': if (ends("iciti")) { r("ic"); break; } 
       break; 
     case 'l': if (ends("ical")) { r("ic"); break; } 
       if (ends("ful")) { r(""); break; } 
       break; 
     case 's': if (ends("ness")) { r(""); break; } 
       break; 
    } } 

    /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */ 

    private final void step5() 
    { if (k == 0) return; /* for Bug 1 */ switch (b[k-1]) 
     { case 'a': if (ends("al")) break; return; 
      case 'c': if (ends("ance")) break; 
        if (ends("ence")) break; return; 
      case 'e': if (ends("er")) break; return; 
      case 'i': if (ends("ic")) break; return; 
      case 'l': if (ends("able")) break; 
        if (ends("ible")) break; return; 
      case 'n': if (ends("ant")) break; 
        if (ends("ement")) break; 
        if (ends("ment")) break; 
        /* element etc. not stripped before the m */ 
        if (ends("ent")) break; return; 
      case 'o': if (ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't')) break; 
            /* j >= 0 fixes Bug 2 */ 
        if (ends("ou")) break; return; 
        /* takes care of -ous */ 
      case 's': if (ends("ism")) break; return; 
      case 't': if (ends("ate")) break; 
        if (ends("iti")) break; return; 
      case 'u': if (ends("ous")) break; return; 
      case 'v': if (ends("ive")) break; return; 
      case 'z': if (ends("ize")) break; return; 
      default: return; 
     } 
     if (m() > 1) k = j; 
    } 

    /* step6() removes a final -e if m() > 1. */ 

    private final void step6() 
    { j = k; 
     if (b[k] == 'e') 
     { int a = m(); 
     if (a > 1 || a == 1 && !cvc(k-1)) k--; 
     } 
     if (b[k] == 'l' && doublec(k) && m() > 1) k--; 
    } 

    /** Stem the word placed into the Stemmer buffer through calls to add(). 
    * Returns true if the stemming process resulted in a word different 
    * from the input. You can retrieve the result with 
    * getResultLength()/getResultBuffer() or toString(). 
    */ 
    public void stem() 

    { 
     //System.out.println(i); 
     k = i - 1; 
     if (k > 1) 
     { 
      step1(); 
      step2(); 
      step3(); 
      step4(); 
      step5(); 
      step6(); 
     } 
     for(int c=0;c<=k;c++) 
     { 
     // System.out.print(b[c]); 

     } 
     str=new String(b); 
//System.out.println(str); 
     sentence+=str+" "; 
    System.out.println(sentence); 

     i_end = k+1; i = 0; 
    } 
    public static void main(String[] args) 
    { 
     stemmer s = new stemmer(); 

     try 
     { 
    BufferedReader br = new BufferedReader(new FileReader("D:/output.txt")); 
    BufferedWriter output = new BufferedWriter(new FileWriter("D:/output1.txt")); 
    String separator = System.getProperty("line.separator"); 
     while ((line = br.readLine()) != null) 
      { 
        StringTokenizer st2 = new StringTokenizer(line, "\n"); 

        while (st2.hasMoreElements()) 
        { 
         String line1 = (String) st2.nextElement(); 
         System.out.println(line1); 
         StringTokenizer st3 = new StringTokenizer(line1, " "); 
         // String sentence= new String(); 
        while (st3.hasMoreTokens()) 
        { 
         element=st3.nextToken(); 
         s.addstring(element); 
         s.stem(); 
         } 
    output.append(separator+sentence); 
    sentence=""; 
        } 
      } 
     output.close(); 
     } 

    //System.out.println(element); 


//  s.addstring(element); 
//  s.stem(); 
//  s.addstring("walks"); 
//  s.stem(); 
     //System.out.println("Output " +s.b); 


     catch(Exception e) 
     { 

     } 
} 
} 
+3

Cette question est très confuse. Nous n'avons aucune idée de ce que «je» est, ou de ce que font les différentes méthodes d'étape, ou de ce que la sortie attendue est. S'il vous plaît lire http://tinyurl.com/so-list –

+0

Avec plus d'infos, il sera plus facile de répondre – JulianG

+0

S'il vous plaît écrire un SSCCE: http://sscce.org/ –

Répondre

4

Pour convertir char tableau à String utilisation déclaration suivante:

String str=new String(char[]); 

Voir la docs.

+3

Lors de la fourniture de code, il est préférable que ce code devrait fonctionner, par exemple 'char [] arr = {'a', 'b'}; String str = new Chaîne (arr); – Dukeling

Questions connexes