2010-09-11 4 views
1

Pour un projet de classe, j'essaye d'écrire un lecteur RSS simple pour ma classe Java. J'essaie de marcher dans l'arbre DOM juste pour avoir l'expérience de le faire, même si je sais qu'il existe de meilleurs moyens et outils. J'ai un objet ReaderObject qui obtient le titre de base, le lien, la description et une liste pour contenir les objets RSSItem qui ont le titre, le lien, la description, la date de publication et le guid des variables d'instance. J'espérais avec cette information, je pourrais analyser et réafficher d'une manière gentille. Je suis coincé avec la partie RSSItem puisque mon texte est vide là. Je ne sais pas si c'est une bonne façon de le faire, et si je comprends bien ...Aide à la construction d'un lecteur RSS en Java

Une autre question est quand vous obtenezChildNodes, puis avec une boucle for, vous obtenez chaque élément, pourquoi faire Je dois obtenir FirstFirst à ce stade. Je l'ai eu de l'exemple du livre, mais je ne sais pas pourquoi.

Voici mon code:

Code: 
import java.io.*; 
import java.util.*; 
import javax.xml.parsers.*; 
import org.w3c.dom.*; 
import org.xml.sax.*; 


public class RSSReader { 
    public static void main(String[] args) { 
     File f = new File("testrss.xml"); 
     if (f.isFile()) { 
      System.out.println("is File"); 
      RSSReader xml = new RSSReader(f); 
     } 
    } 

    public RSSReader(File xmlFile) { 
     try { 
      obj = new ReaderObject(); 

      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 
      DocumentBuilder builder = factory.newDocumentBuilder(); 
      Document doc = builder.parse(xmlFile); // Document extends Node 

      List<Node> nodeList = new ArrayList<Node>(); 
      nodeList.add(doc); 

      while (nodeList.size() > 0) 
      { 
      Node node = nodeList.get(0); 

//   if(node instanceof Document) 
//     System.out.println("Document Node"); 

      // Get entries in the xml file 
      if (node.hasChildNodes()) { 
       NodeList nl = node.getChildNodes(); 
       for(int i = 0; i < nl.getLength(); i++) { 
        if (nl.item(i) instanceof Element) { 
         Element childElement = (Element) nl.item(i); 
         nodeList.add(childElement); 
         //nodeList.add(nl.item(i)); 
        } 
       } 
      } 

      if (node instanceof Element) { 
       // Print out the element tag name 
       System.out.println("Element Node: " + ((Element)node).getTagName()); 

       // Print out the attributes of the element 
       if (node.hasAttributes()) { 
        NamedNodeMap attrMap = node.getAttributes(); 
        for (int i = 0; i < attrMap.getLength(); i++) { 
         Attr attribute = (Attr) attrMap.item(i); 
         System.out.print("\tAttribute Key: " + attribute.getName() + " Value: " + attribute.getValue()); 
        } 
        System.out.println(); 
       } 

       // Get children of node 
       if (node.hasChildNodes()) { 
        NodeList childrenList = node.getChildNodes(); 
        for (int j = 0; j < childrenList.getLength(); j++) { 
         Node child = childrenList.item(j); 
         Element childElement; 
         Text textNode; 
         if (child instanceof Element) { 
          childElement = (Element) child; 
          textNode = (Text) childElement.getFirstChild(); 
          String text = textNode.getData().trim(); 
          if (childElement.getTagName().toLowerCase().equals("title")) { 

           obj.setTitle(text); 
           System.out.println("Title: " + obj.getTitle()); 
          } 
          else if (childElement.getTagName().toLowerCase().equals("link")) { 
           obj.setLink(text); 
           System.out.println("Link: " + obj.getLink()); 
          } 
          else if (childElement.getTagName().toLowerCase().equals("description")) { 
           obj.setDescription(text); 
           System.out.println("Description: " + obj.getDescription()); 
          } 
          else if (childElement.getTagName().toLowerCase().equals("item")) { 
           RSSItem item = new RSSItem(); 
           System.out.println("item text: " + text); // STUCK HERE 
           item.setTitle(text); 
           System.out.println("RSS Item title: " + item.getTitle()); 
          } 
         } 
        } 
       } 
      } 

      nodeList.remove(0); 
      } 
     } 
     catch (IOException e) { 
      e.printStackTrace(); 
     } 
     catch (SAXException e) { 
      e.printStackTrace(); 
     } 
     catch (IllegalArgumentException e) { 
      e.printStackTrace(); 
     } 
     catch (ParserConfigurationException e) { 
      e.printStackTrace(); 
     } 
    } 
    private ReaderObject obj; 
} 
class ReaderObject { 
    public ReaderObject() { 
     this.title = ""; 
     this.link = ""; 
     this.description = ""; 
    }  

    public ReaderObject(String title, String link, String description) { 

     this.title = title; 
     this.link = link; 
     this.description = description; 
    } 

    public void setTitle(String title) { 
     this.title = title; 
    } 

    public void setLink(String link) { 
     this.link = link; 
    } 

    public void setDescription(String description) { 
     this.description = description; 
    } 

    public String getTitle() { 
     return title; 
    } 

    public String getLink() { 
     return link; 
    } 

    public String getDescription() { 
     return description; 
    } 

    private String title; 
    private String link; 
    private String description; 
    private List<RSSItem> items = new ArrayList<RSSItem>(); 
} 

class RSSItem { 

    public RSSItem() { 

     this.title = ""; 
     this.link = ""; 
     this.description = ""; 
     this.pubDate = ""; 
     this.guid = ""; 
    }  

    public RSSItem(String title, String link, String description, String item, String pubDate, String guid) { 

     this.title = title; 
     this.link = link; 
     this.description = description; 
     this.pubDate = pubDate; 
     this.guid = guid; 
    } 

    public void setTitle(String title) { 
     this.title = title; 
    } 

    public void setLink(String link) { 
     this.link = link; 
    } 

    public void setDescription(String description) { 
     this.description = description; 
    } 

    public void setPubDate(String pubDate) { 
     this.pubDate = pubDate; 
    } 

    public void setGuid(String guid) { 
     this.guid = guid; 
    } 

    public String getTitle() { 
     return title; 
    } 
    private String title; 
    private String link; 
    private String description; 
    private String pubDate; 
    private String guid; 
} 

Output: 
is File 
Element Node: rss 
    Attribute Key: version Value: 2.0 
Element Node: channel 
Title: Liftoff News 
Link: http://liftoff.msfc.nasa.gov/ 
Description: Liftoff to Space Exploration. 
item text: 
RSS Item title: 
item text: 
RSS Item title: 
item text: 
RSS Item title: 
item text: 
RSS Item title: 
Element Node: title 
Element Node: link 
Element Node: description 
Element Node: language 
Element Node: pubDate 
Element Node: lastBuildDate 
Element Node: docs 
Element Node: generator 
Element Node: managingEditor 
Element Node: webMaster 
Element Node: item 
Title: Star City 
Link: http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp 
Description: How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>. 
Element Node: item 
Description: Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st. 
Element Node: item 
Title: The Engine That Does More 
Link: http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp 
Description: Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that. 
Element Node: item 
Title: Astronauts' Dirty Laundry 
Link: http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp 
Description: Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options. 
Element Node: title 
Element Node: link 
Element Node: description 
Element Node: pubDate 
Element Node: guid 
Element Node: description 
Element Node: pubDate 
Element Node: guid 
Element Node: title 
Element Node: link 
Element Node: description 
Element Node: pubDate 
Element Node: guid 
Element Node: title 
Element Node: link 
Element Node: description 
Element Node: pubDate 
Element Node: guid 

XML Code: 
    <?xml version="1.0"?> 
    <rss version="2.0"> 
    <channel> 
    <title>Liftoff News</title> 
    <link>http://liftoff.msfc.nasa.gov/</link> 
    <description>Liftoff to Space Exploration.</description> 
    <language>en-us</language> 
    <pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate> 
    <lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate> 
    <docs>http://blogs.law.harvard.edu/tech/rss</docs> 
    <generator>Weblog Editor 2.0</generator> 
    <managingEditor>[email protected]</managingEditor> 
     <webMaster>[email protected]</webMaster> 
     <item> 
     <title>Star City</title> 
     <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link> 
     <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description> 
     <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate> 
     <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid> 
     </item> 
     <item> 
     <description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description> 
      <pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate> 
      <guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid> 
      </item> <item> <title>The Engine That Does More</title> 
      <link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link> 
      <description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description> 
      <pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate> 
      <guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid> 
      </item> <item> <title>Astronauts' Dirty Laundry</title> 
      <link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link> 
      <description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description> <pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate> 
      <guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid> 
      </item> 
       </channel> 
       </rss> 
+0

Si mon université avait donné des devoirs comme ça, je n'aurais pas abandonné (ils nous ont donné des trucs ennuyeux). –

Répondre

0

Le problème avec votre approche est que vous examinez chaque élément sans tenir compte de sa hiérarchie. Par exemple, lorsque vous voyez un élément <title>, il peut s'agir du titre de la chaîne ou du titre de l'article.

Je pense que la meilleure façon de traiter avec un arbre DOM est un algorithme de haut en bas:

import java.io.File; 
import java.io.IOException; 
import java.util.ArrayList; 
import java.util.List; 
import javax.xml.parsers.DocumentBuilder; 
import javax.xml.parsers.DocumentBuilderFactory; 
import javax.xml.parsers.ParserConfigurationException; 
import org.w3c.dom.Document; 
import org.w3c.dom.Element; 
import org.w3c.dom.Node; 
import org.w3c.dom.NodeList; 
import org.w3c.dom.Text; 
import org.xml.sax.SAXException; 

public class RSSReader { 

    public static void main(String[] args) { 
     try { 
      File f = new File("testrss.xml"); 
      if (f.isFile()) { 
       System.out.println("is File"); 
       List<RSSChannel> channels = read(f); 
       for (RSSChannel channel: channels) { 
        System.out.println("Channel: "); 
        System.out.println(" title: " + channel.getTitle()); 
        System.out.println(" link: " + channel.getLink()); 
        System.out.println(" description: " + channel.getDescription()); 
        for (RSSItem item: channel.getItems()) { 
         System.out.println(" Item: "); 
         System.out.println("  title: " + item.getTitle()); 
         System.out.println("  link: " + item.getLink()); 
         System.out.println("  description: " + item.getDescription()); 
         System.out.println("  pubDate: " + item.getPubDate()); 
         System.out.println("  guid: " + item.getGuid()); 
        } 
       } 
      } 
     } catch (Exception e) { 
      e.printStackTrace(); 
     } 
    } 

    public static List<RSSChannel> read(File xmlFile) 
      throws ParserConfigurationException, SAXException, IOException { 
     DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 
     DocumentBuilder builder = factory.newDocumentBuilder(); 
     Document doc = builder.parse(xmlFile); // Document extends Node 

     Element root = doc.getDocumentElement(); 
     if (!root.getTagName().equalsIgnoreCase("rss")) { 
      throw new IOException("Invalid RSS document"); 
     } 

     return readChannels(root.getChildNodes()); 
    } 

    private static List<RSSChannel> readChannels(NodeList nodes) { 
     List<RSSChannel> result = new ArrayList<RSSChannel>(); 
     for (int i = 0; i < nodes.getLength(); ++i) { 
      Node node = nodes.item(i); 
      if (node instanceof Element) { 
       Element elem = (Element)node; 
       if (elem.getTagName().equalsIgnoreCase("channel")) { 
        result.add(readChannel(elem.getChildNodes())); 
       } 
      } 
     } 
     return result; 
    } 

    private static RSSChannel readChannel(NodeList nodes) { 
     RSSChannel channel = new RSSChannel(); 
     for (int i = 0; i < nodes.getLength(); ++i) { 
      Node node = nodes.item(i); 
      if (node instanceof Element) { 
       Element elem = (Element)node; 
       if (elem.getTagName().equalsIgnoreCase("title")) { 
        channel.setTitle(getText(elem)); 
       } else if (elem.getTagName().equalsIgnoreCase("link")) { 
        channel.setLink(getText(elem)); 
       } else if (elem.getTagName().equalsIgnoreCase("description")) { 
        channel.setDescription(getText(elem)); 
       } else if (elem.getTagName().equalsIgnoreCase("item")) { 
        channel.addItem(readItem(elem.getChildNodes())); 
       } 
      } 
     } 
     return channel; 
    } 

    private static RSSItem readItem(NodeList nodes) { 
     RSSItem item = new RSSItem(); 
     for (int i = 0; i < nodes.getLength(); ++i) { 
      Node node = nodes.item(i); 
      if (node instanceof Element) { 
       Element elem = (Element)node; 
       if (elem.getTagName().equalsIgnoreCase("title")) { 
        item.setTitle(getText(elem)); 
       } else if (elem.getTagName().equalsIgnoreCase("link")) { 
        item.setLink(getText(elem)); 
       } else if (elem.getTagName().equalsIgnoreCase("description")) { 
        item.setDescription(getText(elem)); 
       } else if (elem.getTagName().equalsIgnoreCase("pubDate")) { 
        item.setPubDate(getText(elem)); 
       } else if (elem.getTagName().equalsIgnoreCase("guid")) { 
        item.setGuid(getText(elem)); 
       } 
      } 
     } 
     return item; 
    } 

    private static String getText(Element elm) { 
     Node node = elm.getFirstChild(); 
     if (!(node instanceof Text)) { 
      return null; 
     } 
     Text text = (Text)node; 
     return text.getData().trim(); 
    } 

    public static class RSSChannel { 

     public RSSChannel() { 
      this.title = ""; 
      this.link = ""; 
      this.description = ""; 
     } 

     public RSSChannel(String title, String link, String description) { 

      this.title = title; 
      this.link = link; 
      this.description = description; 
     } 

     public void setTitle(String title) { 
      this.title = title; 
     } 

     public void setLink(String link) { 
      this.link = link; 
     } 

     public void setDescription(String description) { 
      this.description = description; 
     } 

     public String getTitle() { 
      return title; 
     } 

     public String getLink() { 
      return link; 
     } 

     public String getDescription() { 
      return description; 
     } 

     public List<RSSItem> getItems() { 
      return new ArrayList<RSSItem>(items); 
     } 

     public void addItem(RSSItem item) { 
      items.add(item); 
     } 

     private String title; 
     private String link; 
     private String description; 
     private List<RSSItem> items = new ArrayList<RSSItem>(); 
    } 

    public static class RSSItem { 

     public RSSItem() { 

      this.title = ""; 
      this.link = ""; 
      this.description = ""; 
      this.pubDate = ""; 
      this.guid = ""; 
     } 

     public RSSItem(String title, String link, String description, String item, String pubDate, String guid) { 

      this.title = title; 
      this.link = link; 
      this.description = description; 
      this.pubDate = pubDate; 
      this.guid = guid; 
     } 

     public void setTitle(String title) { 
      this.title = title; 
     } 

     public void setLink(String link) { 
      this.link = link; 
     } 

     public void setDescription(String description) { 
      this.description = description; 
     } 

     public void setPubDate(String pubDate) { 
      this.pubDate = pubDate; 
     } 

     public void setGuid(String guid) { 
      this.guid = guid; 
     } 

     public String getTitle() { 
      return title; 
     } 

     public String getDescription() { 
      return description; 
     } 

     public String getGuid() { 
      return guid; 
     } 

     public String getLink() { 
      return link; 
     } 

     public String getPubDate() { 
      return pubDate; 
     } 

     private String title; 
     private String link; 
     private String description; 
     private String pubDate; 
     private String guid; 
    } 
} 
1

Pour RSS vous pouvez utiliser une API plus spécifique - Rome. Et here est un article sur la façon de l'utiliser.

Et le getFirstChild() ci-dessus est nécessaire, car votre Element ne contient pas le texte - il contient un nœud Text, qui à son tour a le texte.