mirror of
https://github.com/crawler-commons/crawler-commons
synced 2024-05-09 23:56:04 +02:00
Issue 75: [Sitemaps] more robust parsing of XML elements (jnioche, kkrugler)
This commit is contained in:
parent
40731c3304
commit
20861baf47
|
@ -1,6 +1,7 @@
|
|||
Crawler-Commons Change Log
|
||||
|
||||
Current Development 0.6-SNAPSHOT (dd/mm/yyyy)
|
||||
- Issue 75: [Sitemaps] more robust parsing of XML elements (jnioche, kkrugler)
|
||||
- Issue 76: maven-java-formatter-plugin (jnioche)
|
||||
- Issue 73: Switch groupID in pom from com.google.code.crawler-commons to crawler-commons (jnioche)
|
||||
- Issue 71: Upgrade to Tika 1.8 (jnioche)
|
||||
|
|
|
@ -581,14 +581,12 @@ public class SiteMapParser {
|
|||
private String getElementValue(Element elem, String elementName) {
|
||||
|
||||
NodeList list = elem.getElementsByTagName(elementName);
|
||||
if (list == null)
|
||||
return null;
|
||||
Element e = (Element) list.item(0);
|
||||
if (e != null) {
|
||||
NodeList children = e.getChildNodes();
|
||||
if (children.item(0) != null) {
|
||||
return ((Node) children.item(0)).getNodeValue().trim();
|
||||
}
|
||||
return e.getTextContent();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
@ -254,7 +254,7 @@ public class SiteMapParserTest {
|
|||
.append("</url>").append("<url>").append(" <loc>http://www.example.com/catalog?item=73&desc=vacation_new_zealand</loc>").append(" <lastmod>2004-12-23</lastmod>")
|
||||
.append(" <changefreq>weekly</changefreq>").append("</url>").append("<url>").append(" <loc>http://www.example.com/catalog?item=74&desc=vacation_newfoundland</loc>")
|
||||
.append(" <lastmod>2004-12-23T18:00:15+00:00</lastmod>").append(" <priority>0.3</priority>").append("</url>").append("<url>")
|
||||
.append(" <loc>http://www.example.com/catalog?item=83&desc=vacation_usa</loc>").append(" <lastmod>2004-11-23</lastmod>").append("</url>").append("</urlset>");
|
||||
.append(" <loc><url><![CDATA[http://www.example.com/catalog?item=83&desc=vacation_usa]]></url></loc>").append(" <lastmod>2004-11-23</lastmod>").append("</url>").append("</urlset>");
|
||||
|
||||
return scontent.toString().getBytes();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue