1
0
Fork 0
mirror of https://github.com/crawler-commons/crawler-commons synced 2024-05-09 23:56:04 +02:00

Issue 75: [Sitemaps] more robust parsing of XML elements (jnioche, kkrugler)

This commit is contained in:
Julien Nioche 2015-05-22 11:08:21 +01:00
parent 40731c3304
commit 20861baf47
3 changed files with 5 additions and 6 deletions

View File

@ -1,6 +1,7 @@
Crawler-Commons Change Log
Current Development 0.6-SNAPSHOT (dd/mm/yyyy)
- Issue 75: [Sitemaps] more robust parsing of XML elements (jnioche, kkrugler)
- Issue 76: maven-java-formatter-plugin (jnioche)
- Issue 73: Switch groupID in pom from com.google.code.crawler-commons to crawler-commons (jnioche)
- Issue 71: Upgrade to Tika 1.8 (jnioche)

View File

@ -581,14 +581,12 @@ public class SiteMapParser {
private String getElementValue(Element elem, String elementName) {
NodeList list = elem.getElementsByTagName(elementName);
if (list == null)
return null;
Element e = (Element) list.item(0);
if (e != null) {
NodeList children = e.getChildNodes();
if (children.item(0) != null) {
return ((Node) children.item(0)).getNodeValue().trim();
}
return e.getTextContent();
}
return null;
}

View File

@ -254,7 +254,7 @@ public class SiteMapParserTest {
.append("</url>").append("<url>").append(" <loc>http://www.example.com/catalog?item=73&amp;desc=vacation_new_zealand</loc>").append(" <lastmod>2004-12-23</lastmod>")
.append(" <changefreq>weekly</changefreq>").append("</url>").append("<url>").append(" <loc>http://www.example.com/catalog?item=74&amp;desc=vacation_newfoundland</loc>")
.append(" <lastmod>2004-12-23T18:00:15+00:00</lastmod>").append(" <priority>0.3</priority>").append("</url>").append("<url>")
.append(" <loc>http://www.example.com/catalog?item=83&amp;desc=vacation_usa</loc>").append(" <lastmod>2004-11-23</lastmod>").append("</url>").append("</urlset>");
.append(" <loc><url><![CDATA[http://www.example.com/catalog?item=83&amp;desc=vacation_usa]]></url></loc>").append(" <lastmod>2004-11-23</lastmod>").append("</url>").append("</urlset>");
return scontent.toString().getBytes();
}