1
0
Fork 0
mirror of https://github.com/crawler-commons/crawler-commons synced 2024-05-22 19:06:03 +02:00

Sitemap extension support

- add extension support to SiteMapTester
- list extension attributes in SiteMapURL.toString()
- update change log
This commit is contained in:
Sebastian Nagel 2018-09-28 12:14:02 +02:00
parent 66745b29a1
commit 862af9416f
3 changed files with 21 additions and 1 deletions

View File

@ -1,6 +1,7 @@
Crawler-Commons Change Log
Current Development 0.11-SNAPSHOT (yyyy-mm-dd)
- [Sitemaps] Add support for sitemap extensions (tuxnco, sebastian-nagel) #35, #36, #149, #162
- [Sitemaps] Use the Java 8 date and time API (java.time.*) to parse dates in sitemaps (sebastian-nagel) #217
- [Robots] Fix for handling URLs with query parameters but no path (kkrugler) #215

View File

@ -44,6 +44,8 @@ public class SiteMapTester {
LOG.error("Java properties:");
LOG.error(" sitemap.strictNamespace");
LOG.error(" if true sitemaps are required to use the standard namespace URI");
LOG.error(" sitemap.extensions");
LOG.error(" if true enable sitemap extension parsing");
} else {
URL url = new URL(args[0]);
String mt = (args.length > 1) ? args[1] : null;
@ -64,6 +66,11 @@ public class SiteMapTester {
boolean strictNamespace = new Boolean(System.getProperty("sitemap.strictNamespace"));
saxParser.setStrictNamespace(strictNamespace);
boolean enableExtensions = new Boolean(System.getProperty("sitemap.extensions"));
if (enableExtensions) {
saxParser.enableExtensions();
}
AbstractSiteMap sm = null;
// guesses the mimetype
if (mt == null || mt.equals("")) {
@ -80,7 +87,11 @@ public class SiteMapTester {
} else {
Collection<SiteMapURL> links = ((SiteMap) sm).getSiteMapUrls();
for (SiteMapURL smu : links) {
LOG.info(smu.getUrl().toString());
if (enableExtensions) {
LOG.info(smu.toString());
} else {
LOG.info(smu.getUrl().toString());
}
}
}
}

View File

@ -25,6 +25,7 @@ import java.time.ZonedDateTime;
import java.util.Date;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
import crawlercommons.sitemaps.extension.Extension;
@ -371,6 +372,13 @@ public class SiteMapURL {
sb.append(", lastMod = ").append((lastModified == null) ? "null" : SiteMap.W3C_FULLDATE_FORMATTER_UTC.format(lastModified.toInstant()));
sb.append(", changeFreq = ").append(changeFreq);
sb.append(", priority = ").append(priority);
if (attributes != null) {
for (Entry<Extension, ExtensionMetadata[]> e : attributes.entrySet()) {
for (ExtensionMetadata m : e.getValue()) {
sb.append(", ").append(m.toString());
}
}
}
return sb.toString();
}