mirror of
https://github.com/crawler-commons/crawler-commons
synced 2024-05-22 19:06:03 +02:00
Sitemap extension support
- add extension support to SiteMapTester - list extension attributes in SiteMapURL.toString() - update change log
This commit is contained in:
parent
66745b29a1
commit
862af9416f
|
@ -1,6 +1,7 @@
|
|||
Crawler-Commons Change Log
|
||||
|
||||
Current Development 0.11-SNAPSHOT (yyyy-mm-dd)
|
||||
- [Sitemaps] Add support for sitemap extensions (tuxnco, sebastian-nagel) #35, #36, #149, #162
|
||||
- [Sitemaps] Use the Java 8 date and time API (java.time.*) to parse dates in sitemaps (sebastian-nagel) #217
|
||||
- [Robots] Fix for handling URLs with query parameters but no path (kkrugler) #215
|
||||
|
||||
|
|
|
@ -44,6 +44,8 @@ public class SiteMapTester {
|
|||
LOG.error("Java properties:");
|
||||
LOG.error(" sitemap.strictNamespace");
|
||||
LOG.error(" if true sitemaps are required to use the standard namespace URI");
|
||||
LOG.error(" sitemap.extensions");
|
||||
LOG.error(" if true enable sitemap extension parsing");
|
||||
} else {
|
||||
URL url = new URL(args[0]);
|
||||
String mt = (args.length > 1) ? args[1] : null;
|
||||
|
@ -64,6 +66,11 @@ public class SiteMapTester {
|
|||
boolean strictNamespace = new Boolean(System.getProperty("sitemap.strictNamespace"));
|
||||
saxParser.setStrictNamespace(strictNamespace);
|
||||
|
||||
boolean enableExtensions = new Boolean(System.getProperty("sitemap.extensions"));
|
||||
if (enableExtensions) {
|
||||
saxParser.enableExtensions();
|
||||
}
|
||||
|
||||
AbstractSiteMap sm = null;
|
||||
// guesses the mimetype
|
||||
if (mt == null || mt.equals("")) {
|
||||
|
@ -80,7 +87,11 @@ public class SiteMapTester {
|
|||
} else {
|
||||
Collection<SiteMapURL> links = ((SiteMap) sm).getSiteMapUrls();
|
||||
for (SiteMapURL smu : links) {
|
||||
LOG.info(smu.getUrl().toString());
|
||||
if (enableExtensions) {
|
||||
LOG.info(smu.toString());
|
||||
} else {
|
||||
LOG.info(smu.getUrl().toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.time.ZonedDateTime;
|
|||
import java.util.Date;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import crawlercommons.sitemaps.extension.Extension;
|
||||
|
@ -371,6 +372,13 @@ public class SiteMapURL {
|
|||
sb.append(", lastMod = ").append((lastModified == null) ? "null" : SiteMap.W3C_FULLDATE_FORMATTER_UTC.format(lastModified.toInstant()));
|
||||
sb.append(", changeFreq = ").append(changeFreq);
|
||||
sb.append(", priority = ").append(priority);
|
||||
if (attributes != null) {
|
||||
for (Entry<Extension, ExtensionMetadata[]> e : attributes.entrySet()) {
|
||||
for (ExtensionMetadata m : e.getValue()) {
|
||||
sb.append(", ").append(m.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue