From cb21c29a5704a3a97b044cc8d9766db7abacea81 Mon Sep 17 00:00:00 2001 From: Avi Hayun Date: Wed, 29 Apr 2020 20:00:43 +0300 Subject: [PATCH] Upgrade the toString() method of the Base/simple RobotRules #264 --- CHANGES.txt | 8 +++++--- .../java/crawlercommons/robots/BaseRobotRules.java | 10 ++++++---- .../java/crawlercommons/robots/SimpleRobotRules.java | 11 +++++------ 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index b70d59e..351957f 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,15 +1,17 @@ Crawler-Commons Change Log Current Development 1.1-SNAPSHOT (yyyy-mm-dd) +- [Robots] Upgrade the toString() method of the Base/Simple RobotRules (Avi Hayun) #264 +- Upgrade GitIgnore (Avi Hayun) #260 - [Robots] Deduplicate sitemap links (sebastian-nagel) #261 - EffectiveTldFinder to log loading of public suffix list (sebastian-nagel) #284 - SiteMapParser getPublicationDate in VideoAttributes may throw NPE (panthony, sebastian-nagel) #283 - SimpleRobotRulesParser: Trim log messages (jnioche, sebastian-nagel) #281 - SimpleRobotRulesParser: counter _numWarnings not thread-safe (sebastian-nagel, kkrugler) #278 - ParameterizedTest not executed by mvn builds (sebastian-nagel) #273 -- [BasicNormalizer] Empty path before query to be normalized to `/` (Chaiavi, sebastian-nagel) #247 -- EffectiveTldFinder to validate returned domain names for length restrictions (sebastian-nagel, Chaiavi) #251 -- Upgrade unit tests to use JUnit v5.x and parameterized tests (Chaiavi) #249, #253, #255 +- [BasicNormalizer] Empty path before query to be normalized to `/` (Avi Hayun, sebastian-nagel) #247 +- EffectiveTldFinder to validate returned domain names for length restrictions (sebastian-nagel, Avi Hayun) #251 +- Upgrade unit tests to use JUnit v5.x and parameterized tests (Avi Hayun) #249, #253, #255 - [Robots] Robots parser to always handle absolute sitemap URL even without valid base URL (pr3mar, kkrugler, sebastian-nagel) #240 Release 1.0 (2019-03-19) diff --git a/src/main/java/crawlercommons/robots/BaseRobotRules.java b/src/main/java/crawlercommons/robots/BaseRobotRules.java index bf49757..00fdb8d 100644 --- a/src/main/java/crawlercommons/robots/BaseRobotRules.java +++ b/src/main/java/crawlercommons/robots/BaseRobotRules.java @@ -41,7 +41,7 @@ public abstract class BaseRobotRules implements Serializable { private LinkedHashSet _sitemaps; public BaseRobotRules() { - _sitemaps = new LinkedHashSet(); + _sitemaps = new LinkedHashSet<>(); } public long getCrawlDelay() { @@ -116,13 +116,15 @@ public abstract class BaseRobotRules implements Serializable { sb.append(" - crawl delay: ").append(delay).append('\n'); } - int nSitemaps = getSitemaps().size(); + List sitemaps = getSitemaps(); + int nSitemaps = sitemaps.size(); if (nSitemaps == 0) { sb.append(" - no sitemap URLs\n"); } else { sb.append(" - number of sitemap URLs: ").append(nSitemaps).append('\n'); - if (nSitemaps <= 10) { - sb.append(String.join("\n", getSitemaps())).append("\n\n"); + int numOfSitemapsToShow = Math.min(nSitemaps, 10); + for (int i = 0; i < numOfSitemapsToShow; i++) { + sb.append(sitemaps.get(i)).append("\n"); } } diff --git a/src/main/java/crawlercommons/robots/SimpleRobotRules.java b/src/main/java/crawlercommons/robots/SimpleRobotRules.java index ab4ca3c..e15e428 100644 --- a/src/main/java/crawlercommons/robots/SimpleRobotRules.java +++ b/src/main/java/crawlercommons/robots/SimpleRobotRules.java @@ -129,7 +129,7 @@ public class SimpleRobotRules extends BaseRobotRules { super(); _mode = mode; - _rules = new ArrayList(); + _rules = new ArrayList<>(); } public void clearRules() { @@ -366,11 +366,10 @@ public class SimpleRobotRules extends BaseRobotRules { sb.append('\n'); } else { sb.append(" - number of rules: ").append(nRules).append('\n'); - if (nRules <= 10) { - for (int i = 0; i < nRules; i++) { - RobotRule r = _rules.get(i); - sb.append(r._allow ? " A" : " Disa").append("llow: ").append(r._prefix).append('\n'); - } + int numOfRulesToShow = Math.min(nRules, 10); + for (int i = 0; i < numOfRulesToShow; i++) { + RobotRule r = _rules.get(i); + sb.append(r._allow ? " A" : " Disa").append("llow: ").append(r._prefix).append('\n'); } } return sb.toString();