mirror of
https://github.com/crawler-commons/crawler-commons
synced 2024-09-20 08:42:15 +02:00
[BasicNormalizer] Empty path to be normalized to / even if there is an (empty) query,
fixes #247
This commit is contained in:
parent
2979aaf4aa
commit
7419e96d74
@ -1,7 +1,7 @@
|
||||
Crawler-Commons Change Log
|
||||
|
||||
Current Development 1.1-SNAPSHOT (yyyy-mm-dd)
|
||||
- [BasicNormalizer] Trailing question mark in url query test should be added (Chaiavi, sebastian-nagel) #247
|
||||
- [BasicNormalizer] Empty path before query to be normalized to `/` (Chaiavi, sebastian-nagel) #247
|
||||
- EffectiveTldFinder to validate returned domain names for length restrictions (sebastian-nagel, Chaiavi) #251
|
||||
- Upgrade unit tests to use JUnit v5.x and parameterized tests (Chaiavi) #249, #253, #255
|
||||
- [Robots] Robots parser to always handle absolute sitemap URL even without valid base URL (pr3mar, kkrugler, sebastian-nagel) #240
|
||||
|
@ -130,6 +130,9 @@ public class BasicURLNormalizer extends URLFilter {
|
||||
if (file == null || "".equals(file)) { // add a slash
|
||||
file = "/";
|
||||
changed = true;
|
||||
} else if (!file.startsWith("/")) {
|
||||
file = "/" + file;
|
||||
changed = true;
|
||||
}
|
||||
|
||||
if (url.getRef() != null) { // remove the ref
|
||||
@ -196,6 +199,8 @@ public class BasicURLNormalizer extends URLFilter {
|
||||
// if path is empty return a single slash
|
||||
if (file.isEmpty()) {
|
||||
file = "/";
|
||||
} else if (!file.startsWith("/")) {
|
||||
file = "/" + file;
|
||||
}
|
||||
|
||||
return file;
|
||||
|
@ -130,3 +130,5 @@ http:///////, http:/
|
||||
|
||||
# empty path with trailing question mark (empty query) #247
|
||||
http://example.com?,http://example.com/?
|
||||
http://example.com?a=1,http://example.com/?a=1
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user