mirror of
https://github.com/crawler-commons/crawler-commons
synced 2024-09-23 17:33:23 +02:00
Increase size limit of sitemaps (10MB -> 50MB), fixes #132
This commit is contained in:
parent
9a49088ec8
commit
49b3097083
@ -1,6 +1,7 @@
|
||||
Crawler-Commons Change Log
|
||||
|
||||
Current Development 0.8-SNAPSHOT
|
||||
- Increase sitemap size limit to 50MB (Chaiavi) #132
|
||||
- Remove dependencies to system-specific locale (sebastian-nagel) #137
|
||||
- BasicURLNormalizer: NPE for URLs without authority (sebastian-nagel) #136
|
||||
- BasicURLNormalizer to strip empty port (sebastian-nagel) #133
|
||||
|
@ -59,9 +59,12 @@ public class SiteMapParser {
|
||||
private static final int MAX_URLS = 50000;
|
||||
|
||||
/**
|
||||
* Sitemap docs must be limited to 10MB (10,485,760 bytes)
|
||||
* Sitemaps (including sitemap index files) "must be no larger than
|
||||
* 50MB (52,428,800 bytes)" as specified in the
|
||||
* <a href="https://www.sitemaps.org/protocol.html#index">Sitemaps XML
|
||||
* format</a> (before Nov. 2016 the limit has been 10MB).
|
||||
*/
|
||||
public static final int MAX_BYTES_ALLOWED = 10485760;
|
||||
public static final int MAX_BYTES_ALLOWED = 52428800;
|
||||
|
||||
/* Tika's MediaType components */
|
||||
private static final Tika TIKA = new Tika();
|
||||
|
Loading…
Reference in New Issue
Block a user