mirror of
https://github.com/crawler-commons/crawler-commons
synced 2024-05-31 13:16:04 +02:00
Merge pull request #140 from sebastian-nagel/cc-132-sitemap-limit-50mb
Increase size limit of sitemaps (10MB -> 50MB), fixes #132
This commit is contained in:
commit
375aae3d99
|
@ -1,6 +1,7 @@
|
|||
Crawler-Commons Change Log
|
||||
|
||||
Current Development 0.8-SNAPSHOT
|
||||
- Increase sitemap size limit to 50MB (Chaiavi) #132
|
||||
- Remove dependencies to system-specific locale (sebastian-nagel) #137
|
||||
- BasicURLNormalizer: NPE for URLs without authority (sebastian-nagel) #136
|
||||
- BasicURLNormalizer to strip empty port (sebastian-nagel) #133
|
||||
|
|
|
@ -59,9 +59,12 @@ public class SiteMapParser {
|
|||
private static final int MAX_URLS = 50000;
|
||||
|
||||
/**
|
||||
* Sitemap docs must be limited to 10MB (10,485,760 bytes)
|
||||
* Sitemaps (including sitemap index files) "must be no larger than
|
||||
* 50MB (52,428,800 bytes)" as specified in the
|
||||
* <a href="https://www.sitemaps.org/protocol.html#index">Sitemaps XML
|
||||
* format</a> (before Nov. 2016 the limit has been 10MB).
|
||||
*/
|
||||
public static final int MAX_BYTES_ALLOWED = 10485760;
|
||||
public static final int MAX_BYTES_ALLOWED = 52428800;
|
||||
|
||||
/* Tika's MediaType components */
|
||||
private static final Tika TIKA = new Tika();
|
||||
|
|
Loading…
Reference in New Issue