1
0
Fork 0
mirror of https://github.com/crawler-commons/crawler-commons synced 2024-05-10 16:06:04 +02:00

[Sitemaps] Unit tests depend on system timezone, fixes #238 (#239)

- fix unit test to format data in time zone UTC
- improve documentation of `convertToZonedDateTime`:
  add note that UTC is assumed if no time zone is contained in
  date string
This commit is contained in:
Sebastian Nagel 2019-03-19 16:00:04 +01:00 committed by Julien Nioche
parent 9a5236c484
commit e8b598b2e8
3 changed files with 17 additions and 14 deletions

View File

@ -1,6 +1,7 @@
Crawler-Commons Change Log
Current Development 0.11-SNAPSHOT (yyyy-mm-dd)
- [Sitemaps] Unit tests depend on system timezone (kkrugler, sebastian-nagel) #238
- EffectiveTldFinder: upgrade public suffix list (sebastian-nagel) #219
- [Sitemaps] Detection and parsing of XML sitemaps fails with whitespace before XML declaration (sebastian-nagel, jnioche) #144
- [Sitemaps] XMLHandler needs to append text in characters() vs. immediately processing (kkrugler, sebastian-nagel) #226

View File

@ -146,13 +146,13 @@ public abstract class AbstractSiteMap {
}
/**
* Convert the given date (given in an acceptable DateFormat), null if the
* date is not in the correct format.
* Convert the given date (given in an acceptable DateFormat), return null
* if the date is not in the correct format.
*
* <p>
* Dates must follow the <a href="https://www.w3.org/TR/NOTE-datetime">W3C
* Datetime format</a> which is similar to <a
* href="https://en.wikipedia.org/wiki/ISO_8601">ISO-8601</a> but allows
* Datetime format</a> which is similar to
* <a href="https://en.wikipedia.org/wiki/ISO_8601">ISO-8601</a> but allows
* dates with different precisions:
* </p>
*
@ -171,10 +171,13 @@ public abstract class AbstractSiteMap {
* YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)
* </pre>
*
* Note: Greenwich time (UTC) is assumed if the date string does not specify
* a time zone.
*
* @param date
* - the date to be parsed
* @return the zoned date time equivalent to the date string or NULL parsing
* failed
* @return the zoned date time equivalent to the date string or NULL if
* parsing failed
*/
public static ZonedDateTime convertToZonedDateTime(String date) {

View File

@ -23,7 +23,6 @@ import java.time.Instant;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Date;
import java.util.Locale;
import java.util.TimeZone;
@ -36,14 +35,14 @@ public class AbstractSiteMapTest {
assertNull(AbstractSiteMap.convertToDate("blah"));
assertNull(AbstractSiteMap.convertToDate(null));
SimpleDateFormat isoFormatNoTimezone = new SimpleDateFormat("yyyyMMdd", Locale.ROOT);
SimpleDateFormat isoFormatShortDate = new SimpleDateFormat("yyyyMMdd", Locale.ROOT);
isoFormatShortDate.setTimeZone(TimeZone.getTimeZone("UTC"));
// For formats where there's no time zone information, the time zone is
// undefined, so we can only check on the year/month/day portion of the
// result.
assertEquals("20140101", isoFormatNoTimezone.format(AbstractSiteMap.convertToDate("2014")));
assertEquals("20140601", isoFormatNoTimezone.format(AbstractSiteMap.convertToDate("2014-06")));
assertEquals("20140603", isoFormatNoTimezone.format(AbstractSiteMap.convertToDate("2014-06-03")));
// For short dates we only check on the year/month/day portion of the result.
// Time zone UTC is assumed because short dates do not contain a time zone.
assertEquals("20140101", isoFormatShortDate.format(AbstractSiteMap.convertToDate("2014")));
assertEquals("20140601", isoFormatShortDate.format(AbstractSiteMap.convertToDate("2014-06")));
assertEquals("20140603", isoFormatShortDate.format(AbstractSiteMap.convertToDate("2014-06-03")));
SimpleDateFormat isoFormat = new SimpleDateFormat("yyyyMMdd'T'HHmmss", Locale.ROOT);
isoFormat.setTimeZone(TimeZone.getTimeZone("UTC"));