1
0
Fork 0
mirror of https://github.com/crawler-commons/crawler-commons synced 2024-05-22 02:56:03 +02:00

Adding asMap to ExtensionMetadata Interface (#288)

* added abstract method to extension metadata

* implemented asmap in image/link/mobile/news attributes

* implemented asmap in videoattributes

* adding video attributes unit test

* added news attributes unit tests

* unit test for link attributes

* unit tests for image and mobile attributes

* added constants to news and link attributes
fixing a small issue in NewsAttributes.toString

* using constants instead of strings in more attributes

* cleaned up the imports

* decreasing the visibility of LinkAttributes.PARAMS_PREFIX
adding a comment explaining it's usage

* added related issue to the changelog

* reverting change to NewsAttributes.equal, that causes a unit test failure
This commit is contained in:
Evan Halley 2020-06-15 10:55:20 -04:00 committed by GitHub
parent 2d727b2cfa
commit c04e3f17e7
Signed by: GitHub
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 494 additions and 1 deletions

View File

@ -13,6 +13,7 @@ Current Development 1.1-SNAPSHOT (yyyy-mm-dd)
- EffectiveTldFinder to validate returned domain names for length restrictions (sebastian-nagel, Avi Hayun) #251
- Upgrade unit tests to use JUnit v5.x and parameterized tests (Avi Hayun) #249, #253, #255
- [Robots] Robots parser to always handle absolute sitemap URL even without valid base URL (pr3mar, kkrugler, sebastian-nagel) #240
- Adding asMap() to ExtensionMetadata Interface #288
Release 1.0 (2019-03-19)
- [Sitemaps] Unit tests depend on system timezone (kkrugler, sebastian-nagel) #238

View File

@ -18,6 +18,8 @@ package crawlercommons.sitemaps.extension;
import crawlercommons.sitemaps.SiteMapURL;
import java.util.Map;
/**
* Container for attributes of a {@link SiteMapURL} defined by a sitemap
* extension.
@ -26,6 +28,8 @@ public abstract class ExtensionMetadata {
public abstract boolean equals(Object other);
public abstract Map<String, String[]> asMap();
public boolean isValid() {
return true;
}

View File

@ -17,6 +17,9 @@
package crawlercommons.sitemaps.extension;
import java.net.URL;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
/**
@ -24,6 +27,13 @@ import java.util.Objects;
* indexing, as per http://www.google.com/schemas/sitemap-image/1.1
*/
public class ImageAttributes extends ExtensionMetadata {
public static final String LOC = "loc";
public static final String CAPTION = "caption";
public static final String GEO_LOCATION = "geo_location";
public static final String TITLE = "title";
public static final String LICENSE = "license";
/**
* Image location attribute found under image/loc (required)
*/
@ -128,4 +138,29 @@ public class ImageAttributes extends ExtensionMetadata {
&& Objects.equals(license, that.license);
}
@Override
public Map<String, String[]> asMap() {
Map<String, String[]> map = new HashMap<>();
if (loc != null) {
map.put(LOC, new String[]{ loc.toString() });
}
if (caption != null) {
map.put(CAPTION, new String[]{ caption });
}
if (geoLocation != null) {
map.put(GEO_LOCATION, new String[]{ geoLocation });
}
if (title != null) {
map.put(TITLE, new String[]{ title });
}
if (license != null) {
map.put(LICENSE, new String[]{ license.toString() });
}
return Collections.unmodifiableMap(map);
}
}

View File

@ -17,6 +17,8 @@
package crawlercommons.sitemaps.extension;
import java.net.URL;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
@ -32,6 +34,14 @@ import java.util.Objects;
* you to check for mistakes.</blockquote>
*/
public class LinkAttributes extends ExtensionMetadata {
public static final String HREF = "href";
/**
* Specifies the prefix used when adding Link Attribute parameters to the Map returned by asMap
*/
private static final String PARAMS_PREFIX = "params.%s";
/**
* Link's href attribute
*/
@ -96,4 +106,20 @@ public class LinkAttributes extends ExtensionMetadata {
&& Objects.equals(params, that.params);
}
@Override
public Map<String, String[]> asMap() {
Map<String, String[]> map = new HashMap<>();
if (href != null) {
map.put(HREF, new String[]{ href.toString() });
}
if (params != null) {
for (Entry<String, String> entry : params.entrySet()) {
map.put(String.format(PARAMS_PREFIX, entry.getKey()), new String[] { entry.getValue() });
}
}
return Collections.unmodifiableMap(map);
}
}

View File

@ -16,6 +16,9 @@
package crawlercommons.sitemaps.extension;
import java.util.Collections;
import java.util.Map;
/**
* Google mobile sitemap attributes, see
* http://www.google.de/schemas/sitemap-mobile/1.0/ and
@ -27,7 +30,7 @@ public class MobileAttributes extends ExtensionMetadata {
@Override
public String toString() {
return "Mobile content avaiblabe: yes";
return "Mobile content available: yes";
}
@Override
@ -41,4 +44,9 @@ public class MobileAttributes extends ExtensionMetadata {
return true;
}
@Override
public Map<String, String[]> asMap() {
return Collections.emptyMap();
}
}

View File

@ -16,15 +16,27 @@
package crawlercommons.sitemaps.extension;
import java.time.ZonedDateTime;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
/**
* Data model for Google's extension to the sitemap protocol regarding news
* indexing, as per http://www.google.com/schemas/sitemap-news/0.9
*/
public class NewsAttributes extends ExtensionMetadata {
public static final String NAME = "name";
public static final String LANGUAGE = "language";
public static final String GENRES = "genres";
public static final String PUBLICATION_DATE = "publication_date";
public static final String TITLE = "title";
public static final String KEYWORDS = "keywords";
public static final String STOCK_TICKERS = "stock_tickers";
public static enum NewsGenre {
Blog, OpEd, Opinion, PressRelease, Satire, UserGenerated
}
@ -185,4 +197,42 @@ public class NewsAttributes extends ExtensionMetadata {
}
return sb.toString();
}
@Override
public Map<String, String[]> asMap() {
Map<String, String[]> map = new HashMap<>();
if (name != null) {
map.put(NAME, new String[] { name });
}
if (title != null) {
map.put(TITLE, new String[] { title });
}
if (language != null) {
map.put(LANGUAGE, new String[] { language });
}
if (publicationDate != null) {
map.put(PUBLICATION_DATE, new String[] { publicationDate.toString() });
}
if (keywords != null) {
map.put(KEYWORDS, keywords);
}
if (genres != null) {
String[] genresStrArr = Arrays.stream(genres)
.map(Enum::name)
.toArray(String[]::new);
map.put(GENRES, genresStrArr);
}
if (stockTickers != null) {
map.put(STOCK_TICKERS, stockTickers);
}
return map;
}
}

View File

@ -19,8 +19,11 @@ package crawlercommons.sitemaps.extension;
import java.net.URL;
import java.time.ZonedDateTime;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
/**
@ -29,6 +32,30 @@ import java.util.Objects;
*/
public class VideoAttributes extends ExtensionMetadata {
public static final String THUMBNAIL_LOC = "thumbnail_loc";
public static final String TITLE = "title";
public static final String DESCRIPTION = "description";
public static final String CONTENT_LOC = "content_loc";
public static final String PLAYER_LOC = "player_loc";
public static final String EXPIRATION_DATE = "expiration_date";
public static final String RATING = "rating";
public static final String VIEW_COUNT = "view_count";
public static final String PUBLICATION_DATE = "publication_date";
public static final String FAMILY_FRIENDLY = "family_friendly";
public static final String TAGS = "tags";
public static final String CATEGORY = "category";
public static final String RESTRICTED_COUNTRIES = "restricted_countries";
public static final String ALLOWED_COUNTRIES = "allowed_countries";
public static final String GALLERY_LOC = "gallery_loc";
public static final String GALLERY_TITLE = "gallery_title";
public static final String PRICES = "prices";
public static final String REQUIRES_SUBSCRIPTION = "requires_subscription";
public static final String UPLOADER = "uploader";
public static final String UPLOADER_INFO = "uploader_info";
public static final String ALLOWED_PLATFORMS = "allowed_platforms";
public static final String RESTRICTED_PLATFORMS = "restricted_platforms";
public static final String IS_LIVE = "is_live";
/**
* Video thumbnail URL found under video/thumbnail_loc (required)
*/
@ -551,4 +578,104 @@ public class VideoAttributes extends ExtensionMetadata {
return thumbnailLoc != null && title != null && title.length() <= 100 && description != null && description.length() <= 2048 && (contentLoc != null || playerLoc != null);
}
@Override
public Map<String, String[]> asMap() {
Map<String, String[]> map = new HashMap<>();
if (thumbnailLoc != null) {
map.put(THUMBNAIL_LOC, new String[] { thumbnailLoc.toString() });
}
if (title != null) {
map.put(TITLE, new String[] { title });
}
if (description != null) {
map.put(DESCRIPTION, new String[] { description });
}
if (contentLoc != null) {
map.put(CONTENT_LOC, new String[] { contentLoc.toString() });
}
if (playerLoc != null) {
map.put(PLAYER_LOC, new String[] { playerLoc.toString() });
}
if (expirationDate != null) {
map.put(EXPIRATION_DATE, new String[] { expirationDate.toString() });
}
if (rating != null) {
map.put(RATING, new String[] { rating.toString() });
}
if (viewCount != null) {
map.put(VIEW_COUNT, new String[] { viewCount.toString() });
}
if (publicationDate != null) {
map.put(PUBLICATION_DATE, new String[] { publicationDate.toString() });
}
if (familyFriendly != null) {
map.put(FAMILY_FRIENDLY, new String[] { familyFriendly.toString() });
}
if (tags != null) {
map.put(TAGS, tags);
}
if (category != null) {
map.put(CATEGORY, new String[] { category });
}
if (restrictedCountries != null) {
map.put(RESTRICTED_COUNTRIES, restrictedCountries);
}
if (allowedCountries != null) {
map.put(ALLOWED_COUNTRIES, allowedCountries);
}
if (galleryLoc != null) {
map.put(GALLERY_LOC, new String[]{ galleryLoc.toString() });
}
if (galleryTitle != null) {
map.put(GALLERY_TITLE, new String[]{ galleryTitle });
}
if (prices != null) {
String[] videoPricesArr = Arrays.stream(prices)
.map(VideoPrice::toString)
.toArray(String[]::new);
map.put(PRICES, videoPricesArr);
}
if (requiresSubscription != null) {
map.put(REQUIRES_SUBSCRIPTION, new String[]{ requiresSubscription.toString() });
}
if (uploader != null) {
map.put(UPLOADER, new String[]{ uploader });
}
if (uploaderInfo != null) {
map.put(UPLOADER_INFO, new String[]{ uploaderInfo.toString() });
}
if (allowedPlatforms != null) {
map.put(ALLOWED_PLATFORMS, allowedPlatforms);
}
if (restrictedPlatforms != null) {
map.put(RESTRICTED_PLATFORMS, restrictedPlatforms);
}
if (isLive != null) {
map.put(IS_LIVE, new String[]{ isLive.toString() });
}
return Collections.unmodifiableMap(map);
}
}

View File

@ -0,0 +1,41 @@
package crawlercommons.sitemaps.extension;
import org.junit.jupiter.api.Test;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
public class ImageAttributesTest {
@Test
public void testImageAttributesAsMap() throws MalformedURLException {
ImageAttributes attributes = new ImageAttributes(new URL("http://example.com/image.jpg"));
attributes.setCaption("caption");
attributes.setGeoLocation("kalamazoo");
attributes.setTitle("Title");
attributes.setLicense(new URL("http://example.com/license"));
Map<String, String[]> map = attributes.asMap();
assertEquals(attributes.getLoc().toString(), map.get(ImageAttributes.LOC)[0]);
assertEquals(attributes.getCaption(), map.get(ImageAttributes.CAPTION)[0]);
assertEquals(attributes.getGeoLocation(), map.get(ImageAttributes.GEO_LOCATION)[0]);
assertEquals(attributes.getTitle(), map.get(ImageAttributes.TITLE)[0]);
assertEquals(attributes.getLicense().toString(), map.get(ImageAttributes.LICENSE)[0]);
}
@Test
public void testNullImageAttributesAsMap() {
ImageAttributes attributes = new ImageAttributes(null);
Map<String, String[]> map = attributes.asMap();
assertNull(map.get(ImageAttributes.LOC));
assertNull(map.get(ImageAttributes.CAPTION));
assertNull(map.get(ImageAttributes.GEO_LOCATION));
assertNull(map.get(ImageAttributes.TITLE));
assertNull(map.get(ImageAttributes.LICENSE));
}
}

View File

@ -0,0 +1,36 @@
package crawlercommons.sitemaps.extension;
import org.junit.jupiter.api.Test;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.*;
public class LinkAttributesTest {
@Test
public void testLinkAttributesAsMap() throws MalformedURLException {
LinkAttributes attributes = new LinkAttributes(new URL("http://www.example.com/deutsch/"));
attributes.setParams(new HashMap<String, String>() {
{
put("rel", "alternate");
put("hreflang", "de");
}});
Map<String, String[]> map = attributes.asMap();
assertEquals(attributes.getHref().toString(), map.get(LinkAttributes.HREF)[0]);
assertEquals(attributes.getParams().get("rel"), map.get("params.rel")[0]);
assertEquals(attributes.getParams().get("hreflang"), map.get("params.hreflang")[0]);
}
@Test
public void testNullLinkAttributesAsMap() {
LinkAttributes attributes = new LinkAttributes(null);
Map<String, String[]> map = attributes.asMap();
assertNull(map.get(LinkAttributes.HREF));
}
}

View File

@ -0,0 +1,19 @@
package crawlercommons.sitemaps.extension;
import org.junit.jupiter.api.Test;
import java.net.MalformedURLException;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class MobileAttributesTest {
@Test
public void testMobileAttributesAsMap() throws MalformedURLException {
MobileAttributes attributes = new MobileAttributes();
Map<String, String[]> map = attributes.asMap();
assertEquals(0, map.size());
}
}

View File

@ -0,0 +1,49 @@
package crawlercommons.sitemaps.extension;
import org.junit.jupiter.api.Test;
import java.time.ZonedDateTime;
import java.util.Arrays;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
public class NewsAttributesTest {
@Test
public void testNewsAttributesAsMap() {
ZonedDateTime dt = ZonedDateTime.parse("2008-11-23T00:00:00+00:00");
NewsAttributes attributes = new NewsAttributes("The Example Times", "en", dt, "Companies A, B in Merger Talks");
attributes.setKeywords(new String[] { "business", "merger", "acquisition", "A", "B" });
attributes.setGenres(new NewsAttributes.NewsGenre[] { NewsAttributes.NewsGenre.PressRelease, NewsAttributes.NewsGenre.Blog });
attributes.setStockTickers(new String[] { "NASDAQ:A", "NASDAQ:B" });
Map<String, String[]> map = attributes.asMap();
assertEquals(attributes.getName(), map.get(NewsAttributes.NAME)[0]);
assertEquals(attributes.getTitle(), map.get(NewsAttributes.TITLE)[0]);
assertEquals(attributes.getLanguage(), map.get(NewsAttributes.LANGUAGE)[0]);
assertEquals(attributes.getPublicationDateTime().toString(), map.get(NewsAttributes.PUBLICATION_DATE)[0]);
assertArrayEquals(attributes.getKeywords(), map.get(NewsAttributes.KEYWORDS));
assertArrayEquals(attributes.getStockTickers(), map.get(NewsAttributes.STOCK_TICKERS));
assertArrayEquals(Arrays.stream(attributes.getGenres())
.map(NewsAttributes.NewsGenre::toString)
.toArray(String[]::new), map.get(NewsAttributes.GENRES));
}
@Test
public void testNullNewsAttributesAsMap() {
NewsAttributes attributes = new NewsAttributes(null, null, null, null);
Map<String, String[]> map = attributes.asMap();
assertNull(map.get(NewsAttributes.NAME));
assertNull(map.get(NewsAttributes.TITLE));
assertNull(map.get(NewsAttributes.LANGUAGE));
assertNull(map.get(NewsAttributes.PUBLICATION_DATE));
assertNull(map.get(NewsAttributes.KEYWORDS));
assertNull(map.get(NewsAttributes.STOCK_TICKERS));
assertNull(map.get(NewsAttributes.GENRES));
}
}

View File

@ -0,0 +1,97 @@
package crawlercommons.sitemaps.extension;
import org.junit.jupiter.api.Test;
import java.net.MalformedURLException;
import java.net.URL;
import java.time.ZonedDateTime;
import java.util.Arrays;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
public class VideoAttributesTest {
@Test
public void testVideoAttributesAsMap() throws MalformedURLException {
VideoAttributes attributes = new VideoAttributes(new URL("http://www.example.com/thumbs/123.jpg"), "Grilling steaks for summer",
"Alkis shows you how to get perfectly done steaks every time", new URL("http://www.example.com/video123.flv"), new URL("http://www.example.com/videoplayer.swf?video=123"));
attributes.setDuration(600);
ZonedDateTime dt = ZonedDateTime.parse("2009-11-05T19:20:30+08:00");
attributes.setExpirationDate(dt);
dt = ZonedDateTime.parse("2007-11-05T19:20:30+08:00");
attributes.setPublicationDate(dt);
attributes.setCategory("music");
attributes.setRating(4.2f);
attributes.setViewCount(12345);
attributes.setFamilyFriendly(true);
attributes.setTags(new String[] { "sample_tag1", "sample_tag2" });
attributes.setAllowedCountries(new String[] { "IE", "GB", "US", "CA" });
attributes.setGalleryLoc(new URL("http://cooking.example.com"));
attributes.setGalleryTitle("Cooking Videos");
attributes.setPrices(new VideoAttributes.VideoPrice[] { new VideoAttributes.VideoPrice("EUR", 1.99f, VideoAttributes.VideoPriceType.own) });
attributes.setRequiresSubscription(true);
attributes.setUploader("GrillyMcGrillerson");
attributes.setUploaderInfo(new URL("http://www.example.com/users/grillymcgrillerson"));
attributes.setLive(false);
Map<String, String[]> map = attributes.asMap();
assertEquals(attributes.getThumbnailLoc().toString(), map.get(VideoAttributes.THUMBNAIL_LOC)[0]);
assertEquals(attributes.getTitle(), map.get(VideoAttributes.TITLE)[0]);
assertEquals(attributes.getDescription(), map.get(VideoAttributes.DESCRIPTION)[0]);
assertEquals(attributes.getContentLoc().toString(), map.get(VideoAttributes.CONTENT_LOC)[0]);
assertEquals(attributes.getPlayerLoc().toString(), map.get(VideoAttributes.PLAYER_LOC)[0]);
assertEquals(attributes.getExpirationDateTime().toString(), map.get(VideoAttributes.EXPIRATION_DATE)[0]);
assertEquals(attributes.getRating().toString(), map.get(VideoAttributes.RATING)[0]);
assertEquals(attributes.getViewCount().toString(), map.get(VideoAttributes.VIEW_COUNT)[0]);
assertEquals(attributes.getPublicationDateTime().toString(), map.get(VideoAttributes.PUBLICATION_DATE)[0]);
assertEquals(attributes.getFamilyFriendly().toString(), map.get(VideoAttributes.FAMILY_FRIENDLY)[0]);
assertArrayEquals(attributes.getTags(), map.get(VideoAttributes.TAGS));
assertEquals(attributes.getCategory(), map.get(VideoAttributes.CATEGORY)[0]);
assertArrayEquals(attributes.getRestrictedCountries(), map.get(VideoAttributes.RESTRICTED_COUNTRIES));
assertArrayEquals(attributes.getAllowedCountries(), map.get(VideoAttributes.ALLOWED_COUNTRIES));
assertEquals(attributes.getGalleryLoc().toString(), map.get(VideoAttributes.GALLERY_LOC)[0]);
assertEquals(attributes.getGalleryTitle(), map.get(VideoAttributes.GALLERY_TITLE)[0]);
assertArrayEquals(Arrays.stream(attributes.getPrices())
.map(VideoAttributes.VideoPrice::toString)
.toArray(String[]::new), map.get(VideoAttributes.PRICES));
assertEquals(attributes.getRequiresSubscription().toString(), map.get(VideoAttributes.REQUIRES_SUBSCRIPTION)[0]);
assertEquals(attributes.getUploader(), map.get(VideoAttributes.UPLOADER)[0]);
assertEquals(attributes.getUploaderInfo().toString(), map.get(VideoAttributes.UPLOADER_INFO)[0]);
assertArrayEquals(attributes.getAllowedPlatforms(), map.get(VideoAttributes.ALLOWED_PLATFORMS));
assertArrayEquals(attributes.getRestrictedPlatforms(), map.get(VideoAttributes.RESTRICTED_PLATFORMS));
assertEquals(attributes.getLive().toString(), map.get(VideoAttributes.IS_LIVE)[0]);
}
@Test
public void testNullVideoAttributesAsMap() {
VideoAttributes attributes = new VideoAttributes(null, null, null, null, null);
Map<String, String[]> map = attributes.asMap();
assertNull(map.get(VideoAttributes.THUMBNAIL_LOC));
assertNull(map.get(VideoAttributes.TITLE));
assertNull(map.get(VideoAttributes.DESCRIPTION));
assertNull(map.get(VideoAttributes.CONTENT_LOC));
assertNull(map.get(VideoAttributes.PLAYER_LOC));
assertNull(map.get(VideoAttributes.EXPIRATION_DATE));
assertNull(map.get(VideoAttributes.RATING));
assertNull(map.get(VideoAttributes.VIEW_COUNT));
assertNull(map.get(VideoAttributes.PUBLICATION_DATE));
assertNull(map.get(VideoAttributes.FAMILY_FRIENDLY));
assertNull(map.get(VideoAttributes.TAGS));
assertNull(map.get(VideoAttributes.CATEGORY));
assertNull(map.get(VideoAttributes.RESTRICTED_COUNTRIES));
assertNull(map.get(VideoAttributes.ALLOWED_COUNTRIES));
assertNull(map.get(VideoAttributes.GALLERY_LOC));
assertNull(map.get(VideoAttributes.GALLERY_TITLE));
assertNull(map.get(VideoAttributes.PRICES));
assertNull(map.get(VideoAttributes.REQUIRES_SUBSCRIPTION));
assertNull(map.get(VideoAttributes.UPLOADER));
assertNull(map.get(VideoAttributes.UPLOADER_INFO));
assertNull(map.get(VideoAttributes.ALLOWED_PLATFORMS));
assertNull(map.get(VideoAttributes.RESTRICTED_PLATFORMS));
assertNull(map.get(VideoAttributes.IS_LIVE));
}
}