Upgrade to JDK 1.7 compiler version and introduce Maven forbidden API's plugin

2024-05-18 18:06:05 +02:00 · 2015-09-06 13:55:26 -04:00 · 2015-09-06 13:55:26 -04:00 · ba5906ec40
parent 827b073d12
commit ba5906ec40
18 changed files with 234 additions and 136 deletions
--- a/pom.xml
+++ b/pom.xml
@ -1,4 +1,22 @@
 <?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+    
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">

 	<parent>
@ -31,7 +49,7 @@
 		<connection>scm:git:git://github.com/crawler-commons/crawler-commons.git</connection>
 		<developerConnection>scm:git:git@github.com:crawler-commons/crawler-commons.git</developerConnection>
 	  <tag>HEAD</tag>
-  </scm>
+    </scm>

 	<distributionManagement>
 		<repository>
@ -49,7 +67,7 @@
 	<mailingLists>
 		<mailingList>
 			<name>Project Mailing List</name>
-			<post>crawler-commons [at] googlecode [dot] com</post>
+			<post>crawler-commons [at] googlegroups [dot] com</post>
 		</mailingList>
 	</mailingLists>

@ -135,6 +153,32 @@
 						<!--autoVersionSubmodules>true</autoVersionSubmodules -->
 					</configuration>
 				</plugin>
+				<!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself.-->
+				<plugin>
+				  <groupId>org.eclipse.m2e</groupId>
+				  <artifactId>lifecycle-mapping</artifactId>
+				  <version>1.0.0</version>
+				  <configuration>
+				    <lifecycleMappingMetadata>
+				      <pluginExecutions>
+				        <pluginExecution>
+				          <pluginExecutionFilter>
+				            <groupId>de.thetaphi</groupId>
+				            <artifactId>forbiddenapis</artifactId>
+				            <versionRange>[1.8,)</versionRange>
+				            <goals>
+				              <goal>testCheck</goal>
+				              <goal>check</goal>
+				            </goals>
+				          </pluginExecutionFilter>
+				          <action>
+				            <ignore></ignore>
+				          </action>
+				        </pluginExecution>
+				      </pluginExecutions>
+				    </lifecycleMappingMetadata>
+				  </configuration>
+				</plugin>
 			</plugins>
 		</pluginManagement>

@ -183,7 +227,34 @@
 					<configFile>${project.basedir}/doc/eclipse-formatter.xml</configFile>
 				</configuration>
 			</plugin>
-
+            <plugin>
+                <groupId>de.thetaphi</groupId>
+                <artifactId>forbiddenapis</artifactId>
+                <version>1.8</version>
+                <configuration>
+                    <!-- disallow undocumented classes like sun.misc.Unsafe: -->
+                    <internalRuntimeForbidden>true</internalRuntimeForbidden>
+                    <!--
+                    if the used Java version is too new,
+                    don't fail, just do nothing:
+                    -->
+                    <failOnUnsupportedJava>false</failOnUnsupportedJava>
+                    <bundledSignatures>
+                        <bundledSignature>jdk-unsafe</bundledSignature>
+                        <bundledSignature>jdk-deprecated</bundledSignature>
+                        <bundledSignature>jdk-system-out</bundledSignature>
+                        <!--bundledSignature>commons-io-unsafe-${commons-io.version}</bundledSignature-->
+                    </bundledSignatures>
+                </configuration>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>check</goal>
+                            <goal>testCheck</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
 		</plugins>
 	</build>

@ -300,11 +371,12 @@

 		<!-- General Properties -->
 		<implementation.build>${scmBranch}@r${buildNumber}</implementation.build>
-		<javac.src.version>1.6</javac.src.version>
-		<javac.target.version>1.6</javac.target.version>
+		<javac.src.version>1.7</javac.src.version>
+		<javac.target.version>1.7</javac.target.version>
+        <maven.compiler.target>1.7</maven.compiler.target>
 		<maven.build.timestamp.format>yyyy-MM-dd HH:mm:ssZ</maven.build.timestamp.format>
 		<skipTests>false</skipTests>
-		<assembly.finalName>apache-${project.build.finalName}</assembly.finalName>
+		<assembly.finalName>${project.build.finalName}</assembly.finalName>
 	</properties>

 	<dependencies>
--- a/src/main/java/crawlercommons/fetcher/BaseFetchException.java
+++ b/src/main/java/crawlercommons/fetcher/BaseFetchException.java
@ -133,7 +133,7 @@ public abstract class BaseFetchException extends Exception {

    @Override
    public void printStackTrace() {
-        _exception.printStackTrace();
+        _exception.getMessage();
    }

    @Override
--- a/src/main/java/crawlercommons/fetcher/FetchedResult.java
+++ b/src/main/java/crawlercommons/fetcher/FetchedResult.java
@ -17,6 +17,7 @@

 package crawlercommons.fetcher;

+import java.nio.charset.Charset;
 import java.security.InvalidParameterException;
 import java.util.Arrays;

@ -174,7 +175,7 @@ public class FetchedResult {
        report.append("    FetchedUrl    : " + getFetchedUrl() + "\n");
        report.append("    ContentType   : " + getContentType() + "\n");
        report.append("    ContentLength : " + getContentLength() + "\n");
-        report.append("    Content       : " + new String(getContent()) + "\n"); // byte
+        report.append("    Content       : " + new String(getContent(), Charset.defaultCharset()) + "\n"); // byte
                                                                                 // array
                                                                                 // to
                                                                                 // string
--- a/src/main/java/crawlercommons/fetcher/http/SimpleHttpFetcher.java
+++ b/src/main/java/crawlercommons/fetcher/http/SimpleHttpFetcher.java
@ -31,6 +31,7 @@ import java.security.NoSuchAlgorithmException;
 import java.security.cert.CertificateException;
 import java.security.cert.X509Certificate;
 import java.util.HashSet;
+import java.util.Locale;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;

@ -519,7 +520,7 @@ public class SimpleHttpFetcher extends BaseHttpFetcher {
        } catch (HttpFetchException e) {
            // Don't bother generating a trace for a 404 (not found)
            if (LOGGER.isTraceEnabled() && (e.getHttpStatus() != HttpStatus.SC_NOT_FOUND)) {
-                LOGGER.trace(String.format("Exception fetching %s (%s)", url, e.getMessage()));
+                LOGGER.trace(String.format(Locale.getDefault(), "Exception fetching %s (%s)", url, e.getMessage()));
            }

            throw e;
@ -527,11 +528,11 @@ public class SimpleHttpFetcher extends BaseHttpFetcher {
            // Don't bother reporting that we bailed because the mime-type
            // wasn't one that we wanted.
            if (e.getAbortReason() != AbortedFetchReason.INVALID_MIMETYPE) {
-                LOGGER.debug(String.format("Exception fetching %s (%s)", url, e.getMessage()));
+                LOGGER.debug(String.format(Locale.getDefault(), "Exception fetching %s (%s)", url, e.getMessage()));
            }
            throw e;
        } catch (BaseFetchException e) {
-            LOGGER.debug(String.format("Exception fetching %s (%s)", url, e.getMessage()));
+            LOGGER.debug(String.format(Locale.getDefault(), "Exception fetching %s (%s)", url, e.getMessage()));
            throw e;
        }
    }
@ -547,7 +548,7 @@ public class SimpleHttpFetcher extends BaseHttpFetcher {
            return doRequest(request, url, payload);
        } catch (BaseFetchException e) {
            if (LOGGER.isTraceEnabled()) {
-                LOGGER.trace(String.format("Exception fetching %s", url), e);
+                LOGGER.trace(String.format(Locale.getDefault(), "Exception fetching %s", url), e);
            }
            throw e;
        }
@ -675,7 +676,7 @@ public class SimpleHttpFetcher extends BaseHttpFetcher {

                throw new RedirectFetchException(url, redirectUrl, mre.getReason());
            } else if (e.getCause() instanceof RedirectException) {
-                e.printStackTrace();
+                LOGGER.error(e.getMessage());
                throw new RedirectFetchException(url, extractRedirectedUrl(url, localContext), RedirectExceptionReason.TOO_MANY_REDIRECTS);
            } else {
                throw new IOFetchException(url, e);
--- a/src/main/java/crawlercommons/fetcher/http/UserAgent.java
+++ b/src/main/java/crawlercommons/fetcher/http/UserAgent.java
@ -18,6 +18,7 @@
 package crawlercommons.fetcher.http;

 import java.io.Serializable;
+import java.util.Locale;

 import crawlercommons.CrawlerCommons;

@ -103,6 +104,6 @@ public class UserAgent implements Serializable {
    public String getUserAgentString() {
        // Mozilla/5.0 (compatible; mycrawler/1.0; +http://www.mydomain.com;
        // mycrawler@mydomain.com)
-        return String.format("%s (compatible; %s%s; +%s; %s)", _browserVersion, getAgentName(), _crawlerVersion, _webAddress, _emailAddress);
+        return String.format(Locale.getDefault(), "%s (compatible; %s%s; +%s; %s)", _browserVersion, getAgentName(), _crawlerVersion, _webAddress, _emailAddress);
    }
 }
--- a/src/main/java/crawlercommons/robots/SimpleRobotRulesParser.java
+++ b/src/main/java/crawlercommons/robots/SimpleRobotRulesParser.java
@ -18,10 +18,10 @@
 package crawlercommons.robots;

 import java.io.UnsupportedEncodingException;
-import java.net.URI;
 import java.net.URL;
 import java.net.URLDecoder;
 import java.util.HashMap;
+import java.util.Locale;
 import java.util.Map;
 import java.util.StringTokenizer;
 import java.util.regex.Matcher;
@ -188,7 +188,7 @@ public class SimpleRobotRulesParser extends BaseRobotsParser {
    static {
        for (RobotDirective directive : RobotDirective.values()) {
            if (!directive.isSpecial()) {
-                String prefix = directive.name().toLowerCase().replaceAll("_", "-");
+                String prefix = directive.name().toLowerCase(Locale.getDefault()).replaceAll("_", "-");
                DIRECTIVE_PREFIX.put(prefix, directive);
            }
        }
@ -220,7 +220,7 @@ public class SimpleRobotRulesParser extends BaseRobotsParser {
     * @return robot command found on line
     */
    private static RobotToken tokenize(String line) {
-        String lowerLine = line.toLowerCase();
+        String lowerLine = line.toLowerCase(Locale.getDefault());
        for (String prefix : DIRECTIVE_PREFIX.keySet()) {
            int prefixLength = prefix.length();
            if (lowerLine.startsWith(prefix)) {
@ -336,7 +336,7 @@ public class SimpleRobotRulesParser extends BaseRobotsParser {
        }

        // Decide if we need to do special HTML processing.
-        boolean isHtmlType = ((contentType != null) && contentType.toLowerCase().startsWith("text/html"));
+        boolean isHtmlType = ((contentType != null) && contentType.toLowerCase(Locale.getDefault()).startsWith("text/html"));

        // If it looks like it contains HTML, but doesn't have a user agent
        // field, then
@ -366,7 +366,7 @@ public class SimpleRobotRulesParser extends BaseRobotsParser {
        // an empty
        // string between the \r and \n.
        StringTokenizer lineParser = new StringTokenizer(contentAsStr, "\n\r\u0085\u2028\u2029");
-        ParseState parseState = new ParseState(url, robotName.toLowerCase());
+        ParseState parseState = new ParseState(url, robotName.toLowerCase(Locale.getDefault()));
        boolean keepGoing = true;

        while (keepGoing && lineParser.hasMoreTokens()) {
@ -425,7 +425,7 @@ public class SimpleRobotRulesParser extends BaseRobotsParser {
                    break;

                case MISSING:
-                reportWarning(String.format("Unknown line in robots.txt file (size %d): %s", content.length, line), url);
+                reportWarning(String.format(Locale.getDefault(), "Unknown line in robots.txt file (size %d): %s", content.length, line), url);
                parseState.setFinishedAgentFields(true);
                    break;

@ -496,7 +496,7 @@ public class SimpleRobotRulesParser extends BaseRobotsParser {

        // Handle the case when there are multiple target names are passed
        // TODO should we do lowercase comparison of target name? Assuming yes.
-        String[] targetNames = state.getTargetName().toLowerCase().split(",");
+        String[] targetNames = state.getTargetName().toLowerCase(Locale.getDefault()).split(",");

        for (int count = 0; count < targetNames.length; count++) {
            // Extract possible match names from our target agent name, since it
@ -508,7 +508,7 @@ public class SimpleRobotRulesParser extends BaseRobotsParser {
            String[] agentNames = token.getData().split("[ \t,]");
            for (String agentName : agentNames) {
                // TODO should we do case-insensitive matching? Probably yes.
-                agentName = agentName.trim().toLowerCase();
+                agentName = agentName.trim().toLowerCase(Locale.getDefault());
                if (agentName.isEmpty()) {
                    // Ignore empty names
                } else if (agentName.equals("*") && !state.isMatchedWildcard()) {
--- a/src/main/java/crawlercommons/sitemaps/AbstractSiteMap.java
+++ b/src/main/java/crawlercommons/sitemaps/AbstractSiteMap.java
@ -21,8 +21,8 @@ import java.net.URL;
 import java.text.DateFormat;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
-import java.util.Collection;
 import java.util.Date;
+import java.util.Locale;
 import java.util.TimeZone;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@ -42,14 +42,14 @@ public abstract class AbstractSiteMap {
    private static final ThreadLocal<DateFormat> W3C_NO_SECONDS_FORMAT = new ThreadLocal<DateFormat>() {

        protected DateFormat initialValue() {
-            return new SimpleDateFormat("yyyy-MM-dd'T'HH:mmZ");
+            return new SimpleDateFormat("yyyy-MM-dd'T'HH:mmZ", Locale.getDefault());
        }
    };

    private static final ThreadLocal<DateFormat> W3C_FULLDATE_FORMAT = new ThreadLocal<DateFormat>() {

        protected DateFormat initialValue() {
-            SimpleDateFormat result = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");
+            SimpleDateFormat result = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.getDefault());
            result.setTimeZone(TimeZone.getTimeZone("UTC"));
            return result;
        }
--- a/src/main/java/crawlercommons/sitemaps/SiteMapTester.java
+++ b/src/main/java/crawlercommons/sitemaps/SiteMapTester.java
@ -5,17 +5,21 @@ import java.net.URL;
 import java.util.Collection;

 import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;

 /**
 * Sitemap Tool for recursively fetching all URL's from a sitemap (and all of
 * it's children)
 **/
 public class SiteMapTester {
+  
+    private static final Logger LOG = LoggerFactory.getLogger(SiteMapTester.class);
    private static SiteMapParser parser = new SiteMapParser(false);

    public static void main(String[] args) throws IOException, UnknownFormatException {
        if (args.length < 1) {
-            System.err.println("Usage: SiteMapTester <URL_TO_TEST> [MIME_TYPE]");
+            LOG.error("Usage: SiteMapTester <URL_TO_TEST> [MIME_TYPE]");
        } else {
            URL url = new URL(args[0]);
            String mt = (args.length > 1) ? args[1] : null;
@ -47,7 +51,7 @@ public class SiteMapTester {
        } else {
            Collection<SiteMapURL> links = ((SiteMap) sm).getSiteMapUrls();
            for (SiteMapURL smu : links) {
-                System.out.println(smu.getUrl());
+                LOG.info(smu.getUrl().toString());
            }
        }
    }
--- a/src/main/java/crawlercommons/sitemaps/SiteMapURL.java
+++ b/src/main/java/crawlercommons/sitemaps/SiteMapURL.java
@ -23,6 +23,7 @@ import org.slf4j.LoggerFactory;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.Date;
+import java.util.Locale;

 /**
 * The SitemapUrl class represents a URL found in a Sitemap.
@ -226,7 +227,7 @@ public class SiteMapURL {
    public void setChangeFrequency(String changeFreq) {

        if (changeFreq != null) {
-            changeFreq = changeFreq.toUpperCase();
+            changeFreq = changeFreq.toUpperCase(Locale.getDefault());

            if (changeFreq.contains("ALWAYS")) {
                this.changeFreq = ChangeFrequency.ALWAYS;
--- a/src/main/java/crawlercommons/url/EffectiveTldFinder.java
+++ b/src/main/java/crawlercommons/url/EffectiveTldFinder.java
@ -25,6 +25,8 @@ import java.net.IDN;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.Locale;
+import java.nio.charset.Charset;

 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -79,7 +81,7 @@ public class EffectiveTldFinder {
            if (null == effective_tld_data_stream && null != this.getClass().getResource(ETLD_DATA)) {
                effective_tld_data_stream = this.getClass().getResourceAsStream(ETLD_DATA);
            }
-            BufferedReader input = new BufferedReader(new InputStreamReader(effective_tld_data_stream));
+            BufferedReader input = new BufferedReader(new InputStreamReader(effective_tld_data_stream, Charset.defaultCharset()));
            String line = null;
            while (null != (line = input.readLine())) {
                if (line.length() == 0 || (line.length() > 1 && line.startsWith(COMMENT))) {
@ -148,8 +150,8 @@ public class EffectiveTldFinder {
     */
    public static String getAssignedDomain(String hostname) {
        EffectiveTLD etld = getEffectiveTLD(hostname);
-        if (null == etld || etld.getDomain() == hostname.toLowerCase()) {
-            return hostname.toLowerCase();
+        if (null == etld || etld.getDomain() == hostname.toLowerCase(Locale.getDefault())) {
+            return hostname.toLowerCase(Locale.getDefault());
        }
        String domain = hostname.replaceFirst(".*?([^.]+\\.)" + etld.getDomain() + "$", "$1" + etld.getDomain());
        return domain;
@ -201,7 +203,7 @@ public class EffectiveTldFinder {

        private String asciiConvert(String str) {
            if (isAscii(str)) {
-                return str.toLowerCase();
+                return str.toLowerCase(Locale.getDefault());
            }
            return IDN.toASCII(str);
        }
--- a/src/main/java/crawlercommons/url/PaidLevelDomain.java
+++ b/src/main/java/crawlercommons/url/PaidLevelDomain.java
@ -20,6 +20,7 @@ package crawlercommons.url;
 import java.net.URL;
 import java.util.Arrays;
 import java.util.HashSet;
+import java.util.Locale;
 import java.util.Set;
 import java.util.regex.Pattern;

@ -76,7 +77,7 @@ public class PaidLevelDomain {
        }

        int firstHostPiece = 0;
-        if (ccTLDs.contains(subNames[numPieces - 1].toLowerCase())) {
+        if (ccTLDs.contains(subNames[numPieces - 1].toLowerCase(Locale.getDefault()))) {
            // We have a country code at the end. See if the preceding piece is
            // either
            // a two-letter name (country code or funky short gTLD), or one of
@ -85,15 +86,15 @@ public class PaidLevelDomain {
            if (subNames[numPieces - 2].length() <= 2) {
                // Must be xxx.co.jp format
                firstHostPiece = numPieces - 3;
-            } else if (gTLDs.contains(subNames[numPieces - 2].toLowerCase())) {
+            } else if (gTLDs.contains(subNames[numPieces - 2].toLowerCase(Locale.getDefault()))) {
                // Must be xxx.com.mx format
                firstHostPiece = numPieces - 3;
            } else {
                // Must be xxx.it format
                firstHostPiece = numPieces - 2;
            }
-        } else if (gTLDs.contains(subNames[numPieces - 1].toLowerCase())) {
-            if (ccTLDs.contains(subNames[numPieces - 2].toLowerCase())) {
+        } else if (gTLDs.contains(subNames[numPieces - 1].toLowerCase(Locale.getDefault()))) {
+            if (ccTLDs.contains(subNames[numPieces - 2].toLowerCase(Locale.getDefault()))) {
                // Must be xxx.de.com format
                firstHostPiece = numPieces - 3;
            } else {
--- a/src/test/java/crawlercommons/fetcher/FetchedResultTest.java
+++ b/src/test/java/crawlercommons/fetcher/FetchedResultTest.java
@ -16,22 +16,29 @@
 */
 package crawlercommons.fetcher;

+import java.io.UnsupportedEncodingException;
+
 import org.apache.tika.metadata.Metadata;
 import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;

 /**
 * @author lmcgibbn
 * 
 */
 public class FetchedResultTest {
+  
+    private static final Logger LOG = LoggerFactory.getLogger(FetchedResultTest.class);

    /**
     * Test method for {@link crawlercommons.fetcher.FetchedResult#report()}.
     * This does not actually test anything but simply allows us to see what a
     * generated report would look like.
+     * @throws UnsupportedEncodingException 
     */
    @Test
-    public void testPrintReport() {
+    public void testPrintReport() throws UnsupportedEncodingException {
        Metadata headerMetadata = new Metadata();
        headerMetadata.add(Metadata.CONTENT_DISPOSITION, "This is content disposition");
        headerMetadata.add(Metadata.CONTENT_ENCODING, "This is the encoding");
@ -52,8 +59,8 @@ public class FetchedResultTest {
                        "http://en.wikipedia.org/wiki/Glasgow", // redirectedUrl
                        System.currentTimeMillis(), // fetchTime
                        headerMetadata, new String("Glasgow (/ˈɡlɑːzɡoʊ, ˈɡlæz-/;[4] Scots: Glesca; Scottish Gaelic: Glaschu) "
-                                        + "is the largest city in Scotland, and the third largest in the United Kingdom.").getBytes(), "ScotsText", 2014, load, "http://en.wikipedia.org/wiki/Glasgow",
+                                        + "is the largest city in Scotland, and the third largest in the United Kingdom.").getBytes("UTF-8"), "ScotsText", 2014, load, "http://en.wikipedia.org/wiki/Glasgow",
                        0, "wikipedia.org", 200, "");
-        System.out.println(result.report());
+        LOG.error(result.report());
    }
 }
--- a/src/test/java/crawlercommons/fetcher/http/SimpleHttpFetcherTest.java
+++ b/src/test/java/crawlercommons/fetcher/http/SimpleHttpFetcherTest.java
@ -25,6 +25,7 @@ import static org.junit.Assert.fail;

 import java.io.IOException;
 import java.net.ConnectException;
+import java.nio.charset.Charset;
 import java.util.HashSet;
 import java.util.Set;

@ -118,7 +119,7 @@ public class SimpleHttpFetcherTest {

                String content = "redirected";
                response.setContentLength(content.length());
-                response.getOutputStream().write(content.getBytes());
+                response.getOutputStream().write(content.getBytes("UTF-8"));
            }
        }
    }
@ -148,7 +149,7 @@ public class SimpleHttpFetcherTest {
            response.setContentType("text/plain");

            response.setContentLength(content.length());
-            response.getOutputStream().write(content.getBytes());
+            response.getOutputStream().write(content.getBytes("UTF-8"));
        }
    }

@ -170,7 +171,7 @@ public class SimpleHttpFetcherTest {
            }

            response.setContentLength(content.length());
-            response.getOutputStream().write(content.getBytes());
+            response.getOutputStream().write(content.getBytes("UTF-8"));
        }
    }

@ -380,7 +381,7 @@ public class SimpleHttpFetcherTest {
        BaseFetcher fetcher = new SimpleHttpFetcher(1, TestUtils.CC_TEST_AGENT);
        String url = "http://localhost:8089/";
        FetchedResult result = fetcher.get(url);
-        String contentStr = new String(result.getContent(), 0, result.getContentLength());
+        String contentStr = new String(result.getContent(), 0, result.getContentLength(), Charset.defaultCharset());
        assertTrue(englishContent.equals(contentStr));
    }

--- a/src/test/java/crawlercommons/robots/RobotUtilsTest.java
+++ b/src/test/java/crawlercommons/robots/RobotUtilsTest.java
@ -114,7 +114,7 @@ public class RobotUtilsTest {

        BaseHttpFetcher fetcher = Mockito.mock(BaseHttpFetcher.class);
        FetchedResult result = Mockito.mock(FetchedResult.class);
-        Mockito.when(result.getContent()).thenReturn(simpleRobotsTxt.getBytes());
+        Mockito.when(result.getContent()).thenReturn(simpleRobotsTxt.getBytes("UTF-8"));
        Mockito.when(fetcher.get(Mockito.any(String.class))).thenReturn(result);
        UserAgent userAgent = new UserAgent("testAgent", "crawler@domain.com", "http://www.domain.com");
        Mockito.when(fetcher.getUserAgent()).thenReturn(userAgent);
--- a/src/test/java/crawlercommons/robots/SimpleRobotRulesParserTest.java
+++ b/src/test/java/crawlercommons/robots/SimpleRobotRulesParserTest.java
@ -26,6 +26,7 @@ import java.io.InputStream;
 import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
 import java.util.Arrays;
+import java.util.Locale;

 import javax.servlet.http.HttpServletResponse;

@ -45,8 +46,8 @@ public class SimpleRobotRulesParserTest {
    }

    @Test
-    public void testEmptyRules() throws MalformedURLException {
-        BaseRobotRules rules = createRobotRules("Any-darn-crawler", "".getBytes());
+    public void testEmptyRules() throws MalformedURLException, UnsupportedEncodingException {
+        BaseRobotRules rules = createRobotRules("Any-darn-crawler", "".getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.domain.com/anypage.html"));
    }

@ -54,7 +55,7 @@ public class SimpleRobotRulesParserTest {
    public void testQueryParamInDisallow() throws Exception {
        final String simpleRobotsTxt = "User-agent: *" + CRLF + "Disallow: /index.cfm?fuseaction=sitesearch.results*";

-        BaseRobotRules rules = createRobotRules("Any-darn-crawler", simpleRobotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("Any-darn-crawler", simpleRobotsTxt.getBytes("UTF-8"));
        assertFalse(rules.isAllowed("http://searchservice.domain.com/index.cfm?fuseaction=sitesearch.results&type=People&qry=california&pg=2"));
    }

@ -64,7 +65,7 @@ public class SimpleRobotRulesParserTest {
        // Test for /fish
        final String simpleRobotsTxt1 = "User-agent: *" + CRLF + "Disallow: /fish" + CRLF;

-        BaseRobotRules rule1 = createRobotRules("Any-darn-crawler", simpleRobotsTxt1.getBytes());
+        BaseRobotRules rule1 = createRobotRules("Any-darn-crawler", simpleRobotsTxt1.getBytes("UTF-8"));
        assertFalse(rule1.isAllowed("http://www.fict.com/fish"));
        assertFalse(rule1.isAllowed("http://www.fict.com/fish.html"));
        assertFalse(rule1.isAllowed("http://www.fict.com/fish/salmon.html"));
@ -80,7 +81,7 @@ public class SimpleRobotRulesParserTest {
        // Test for /fish*
        final String simpleRobotsTxt2 = "User-agent: *" + CRLF + "Disallow: /fish*" + CRLF;

-        BaseRobotRules rule2 = createRobotRules("Any-darn-crawler", simpleRobotsTxt2.getBytes());
+        BaseRobotRules rule2 = createRobotRules("Any-darn-crawler", simpleRobotsTxt2.getBytes("UTF-8"));
        assertFalse(rule2.isAllowed("http://www.fict.com/fish"));
        assertFalse(rule2.isAllowed("http://www.fict.com/fish.html"));
        assertFalse(rule2.isAllowed("http://www.fict.com/fish/salmon.html"));
@ -96,7 +97,7 @@ public class SimpleRobotRulesParserTest {
        // Test for /fish/
        final String simpleRobotsTxt3 = "User-agent: *" + CRLF + "Disallow: /fish/" + CRLF;

-        BaseRobotRules rule3 = createRobotRules("Any-darn-crawler", simpleRobotsTxt3.getBytes());
+        BaseRobotRules rule3 = createRobotRules("Any-darn-crawler", simpleRobotsTxt3.getBytes("UTF-8"));
        assertFalse(rule3.isAllowed("http://www.fict.com/fish/"));
        assertFalse(rule3.isAllowed("http://www.fict.com/fish/?id=anything"));
        assertFalse(rule3.isAllowed("http://www.fict.com/fish/salmon.htm"));
@ -108,7 +109,7 @@ public class SimpleRobotRulesParserTest {
        // Test for /*.php
        final String simpleRobotsTxt4 = "User-agent: *" + CRLF + "Disallow: /*.php" + CRLF;

-        BaseRobotRules rule4 = createRobotRules("Any-darn-crawler", simpleRobotsTxt4.getBytes());
+        BaseRobotRules rule4 = createRobotRules("Any-darn-crawler", simpleRobotsTxt4.getBytes("UTF-8"));
        assertFalse(rule4.isAllowed("http://www.fict.com/filename.php"));
        assertFalse(rule4.isAllowed("http://www.fict.com/folder/filename.php"));
        assertFalse(rule4.isAllowed("http://www.fict.com/folder/filename.php?parameters"));
@ -121,7 +122,7 @@ public class SimpleRobotRulesParserTest {
        // Test for /*.php$
        final String simpleRobotsTxt5 = "User-agent: *" + CRLF + "Disallow: /*.php$" + CRLF;

-        BaseRobotRules rule5 = createRobotRules("Any-darn-crawler", simpleRobotsTxt5.getBytes());
+        BaseRobotRules rule5 = createRobotRules("Any-darn-crawler", simpleRobotsTxt5.getBytes("UTF-8"));
        assertFalse(rule5.isAllowed("http://www.fict.com/filename.php"));
        assertFalse(rule5.isAllowed("http://www.fict.com/folder/filename.php"));

@ -133,7 +134,7 @@ public class SimpleRobotRulesParserTest {
        // Test for /fish*.php
        final String simpleRobotsTxt6 = "User-agent: *" + CRLF + "Disallow: /fish*.php" + CRLF;

-        BaseRobotRules rule6 = createRobotRules("Any-darn-crawler", simpleRobotsTxt6.getBytes());
+        BaseRobotRules rule6 = createRobotRules("Any-darn-crawler", simpleRobotsTxt6.getBytes("UTF-8"));
        assertFalse(rule6.isAllowed("http://www.fict.com/fish.php"));
        assertFalse(rule6.isAllowed("http://www.fict.com/fishheads/catfish.php?parameters"));

@ -142,35 +143,35 @@ public class SimpleRobotRulesParserTest {
        // Test rule with multiple '*' characters
        final String simpleRobotsTxt7 = "User-agent: *" + CRLF + "Disallow: /*fish*.php" + CRLF;

-        BaseRobotRules rule7 = createRobotRules("Any-darn-crawler", simpleRobotsTxt7.getBytes());
+        BaseRobotRules rule7 = createRobotRules("Any-darn-crawler", simpleRobotsTxt7.getBytes("UTF-8"));
        assertFalse(rule7.isAllowed("http://www.fict.com/fish.php"));
        assertFalse(rule7.isAllowed("http://www.fict.com/superfishheads/catfish.php?parameters"));
        assertTrue(rule7.isAllowed("http://www.fict.com/fishheads/catfish.htm"));
    }

    @Test
-    public void testCommentedOutLines() throws MalformedURLException {
+    public void testCommentedOutLines() throws MalformedURLException, UnsupportedEncodingException {
        final String simpleRobotsTxt = "#user-agent: testAgent" + LF + LF + "#allow: /index.html" + LF + "#allow: /test" + LF + LF + "#user-agent: test" + LF + LF + "#allow: /index.html" + LF
                        + "#disallow: /test" + LF + LF + "#user-agent: someAgent" + LF + LF + "#disallow: /index.html" + LF + "#disallow: /test" + LF + LF;

-        BaseRobotRules rules = createRobotRules("Any-darn-crawler", simpleRobotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("Any-darn-crawler", simpleRobotsTxt.getBytes("UTF-8"));
        Assert.assertTrue(rules.isAllowed("http://www.domain.com/anypage.html"));
    }

    @Test
-    public void testRobotsTxtAlwaysAllowed() throws MalformedURLException {
+    public void testRobotsTxtAlwaysAllowed() throws MalformedURLException, UnsupportedEncodingException {
        final String simpleRobotsTxt = "User-agent: *" + CRLF + "Disallow: /";

-        BaseRobotRules rules = createRobotRules("any-darn-crawler", simpleRobotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("any-darn-crawler", simpleRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.domain.com/robots.txt"));
    }

    @Test
-    public void testAgentNotListed() throws MalformedURLException {
+    public void testAgentNotListed() throws MalformedURLException, UnsupportedEncodingException {
        // Access is assumed to be allowed, if no rules match an agent.
        final String simpleRobotsTxt = "User-agent: crawler1" + CRLF + "Disallow: /index.html" + CRLF + "Allow: /" + CRLF + CRLF + "User-agent: crawler2" + CRLF + "Disallow: /";

-        BaseRobotRules rules = createRobotRules("crawler3", simpleRobotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("crawler3", simpleRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.domain.com/anypage.html"));
        assertTrue(rules.isAllowed("http://www.domain.com/index.html"));
    }
@ -184,26 +185,26 @@ public class SimpleRobotRulesParserTest {
    }

    @Test
-    public void testSimplestAllowAll() throws MalformedURLException {
+    public void testSimplestAllowAll() throws MalformedURLException, UnsupportedEncodingException {
        final String simpleRobotsTxt = "User-agent: *" + CRLF + "Disallow:";

-        BaseRobotRules rules = createRobotRules("Any-darn-crawler", simpleRobotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("Any-darn-crawler", simpleRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.domain.com/anypage.html"));
    }

    @Test
-    public void testMixedEndings() throws MalformedURLException {
+    public void testMixedEndings() throws MalformedURLException, UnsupportedEncodingException {
        final String mixedEndingsRobotsTxt = "# /robots.txt for http://www.fict.org/" + CRLF + "# comments to webmaster@fict.org" + CR + LF + "User-agent: unhipbot" + LF + "Disallow: /" + CR + ""
                        + CRLF + "User-agent: webcrawler" + LF + "User-agent: excite" + CR + "Disallow: " + "\u0085" + CR + "User-agent: *" + CRLF + "Disallow: /org/plans.html" + LF + "Allow: /org/"
                        + CR + "Allow: /serv" + CRLF + "Allow: /~mak" + LF + "Disallow: /" + CRLF;

        BaseRobotRules rules;

-        rules = createRobotRules("WebCrawler/3.0", mixedEndingsRobotsTxt.getBytes());
+        rules = createRobotRules("WebCrawler/3.0", mixedEndingsRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.fict.org/"));
        assertTrue(rules.isAllowed("http://www.fict.org/index.html"));

-        rules = createRobotRules("Unknown/1.0", mixedEndingsRobotsTxt.getBytes());
+        rules = createRobotRules("Unknown/1.0", mixedEndingsRobotsTxt.getBytes("UTF-8"));
        assertFalse(rules.isAllowed("http://www.fict.org/"));
        assertFalse(rules.isAllowed("http://www.fict.org/index.html"));
        assertTrue(rules.isAllowed("http://www.fict.org/robots.txt"));
@ -219,7 +220,7 @@ public class SimpleRobotRulesParserTest {
    }

    @Test
-    public void testRfpCases() throws MalformedURLException {
+    public void testRfpCases() throws MalformedURLException, UnsupportedEncodingException {
        // Run through all of the tests that are part of the robots.txt RFP
        // http://www.robotstxt.org/norobots-rfc.txt
        final String rfpExampleRobotsTxt = "# /robots.txt for http://www.fict.org/" + CRLF + "# comments to webmaster@fict.org" + CRLF + CRLF + "User-agent: unhipbot" + CRLF + "Disallow: /" + CRLF
@ -228,7 +229,7 @@ public class SimpleRobotRulesParserTest {

        BaseRobotRules rules;

-        rules = createRobotRules("UnhipBot/0.1", rfpExampleRobotsTxt.getBytes());
+        rules = createRobotRules("UnhipBot/0.1", rfpExampleRobotsTxt.getBytes("UTF-8"));
        assertFalse(rules.isAllowed("http://www.fict.org/"));
        assertFalse(rules.isAllowed("http://www.fict.org/index.html"));
        assertTrue(rules.isAllowed("http://www.fict.org/robots.txt"));
@ -241,7 +242,7 @@ public class SimpleRobotRulesParserTest {
        assertFalse(rules.isAllowed("http://www.fict.org/%7Ejim/jim.html"));
        assertFalse(rules.isAllowed("http://www.fict.org/%7Emak/mak.html"));

-        rules = createRobotRules("WebCrawler/3.0", rfpExampleRobotsTxt.getBytes());
+        rules = createRobotRules("WebCrawler/3.0", rfpExampleRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.fict.org/"));
        assertTrue(rules.isAllowed("http://www.fict.org/index.html"));
        assertTrue(rules.isAllowed("http://www.fict.org/robots.txt"));
@ -254,7 +255,7 @@ public class SimpleRobotRulesParserTest {
        assertTrue(rules.isAllowed("http://www.fict.org/%7Ejim/jim.html"));
        assertTrue(rules.isAllowed("http://www.fict.org/%7Emak/mak.html"));

-        rules = createRobotRules("Excite/1.0", rfpExampleRobotsTxt.getBytes());
+        rules = createRobotRules("Excite/1.0", rfpExampleRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.fict.org/"));
        assertTrue(rules.isAllowed("http://www.fict.org/index.html"));
        assertTrue(rules.isAllowed("http://www.fict.org/robots.txt"));
@ -267,7 +268,7 @@ public class SimpleRobotRulesParserTest {
        assertTrue(rules.isAllowed("http://www.fict.org/%7Ejim/jim.html"));
        assertTrue(rules.isAllowed("http://www.fict.org/%7Emak/mak.html"));

-        rules = createRobotRules("Unknown/1.0", rfpExampleRobotsTxt.getBytes());
+        rules = createRobotRules("Unknown/1.0", rfpExampleRobotsTxt.getBytes("UTF-8"));
        assertFalse(rules.isAllowed("http://www.fict.org/"));
        assertFalse(rules.isAllowed("http://www.fict.org/index.html"));
        assertTrue(rules.isAllowed("http://www.fict.org/robots.txt"));
@ -282,7 +283,7 @@ public class SimpleRobotRulesParserTest {
    }

    @Test
-    public void testNutchCases() throws MalformedURLException {
+    public void testNutchCases() throws MalformedURLException, UnsupportedEncodingException {
        // Run through the Nutch test cases.

        final String nutchRobotsTxt = "User-Agent: Agent1 #foo" + CR + "Disallow: /a" + CR + "Disallow: /b/a" + CR + "#Disallow: /c" + CR + "" + CR + "" + CR + "User-Agent: Agent2 Agent3#foo" + CR
@ -290,7 +291,7 @@ public class SimpleRobotRulesParserTest {

        BaseRobotRules rules;

-        rules = createRobotRules("Agent1", nutchRobotsTxt.getBytes());
+        rules = createRobotRules("Agent1", nutchRobotsTxt.getBytes("UTF-8"));
        assertFalse(rules.isAllowed("http://www.domain.com/a"));
        assertFalse(rules.isAllowed("http://www.domain.com/a/"));
        assertFalse(rules.isAllowed("http://www.domain.com/a/bloh/foo.html"));
@ -312,7 +313,7 @@ public class SimpleRobotRulesParserTest {
        assertTrue(rules.isAllowed("http://www.domain.com/foo/bar/baz.html"));
        assertTrue(rules.isAllowed("http://www.domain.com/f/"));

-        rules = createRobotRules("Agent2", nutchRobotsTxt.getBytes());
+        rules = createRobotRules("Agent2", nutchRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.domain.com/a"));
        assertTrue(rules.isAllowed("http://www.domain.com/a/"));
        assertTrue(rules.isAllowed("http://www.domain.com/a/bloh/foo.html"));
@ -334,7 +335,7 @@ public class SimpleRobotRulesParserTest {
        assertTrue(rules.isAllowed("http://www.domain.com/foo/bar/baz.html"));
        assertTrue(rules.isAllowed("http://www.domain.com/f/"));

-        rules = createRobotRules("Agent3", nutchRobotsTxt.getBytes());
+        rules = createRobotRules("Agent3", nutchRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.domain.com/a"));
        assertTrue(rules.isAllowed("http://www.domain.com/a/"));
        assertTrue(rules.isAllowed("http://www.domain.com/a/bloh/foo.html"));
@ -356,7 +357,7 @@ public class SimpleRobotRulesParserTest {
        assertTrue(rules.isAllowed("http://www.domain.com/foo/bar/baz.html"));
        assertTrue(rules.isAllowed("http://www.domain.com/f/"));

-        rules = createRobotRules("Agent4", nutchRobotsTxt.getBytes());
+        rules = createRobotRules("Agent4", nutchRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.domain.com/a"));
        assertTrue(rules.isAllowed("http://www.domain.com/a/"));
        assertTrue(rules.isAllowed("http://www.domain.com/a/bloh/foo.html"));
@ -378,7 +379,7 @@ public class SimpleRobotRulesParserTest {
        assertTrue(rules.isAllowed("http://www.domain.com/foo/bar/baz.html"));
        assertTrue(rules.isAllowed("http://www.domain.com/f/"));

-        rules = createRobotRules("Agent5", nutchRobotsTxt.getBytes());
+        rules = createRobotRules("Agent5", nutchRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.domain.com/a"));
        assertTrue(rules.isAllowed("http://www.domain.com/a/"));
        assertTrue(rules.isAllowed("http://www.domain.com/a/bloh/foo.html"));
@ -400,7 +401,7 @@ public class SimpleRobotRulesParserTest {
        assertFalse(rules.isAllowed("http://www.domain.com/foo/bar/baz.html"));
        assertTrue(rules.isAllowed("http://www.domain.com/f/"));

-        rules = createRobotRules("Agent5,Agent2,Agent1,Agent3,*", nutchRobotsTxt.getBytes());
+        rules = createRobotRules("Agent5,Agent2,Agent1,Agent3,*", nutchRobotsTxt.getBytes("UTF-8"));
        assertFalse(rules.isAllowed("http://www.domain.com/a"));
        assertFalse(rules.isAllowed("http://www.domain.com/a/"));
        assertFalse(rules.isAllowed("http://www.domain.com/a/bloh/foo.html"));
@ -424,18 +425,18 @@ public class SimpleRobotRulesParserTest {
    }

    @Test
-    public void testHtmlMarkupInRobotsTxt() throws MalformedURLException {
+    public void testHtmlMarkupInRobotsTxt() throws MalformedURLException, UnsupportedEncodingException {
        final String htmlRobotsTxt = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"><HTML>\n" + "<HEAD>\n" + "<TITLE>/robots.txt</TITLE>\n" + "</HEAD>\n" + "<BODY>\n"
                        + "User-agent: anybot<BR>\n" + "Disallow: <BR>\n" + "Crawl-Delay: 10<BR>\n" + "\n" + "User-agent: *<BR>\n" + "Disallow: /<BR>\n" + "Crawl-Delay: 30<BR>\n" + "\n" + "</BODY>\n"
                        + "</HTML>\n";

        BaseRobotRules rules;

-        rules = createRobotRules("anybot", htmlRobotsTxt.getBytes());
+        rules = createRobotRules("anybot", htmlRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.domain.com/index.html"));
        assertEquals(10000, rules.getCrawlDelay());

-        rules = createRobotRules("bogusbot", htmlRobotsTxt.getBytes());
+        rules = createRobotRules("bogusbot", htmlRobotsTxt.getBytes("UTF-8"));
        assertFalse(rules.isAllowed("http://www.domain.com/index.html"));
        assertEquals(30000, rules.getCrawlDelay());
    }
@ -450,39 +451,39 @@ public class SimpleRobotRulesParserTest {
    }

    @Test
-    public void testHeritrixCases() throws MalformedURLException {
+    public void testHeritrixCases() throws MalformedURLException, UnsupportedEncodingException {
        final String heritrixRobotsTxt = "User-agent: *\n" + "Disallow: /cgi-bin/\n" + "Disallow: /details/software\n" + "\n" + "User-agent: denybot\n" + "Disallow: /\n" + "\n"
                        + "User-agent: allowbot1\n" + "Disallow: \n" + "\n" + "User-agent: allowbot2\n" + "Disallow: /foo\n" + "Allow: /\n" + "\n" + "User-agent: delaybot\n" + "Disallow: /\n"
                        + "Crawl-Delay: 20\n" + "Allow: /images/\n";

        BaseRobotRules rules;
-        rules = createRobotRules("Mozilla allowbot1 99.9", heritrixRobotsTxt.getBytes());
+        rules = createRobotRules("Mozilla allowbot1 99.9", heritrixRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.domain.com/path"));
        assertTrue(rules.isAllowed("http://www.domain.com/"));

-        rules = createRobotRules("Mozilla allowbot2 99.9", heritrixRobotsTxt.getBytes());
+        rules = createRobotRules("Mozilla allowbot2 99.9", heritrixRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.domain.com/path"));
        assertTrue(rules.isAllowed("http://www.domain.com/"));
        assertFalse(rules.isAllowed("http://www.domain.com/foo"));

-        rules = createRobotRules("Mozilla denybot 99.9", heritrixRobotsTxt.getBytes());
+        rules = createRobotRules("Mozilla denybot 99.9", heritrixRobotsTxt.getBytes("UTF-8"));
        assertFalse(rules.isAllowed("http://www.domain.com/path"));
        assertFalse(rules.isAllowed("http://www.domain.com/"));
        assertEquals(BaseRobotRules.UNSET_CRAWL_DELAY, rules.getCrawlDelay());

-        rules = createRobotRules("Mozilla anonbot 99.9", heritrixRobotsTxt.getBytes());
+        rules = createRobotRules("Mozilla anonbot 99.9", heritrixRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.domain.com/path"));
        assertFalse(rules.isAllowed("http://www.domain.com/cgi-bin/foo.pl"));

-        rules = createRobotRules("Mozilla delaybot 99.9", heritrixRobotsTxt.getBytes());
+        rules = createRobotRules("Mozilla delaybot 99.9", heritrixRobotsTxt.getBytes("UTF-8"));
        assertEquals(20000, rules.getCrawlDelay());
    }

    @Test
-    public void testCaseSensitivePaths() throws MalformedURLException {
+    public void testCaseSensitivePaths() throws MalformedURLException, UnsupportedEncodingException {
        final String simpleRobotsTxt = "User-agent: *" + CRLF + "Allow: /AnyPage.html" + CRLF + "Allow: /somepage.html" + CRLF + "Disallow: /";

-        BaseRobotRules rules = createRobotRules("Any-darn-crawler", simpleRobotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("Any-darn-crawler", simpleRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.domain.com/AnyPage.html"));
        assertFalse(rules.isAllowed("http://www.domain.com/anypage.html"));
        assertTrue(rules.isAllowed("http://www.domain.com/somepage.html"));
@ -490,76 +491,76 @@ public class SimpleRobotRulesParserTest {
    }

    @Test
-    public void testEmptyDisallow() throws MalformedURLException {
+    public void testEmptyDisallow() throws MalformedURLException, UnsupportedEncodingException {
        final String simpleRobotsTxt = "User-agent: *" + CRLF + "Disallow:";

-        BaseRobotRules rules = createRobotRules("Any-darn-crawler", simpleRobotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("Any-darn-crawler", simpleRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.domain.com/anypage.html"));
    }

    @Test
-    public void testEmptyAllow() throws MalformedURLException {
+    public void testEmptyAllow() throws MalformedURLException, UnsupportedEncodingException {
        final String simpleRobotsTxt = "User-agent: *" + CRLF + "Allow:";

-        BaseRobotRules rules = createRobotRules("Any-darn-crawler", simpleRobotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("Any-darn-crawler", simpleRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.domain.com/anypage.html"));
    }

    @Test
-    public void testMultiWildcard() throws MalformedURLException {
+    public void testMultiWildcard() throws MalformedURLException, UnsupportedEncodingException {
        // Make sure we only take the first wildcard entry.
        final String simpleRobotsTxt = "User-agent: *" + CRLF + "Disallow: /index.html" + CRLF + "Allow: /" + CRLF + CRLF + "User-agent: *" + CRLF + "Disallow: /";

-        BaseRobotRules rules = createRobotRules("Any-darn-crawler", simpleRobotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("Any-darn-crawler", simpleRobotsTxt.getBytes("UTF-8"));
        assertFalse(rules.isAllowed("http://www.domain.com/index.html"));
        assertTrue(rules.isAllowed("http://www.domain.com/anypage.html"));
    }

    @Test
-    public void testMultiMatches() throws MalformedURLException {
+    public void testMultiMatches() throws MalformedURLException, UnsupportedEncodingException {
        // Make sure we only take the first record that matches.
        final String simpleRobotsTxt = "User-agent: crawlerbot" + CRLF + "Disallow: /index.html" + CRLF + "Allow: /" + CRLF + CRLF + "User-agent: crawler" + CRLF + "Disallow: /";

-        BaseRobotRules rules = createRobotRules("crawlerbot", simpleRobotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("crawlerbot", simpleRobotsTxt.getBytes("UTF-8"));
        assertFalse(rules.isAllowed("http://www.domain.com/index.html"));
        assertTrue(rules.isAllowed("http://www.domain.com/anypage.html"));
    }

    @Test
-    public void testMultiAgentNames() throws MalformedURLException {
+    public void testMultiAgentNames() throws MalformedURLException, UnsupportedEncodingException {
        // When there are more than one agent name on a line.
        final String simpleRobotsTxt = "User-agent: crawler1 crawler2" + CRLF + "Disallow: /index.html" + CRLF + "Allow: /";

-        BaseRobotRules rules = createRobotRules("crawler2", simpleRobotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("crawler2", simpleRobotsTxt.getBytes("UTF-8"));
        assertFalse(rules.isAllowed("http://www.domain.com/index.html"));
        assertTrue(rules.isAllowed("http://www.domain.com/anypage.html"));
    }

    @Test
-    public void testMultiWordAgentName() throws MalformedURLException {
+    public void testMultiWordAgentName() throws MalformedURLException, UnsupportedEncodingException {
        // When the user agent name has a space in it.
        final String simpleRobotsTxt = "User-agent: Download Ninja" + CRLF + "Disallow: /index.html" + CRLF + "Allow: /";

-        BaseRobotRules rules = createRobotRules("Download Ninja", simpleRobotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("Download Ninja", simpleRobotsTxt.getBytes("UTF-8"));
        assertFalse(rules.isAllowed("http://www.domain.com/index.html"));
        assertTrue(rules.isAllowed("http://www.domain.com/anypage.html"));
    }

    @Test
-    public void testUnsupportedFields() throws MalformedURLException {
+    public void testUnsupportedFields() throws MalformedURLException, UnsupportedEncodingException {
        // When we have a new field type that we don't know about.
        final String simpleRobotsTxt = "User-agent: crawler1" + CRLF + "Disallow: /index.html" + CRLF + "Allow: /" + CRLF + "newfield: 234" + CRLF + "User-agent: crawler2" + CRLF + "Disallow: /";

-        BaseRobotRules rules = createRobotRules("crawler2", simpleRobotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("crawler2", simpleRobotsTxt.getBytes("UTF-8"));
        assertFalse(rules.isAllowed("http://www.domain.com/anypage.html"));
    }

    @Test
-    public void testAcapFields() throws MalformedURLException {
+    public void testAcapFields() throws MalformedURLException, UnsupportedEncodingException {
        final String robotsTxt = "acap-crawler: *" + CRLF + "acap-disallow-crawl: /ultima_ora/";

        SimpleRobotRulesParser parser = new SimpleRobotRulesParser();
-        parser.parseContent("url", robotsTxt.getBytes(), "text/plain", "foobot");
+        parser.parseContent("url", robotsTxt.getBytes("UTF-8"), "text/plain", "foobot");
        assertEquals(0, parser.getNumWarnings());
    }

@ -597,34 +598,34 @@ public class SimpleRobotRulesParserTest {
    }

    @Test
-    public void testCrawlDelay() {
+    public void testCrawlDelay() throws UnsupportedEncodingException {
        final String delayRules1RobotsTxt = "User-agent: bixo" + CR + "Crawl-delay: 10" + CR + "User-agent: foobot" + CR + "Crawl-delay: 20" + CR + "User-agent: *" + CR + "Disallow:/baz" + CR;

-        BaseRobotRules rules = createRobotRules("bixo", delayRules1RobotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("bixo", delayRules1RobotsTxt.getBytes("UTF-8"));
        long crawlDelay = rules.getCrawlDelay();
        assertEquals("testing crawl delay for agent bixo - rule 1", 10000, crawlDelay);

        final String delayRules2RobotsTxt = "User-agent: foobot" + CR + "Crawl-delay: 20" + CR + "User-agent: *" + CR + "Disallow:/baz" + CR;

-        rules = createRobotRules("bixo", delayRules2RobotsTxt.getBytes());
+        rules = createRobotRules("bixo", delayRules2RobotsTxt.getBytes("UTF-8"));
        crawlDelay = rules.getCrawlDelay();
        assertEquals("testing crawl delay for agent bixo - rule 2", BaseRobotRules.UNSET_CRAWL_DELAY, crawlDelay);
    }

    @Test
-    public void testBigCrawlDelay() throws MalformedURLException {
+    public void testBigCrawlDelay() throws MalformedURLException, UnsupportedEncodingException {
        final String robotsTxt = "User-agent: *" + CR + "Crawl-delay: 3600" + CR + "Disallow:" + CR;

-        BaseRobotRules rules = createRobotRules("bixo", robotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("bixo", robotsTxt.getBytes("UTF-8"));
        assertFalse("disallow all if huge crawl delay", rules.isAllowed("http://www.domain.com/"));
    }

    @Test
-    public void testBrokenKrugleRobotsTxtFile() throws MalformedURLException {
+    public void testBrokenKrugleRobotsTxtFile() throws MalformedURLException, UnsupportedEncodingException {
        final String krugleRobotsTxt = "User-agent: *" + CR + "Disallow: /maintenance.html" + CR + "Disallow: /perl/" + CR + "Disallow: /cgi-bin/" + CR + "Disallow: /examples/" + CR
                        + "Crawl-delay: 3" + CR + "" + CR + "User-agent: googlebot" + CR + "Crawl-delay: 1" + CR + "" + CR + "User-agent: qihoobot" + CR + "Disallow: /";

-        BaseRobotRules rules = createRobotRules("googlebot/2.1", krugleRobotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("googlebot/2.1", krugleRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.krugle.com/examples/index.html"));
    }

@ -647,10 +648,10 @@ public class SimpleRobotRulesParserTest {
    }

    @Test
-    public void testFloatingPointCrawlDelay() throws MalformedURLException {
+    public void testFloatingPointCrawlDelay() throws MalformedURLException, UnsupportedEncodingException {
        final String robotsTxt = "User-agent: *" + CR + "Crawl-delay: 0.5" + CR + "Disallow:" + CR;

-        BaseRobotRules rules = createRobotRules("bixo", robotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("bixo", robotsTxt.getBytes("UTF-8"));
        assertEquals(500, rules.getCrawlDelay());
    }

@ -703,7 +704,7 @@ public class SimpleRobotRulesParserTest {
        assertEquals("Found sitemap", 3, rules.getSitemaps().size());
        // check that the last one is not lowercase only
        String url = rules.getSitemaps().get(2);
-        boolean lowercased = url.equals(url.toLowerCase());
+        boolean lowercased = url.equals(url.toLowerCase(Locale.getDefault()));
        assertFalse("Sitemap case check", lowercased);
    }

@ -749,7 +750,7 @@ public class SimpleRobotRulesParserTest {
    public void testAllowBeforeDisallow() throws Exception {
        final String simpleRobotsTxt = "User-agent: *" + CRLF + "Disallow: /fish" + CRLF + "Allow: /fish" + CRLF;

-        BaseRobotRules rules = createRobotRules("Any-darn-crawler", simpleRobotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("Any-darn-crawler", simpleRobotsTxt.getBytes("UTF-8"));

        assertTrue(rules.isAllowed("http://www.fict.com/fish"));
    }
@ -758,16 +759,16 @@ public class SimpleRobotRulesParserTest {
    public void testSpacesInMultipleUserAgentNames() throws Exception {
        final String simpleRobotsTxt = "User-agent: One, Two, Three" + CRLF + "Disallow: /" + CRLF + "" + CRLF + "User-agent: *" + CRLF + "Allow: /" + CRLF;

-        BaseRobotRules rules = createRobotRules("One", simpleRobotsTxt.getBytes());
+        BaseRobotRules rules = createRobotRules("One", simpleRobotsTxt.getBytes("UTF-8"));
        assertFalse(rules.isAllowed("http://www.fict.com/fish"));

-        rules = createRobotRules("Two", simpleRobotsTxt.getBytes());
+        rules = createRobotRules("Two", simpleRobotsTxt.getBytes("UTF-8"));
        assertFalse(rules.isAllowed("http://www.fict.com/fish"));

-        rules = createRobotRules("Three", simpleRobotsTxt.getBytes());
+        rules = createRobotRules("Three", simpleRobotsTxt.getBytes("UTF-8"));
        assertFalse(rules.isAllowed("http://www.fict.com/fish"));

-        rules = createRobotRules("Any-darn-crawler", simpleRobotsTxt.getBytes());
+        rules = createRobotRules("Any-darn-crawler", simpleRobotsTxt.getBytes("UTF-8"));
        assertTrue(rules.isAllowed("http://www.fict.com/fish"));
    }

--- a/src/test/java/crawlercommons/sitemaps/AbstractSiteMapTest.java
+++ b/src/test/java/crawlercommons/sitemaps/AbstractSiteMapTest.java
@ -3,6 +3,7 @@ package crawlercommons.sitemaps;
 import static org.junit.Assert.*;

 import java.text.SimpleDateFormat;
+import java.util.Locale;
 import java.util.TimeZone;

 import org.junit.Test;
@ -14,7 +15,7 @@ public class AbstractSiteMapTest {
        assertNull(AbstractSiteMap.convertToDate("blah"));
        assertNull(AbstractSiteMap.convertToDate(null));

-        SimpleDateFormat isoFormatNoTimezone = new SimpleDateFormat("yyyyMMdd");
+        SimpleDateFormat isoFormatNoTimezone = new SimpleDateFormat("yyyyMMdd", Locale.getDefault());

        // For formats where there's no time zone information, the time zone is
        // undefined, so we can
@ -23,7 +24,7 @@ public class AbstractSiteMapTest {
        assertEquals("20140601", isoFormatNoTimezone.format(AbstractSiteMap.convertToDate("2014-06")));
        assertEquals("20140603", isoFormatNoTimezone.format(AbstractSiteMap.convertToDate("2014-06-03")));

-        SimpleDateFormat isoFormat = new SimpleDateFormat("yyyyMMdd'T'HHmmss");
+        SimpleDateFormat isoFormat = new SimpleDateFormat("yyyyMMdd'T'HHmmss", Locale.getDefault());
        isoFormat.setTimeZone(TimeZone.getTimeZone("UTC"));

        // Complete date plus hours and minutes
@ -38,7 +39,7 @@ public class AbstractSiteMapTest {

        // Complete date plus hours, minutes, seconds and a decimal fraction of
        // a second
-        SimpleDateFormat isoFormatWithFractionSeconds = new SimpleDateFormat("yyyyMMdd'T'HHmmss.S");
+        SimpleDateFormat isoFormatWithFractionSeconds = new SimpleDateFormat("yyyyMMdd'T'HHmmss.S", Locale.getDefault());
        isoFormatWithFractionSeconds.setTimeZone(TimeZone.getTimeZone("UTC"));
        assertEquals("20140603T103045.820", isoFormatWithFractionSeconds.format(AbstractSiteMap.convertToDate("2014-06-03T10:30:45.82+00:00")));

--- a/src/test/java/crawlercommons/sitemaps/SiteMapParserTest.java
+++ b/src/test/java/crawlercommons/sitemaps/SiteMapParserTest.java
@ -21,10 +21,11 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
 import java.net.URL;
 import java.text.SimpleDateFormat;
 import java.util.Date;
-import java.util.TimeZone;
+import java.util.Locale;

 import org.apache.commons.io.IOUtils;
 import org.junit.After;
@ -32,11 +33,15 @@ import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;

 import static org.junit.Assert.*;

@RunWith(JUnit4.class)
 public class SiteMapParserTest {
+  
+    private static final Logger LOG = LoggerFactory.getLogger(SiteMapParserTest.class);

    @Before
    public void setUp() throws Exception {
@ -79,11 +84,10 @@ public class SiteMapParserTest {

    @Test
    public void testFullDateFormat() {
-        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm+hh:00");
-
+        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm+hh:00", Locale.getDefault());
        Date date = new Date();
-        System.out.println(format.format(date));
-        System.out.println(SiteMap.getFullDateFormat().format(date));
+        LOG.info(format.format(date));
+        LOG.info(SiteMap.getFullDateFormat().format(date));
    }

    @Test
@ -91,7 +95,7 @@ public class SiteMapParserTest {
        SiteMapParser parser = new SiteMapParser();
        String contentType = "text/plain";
        String scontent = "http://www.example.com/catalog?item=1\nhttp://www.example.com/catalog?item=11";
-        byte[] content = scontent.getBytes();
+        byte[] content = scontent.getBytes("UTF-8");
        URL url = new URL("http://www.example.com/sitemap.txt");

        AbstractSiteMap asm = parser.parseSiteMap(contentType, content, url);
@ -106,7 +110,7 @@ public class SiteMapParserTest {
    public void testSitemapTXTWithXMLExt() throws UnknownFormatException, IOException {
        SiteMapParser parser = new SiteMapParser();
        String scontent = "http://www.example.com/catalog?item=1\nhttp://www.example.com/catalog?item=11";
-        byte[] content = scontent.getBytes();
+        byte[] content = scontent.getBytes("UTF-8");
        URL url = new URL("http://www.example.com/sitemap.xml");
        String contentType = "text/plain";

@ -160,7 +164,7 @@ public class SiteMapParserTest {
        scontent.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>").append("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">")
                        .append("<url><!-- This file is not a valid XML file --></url>").append("<url><loc> http://cs.harding.edu/fmccown/sitemaps/something.html</loc>")
                        .append("</url><!-- missing opening url tag --></url></urlset>");
-        byte[] content = scontent.toString().getBytes();
+        byte[] content = scontent.toString().getBytes("UTF-8");
        URL url = new URL("http://www.example.com/sitemapindex.xml");

        parser.parseSiteMap(contentType, content, url); // This Sitemap contains
@ -224,7 +228,7 @@ public class SiteMapParserTest {
        StringBuilder scontent = new StringBuilder(1024);
        scontent.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>").append("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">").append("<url>")
                        .append("<loc>http://www.example.com/</loc>").append("</url>").append("</urlset>");
-        byte[] content = scontent.toString().getBytes();
+        byte[] content = scontent.toString().getBytes("UTF-8");

        URL url = new URL("http://www.example.com/subsection/sitemap.xml");
        AbstractSiteMap asm = parser.parseSiteMap(contentType, content, url);
@ -245,8 +249,9 @@ public class SiteMapParserTest {
        assertFalse(sm.getSiteMapUrls().iterator().next().isValid());
    }

-    /** Returns a good simple default XML sitemap as a byte array */
-    private byte[] getXMLSitemapAsBytes() {
+    /** Returns a good simple default XML sitemap as a byte array 
+     * @throws UnsupportedEncodingException */
+    private byte[] getXMLSitemapAsBytes() throws UnsupportedEncodingException {
        StringBuilder scontent = new StringBuilder(1024);
        scontent.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>").append("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">").append("<url>")
                        .append("  <loc>http://www.example.com/</loc>").append("  <lastmod>2005-01-01</lastmod>").append("  <changefreq>monthly</changefreq>").append("  <priority>0.8</priority>")
@ -257,6 +262,6 @@ public class SiteMapParserTest {
                        .append("  <loc><url><![CDATA[http://www.example.com/catalog?item=83&amp;desc=vacation_usa]]></url></loc>").append("  <lastmod>2004-11-23</lastmod>").append("</url>")
                        .append("</urlset>");

-        return scontent.toString().getBytes();
+        return scontent.toString().getBytes("UTF-8");
    }
 }
--- a/src/test/java/crawlercommons/test/RedirectResponseHandler.java
+++ b/src/test/java/crawlercommons/test/RedirectResponseHandler.java
@ -56,14 +56,14 @@ public class RedirectResponseHandler extends AbstractHttpHandler {

            String content = "redirected content";
            response.setContentLength(content.length());
-            response.getOutputStream().write(content.getBytes());
+            response.getOutputStream().write(content.getBytes("UTF-8"));
        } else {
            response.setStatus(HttpStatus.SC_OK);
            response.setContentType("text/plain");

            String content = "other content";
            response.setContentLength(content.length());
-            response.getOutputStream().write(content.getBytes());
+            response.getOutputStream().write(content.getBytes("UTF-8"));
        }
    }
 }