chore: extend argparse + reword exception messages
* also add an example urls2crawl text file with one url per line
This commit is contained in:
parent
a82ff94e6a
commit
4d468d13ef
|
@ -89,8 +89,8 @@ class ParserCallback extends HTMLEditorKit.ParserCallback {
|
|||
System.err.println("Adding URI: "+uri.toString());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.err.println("Nalezeno nekorektn<74> URI: "+href);
|
||||
e.printStackTrace();
|
||||
System.err.println("Bad bad URI found: "+href);
|
||||
System.err.println(e);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -117,16 +117,35 @@ class ParserCallback extends HTMLEditorKit.ParserCallback {
|
|||
public class crawler {
|
||||
public static void main(String[] args) {
|
||||
if (args.length < 1) {
|
||||
System.err.println("Missing parameter - start URL");
|
||||
System.err.println("Not enough parameters");
|
||||
return;
|
||||
}
|
||||
LinkedList<URIinfo> foundURIs=new LinkedList<URIinfo>();
|
||||
HashSet<URI> visitedURIs=new HashSet<URI>();
|
||||
URI uri;
|
||||
try {
|
||||
uri = new URI(args[0]+"/");
|
||||
foundURIs.add(new URIinfo(uri, 0));
|
||||
visitedURIs.add(uri);
|
||||
/* if arg 0 = -f go for a file with urls to crawl, else assume the arg is an URL */
|
||||
if (args[0].equals("-f")) {
|
||||
/* read urls from the file pls */
|
||||
File file = new File(args[1]);
|
||||
Scanner sc = new Scanner(file);
|
||||
while (sc.hasNextLine())
|
||||
{
|
||||
String nuurlpart = sc.nextLine();
|
||||
if (!nuurlpart.endsWith("/")) {nuurlpart += "/";}
|
||||
uri = new URI(nuurlpart);
|
||||
foundURIs.add(new URIinfo(uri, 0));
|
||||
visitedURIs.add(uri);
|
||||
}
|
||||
} else {
|
||||
String nuurlpart = args[0];
|
||||
if (!nuurlpart.endsWith("/")) {nuurlpart += "/";}
|
||||
uri = new URI(args[0]);
|
||||
foundURIs.add(new URIinfo(uri, 0));
|
||||
visitedURIs.add(uri);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* zde zpracujte dalsi parametry - maxDepth a debugLevel...
|
||||
*/
|
||||
|
@ -144,6 +163,20 @@ public class crawler {
|
|||
reader.close();
|
||||
} catch (FileNotFoundException e) {
|
||||
System.err.println("Error loading page - does it exist?");
|
||||
System.err.println(e);
|
||||
} catch (ConnectException e){
|
||||
System.err.println("Dang - connection refused");
|
||||
System.err.println(e);
|
||||
} catch (UnknownHostException e) {
|
||||
if (e.getMessage().endsWith(".onion")){
|
||||
System.err.println("This host is not on clearweb - skipping for now.");
|
||||
} else {
|
||||
System.err.println("The DNS record for this host might no longer exist.");
|
||||
}
|
||||
System.err.println(e);
|
||||
} catch (IOException e) {
|
||||
System.err.println("This host returned a 403.");
|
||||
System.err.println(e);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
https://stalluminati.neocities.org/matrix/
|
||||
https://git.dotya.ml
|
Loading…
Reference in New Issue