@@ -34,7 +34,7 @@ public static Book[] search(String term) throws IOException{
34
34
"foreignfiction/index.php" + // subdirectory (fiction only now)
35
35
"?s=" + // prefix for query
36
36
URLEncoder .encode (term , "UTF-8" )+ // encode search term properly
37
- "&f_lang=0 " + // language type
37
+ "&f_lang=English " + // language type
38
38
"&f_columns=0" +
39
39
"&f_ext=0" );
40
40
URLConnection libgenConnection = destWebpage .openConnection ();
@@ -49,58 +49,69 @@ public static Book[] search(String term) throws IOException{
49
49
}
50
50
51
51
final Book [] urls = booksFromHTML (finalHTML );
52
- return urls ; // unable to find any
52
+ return urls ;
53
53
}
54
54
55
55
private static Book [] booksFromHTML (String finalHTML ) throws MalformedURLException {
56
56
ArrayList <Book > books = new ArrayList <Book >();
57
57
Document doc = Jsoup .parse (finalHTML );
58
- Elements tableData = doc .getElementsByTag ("td" ); // find all table data elements
58
+ Elements tables = doc .getElementsByTag ("table" );
59
+ Element targetTable = null ;
60
+ // target: the table after the headings table
61
+ for (int i = 0 ; i < tables .size (); i ++){
62
+ Element table = tables .get (i );
63
+ if (table .getElementsByTag ("td" ).get (0 ).html ().contains ("Author(s)" )){
64
+ targetTable = tables .get (i +1 );
65
+ break ;
66
+ }
67
+ }
68
+
69
+ Elements tableData = targetTable .getElementsByTag ("td" ); // find all table data elements
59
70
for (Element tableDatum : tableData ){
60
71
Element dlTag = tableDatum .getElementById ("1" );
61
72
if (dlTag != null ){
62
73
final String currentLine = dlTag .toString ();
63
74
final String lowerCaseLine = currentLine .toLowerCase ();
64
- if (lowerCaseLine .contains ("english" )){
65
- Pattern dlPattern = Pattern .compile ("/foreignfiction/get\\ .php\\ ?md5=([a-z]|[0-9])*" ); // it's an http request with an md5 arg
66
- Matcher dlMatches = dlPattern .matcher (currentLine );
67
- dlMatches .find ();
68
- final String dlLink = dlMatches .group ();
69
- final String md5 = dlLink .substring (dlLink .indexOf ("=" ) + 1 );
70
- final URL dlURL = new URL (mirror + dlLink );
71
-
72
- final String titlePrefix = "itle:</td><td>" ;
73
- final Pattern titlePattern = Pattern .compile (titlePrefix + "[^<]*" ); // <td>Title1:</td><td>The Hunt for Red October</td>
74
- final Matcher titleMatcher = titlePattern .matcher (currentLine );
75
- titleMatcher .find ();
76
- String title = titleMatcher .group ();
77
- title = title .substring (titlePrefix .length ());
78
-
79
- final String authorPrefix = "uthor1:</td><td>" ;
80
- final Pattern authorPattern = Pattern .compile (authorPrefix + "[^<]*" ); // <td>Author1:</td><td>Clancy, Tom</td>
81
- final Matcher authorMatcher = authorPattern .matcher (currentLine );
82
- authorMatcher .find ();
83
- String author = authorMatcher .group ();
84
- author = author .substring (authorPrefix .length ());
85
-
86
- final Pattern extensionPattern = Pattern .compile (">[a-z]*\\ ([0-9]*.*\\ )" ); //>epub(854kb)</a>
87
- final Matcher extensionMatcher = extensionPattern .matcher (currentLine );
88
- extensionMatcher .find ();
89
- final String extensionSize = extensionMatcher .group ();
90
- String extension = extensionSize .substring (1 , extensionSize .indexOf ('(' ));
91
-
92
- String sizeNotation = extensionSize .substring (extensionSize .indexOf ('(' )+1 , extensionSize .indexOf (')' )).toLowerCase ();
93
- int size = 0 ;
94
- if (sizeNotation .indexOf ('k' ) != -1 ){
95
- size = Integer .parseInt (sizeNotation .substring (0 , sizeNotation .indexOf ('k' )));
96
- }
97
- else if (sizeNotation .indexOf ('m' ) != -1 ){
98
- size = Integer .parseInt (sizeNotation .substring (0 , sizeNotation .indexOf ('m' ))) * 1024 ; // iz megabyte
99
- }
100
-
101
- Book currentBook = new Book (title , author , md5 + "." + extension , dlURL , size );
102
- books .add (currentBook );
75
+ if (lowerCaseLine .contains ("english" )){
76
+ Pattern dlPattern = Pattern .compile ("/foreignfiction/get\\ .php\\ ?md5=([a-z]|[0-9])*" ); // it's an http request with an md5 arg
77
+ Matcher dlMatches = dlPattern .matcher (currentLine );
78
+ dlMatches .find ();
79
+ final String dlLink = dlMatches .group ();
80
+ final String md5 = dlLink .substring (dlLink .indexOf ("=" ) + 1 );
81
+ final URL dlURL = new URL (mirror + dlLink );
82
+
83
+ final String titlePrefix = "itle:</td><td>" ;
84
+ final Pattern titlePattern = Pattern .compile (titlePrefix + "[^<]*" ); // <td>Title1:</td><td>The Hunt for Red October</td>
85
+ final Matcher titleMatcher = titlePattern .matcher (currentLine );
86
+ titleMatcher .find ();
87
+ String title = titleMatcher .group ();
88
+ title = title .substring (titlePrefix .length ());
89
+
90
+ final String authorPrefix = "uthor1:</td><td>" ;
91
+ final Pattern authorPattern = Pattern .compile (authorPrefix + "[^<]*" ); // <td>Author1:</td><td>Clancy, Tom</td>
92
+ final Matcher authorMatcher = authorPattern .matcher (currentLine );
93
+ authorMatcher .find ();
94
+ String author = authorMatcher .group ();
95
+ author = author .substring (authorPrefix .length ());
96
+
97
+ final Pattern extensionPattern = Pattern .compile (">[a-z]*\\ ([0-9]*.*\\ )" ); //>epub(854kb)</a>
98
+ final Matcher extensionMatcher = extensionPattern .matcher (currentLine );
99
+ extensionMatcher .find ();
100
+ final String extensionSize = extensionMatcher .group ();
101
+ String extension = extensionSize .substring (1 , extensionSize .indexOf ('(' ));
102
+
103
+ String sizeNotation = extensionSize .substring (extensionSize .indexOf ('(' )+1 , extensionSize .indexOf (')' )).toLowerCase ();
104
+ int size = 0 ;
105
+ if (sizeNotation .indexOf ('k' ) != -1 ){
106
+ size = Integer .parseInt (sizeNotation .substring (0 , sizeNotation .indexOf ('k' )));
107
+ }
108
+ else if (sizeNotation .indexOf ('m' ) != -1 ){
109
+ size = Integer .parseInt (sizeNotation .substring (0 , sizeNotation .indexOf ('m' ))) * 1024 ; // iz megabyte
103
110
}
111
+
112
+ Book currentBook = new Book (title , author , md5 , extension , dlURL , size );
113
+ books .add (currentBook );
114
+ }
104
115
}
105
116
}
106
117
Book [] urls = new Book [books .size ()];
0 commit comments