1
1
package lx ;
2
2
3
+ import cn .hutool .core .io .IORuntimeException ;
4
+ import cn .hutool .http .ContentType ;
5
+ import cn .hutool .http .HttpException ;
6
+ import cn .hutool .http .HttpUtil ;
7
+ import com .alibaba .fastjson .JSONObject ;
8
+ import com .google .common .collect .Lists ;
9
+ import lx .model .Zdm ;
10
+ import lx .utils .StreamUtils ;
11
+ import lx .utils .Utils ;
12
+ import org .apache .commons .lang3 .StringUtils ;
13
+
14
+ import javax .mail .*;
15
+ import javax .mail .internet .InternetAddress ;
16
+ import javax .mail .internet .MimeMessage ;
3
17
import java .io .File ;
4
18
import java .io .IOException ;
5
19
import java .nio .file .Files ;
8
22
import java .time .LocalDate ;
9
23
import java .time .LocalDateTime ;
10
24
import java .time .format .DateTimeFormatter ;
11
- import java .util .ArrayList ;
12
- import java .util .Comparator ;
13
- import java .util .HashSet ;
14
- import java .util .LinkedHashSet ;
15
- import java .util .List ;
16
- import java .util .Properties ;
17
- import java .util .Set ;
25
+ import java .util .*;
18
26
import java .util .stream .Collectors ;
19
27
20
- import javax .mail .Authenticator ;
21
- import javax .mail .Message ;
22
- import javax .mail .PasswordAuthentication ;
23
- import javax .mail .Session ;
24
- import javax .mail .Transport ;
25
- import javax .mail .internet .InternetAddress ;
26
- import javax .mail .internet .MimeMessage ;
27
-
28
- import org .apache .commons .lang3 .StringUtils ;
29
-
30
- import com .alibaba .fastjson .JSONObject ;
31
-
32
- import cn .hutool .core .io .IORuntimeException ;
33
- import cn .hutool .http .HttpException ;
34
- import cn .hutool .http .HttpUtil ;
35
- import lx .model .Zdm ;
36
- import lx .utils .StreamUtils ;
37
- import lx .utils .Utils ;
38
-
28
+ import static lx .utils .Const .WXPUSHER_URL ;
39
29
import static lx .utils .Const .ZDM_URL ;
40
30
41
31
public class ZdmCrawler {
42
32
43
33
public static void main (String [] args ) {
34
+ String emailHost = System .getenv ("emailHost" ), emailAccount = System .getenv ("emailAccount" ),
35
+ emailPassword = System .getenv ("emailPassword" ), spt = System .getenv ("spt" );
36
+ int minVoted = Integer .parseInt (System .getenv ("minVoted" )), minComments = Integer .parseInt (System .getenv ("minComments" )),
37
+ minPushSize = Integer .parseInt (System .getenv ("MIN_PUSH_SIZE" ));
38
+
39
+ //git actions部署的服务器一般在海外,调整为东八区的时区
40
+ TimeZone .setDefault (TimeZone .getTimeZone ("GMT+8" ));
41
+
44
42
Set <Zdm > zdms = ZDM_URL .stream ().flatMap (url -> {
45
43
List <Zdm > zdmPage = new ArrayList <>();
46
44
for (int i = 1 ; i <= 20 ; i ++) {//爬取前20页数据
47
45
try {
48
46
String s = HttpUtil .get (url + i , 10000 );
49
47
List <Zdm > zdmPart = JSONObject .parseArray (s , Zdm .class );
50
48
zdmPart .forEach (zdm -> {
51
- //将评论和点值数量的值后面会跟着 'k','w'这种字符,将它们转换一下方便后面过滤和排序
49
+ //评论和点值数量的值后面会跟着 'k','w'这种字符,将它们转换一下方便后面过滤和排序
52
50
zdm .setComments (Utils .strNumberFormat (zdm .getComments ()));
53
51
zdm .setVoted (Utils .strNumberFormat (zdm .getVoted ()));
54
52
});
@@ -66,11 +64,7 @@ public static void main(String[] args) {
66
64
HashSet <String > unPushed = Utils .readFile ("./unpushed.txt" );
67
65
zdms .addAll (StreamUtils .map (unPushed , o -> JSONObject .parseObject (o , Zdm .class )));
68
66
69
- //黑词过滤
70
- HashSet <String > blackWords = Utils .readFile ("./black_words.txt" );
71
- blackWords .removeIf (StringUtils ::isBlank );
72
-
73
- //已推送的优惠信息id
67
+ //已推送的优惠信息
74
68
Set <String > pushedIds ;
75
69
try {
76
70
new File ("./logs/" ).mkdirs ();
@@ -84,6 +78,10 @@ public static void main(String[] args) {
84
78
throw new RuntimeException ("读取logs目录失败" );
85
79
}
86
80
81
+ //黑词过滤
82
+ HashSet <String > blackWords = Utils .readFile ("./black_words.txt" );
83
+ blackWords .removeIf (StringUtils ::isBlank );
84
+
87
85
//白词过滤内容
88
86
HashSet <String > whiteWords = Utils .readFile ("./white_words.txt" );
89
87
whiteWords .removeIf (StringUtils ::isBlank );
@@ -93,8 +91,8 @@ public static void main(String[] args) {
93
91
System .out .println ("whiteWords is empty, running in blackWords mode." );
94
92
zdms = new HashSet <>(StreamUtils .filter (zdms , z ->
95
93
StringUtils .isBlank (StreamUtils .findFirst (blackWords , w -> z .getTitle ().contains (w ))) //黑词过滤
96
- && Integer .parseInt (z .getVoted ()) > Integer . parseInt ( System . getenv ( " minVoted" )) //值的数量
97
- && Integer .parseInt (z .getComments ()) > Integer . parseInt ( System . getenv ( " minComments" )) //评论的数量
94
+ && Integer .parseInt (z .getVoted ()) > minVoted //值的数量
95
+ && Integer .parseInt (z .getComments ()) > minComments //评论的数量
98
96
&& !z .getPrice ().contains ("前" ) //不是前xxx名的耍猴抢购
99
97
&& !pushedIds .contains (z .getArticleId ()) //不是已经推送过的
100
98
));
@@ -106,46 +104,89 @@ public static void main(String[] args) {
106
104
}
107
105
zdms = new HashSet <>(StreamUtils .filter (zdms , z ->
108
106
!StringUtils .isBlank (StreamUtils .findFirst (whiteWords , w -> z .getTitle ().contains (w ))) //白词过滤
109
- && Integer .parseInt (z .getVoted ()) > Integer . parseInt ( System . getenv ( " minVoted" )) //值的数量
110
- && Integer .parseInt (z .getComments ()) > Integer . parseInt ( System . getenv ( " minComments" )) //评论的数量
107
+ && Integer .parseInt (z .getVoted ()) > minVoted //值的数量
108
+ && Integer .parseInt (z .getComments ()) > minComments //评论的数量
111
109
&& !z .getPrice ().contains ("前" ) //不是前xxx名的耍猴抢购
112
110
&& !pushedIds .contains (z .getArticleId ()) //不是已经推送过的
113
111
));
114
112
}
115
113
116
114
zdms .forEach (z -> System .out .println (z .getArticleId () + " | " + z .getTitle ()));
117
115
118
- if (zdms .size () > Integer .parseInt (System .getenv ("MIN_PUSH_SIZE" ))) {
119
- sendEmail (Utils .buildMessage (new ArrayList <>(zdms )));
120
- Utils .write ("./logs/" + LocalDate .now () + "/pushed.txt" , true , StreamUtils .map (zdms , Zdm ::getArticleId ));
121
- } else {
116
+ //优惠信息单次推送的数量阈值,未达到阈值则暂存推送信息等待下次运行
117
+ if (zdms .size () < minPushSize ) {
118
+ //记录暂未推送的优惠信息
122
119
Utils .write ("./unpushed.txt" , false , StreamUtils .map (zdms , JSONObject ::toJSONString ));
120
+ return ;
123
121
}
122
+
123
+ //部分推送方式存在内容长度限制, 这里加了单次推送的条数限制, 超出则分批推送
124
+ Lists .partition (new ArrayList <>(zdms ), 100 ).forEach (part -> {
125
+ //生成推送消息的正文内容(html格式)
126
+ String text = Utils .buildMessage (new ArrayList <>(part ));
127
+ //通过邮箱推送
128
+ pushToEmail (text , emailHost , emailAccount , emailPassword );
129
+ //通过WxPusher推送
130
+ pushToWx (text , spt );
131
+ //记录已推送的优惠信息
132
+ List <String > articleIds = StreamUtils .map (part , Zdm ::getArticleId );
133
+ Utils .write ("./logs/" + LocalDate .now () + "/pushed.txt" , true , articleIds );
134
+ });
124
135
}
125
136
126
- public static void sendEmail (String text ) {
137
+ private static void pushToEmail (String text , String emailHost , String emailAccount , String emailPassword ) {
138
+ if (StringUtils .isBlank (emailHost ) || StringUtils .isBlank (emailAccount ) || StringUtils .isBlank (emailPassword )) {
139
+ System .out .println ("邮箱推送配置不完整,将尝试其他推送方式" );
140
+ return ;
141
+ }
142
+
127
143
Properties props = new Properties ();
128
- props .setProperty ("mail.smtp.host" , System . getenv ( " emailHost" ) );
144
+ props .setProperty ("mail.smtp.host" , emailHost );
129
145
props .setProperty ("mail.smtp.port" , "465" );
130
146
props .setProperty ("mail.smtp.auth" , "true" );
131
147
props .setProperty ("mail.smtp.ssl.enable" , "true" );
132
148
try {
133
149
Session session = Session .getDefaultInstance (props , new Authenticator () {
134
150
@ Override
135
151
public PasswordAuthentication getPasswordAuthentication () {
136
- return new PasswordAuthentication (System . getenv ( " emailAccount" ), System . getenv ( " emailPassword" ) );
152
+ return new PasswordAuthentication (emailAccount , emailPassword );
137
153
}
138
154
});
139
155
140
156
MimeMessage message = new MimeMessage (session );
141
- message .setFrom (System . getenv ( " emailAccount" ) );
142
- message .addRecipient (Message .RecipientType .TO , new InternetAddress (System . getenv ( " emailAccount" ) ));
143
- message .setSubject (DateTimeFormatter .ofPattern ("yyyy-MM-dd HH:mm" ).format (LocalDateTime .now ()));
157
+ message .setFrom (emailAccount );
158
+ message .addRecipient (Message .RecipientType .TO , new InternetAddress (emailAccount ));
159
+ message .setSubject ("zdm优惠信息汇总" + DateTimeFormatter .ofPattern ("yyyy-MM-dd HH:mm" ).format (LocalDateTime .now ()));
144
160
message .setContent (text , "text/html;charset=UTF-8" );
145
161
Transport .send (message );
146
162
} catch (Exception e ) {
147
163
e .printStackTrace ();
148
164
throw new RuntimeException ("邮件发送失败" );
149
165
}
150
166
}
167
+
168
+ private static void pushToWx (String text , String spt ) {
169
+ if (StringUtils .isBlank (spt )) {
170
+ System .out .println ("WxPusher推送配置不完整,将尝试其他推送方式" );
171
+ return ;
172
+ }
173
+ HashMap <String , Object > body = new HashMap <>();
174
+ //推送内容
175
+ body .put ("content" , text );
176
+ //消息摘要,显示在微信聊天页面或者模版消息卡片上,限制长度20(微信只能显示20),可以不传,不传默认截取content前面的内容。
177
+ body .put ("summary" , "zdm优惠信息汇总" );
178
+ //内容类型 1表示文字 2表示html 3表示markdown
179
+ body .put ("contentType" , "2" );
180
+ body .put ("spt" , System .getenv ("spt" ));
181
+ String response = HttpUtil .createPost (WXPUSHER_URL )
182
+ .contentType (ContentType .JSON .getValue ())
183
+ .body (JSONObject .toJSONString (body ))
184
+ .execute ().body ();
185
+ System .out .println ("WxPusher response:" + response );
186
+ JSONObject jsonObject = (JSONObject ) JSONObject .parse (response );
187
+ //状态码,非1000表示有异常
188
+ String code = jsonObject .getString ("code" );
189
+ if (!"1000" .equals (code ))
190
+ throw new RuntimeException ("WxPusher推送失败:" + jsonObject .getString ("msg" ));
191
+ }
151
192
}
0 commit comments