Skip to content

Commit ce7e00f

Browse files
committed
新增WxPusher推送方式
1 parent 0c09a75 commit ce7e00f

File tree

7 files changed

+123
-81
lines changed

7 files changed

+123
-81
lines changed

.github/workflows/zdm_crawler.yml

+6-4
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,16 @@ jobs:
3131
MIN_PUSH_SIZE: 20
3232
# 邮箱的smtp服务器地址
3333
emailHost: smtp.qq.com
34-
# 接收优惠信息的邮箱
34+
# 接收优惠信息的邮箱 为空代表不进行邮箱推送
3535
emailAccount: ${{secrets.emailAccount}}
36-
# 接收优惠信息的邮箱的授权码
36+
# 接收优惠信息的邮箱的授权码 为空代表不进行邮箱推送
3737
emailPassword: ${{secrets.emailPassword}}
38+
# WxPusher极简推送模式的spt 为空代表不进行Wx推送
39+
spt: ${{secrets.spt}}
3840
run: java -jar zdm.jar
41+
#保存已推送和暂未推送的优惠信息
3942
- name: git add files
4043
run: |
41-
git add pushed.txt
4244
git add unpushed.txt
4345
git add ./logs/
4446
- name: commit & push
@@ -47,4 +49,4 @@ jobs:
4749
author-email: 'actions@github.com'
4850
author-name: 'GitHub Actions'
4951
commit-message: 'update articles'
50-
token: ${{ secrets.GIT_TOKEN }}
52+
token: ${{secrets.GIT_TOKEN}}

pushed.txt

Whitespace-only changes.

readme.md

+17-18
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
11
# 项目简介
22

3-
定时按什么值得买的好价排行榜,收集优惠信息并推送至邮箱,基于java实现,欢迎各位值友来捉虫
4-
* 邮件的正文内容是html,优惠信息将以表格的形式显示在正文中
5-
* 已推送的优惠信息会被记录在项目根目录的`pushed.txt`文件中,每次运行项目都会自动提交代码,更新这个文件
3+
定时从什么值得买的好价排行榜中收集优惠信息,并推送至邮箱或微信. 基于java实现,通过git actions定时运行,无需搭建服务器. 欢迎各位值友来捉虫
4+
* 推送的优惠信息将以html表格的形式显示在正文中
5+
* 已推送的优惠信息会被记录在项目根目录的`pushed.txt`文件中,每次运行项目都会自动提交代码,更新这个文件,下次运行将跳过这些信息
66
* 有其他改进的建议可以提issue给我
77
* 麻烦给个star支持一下:heart:
88

99
# 运行
1010

1111
* fork本仓库
12-
* 新增Actions secrets
12+
* 新增Actions secrets. 如果需要用邮箱推送,则填写`EMAILACCOUNT``EMAILPASSWORD`.需要用微信推送的话则填写`SPT`
1313

14-
| secret | 说明 |
15-
| ------------- | ------------------------------------------------------------ |
16-
| EMAILACCOUNT | 接收优惠信息的邮箱 |
17-
| EMAILPASSWORD | 邮箱的授权码,[参考qq邮箱的这篇文档](https://service.mail.qq.com/cgi-bin/help?subtype=1&&id=28&&no=1001256) |
18-
| GIT_TOKEN | [参考这篇文章](http://t.zoukankan.com/joe235-p-15152380.html),只勾选repo的权限,Expiration设置为No Expiration |
14+
| secret | | 说明 |
15+
|---------------|---|-----------------------------------------------------------------------------------------------|
16+
| EMAILACCOUNT | 选填 | 接收优惠信息的邮箱 |
17+
| EMAILPASSWORD | 选填 | 邮箱的授权码,[参考qq邮箱的这篇文档](https://service.mail.qq.com/cgi-bin/help?subtype=1&&id=28&&no=1001256) |
18+
| SPT | 选填 | WxPusher极简推送使用的身份ID,[参考WxPusher文档](https://wxpusher.zjiecode.com/docs/#/?id=spt) |
19+
| GIT_TOKEN | 必填 | [参考这篇文章](http://t.zoukankan.com/joe235-p-15152380.html),只勾选repo的权限,Expiration设置为No Expiration |
1920

2021
<img src="https://raw.githubusercontent.com/lx1169732264/Images/master/zdmActions.png" width = "700" height = "350" alt="图片名称" align=center />
2122

2223

23-
2424
* 打开fork项目的workFlow开关
2525

2626
<img src="https://raw.githubusercontent.com/lx1169732264/Images/master/enableWorkFlow.png" width = "700" height = "350" alt="图片名称" align=center />
@@ -68,12 +68,11 @@
6868

6969
# CHANGELOG
7070

71-
| 日期 | 说明 |
72-
| ------------- | ------------------------------------------------------------ |
73-
| 2023/5/4 | 已推送优惠信息按日期在logs文件夹下归类记录.避免单个文件记录数据量过大的问题 |
74-
| 2023/1/31 | 实现定时按什么值得买的好价排行榜,收集优惠信息并推送至邮箱的功能 |
75-
| 2024/10/22 | 邮箱登陆切换到 stmp ssl 465 端口模式以解决 QQ 邮箱不再支持 stmp 明文模式问题 |
76-
| 2024/10/23 | 支持白名单匹配模式以允许用户设置感兴趣的关键词进行推送 |
77-
78-
71+
| 日期 | 说明 |
72+
|------------|------------------------------------------------------------------------------------|
73+
| 2023/5/4 | 已推送优惠信息按日期在logs文件夹下归类记录.避免单个文件记录数据量过大的问题 |
74+
| 2023/1/31 | 实现定时按什么值得买的好价排行榜,收集优惠信息并推送至邮箱的功能 |
75+
| 2024/10/22 | 邮箱登陆切换到 stmp ssl 465 端口模式以解决 QQ 邮箱不再支持 stmp 明文模式问题 |
76+
| 2024/10/23 | 支持白名单匹配模式以允许用户设置感兴趣的关键词进行推送 |
77+
| 2024/11/22 | 1.新增WxPusher推送方式; 2.为了适应移动端小屏幕展示效果,将跳转至什么值得买的超链接从详情列改为商品标题列,并移除了详情列,让表格的其余列有更充足的宽度 |
7978

src/main/java/lx/ZdmCrawler.java

+87-46
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
11
package lx;
22

3+
import cn.hutool.core.io.IORuntimeException;
4+
import cn.hutool.http.ContentType;
5+
import cn.hutool.http.HttpException;
6+
import cn.hutool.http.HttpUtil;
7+
import com.alibaba.fastjson.JSONObject;
8+
import com.google.common.collect.Lists;
9+
import lx.model.Zdm;
10+
import lx.utils.StreamUtils;
11+
import lx.utils.Utils;
12+
import org.apache.commons.lang3.StringUtils;
13+
14+
import javax.mail.*;
15+
import javax.mail.internet.InternetAddress;
16+
import javax.mail.internet.MimeMessage;
317
import java.io.File;
418
import java.io.IOException;
519
import java.nio.file.Files;
@@ -8,47 +22,31 @@
822
import java.time.LocalDate;
923
import java.time.LocalDateTime;
1024
import java.time.format.DateTimeFormatter;
11-
import java.util.ArrayList;
12-
import java.util.Comparator;
13-
import java.util.HashSet;
14-
import java.util.LinkedHashSet;
15-
import java.util.List;
16-
import java.util.Properties;
17-
import java.util.Set;
25+
import java.util.*;
1826
import java.util.stream.Collectors;
1927

20-
import javax.mail.Authenticator;
21-
import javax.mail.Message;
22-
import javax.mail.PasswordAuthentication;
23-
import javax.mail.Session;
24-
import javax.mail.Transport;
25-
import javax.mail.internet.InternetAddress;
26-
import javax.mail.internet.MimeMessage;
27-
28-
import org.apache.commons.lang3.StringUtils;
29-
30-
import com.alibaba.fastjson.JSONObject;
31-
32-
import cn.hutool.core.io.IORuntimeException;
33-
import cn.hutool.http.HttpException;
34-
import cn.hutool.http.HttpUtil;
35-
import lx.model.Zdm;
36-
import lx.utils.StreamUtils;
37-
import lx.utils.Utils;
38-
28+
import static lx.utils.Const.WXPUSHER_URL;
3929
import static lx.utils.Const.ZDM_URL;
4030

4131
public class ZdmCrawler {
4232

4333
public static void main(String[] args) {
34+
String emailHost = System.getenv("emailHost"), emailAccount = System.getenv("emailAccount"),
35+
emailPassword = System.getenv("emailPassword"), spt = System.getenv("spt");
36+
int minVoted = Integer.parseInt(System.getenv("minVoted")), minComments = Integer.parseInt(System.getenv("minComments")),
37+
minPushSize = Integer.parseInt(System.getenv("MIN_PUSH_SIZE"));
38+
39+
//git actions部署的服务器一般在海外,调整为东八区的时区
40+
TimeZone.setDefault(TimeZone.getTimeZone("GMT+8"));
41+
4442
Set<Zdm> zdms = ZDM_URL.stream().flatMap(url -> {
4543
List<Zdm> zdmPage = new ArrayList<>();
4644
for (int i = 1; i <= 20; i++) {//爬取前20页数据
4745
try {
4846
String s = HttpUtil.get(url + i, 10000);
4947
List<Zdm> zdmPart = JSONObject.parseArray(s, Zdm.class);
5048
zdmPart.forEach(zdm -> {
51-
//将评论和点值数量的值后面会跟着'k','w'这种字符,将它们转换一下方便后面过滤和排序
49+
//评论和点值数量的值后面会跟着'k','w'这种字符,将它们转换一下方便后面过滤和排序
5250
zdm.setComments(Utils.strNumberFormat(zdm.getComments()));
5351
zdm.setVoted(Utils.strNumberFormat(zdm.getVoted()));
5452
});
@@ -66,11 +64,7 @@ public static void main(String[] args) {
6664
HashSet<String> unPushed = Utils.readFile("./unpushed.txt");
6765
zdms.addAll(StreamUtils.map(unPushed, o -> JSONObject.parseObject(o, Zdm.class)));
6866

69-
//黑词过滤
70-
HashSet<String> blackWords = Utils.readFile("./black_words.txt");
71-
blackWords.removeIf(StringUtils::isBlank);
72-
73-
//已推送的优惠信息id
67+
//已推送的优惠信息
7468
Set<String> pushedIds;
7569
try {
7670
new File("./logs/").mkdirs();
@@ -84,6 +78,10 @@ public static void main(String[] args) {
8478
throw new RuntimeException("读取logs目录失败");
8579
}
8680

81+
//黑词过滤
82+
HashSet<String> blackWords = Utils.readFile("./black_words.txt");
83+
blackWords.removeIf(StringUtils::isBlank);
84+
8785
//白词过滤内容
8886
HashSet<String> whiteWords = Utils.readFile("./white_words.txt");
8987
whiteWords.removeIf(StringUtils::isBlank);
@@ -93,8 +91,8 @@ public static void main(String[] args) {
9391
System.out.println("whiteWords is empty, running in blackWords mode.");
9492
zdms = new HashSet<>(StreamUtils.filter(zdms, z ->
9593
StringUtils.isBlank(StreamUtils.findFirst(blackWords, w -> z.getTitle().contains(w))) //黑词过滤
96-
&& Integer.parseInt(z.getVoted()) > Integer.parseInt(System.getenv("minVoted")) //值的数量
97-
&& Integer.parseInt(z.getComments()) > Integer.parseInt(System.getenv("minComments")) //评论的数量
94+
&& Integer.parseInt(z.getVoted()) > minVoted //值的数量
95+
&& Integer.parseInt(z.getComments()) > minComments //评论的数量
9896
&& !z.getPrice().contains("前") //不是前xxx名的耍猴抢购
9997
&& !pushedIds.contains(z.getArticleId()) //不是已经推送过的
10098
));
@@ -106,46 +104,89 @@ public static void main(String[] args) {
106104
}
107105
zdms = new HashSet<>(StreamUtils.filter(zdms, z ->
108106
!StringUtils.isBlank(StreamUtils.findFirst(whiteWords, w -> z.getTitle().contains(w))) //白词过滤
109-
&& Integer.parseInt(z.getVoted()) > Integer.parseInt(System.getenv("minVoted")) //值的数量
110-
&& Integer.parseInt(z.getComments()) > Integer.parseInt(System.getenv("minComments")) //评论的数量
107+
&& Integer.parseInt(z.getVoted()) > minVoted //值的数量
108+
&& Integer.parseInt(z.getComments()) > minComments //评论的数量
111109
&& !z.getPrice().contains("前") //不是前xxx名的耍猴抢购
112110
&& !pushedIds.contains(z.getArticleId()) //不是已经推送过的
113111
));
114112
}
115113

116114
zdms.forEach(z -> System.out.println(z.getArticleId() + " | " + z.getTitle()));
117115

118-
if (zdms.size() > Integer.parseInt(System.getenv("MIN_PUSH_SIZE"))) {
119-
sendEmail(Utils.buildMessage(new ArrayList<>(zdms)));
120-
Utils.write("./logs/" + LocalDate.now() + "/pushed.txt", true, StreamUtils.map(zdms, Zdm::getArticleId));
121-
} else {
116+
//优惠信息单次推送的数量阈值,未达到阈值则暂存推送信息等待下次运行
117+
if (zdms.size() < minPushSize) {
118+
//记录暂未推送的优惠信息
122119
Utils.write("./unpushed.txt", false, StreamUtils.map(zdms, JSONObject::toJSONString));
120+
return;
123121
}
122+
123+
//部分推送方式存在内容长度限制, 这里加了单次推送的条数限制, 超出则分批推送
124+
Lists.partition(new ArrayList<>(zdms), 100).forEach(part -> {
125+
//生成推送消息的正文内容(html格式)
126+
String text = Utils.buildMessage(new ArrayList<>(part));
127+
//通过邮箱推送
128+
pushToEmail(text, emailHost, emailAccount, emailPassword);
129+
//通过WxPusher推送
130+
pushToWx(text, spt);
131+
//记录已推送的优惠信息
132+
List<String> articleIds = StreamUtils.map(part, Zdm::getArticleId);
133+
Utils.write("./logs/" + LocalDate.now() + "/pushed.txt", true, articleIds);
134+
});
124135
}
125136

126-
public static void sendEmail(String text) {
137+
private static void pushToEmail(String text, String emailHost, String emailAccount, String emailPassword) {
138+
if (StringUtils.isBlank(emailHost) || StringUtils.isBlank(emailAccount) || StringUtils.isBlank(emailPassword)) {
139+
System.out.println("邮箱推送配置不完整,将尝试其他推送方式");
140+
return;
141+
}
142+
127143
Properties props = new Properties();
128-
props.setProperty("mail.smtp.host", System.getenv("emailHost"));
144+
props.setProperty("mail.smtp.host", emailHost);
129145
props.setProperty("mail.smtp.port", "465");
130146
props.setProperty("mail.smtp.auth", "true");
131147
props.setProperty("mail.smtp.ssl.enable", "true");
132148
try {
133149
Session session = Session.getDefaultInstance(props, new Authenticator() {
134150
@Override
135151
public PasswordAuthentication getPasswordAuthentication() {
136-
return new PasswordAuthentication(System.getenv("emailAccount"), System.getenv("emailPassword"));
152+
return new PasswordAuthentication(emailAccount, emailPassword);
137153
}
138154
});
139155

140156
MimeMessage message = new MimeMessage(session);
141-
message.setFrom(System.getenv("emailAccount"));
142-
message.addRecipient(Message.RecipientType.TO, new InternetAddress(System.getenv("emailAccount")));
143-
message.setSubject(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm").format(LocalDateTime.now()));
157+
message.setFrom(emailAccount);
158+
message.addRecipient(Message.RecipientType.TO, new InternetAddress(emailAccount));
159+
message.setSubject("zdm优惠信息汇总" + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm").format(LocalDateTime.now()));
144160
message.setContent(text, "text/html;charset=UTF-8");
145161
Transport.send(message);
146162
} catch (Exception e) {
147163
e.printStackTrace();
148164
throw new RuntimeException("邮件发送失败");
149165
}
150166
}
167+
168+
private static void pushToWx(String text, String spt) {
169+
if (StringUtils.isBlank(spt)) {
170+
System.out.println("WxPusher推送配置不完整,将尝试其他推送方式");
171+
return;
172+
}
173+
HashMap<String, Object> body = new HashMap<>();
174+
//推送内容
175+
body.put("content", text);
176+
//消息摘要,显示在微信聊天页面或者模版消息卡片上,限制长度20(微信只能显示20),可以不传,不传默认截取content前面的内容。
177+
body.put("summary", "zdm优惠信息汇总");
178+
//内容类型 1表示文字 2表示html 3表示markdown
179+
body.put("contentType", "2");
180+
body.put("spt", System.getenv("spt"));
181+
String response = HttpUtil.createPost(WXPUSHER_URL)
182+
.contentType(ContentType.JSON.getValue())
183+
.body(JSONObject.toJSONString(body))
184+
.execute().body();
185+
System.out.println("WxPusher response:" + response);
186+
JSONObject jsonObject = (JSONObject) JSONObject.parse(response);
187+
//状态码,非1000表示有异常
188+
String code = jsonObject.getString("code");
189+
if (!"1000".equals(code))
190+
throw new RuntimeException("WxPusher推送失败:" + jsonObject.getString("msg"));
191+
}
151192
}

src/main/java/lx/model/Zdm.java

+4-6
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
package lx.model;
22

3-
import java.util.Objects;
4-
53
import com.alibaba.fastjson.annotation.JSONField;
6-
74
import lombok.AccessLevel;
85
import lombok.Getter;
96
import lombok.Setter;
107
import lombok.experimental.FieldDefaults;
118

9+
import java.util.Objects;
10+
1211
@Getter
1312
@Setter
1413
@FieldDefaults(level = AccessLevel.PRIVATE)
@@ -40,12 +39,11 @@ public class Zdm implements Crawlable {
4039
@Override
4140
public String toHtmlTr() {
4241
return "<tr>" +
43-
"<td><img width='100px' height='100px' src='" + picUrl + "'/></td>" +
44-
"<td>" + title + "</td>" +
42+
"<td><img src='" + picUrl + "'/></td>" +
43+
"<td>" + "<a target='_blank' href='" + url + "'>" + title + "</a></td>" +
4544
"<td>" + price + "</td>" +
4645
"<td>" + voted + "/" + comments + "</td>" +
4746
"<td>" + articleMall + "</td>" +
48-
"<td>" + "<a href='" + url + "'>详情</a>" + "</td>" +
4947
"</tr>";
5048
}
5149

src/main/java/lx/utils/Const.java

+7-5
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
package lx.utils;
22

3-
import java.util.List;
4-
53
import com.google.common.collect.Lists;
64

7-
public class Const {
8-
public static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36";
5+
import java.util.List;
96

10-
public static final List<String> ZDM_URL = Lists.newArrayList("https://faxian.smzdm.com/json_more?filter=h2s0t0f0c3&page=",
7+
public interface Const {
8+
9+
List<String> ZDM_URL = Lists.newArrayList("https://faxian.smzdm.com/json_more?filter=h2s0t0f0c3&page=",
1110
"https://faxian.smzdm.com/json_more?filter=h3s0t0f0c3&page=");
1211

12+
//WxPusher极简推送模式的链接
13+
String WXPUSHER_URL = "https://wxpusher.zjiecode.com/api/send/message/simple-push";
14+
1315
}

src/main/java/lx/utils/Utils.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ public static String buildMessage(List<? extends Crawlable> zdms) {
2020
zdms.sort(Comparator.comparingInt(Crawlable::obtainSortOrder).reversed());
2121

2222
StringBuilder s = new StringBuilder();
23-
s.append("<table border=\"1\">");
24-
s.append("<tr><th>图</th><th>标题</th><th>价格</th><th>赞/评</th><th>平台</th></tr>");
23+
s.append("<table border='1'>");
24+
s.append("<tr><th width='20%'>图</th><th width='45%'>标题</th><th width='15%'>价格</th><th width='10%'>赞/评</th><th width='10%'>平台</th></tr>");
2525
zdms.forEach(z -> s.append(z.toHtmlTr()));
2626
s.append("</table>");
2727
return s.toString();

0 commit comments

Comments
 (0)