Skip to content

Commit 99d04af

Browse files
committed
upload code
1 parent c9cc3bf commit 99d04af

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+3786
-0
lines changed

figures/AI_text.jpg

1.77 MB

figures/Android.png

14.9 MB

figures/Claude3_Fast-DetectGPT.png

299 KB

figures/Claude3_GPTzero.png

938 KB

figures/Claude3_HC3.png

375 KB

figures/Claude3_text.png

442 KB

figures/Claude3_星鉴.png

534 KB

figures/DNA_GPT.pdf

386 KB
Binary file not shown.

figures/DetctGPT_流程.pdf

461 KB
Binary file not shown.

figures/DetctGPT_流程.svg

+1,901

figures/FastDetectGPT.pdf

231 KB
Binary file not shown.

figures/LLMs_History.pdf

1.4 MB
Binary file not shown.

figures/ModalNet-21.png

155 KB

figures/ModalNet-32.png

50.5 KB

figures/OCR_result.png

777 KB

figures/OpenAI.pdf

877 KB
Binary file not shown.

figures/PDF_result.png

128 KB

figures/PDF高亮结果.png

363 KB
146 KB

figures/ROC.pdf

54.2 KB
Binary file not shown.

figures/SIR.png

31.4 KB

figures/TXT_result.png

47.9 KB

figures/Token.png

63 KB

figures/UI.png

190 KB

figures/V-model.png

129 KB

figures/Web_model.png

145 KB

figures/combined_roc_curve.png

40.6 KB

figures/detection_report.pdf

63.8 KB
Binary file not shown.

figures/fake_news.png

1.1 MB

figures/fake_paper.png

777 KB

figures/image.png

4.92 MB

figures/roc_curve.png

38.3 KB

figures/server_config.png

362 KB
5.81 MB

figures/upload_picture.png

836 KB

figures/上传文件检测.png

440 KB

figures/不同模型性能对比.png

54.3 KB

figures/动态对抗攻击.pdf

194 KB
Binary file not shown.

figures/对比图.png

61.6 KB

figures/攻击分类.png

195 KB

figures/文件检测结果.png

297 KB

figures/文本检测结果.png

680 KB

figures/文本深度分析.png

879 KB

figures/文本过长警告.png

563 KB

figures/星鉴安装程序.png

4.92 MB

figures/星鉴性能对比图.pdf

26.7 KB
Binary file not shown.

figures/星鉴速度对比图.pdf

21.7 KB
Binary file not shown.

figures/系统主页.png

278 KB

figures/系统架构图.pdf

64.4 KB
Binary file not shown.

figures/输入文本检测.png

575 KB

figures/选择中文检测.png

287 KB

figures/递归释义攻击.png

15.2 KB
1.22 MB

main.bbl

+98
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
\begin{thebibliography}{10}
2+
3+
\bibitem{vaswani2017attention}
4+
Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan~N Gomez, {\L}ukasz Kaiser, and Illia Polosukhin.
5+
\newblock Attention is all you need.
6+
\newblock {\em Advances in neural information processing systems}, 30, 2017.
7+
8+
\bibitem{zhao2023survey}
9+
Wayne~Xin Zhao, Kun Zhou, Junyi Li, Tianyi Tang, Xiaolei Wang, Yupeng Hou, Yingqian Min, Beichen Zhang, Junjie Zhang, Zican Dong, et~al.
10+
\newblock A survey of large language models.
11+
\newblock {\em arXiv preprint arXiv:2303.18223}, 2023.
12+
13+
\bibitem{gehrmann2019gltr}
14+
Sebastian Gehrmann, Hendrik Strobelt, and Alexander~M Rush.
15+
\newblock Gltr: Statistical detection and visualization of generated text.
16+
\newblock {\em arXiv preprint arXiv:1906.04043}, 2019.
17+
18+
\bibitem{peters2018deep}
19+
Matthew~E Peters, Mark Neumann, Mohit Iyyer, Matt Gardner, Christopher Clark, Kenton Lee, and Luke Zettlemoyer.
20+
\newblock Deep contextualized word representations.
21+
\newblock {\em arXiv preprint arXiv:1802.05365}, 2018.
22+
23+
\bibitem{devlin2018bert}
24+
Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova.
25+
\newblock Bert: Pre-training of deep bidirectional transformers for language understanding.
26+
\newblock {\em arXiv preprint arXiv:1810.04805}, 2018.
27+
28+
\bibitem{liu2019roberta}
29+
Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov.
30+
\newblock Roberta: A robustly optimized bert pretraining approach.
31+
\newblock {\em arXiv preprint arXiv:1907.11692}, 2019.
32+
33+
\bibitem{badaskar2008identifying}
34+
Sameer Badaskar, Sachin Agarwal, and Shilpa Arora.
35+
\newblock Identifying real or fake articles: Towards better language modeling.
36+
\newblock In {\em Proceedings of the Third International Joint Conference on Natural Language Processing: Volume-II}, 2008.
37+
38+
\bibitem{lavergne2008detecting}
39+
Thomas Lavergne, Tanguy Urvoy, and Fran{\c{c}}ois Yvon.
40+
\newblock Detecting fake content with relative entropy scoring.
41+
\newblock {\em Pan}, 8(27-31):4, 2008.
42+
43+
\bibitem{beresneva2016computer}
44+
Daria Beresneva.
45+
\newblock Computer-generated text detection using machine learning: A systematic review.
46+
\newblock In {\em Natural Language Processing and Information Systems: 21st International Conference on Applications of Natural Language to Information Systems, NLDB 2016, Salford, UK, June 22-24, 2016, Proceedings 21}, pages 421--426. Springer, 2016.
47+
48+
\bibitem{kirchenbauer2023watermark}
49+
John Kirchenbauer, Jonas Geiping, Yuxin Wen, Jonathan Katz, Ian Miers, and Tom Goldstein.
50+
\newblock A watermark for large language models.
51+
\newblock In {\em International Conference on Machine Learning}, pages 17061--17084. PMLR, 2023.
52+
53+
\bibitem{chakraborty2023possibilities}
54+
Souradip Chakraborty, Amrit~Singh Bedi, Sicheng Zhu, Bang An, Dinesh Manocha, and Furong Huang.
55+
\newblock On the possibilities of ai-generated text detection.
56+
\newblock {\em arXiv preprint arXiv:2304.04736}, 2023.
57+
58+
\bibitem{mitchell2023detectgpt}
59+
Eric Mitchell, Yoonho Lee, Alexander Khazatsky, Christopher~D Manning, and Chelsea Finn.
60+
\newblock Detectgpt: Zero-shot machine-generated text detection using probability curvature.
61+
\newblock In {\em International Conference on Machine Learning}, pages 24950--24962. PMLR, 2023.
62+
63+
\bibitem{bao2023fast}
64+
Guangsheng Bao, Yanbin Zhao, Zhiyang Teng, Linyi Yang, and Yue Zhang.
65+
\newblock Fast-detectgpt: Efficient zero-shot detection of machine-generated text via conditional probability curvature.
66+
\newblock {\em arXiv preprint arXiv:2310.05130}, 2023.
67+
68+
\bibitem{yang2023dna}
69+
Xianjun Yang, Wei Cheng, Yue Wu, Linda Petzold, William~Yang Wang, and Haifeng Chen.
70+
\newblock Dna-gpt: Divergent n-gram analysis for training-free detection of gpt-generated text.
71+
\newblock {\em arXiv preprint arXiv:2305.17359}, 2023.
72+
73+
\bibitem{sadasivan2023can}
74+
Vinu~Sankar Sadasivan, Aounon Kumar, Sriram Balasubramanian, Wenxiao Wang, and Soheil Feizi.
75+
\newblock Can ai-generated text be reliably detected?
76+
\newblock {\em arXiv preprint arXiv:2303.11156}, 2023.
77+
78+
\bibitem{zhou2024humanizing}
79+
Ying Zhou, Ben He, and Le~Sun.
80+
\newblock Humanizing machine-generated content: Evading ai-text detection through adversarial attack.
81+
\newblock {\em arXiv preprint arXiv:2404.01907}, 2024.
82+
83+
\bibitem{guo2023close}
84+
Biyang Guo, Xin Zhang, Ziyuan Wang, Minqi Jiang, Jinran Nie, Yuxuan Ding, Jianwei Yue, and Yupeng Wu.
85+
\newblock How close is chatgpt to human experts? comparison corpus, evaluation, and detection.
86+
\newblock {\em arXiv preprint arXiv:2301.07597}, 2023.
87+
88+
\bibitem{wang2024llm}
89+
Rongsheng Wang, Haoming Chen, Ruizhe Zhou, Han Ma, Yaofei Duan, Yanlan Kang, Songhua Yang, Baoyu Fan, and Tao Tan.
90+
\newblock Llm-detector: Improving ai-generated chinese text detection with open-source llm instruction tuning.
91+
\newblock {\em arXiv preprint arXiv:2402.01158}, 2024.
92+
93+
\bibitem{krishna2024paraphrasing}
94+
Kalpesh Krishna, Yixiao Song, Marzena Karpinska, John Wieting, and Mohit Iyyer.
95+
\newblock Paraphrasing evades detectors of ai-generated text, but retrieval is an effective defense.
96+
\newblock {\em Advances in Neural Information Processing Systems}, 36, 2024.
97+
98+
\end{thebibliography}

main.pdf

30.8 MB
Binary file not shown.

main.synctex.gz

451 KB
Binary file not shown.

main.tex

+1,548
Large diffs are not rendered by default.

references.bib

+239
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
%Transformer
2+
@article{vaswani2017attention,
3+
title={Attention is all you need},
4+
author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
5+
journal={Advances in neural information processing systems},
6+
volume={30},
7+
year={2017}
8+
}
9+
@article{zhao2023survey,
10+
title={A survey of large language models},
11+
author={Zhao, Wayne Xin and Zhou, Kun and Li, Junyi and Tang, Tianyi and Wang, Xiaolei and Hou, Yupeng and Min, Yingqian and Zhang, Beichen and Zhang, Junjie and Dong, Zican and others},
12+
journal={arXiv preprint arXiv:2303.18223},
13+
year={2023}
14+
}
15+
16+
%GLTR
17+
@article{gehrmann2019gltr,
18+
title={Gltr: Statistical detection and visualization of generated text},
19+
author={Gehrmann, Sebastian and Strobelt, Hendrik and Rush, Alexander M},
20+
journal={arXiv preprint arXiv:1906.04043},
21+
year={2019}
22+
}
23+
24+
%bert
25+
@article{devlin2018bert,
26+
title={Bert: Pre-training of deep bidirectional transformers for language understanding},
27+
author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
28+
journal={arXiv preprint arXiv:1810.04805},
29+
year={2018}
30+
}
31+
32+
%roberta
33+
@article{liu2019roberta,
34+
title={Roberta: A robustly optimized bert pretraining approach},
35+
author={Liu, Yinhan and Ott, Myle and Goyal, Naman and Du, Jingfei and Joshi, Mandar and Chen, Danqi and Levy, Omer and Lewis, Mike and Zettlemoyer, Luke and Stoyanov, Veselin},
36+
journal={arXiv preprint arXiv:1907.11692},
37+
year={2019}
38+
}
39+
40+
%HC3
41+
@article{guo2023close,
42+
title={How close is chatgpt to human experts? comparison corpus, evaluation, and detection},
43+
author={Guo, Biyang and Zhang, Xin and Wang, Ziyuan and Jiang, Minqi and Nie, Jinran and Ding, Yuxuan and Yue, Jianwei and Wu, Yupeng},
44+
journal={arXiv preprint arXiv:2301.07597},
45+
year={2023}
46+
}
47+
48+
%M4
49+
@article{wang2024llm,
50+
title={LLM-Detector: Improving AI-Generated Chinese Text Detection with Open-Source LLM Instruction Tuning},
51+
author={Wang, Rongsheng and Chen, Haoming and Zhou, Ruizhe and Ma, Han and Duan, Yaofei and Kang, Yanlan and Yang, Songhua and Fan, Baoyu and Tan, Tao},
52+
journal={arXiv preprint arXiv:2402.01158},
53+
year={2024}
54+
}
55+
56+
%域外性能不佳
57+
@article{chakraborty2023possibilities,
58+
title={On the possibilities of ai-generated text detection},
59+
author={Chakraborty, Souradip and Bedi, Amrit Singh and Zhu, Sicheng and An, Bang and Manocha, Dinesh and Huang, Furong},
60+
journal={arXiv preprint arXiv:2304.04736},
61+
year={2023}
62+
}
63+
64+
%DetectGPT
65+
@inproceedings{mitchell2023detectgpt,
66+
title={Detectgpt: Zero-shot machine-generated text detection using probability curvature},
67+
author={Mitchell, Eric and Lee, Yoonho and Khazatsky, Alexander and Manning, Christopher D and Finn, Chelsea},
68+
booktitle={International Conference on Machine Learning},
69+
pages={24950--24962},
70+
year={2023},
71+
organization={PMLR}
72+
}
73+
74+
%Fast DetectGPT
75+
@article{bao2023fast,
76+
title={Fast-detectgpt: Efficient zero-shot detection of machine-generated text via conditional probability curvature},
77+
author={Bao, Guangsheng and Zhao, Yanbin and Teng, Zhiyang and Yang, Linyi and Zhang, Yue},
78+
journal={arXiv preprint arXiv:2310.05130},
79+
year={2023}
80+
}
81+
82+
%n-gram
83+
@inproceedings{badaskar2008identifying,
84+
title={Identifying real or fake articles: Towards better language modeling},
85+
author={Badaskar, Sameer and Agarwal, Sachin and Arora, Shilpa},
86+
booktitle={Proceedings of the Third International Joint Conference on Natural Language Processing: Volume-II},
87+
year={2008}
88+
}
89+
90+
%熵
91+
@article{lavergne2008detecting,
92+
title={Detecting Fake Content with Relative Entropy Scoring.},
93+
author={Lavergne, Thomas and Urvoy, Tanguy and Yvon, Fran{\c{c}}ois},
94+
journal={Pan},
95+
volume={8},
96+
number={27-31},
97+
pages={4},
98+
year={2008}
99+
}
100+
101+
%困惑度
102+
@inproceedings{beresneva2016computer,
103+
title={Computer-generated text detection using machine learning: A systematic review},
104+
author={Beresneva, Daria},
105+
booktitle={Natural Language Processing and Information Systems: 21st International Conference on Applications of Natural Language to Information Systems, NLDB 2016, Salford, UK, June 22-24, 2016, Proceedings 21},
106+
pages={421--426},
107+
year={2016},
108+
organization={Springer}
109+
}
110+
111+
@inproceedings{kirchenbauer2023watermark,
112+
title={A watermark for large language models},
113+
author={Kirchenbauer, John and Geiping, Jonas and Wen, Yuxin and Katz, Jonathan and Miers, Ian and Goldstein, Tom},
114+
booktitle={International Conference on Machine Learning},
115+
pages={17061--17084},
116+
year={2023},
117+
organization={PMLR}
118+
}
119+
120+
%DNAGPT
121+
@article{yang2023dna,
122+
title={Dna-gpt: Divergent n-gram analysis for training-free detection of gpt-generated text},
123+
author={Yang, Xianjun and Cheng, Wei and Wu, Yue and Petzold, Linda and Wang, William Yang and Chen, Haifeng},
124+
journal={arXiv preprint arXiv:2305.17359},
125+
year={2023}
126+
}
127+
128+
%递归释义攻击
129+
@article{sadasivan2023can,
130+
title={Can AI-generated text be reliably detected?},
131+
author={Sadasivan, Vinu Sankar and Kumar, Aounon and Balasubramanian, Sriram and Wang, Wenxiao and Feizi, Soheil},
132+
journal={arXiv preprint arXiv:2303.11156},
133+
year={2023}
134+
}
135+
@article{krishna2024paraphrasing,
136+
title={Paraphrasing evades detectors of ai-generated text, but retrieval is an effective defense},
137+
author={Krishna, Kalpesh and Song, Yixiao and Karpinska, Marzena and Wieting, John and Iyyer, Mohit},
138+
journal={Advances in Neural Information Processing Systems},
139+
volume={36},
140+
year={2024}
141+
}
142+
%动态对抗攻击
143+
@article{zhou2024humanizing,
144+
title={Humanizing Machine-Generated Content: Evading AI-Text Detection through Adversarial Attack},
145+
author={Zhou, Ying and He, Ben and Sun, Le},
146+
journal={arXiv preprint arXiv:2404.01907},
147+
year={2024}
148+
}
149+
150+
151+
@article{hendrycks2016gaussian,
152+
title={Gaussian error linear units (gelus)},
153+
author={Hendrycks, Dan and Gimpel, Kevin},
154+
journal={arXiv preprint arXiv:1606.08415},
155+
year={2016}
156+
}
157+
158+
159+
@article{peters2018deep,
160+
title={Deep contextualized word representations},
161+
author={Peters, Matthew E and Neumann, Mark and Iyyer, Mohit and Gardner, Matt and Clark, Christopher and Lee, Kenton and Zettlemoyer, Luke},
162+
journal={arXiv preprint arXiv:1802.05365},
163+
year={2018}
164+
}
165+
166+
167+
@article{soto2024few,
168+
title={Few-Shot Detection of Machine-Generated Text using Style Representations},
169+
author={Soto, Rafael Rivera and Koch, Kailin and Khan, Aleem and Chen, Barry and Bishop, Marcus and Andrews, Nicholas},
170+
journal={arXiv preprint arXiv:2401.06712},
171+
year={2024}
172+
}
173+
174+
@article{chen2023token,
175+
title={Token Prediction as Implicit Classification to Identify LLM-Generated Text},
176+
author={Chen, Yutian and Kang, Hao and Zhai, Vivian and Li, Liangze and Singh, Rita and Raj, Bhiksha},
177+
journal={arXiv preprint arXiv:2311.08723},
178+
year={2023}
179+
}
180+
181+
@article{guo2023authentigpt,
182+
title={AuthentiGPT: Detecting Machine-Generated Text via Black-Box Language Models Denoising},
183+
author={Guo, Zhen and Yu, Shangdi},
184+
journal={arXiv preprint arXiv:2311.07700},
185+
year={2023}
186+
}
187+
188+
@article{zhan2023g3detector,
189+
title={G3detector: General gpt-generated text detector},
190+
author={Zhan, Haolan and He, Xuanli and Xu, Qiongkai and Wu, Yuxiang and Stenetorp, Pontus},
191+
journal={arXiv preprint arXiv:2305.12680},
192+
year={2023}
193+
}
194+
195+
@article{chen2023gpt,
196+
title={Gpt-sentinel: Distinguishing human and chatgpt generated content},
197+
author={Chen, Yutian and Kang, Hao and Zhai, Vivian and Li, Liangze and Singh, Rita and Raj, Bhiksha},
198+
journal={arXiv preprint arXiv:2305.07969},
199+
year={2023}
200+
}
201+
202+
@article{yu2023gpt,
203+
title={Gpt paternity test: Gpt generated text detection with gpt genetic inheritance},
204+
author={Yu, Xiao and Qi, Yuang and Chen, Kejiang and Chen, Guoqiang and Yang, Xi and Zhu, Pengyuan and Zhang, Weiming and Yu, Nenghai},
205+
journal={arXiv preprint arXiv:2305.12519},
206+
year={2023}
207+
}
208+
209+
@article{liu2022coco,
210+
title={Coco: Coherence-enhanced machine-generated text detection under data limitation with contrastive learning},
211+
author={Liu, Xiaoming and Zhang, Zhaohan and Wang, Yichen and Pu, Hang and Lan, Yu and Shen, Chao},
212+
journal={arXiv preprint arXiv:2212.10341},
213+
year={2022}
214+
}
215+
216+
@article{wu2023llmdet,
217+
title={Llmdet: A large language models detection tool},
218+
author={Wu, Kangxi and Pang, Liang and Shen, Huawei and Cheng, Xueqi and Chua, Tat-Seng},
219+
journal={arXiv preprint arXiv:2305.15004},
220+
year={2023}
221+
}
222+
223+
@article{hu2023radar,
224+
title={Radar: Robust ai-text detection via adversarial learning},
225+
author={Hu, Xiaomeng and Chen, Pin-Yu and Ho, Tsung-Yi},
226+
journal={Advances in Neural Information Processing Systems},
227+
volume={36},
228+
pages={15077--15095},
229+
year={2023}
230+
}
231+
@inproceedings{cai2024decoupled,
232+
title={Decoupled textual embeddings for customized image generation},
233+
author={Cai, Yufei and Wei, Yuxiang and Ji, Zhilong and Bai, Jinfeng and Han, Hu and Zuo, Wangmeng},
234+
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
235+
volume={38},
236+
number={2},
237+
pages={909--917},
238+
year={2024}
239+
}

0 commit comments

Comments
 (0)