-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathSF.py
125 lines (106 loc) · 4.99 KB
/
SF.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from flask import Flask, request, Response, stream_with_context
import requests
import json
import re
app = Flask(__name__)
@app.route('/v1/chat/completions', methods=['POST'])
def chat_completions():
user_data = request.json
if 'messages' in user_data:
cleaned_messages = []
for msg in user_data['messages']:
content = msg.get('content', '')
# Handle file content
if isinstance(content, list):
cleaned_msg = msg.copy()
cleaned_msg['content'] = content
cleaned_messages.append(cleaned_msg)
else:
cleaned_content = re.sub(r'<think>.*?</think>\s*\n*', '', content, flags=re.DOTALL)
cleaned_msg = msg.copy()
cleaned_msg['content'] = cleaned_content.strip()
cleaned_messages.append(cleaned_msg)
user_data['messages'] = cleaned_messages
headers = {
"Authorization": request.headers.get('Authorization'),
"Content-Type": "application/json"
}
user_data['stream'] = True
response = requests.post(
"https://api.siliconflow.cn/v1/chat/completions",
json=user_data,
headers=headers,
stream=True
)
def generate():
is_first_reasoning = True
last_was_reasoning = False
for line in response.iter_lines():
if line:
json_str = line.decode('utf-8').replace('data: ', '')
if json_str == '[DONE]':
if last_was_reasoning:
modified_data = {
'choices': [{
'delta': {
'content': "</think>"
}
}]
}
yield f"data: {json.dumps(modified_data)}\n\n"
yield 'data: [DONE]\n\n'
break
try:
response_data = json.loads(json_str)
if 'choices' in response_data and response_data['choices']:
choice = response_data['choices'][0]
if 'delta' in choice:
delta = choice['delta']
reasoning = delta.get('reasoning_content', '')
if reasoning:
if is_first_reasoning:
modified_data = {
'choices': [{
'delta': {
'content': "<think>"
}
}]
}
yield f"data: {json.dumps(modified_data)}\n\n"
is_first_reasoning = False
modified_data = {
'choices': [{
'delta': {
'content': reasoning
}
}]
}
yield f"data: {json.dumps(modified_data)}\n\n"
last_was_reasoning = True
content = delta.get('content', '')
if content:
if last_was_reasoning:
modified_data = {
'choices': [{
'delta': {
'content': "</think>\n\n"
}
}]
}
yield f"data: {json.dumps(modified_data)}\n\n"
last_was_reasoning = False
yield f"data: {json_str}\n\n"
if not (reasoning or content):
yield f"data: {json_str}\n\n"
else:
yield f"data: {json_str}\n\n"
else:
yield f"data: {json_str}\n\n"
except json.JSONDecodeError:
yield f"data: {json_str}\n\n"
return Response(
stream_with_context(generate()),
mimetype='text/event-stream'
)
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=9007)