Skip to content

Commit d1c36c9

Browse files
authored
Merge pull request #2 from iMeanAI/dev
update a demo base agent
2 parents a5887cd + 0170147 commit d1c36c9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+5453
-3
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ logs/
4646
log/
4747
tmp/
4848
temp/
49+
token_results/
50+
temp_ref/
4951

5052
# Data
5153
data/

.gradio/certificate.pem

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
-----BEGIN CERTIFICATE-----
2+
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3+
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4+
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5+
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6+
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7+
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8+
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10+
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11+
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12+
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13+
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14+
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15+
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16+
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17+
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18+
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19+
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20+
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21+
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22+
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23+
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24+
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25+
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26+
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27+
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28+
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29+
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30+
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31+
-----END CERTIFICATE-----

README.md

+25-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
# open-operator
1515

16-
This project aims to provide the open-source community with an easy-to-use system for building, self-hosting, and evaluating web agent models. Our goal is to offer an alternative to the $200/month ChatGPT Pro and cloud-based, uncontrolled execution environments.
16+
This project aims to provide the open-source community with an easy-to-use system for building, self-hosting, and evaluating web agent computer-use models. Our goal is to offer an alternative to the $200/month ChatGPT Pro and cloud-based, uncontrolled execution environments.
1717

1818
With open-operator, you can:
1919
- Annotate your web trajectory data.
@@ -28,6 +28,30 @@ We believe in empowering developers to have complete control over their web agen
2828
![Roadmap](src/roadmap.png)
2929
Briefly describe the roadmap of the project. Green part will be included in this repo.
3030

31+
## Run your Base Agent Using Open-Operator
32+
### Prepare the environment
33+
```bash
34+
conda create -n open-operator python=3.11
35+
pip install -r requirements.txt
36+
```
37+
38+
For the browser environment, you can use [browserbase](https://www.browserbase.com/) to setup the following environment variables.
39+
40+
```bash
41+
export BROWSERBASE_API_KEY=your_api_key
42+
```
43+
44+
45+
### Initialize the base agent
46+
```bash
47+
python inference/app.py
48+
```
49+
You can select the base model you want to use in the dropdown menu.(From Anthropic, Google, OpenAI, etc.)
50+
51+
![Open-Operator](src/ui.png)
52+
53+
Then start your first experience with Open-Operator!
54+
3155
## Data Annotation and Downloading
3256
Follow the step wise instruction below:
3357
1. Download the latest iMean builder extension here: [iMean Builder](https://drive.google.com/file/d/1BpLOQ9M41rdc6VYY-1Aes1lhzo5-LdiH/view?usp=sharing)

configs/config.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ data_download:
66
challenge_id: "L_if1ihd1jmMJq4WUbrYe"
77
save_path: "data/raw"
88
save_raw_data: true
9-
username: ""
10-
password: ""
9+
username: "han032206@gmail.com"
10+
password: "qazwsxpl,okm145"
1111

1212
# Processing Mode
1313
mode: "dom_tree" # or "vision"
+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .html_env import *
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from enum import Enum
2+
from playwright.async_api import async_playwright
3+
import os
4+
5+
class BrowserType(Enum):
6+
LOCAL = "local"
7+
BROWSERBASE = "browserbase"
8+
9+
class BrowserEnvironment:
10+
@staticmethod
11+
async def create_browser_instance(browser_type: str, headless: bool, slow_mo: int, viewport_size: dict, locale: str):
12+
if browser_type == BrowserType.LOCAL.value:
13+
return await LocalBrowserEnvironment.create(headless, slow_mo, viewport_size, locale)
14+
elif browser_type == BrowserType.BROWSERBASE.value:
15+
return await BrowserbaseBrowserEnvironment.create(headless, slow_mo, viewport_size, locale)
16+
else:
17+
raise ValueError(f"Unsupported browser type: {browser_type}")
18+
19+
class LocalBrowserEnvironment:
20+
@staticmethod
21+
async def create(headless: bool, slow_mo: int, viewport_size: dict, locale: str):
22+
playwright = await async_playwright().start()
23+
browser = await playwright.chromium.launch(headless=headless, slow_mo=slow_mo)
24+
context = await browser.new_context(viewport=viewport_size, locale=locale)
25+
return browser, context, playwright, None
26+
27+
class BrowserbaseBrowserEnvironment:
28+
@staticmethod
29+
async def create(headless: bool, slow_mo: int, viewport_size: dict, locale: str):
30+
playwright = await async_playwright().start()
31+
browserbase_api_key = os.environ.get('BROWSERBASE_API_KEY')
32+
if not browserbase_api_key:
33+
raise ValueError("BROWSERBASE_API_KEY not found in environment variables")
34+
35+
browser_cdp_url = f"wss://connect.browserbase.com?apiKey={browserbase_api_key}"
36+
browser = await playwright.chromium.connect_over_cdp(browser_cdp_url)
37+
context = browser.contexts[0]
38+
return browser, context, playwright, None
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from .utils import *
2+
from .build_tree import *
3+
from .active_elements import *
4+
from .actions import *
5+
from .async_env import *
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
from beartype import beartype
2+
from typing import TypedDict
3+
from enum import IntEnum
4+
5+
6+
class Action(TypedDict):
7+
action_type: int
8+
element_id: int
9+
element_name: str
10+
url: str
11+
fill_text: str
12+
13+
14+
class ActionTypes(IntEnum):
15+
NONE = 0
16+
CLICK = 1
17+
GOTO = 2
18+
GOOGLE_SEARCH = 3
19+
FILL_FORM = 4
20+
SWITCH_TAB = 5
21+
GO_BACK = 6
22+
FILL_SEARCH = 7
23+
SELECT_OPTION = 8
24+
HOVER = 9
25+
SCROLL_DOWN = 10
26+
SCROLL_UP = 11
27+
CACHE_DATA = 12
28+
GET_FINAL_ANSWER = 13
29+
30+
@beartype
31+
def create_cache_data_action(elementid: int,fill_text: str) -> Action:
32+
return {
33+
"action_type": ActionTypes.CACHE_DATA,
34+
"element_id": elementid,
35+
"url": "",
36+
"fill_text": fill_text,
37+
"element_name": ""
38+
}
39+
40+
41+
@beartype
42+
def create_get_final_answer(elementid: int,fill_text: str) -> Action:
43+
return {
44+
"action_type": ActionTypes.GET_FINAL_ANSWER,
45+
"element_id": elementid,
46+
"url": "",
47+
"fill_text": fill_text,
48+
"element_name": ""
49+
}
50+
51+
52+
@beartype
53+
def create_click_action(elementid: int) -> Action:
54+
return {
55+
"action_type": ActionTypes.CLICK,
56+
"element_id": elementid,
57+
"url": "",
58+
"fill_text": "",
59+
"element_name": ""
60+
}
61+
62+
63+
@beartype
64+
def create_goto_action(elementid: int, url: str) -> Action:
65+
return {
66+
"action_type": ActionTypes.GOTO,
67+
"element_id": elementid,
68+
"url": url,
69+
"fill_text": "",
70+
"element_name": ""
71+
}
72+
73+
74+
@beartype
75+
def create_none_action(elementid: int) -> Action:
76+
return {
77+
"action_type": ActionTypes.NONE,
78+
"element_id": elementid,
79+
"url": "",
80+
"fill_text": "",
81+
"element_name": ""
82+
}
83+
84+
85+
@beartype
86+
def create_fill_action(elementid: int, fill_text: str) -> Action:
87+
return {
88+
"action_type": ActionTypes.FILL_FORM,
89+
"element_id": elementid,
90+
"url": "",
91+
"fill_text": fill_text,
92+
"element_name": ""
93+
}
94+
95+
96+
@beartype
97+
def create_fill_search_action(elementid: int, fill_text: str) -> Action:
98+
return {
99+
"action_type": ActionTypes.FILL_SEARCH,
100+
"element_id": elementid,
101+
"url": "",
102+
"fill_text": fill_text,
103+
"element_name": ""
104+
}
105+
106+
107+
@beartype
108+
def create_search_action(elementid: int, text: str) -> Action:
109+
return {
110+
"action_type": ActionTypes.GOOGLE_SEARCH,
111+
"element_id": elementid,
112+
"url": "https://www.google.com",
113+
"fill_text": text,
114+
"element_name": ""
115+
}
116+
117+
118+
@beartype
119+
def create_go_back_action(elementid: int) -> Action:
120+
return {
121+
"action_type": ActionTypes.GO_BACK,
122+
"element_id": elementid,
123+
"url": "",
124+
"fill_text": "",
125+
"element_name": ""
126+
}
127+
128+
129+
@beartype
130+
def create_select_option_action(elementid: int, target_value: str) -> Action:
131+
return {
132+
"action_type": ActionTypes.SELECT_OPTION,
133+
"element_id": elementid,
134+
"url": "",
135+
"fill_text": target_value,
136+
"element_name": ""
137+
}
138+
139+
@beartype
140+
def create_hover_action(elementid: int) -> Action:
141+
return {
142+
"action_type": ActionTypes.HOVER,
143+
"element_id": elementid,
144+
"url": "",
145+
"fill_text": "",
146+
"element_name": ""
147+
}
148+
149+
@beartype
150+
def create_scroll_down_action(elementid: int) -> Action:
151+
return {
152+
"action_type": ActionTypes.SCROLL_DOWN,
153+
"element_id": elementid,
154+
"url": "",
155+
"fill_text": "",
156+
"element_name": ""
157+
}
158+
159+
@beartype
160+
def create_scroll_up_action(elementid: int) -> Action:
161+
return {
162+
"action_type": ActionTypes.SCROLL_UP,
163+
"element_id": elementid,
164+
"url": "",
165+
"fill_text": "",
166+
"element_name": ""
167+
}
168+
169+
@beartype
170+
def create_action(elementid: int, action_type: str, action_input: str) -> Action:
171+
if action_type == "click":
172+
return create_click_action(elementid=elementid)
173+
elif action_type == "fill_form":
174+
return create_fill_action(elementid=elementid, fill_text=action_input)
175+
elif action_type == "fill_search":
176+
return create_fill_search_action(elementid=elementid, fill_text=action_input)
177+
elif action_type == "goto":
178+
return create_goto_action(elementid=elementid, url=action_input)
179+
elif action_type == "google_search":
180+
return create_search_action(elementid=elementid, text=action_input)
181+
elif action_type == "go_back":
182+
return create_go_back_action(elementid=elementid)
183+
elif action_type == "select_option":
184+
return create_select_option_action(elementid=elementid, target_value=action_input)
185+
elif action_type == "hover":
186+
return create_hover_action(elementid=elementid)
187+
elif action_type == "scroll_down":
188+
return create_scroll_down_action(elementid=elementid)
189+
elif action_type == "scroll_up":
190+
return create_scroll_up_action(elementid=elementid)
191+
elif action_type == "cache_storage":
192+
return create_cache_data_action(elementid=elementid,fill_text=action_input)
193+
elif action_type == "get_final_answer":
194+
return create_get_final_answer(elementid=elementid,fill_text=action_input)
195+
else:
196+
return create_none_action(elementid=elementid)
197+
198+
199+
__all__ = [
200+
"Action",
201+
"ActionTypes",
202+
"create_click_action",
203+
"create_fill_action",
204+
"create_none_action",
205+
"create_goto_action",
206+
"create_search_action",
207+
"create_go_back_action",
208+
"create_fill_search_action",
209+
"create_select_option_action",
210+
"create_hover_action",
211+
"create_scroll_down_action",
212+
"create_scroll_up_action",
213+
"create_cache_data_action",
214+
"create_get_final_answer",
215+
"create_action"
216+
]

0 commit comments

Comments
 (0)