-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathannotator.py
executable file
·59 lines (49 loc) · 2.54 KB
/
annotator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from typing import List
import json
import sys
import time
import openai
import pickle
from config import Config
from random import shuffle
from tqdm import tqdm
def load_links(file_path):
with open(file_path, 'r') as f:
return [(line.strip().split('\t')[0].split('/')[-1], line.strip().split('\t')[1].split('/')[-1]) for line in f]
def get_entity_set(file_path):
with open(file_path, 'r') as f:
return {line.strip().split('\t')[i].split('/')[-1] for line in f for i in [0, 2]}
class Annotator:
def __init__(self, api_key: str, messages: List = None):
openai.api_key = api_key
if messages:
self.messages = messages
else:
self.messages = [
{"role": "system", "content": "You are an expert in data mining and knowledge graph alignment."},
{"role": "user", "content": "Please help me with the task of aligning entities in a knowledge graph. Currently, I have two versions of the knowledge graph, one is built from DBpedia (a structured database) and the other is extracted from wikipedia. These two knowledge graphs have the same set of entities and relationships. Please assist me in identifying the matching entities between these two knowledge graphs based on their entity names and semantics."}
]
def ask_chat_gpt(self) -> str:
response = openai.ChatCompletion.create(
model=Config.gpt_model,
messages=self.messages
)
response_content = response['choices'][0]['message']['content']
return response_content
def choose(self, x1, x2):
self.messages.append({"role": "user", "content": f"Given an entity '{x1}' in the English knowledge graph, please help me determine which of the following entities in '{x2}' corresponds to '{x1}' in the German knowledge graph. Please directly reply with the name of the target entity. Do not reply with any extra words or punctuation."})
response_content = self.ask_chat_gpt()
self.messages.pop()
return response_content
def save(self, model_path: str):
model_dict = {
'messages': self.messages
}
with open(model_path, "w", encoding='utf-8') as f:
json.dump(model_dict, f, ensure_ascii=False, indent=2)
@classmethod
def load(self, model_path: str, api_key: str) -> 'Annotator':
with open(model_path, "r", encoding='utf-8') as f:
model_dict = json.load(f)
model = Annotator(api_key=api_key, messages=model_dict['messages'])
return model