Skip to content

Commit df979d7

Browse files
committed
updated preprocess for easier interpretation
1 parent 30ff7a9 commit df979d7

File tree

1 file changed

+0
-21
lines changed

1 file changed

+0
-21
lines changed

chatGPT-API/preprocess.py

-21
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131

3232
df_valid = pd.DataFrame(list(zip(context_id, id, context, response, response_label, type_label)), columns=[
3333
"context_id", "id", "context", "response", "response_label", "type_label"])
34-
# print(df_valid)
3534

3635

3736
with open("./input_raw/test_split.jsonl") as f:
@@ -48,32 +47,12 @@
4847

4948
df_test = pd.DataFrame(list(zip(context_id, id, context, response, response_label, type_label)), columns=[
5049
"context_id", "id", "context", "response", "response_label", "type_label"])
51-
# print(df_test)
5250

5351

5452
df = pd.concat([df_valid, df_test])
55-
# print(df.columns)
56-
5753
df = df.loc[df["response_label"] == "SUPPORTS"]
58-
# print(df)
59-
6054
df["query"] = df["context"].astype(str) + " " + df["response"].astype(str)
61-
# df["query"] = df["context"]
6255
df = df.astype(str).drop_duplicates(["query"]).reset_index(drop=True)
63-
64-
# print(df)
65-
# print(df["response_label"].describe())
6656
print(df["query"])
67-
6857
df.to_pickle(f"./input_processed/{DATASET}.pkl")
6958

70-
71-
# num_chunks = ceil(df.shape[0] / CHUNK)
72-
# df_list = np.array_split(df, num_chunks)
73-
74-
# total = 0
75-
# for i in range(len(df_list)):
76-
# total = total + len(df_list[i])
77-
# df_list[i].to_pickle(f"./preprocess/{i}.pkl")
78-
79-
# print(f"Total: {total}")

0 commit comments

Comments
 (0)