Skip to content

Commit 21e6976

Browse files
committed
Remove unused dependencies and clustering code
1 parent e2ac7a4 commit 21e6976

File tree

3 files changed

+0
-192
lines changed

3 files changed

+0
-192
lines changed

compile-quotes.js

-100
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import dotenv from "dotenv";
99
import fs from "fs";
1010
import similarity from "compute-cosine-similarity";
1111
import { Configuration, OpenAIApi } from "openai";
12-
import * as math from "mathjs";
1312
import { kmeans } from "ml-kmeans";
1413

1514
dotenv.config();
@@ -23,7 +22,6 @@ const openai = new OpenAIApi(configuration);
2322

2423
dotenv.config();
2524

26-
const CLUSTER_THRESHOLD = 0.78;
2725
const BOOK_ID = "38531384";
2826

2927
const supabaseUrl = process.env.SUPABASE_URL;
@@ -34,57 +32,6 @@ const supabase = createClient(supabaseUrl, supabaseKey, {
3432
},
3533
});
3634

37-
// Define the k-means clustering algorithm
38-
function kMeansLocal(data, k) {
39-
// Initialize the centroids
40-
const centroids = [];
41-
let prevCentroids = [];
42-
for (let i = 0; i < k; i++) {
43-
centroids.push(data[Math.floor(Math.random() * data.length)]);
44-
}
45-
46-
// Assign each data point to the closest centroid
47-
const assignments = [];
48-
for (let i = 0; i < data.length; i++) {
49-
const distances = [];
50-
for (let j = 0; j < centroids.length; j++) {
51-
distances.push(math.distance(data[i], centroids[j]));
52-
}
53-
assignments.push(distances.indexOf(Math.min(...distances)));
54-
}
55-
56-
// Update the centroids
57-
for (let i = 0; i < k; i++) {
58-
const cluster = data.filter((d, j) => assignments[j] === i);
59-
if (cluster.length === 0) {
60-
continue;
61-
}
62-
centroids[i] = math.mean(cluster, 0);
63-
}
64-
65-
// Repeat until the centroids no longer change
66-
if (!math.deepEqual(centroids, prevCentroids)) {
67-
prevCentroids = centroids;
68-
return kMeans(data, k);
69-
}
70-
71-
// Return the assignments
72-
return assignments;
73-
}
74-
75-
const getQuotes = async (bookId) => {
76-
const { data, error } = await supabase
77-
.from("highlights")
78-
.select("*")
79-
.eq("book_id", bookId);
80-
81-
if (error) {
82-
console.error(error);
83-
return [];
84-
}
85-
86-
return data;
87-
};
8835

8936
const getBookIDFromTitle = async (title) => {
9037
const { data, error } = await supabase
@@ -121,43 +68,6 @@ const compileQuotesFomID = async (bookID) => {
12168
return quotes;
12269
};
12370

124-
function clusterEmbeddings(quotes, threshold = 0.01) {
125-
let clusters = [];
126-
let clusterIndex = 0;
127-
let similarityAvg = 0;
128-
let total = 0;
129-
130-
quotes.forEach((quote, index) => {
131-
if (quote.cluster === undefined) {
132-
quote.cluster = clusterIndex;
133-
clusters[clusterIndex] = [quote];
134-
135-
quotes.forEach((otherQuote, otherIndex) => {
136-
const quoteEmbedding = JSON.parse(quote.embedding);
137-
const otherQuoteEmbedding = JSON.parse(otherQuote.embedding);
138-
139-
if (index === otherIndex) return;
140-
const _similarity = similarity(quoteEmbedding, otherQuoteEmbedding);
141-
if (otherQuote.cluster === undefined) {
142-
if (_similarity > threshold) {
143-
otherQuote.cluster = clusterIndex;
144-
clusters[clusterIndex].push(otherQuote);
145-
}
146-
}
147-
148-
total++;
149-
similarityAvg += _similarity;
150-
});
151-
152-
clusterIndex++;
153-
}
154-
});
155-
156-
console.log("Average similarity: " + similarityAvg / total);
157-
158-
return clusters;
159-
}
160-
16171
const assignTopicToCluster = async (cluster) => {
16272
try {
16373
const prompt = `Given the following quotes, what is a good topic for them? Return only the topic as a Markdown heading with no leading #. No bold (**) or italics (*) are needed.`;
@@ -271,15 +181,6 @@ const main = async () => {
271181

272182
console.log(quotes.length + " quotes found.");
273183

274-
// let clusteredQuotes = clusterEmbeddings(quotes, CLUSTER_THRESHOLD);
275-
276-
// Example usage
277-
// const data = [
278-
// [1, 2],
279-
// [3, 4],
280-
// [5, 6],
281-
// [7, 8],
282-
// ];
283184
const k = 5;
284185
const assignments = kmeans(
285186
quotes
@@ -288,7 +189,6 @@ const main = async () => {
288189
k
289190
);
290191

291-
// console.log(assignments);
292192

293193
// convert each cluster into a heading and a list of quotes in markdown under it and write to a file
294194
// use cluster index as heading

package-lock.json

-91
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
"heic-convert": "^1.2.4",
3030
"jimp": "^0.22.10",
3131
"jsdom": "^24.0.0",
32-
"mathjs": "^12.4.1",
3332
"ml-kmeans": "^6.0.0",
3433
"node-fetch": "^3.3.2",
3534
"openai": "^3.3.0",

0 commit comments

Comments
 (0)