diff --git a/app/db/drizzle-client.server.ts b/app/db/drizzle-client.server.ts index 14eedc95d9..a4f829d1e8 100644 --- a/app/db/drizzle-client.server.ts +++ b/app/db/drizzle-client.server.ts @@ -101,6 +101,7 @@ const bookmarkEntries = mysqlTable("bookmarkEntries", { ...timestampColumns, // text: text("text").notNull(), + textCharacters: json("textCharacters").notNull(), side: int("side").notNull(), // 0 | 1 offset: int("offset").notNull(), }); diff --git a/app/db/migrations/20230511032932_bookmarkEntries-text-fulltext-key.ts b/app/db/migrations/20230511032932_bookmarkEntries-text-fulltext-key.ts new file mode 100644 index 0000000000..977830cb29 --- /dev/null +++ b/app/db/migrations/20230511032932_bookmarkEntries-text-fulltext-key.ts @@ -0,0 +1,13 @@ +import type { Knex } from "knex"; + +export async function up(knex: Knex) { + await knex.raw( + "ALTER TABLE `bookmarkEntries` ADD COLUMN `textCharacters` json NOT NULL DEFAULT (json_array()) AFTER `text`, ADD KEY `bookmarkEntries_textCharacters_key` ((cast(`textCharacters` as char(1) array))), ADD FULLTEXT KEY `text` (`text`) /*!50100 WITH PARSER `ngram` */ ;" + ); +} + +export async function down(knex: Knex) { + await knex.raw( + "ALTER TABLE `bookmarkEntries` DROP COLUMN `textCharacters`, DROP KEY `bookmarkEntries_textCharacters_key`, DROP KEY `text`;" + ); +} diff --git a/app/db/skeema/README.md b/app/db/skeema/README.md index 8f774974ab..4875f1ebd8 100644 --- a/app/db/skeema/README.md +++ b/app/db/skeema/README.md @@ -3,27 +3,24 @@ we started to experiment with skeema. we still use knex to apply migrations, but it's already useful enough during development. -## generate knex migrations +## generate up/down migrations ``` # 0. update skeema/some-table.sql -# 1. generate empty knex migration -pnpm knex migrate:make some-migration - -# 2. diff for `up` knex migration +# 1. diff for "up" migration pnpm skeema diff --allow-unsafe -# 3. apply knex migration -pnpm knex migrate:up +# 2. apply "up" +pnpm skeema push --allow-unsafe -# 4. temporary revert skeema/some-table.sql +# 3. temporary revert skeema/some-table.sql git stash -# 5. diff for `down` knex migration +# 4. diff for "down" migration pnpm skeema diff --allow-unsafe -# 6. restore skeema/some-table.sql +# 5. restore skeema/some-table.sql git stash pop ``` diff --git a/app/db/skeema/bookmarkEntries.sql b/app/db/skeema/bookmarkEntries.sql index 4a50f21799..b9cb1a0fe6 100644 --- a/app/db/skeema/bookmarkEntries.sql +++ b/app/db/skeema/bookmarkEntries.sql @@ -1,6 +1,7 @@ CREATE TABLE `bookmarkEntries` ( `id` bigint NOT NULL AUTO_INCREMENT, `text` text NOT NULL, + `textCharacters` json NOT NULL DEFAULT (json_array()), `side` int NOT NULL, `offset` int NOT NULL, `createdAt` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, @@ -11,5 +12,7 @@ CREATE TABLE `bookmarkEntries` ( PRIMARY KEY (`id`), KEY `bookmarkEntries_userId_createdAt_key` (`userId`,`createdAt`), KEY `bookmarkEntries_videoId_key` (`videoId`), - KEY `bookmarkEntries_captionEntryId_key` (`captionEntryId`) + KEY `bookmarkEntries_captionEntryId_key` (`captionEntryId`), + KEY `bookmarkEntries_textCharacters_key` ((cast(`textCharacters` as char(1) array))), + FULLTEXT KEY `text` (`text`) /*!50100 WITH PARSER `ngram` */ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; diff --git a/app/misc/cli.ts b/app/misc/cli.ts index 9299116043..7e844b544a 100644 --- a/app/misc/cli.ts +++ b/app/misc/cli.ts @@ -525,6 +525,25 @@ async function fixBookmarkEntriesOffset(rawArgs: unknown) { console.log({ stats }); } +// +// resetBookmarkEntriesTextChars +// + +cli + .command(resetBookmarkEntriesTextCharacters.name) + .action(resetBookmarkEntriesTextCharacters); + +async function resetBookmarkEntriesTextCharacters() { + const rows = await db.select().from(T.bookmarkEntries); + + for (const row of rows) { + await db + .update(T.bookmarkEntries) + .set({ textCharacters: Array.from(row.text), updatedAt: row.updatedAt }) + .where(E.eq(T.bookmarkEntries.id, row.id)); + } +} + // // main // diff --git a/app/trpc/routes/bookmarks.ts b/app/trpc/routes/bookmarks.ts index ce26f43915..8956e07104 100644 --- a/app/trpc/routes/bookmarks.ts +++ b/app/trpc/routes/bookmarks.ts @@ -42,6 +42,7 @@ export const trpcRoutesBookmarks = { // insert with counter cache increment const [{ insertId }] = await db.insert(T.bookmarkEntries).values({ ...input, + textCharacters: Array.from(input.text), userId: ctx.user.id, }); await db @@ -121,8 +122,16 @@ export const trpcRoutesBookmarks = { .where( E.and( E.eq(T.bookmarkEntries.userId, ctx.user.id), - // TODO: index - mapOption(input.q, (v) => E.like(T.bookmarkEntries.text, `%${v}%`)) + mapOption(input.q, (q) => { + // default ngram size 2 doesn't support single character search, + // so we rely on JSON array multi-valued index. + if (q.length === 1) { + // https://dev.mysql.com/doc/refman/8.0/en/create-index.html#create-index-multi-valued + return sql`${q} MEMBER OF(${T.bookmarkEntries.textCharacters})`; + } + // https://dev.mysql.com/doc/refman/8.0/en/fulltext-boolean.html + return sql`MATCH (${T.bookmarkEntries.text}) AGAINST (${q} IN BOOLEAN MODE)`; + }) ) ) .orderBy(E.desc(T.bookmarkEntries.createdAt)) diff --git a/app/utils/practice-system.test.ts b/app/utils/practice-system.test.ts index 3fedd44af6..bd62371cae 100644 --- a/app/utils/practice-system.test.ts +++ b/app/utils/practice-system.test.ts @@ -41,6 +41,7 @@ describe("PracticeSystem", () => { .insert(T.bookmarkEntries) .values({ text: "Bonjour à tous", + textCharacters: [], side: 0, offset: 8, userId: hook.user.id,