From 16f50a9e38534f1f0b3e03a06bd095316b8d5770 Mon Sep 17 00:00:00 2001 From: Hiroshi Ogawa Date: Thu, 11 May 2023 12:26:20 +0900 Subject: [PATCH 01/10] chore: update README.md --- app/db/skeema/README.md | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/app/db/skeema/README.md b/app/db/skeema/README.md index 8f774974ab..4875f1ebd8 100644 --- a/app/db/skeema/README.md +++ b/app/db/skeema/README.md @@ -3,27 +3,24 @@ we started to experiment with skeema. we still use knex to apply migrations, but it's already useful enough during development. -## generate knex migrations +## generate up/down migrations ``` # 0. update skeema/some-table.sql -# 1. generate empty knex migration -pnpm knex migrate:make some-migration - -# 2. diff for `up` knex migration +# 1. diff for "up" migration pnpm skeema diff --allow-unsafe -# 3. apply knex migration -pnpm knex migrate:up +# 2. apply "up" +pnpm skeema push --allow-unsafe -# 4. temporary revert skeema/some-table.sql +# 3. temporary revert skeema/some-table.sql git stash -# 5. diff for `down` knex migration +# 4. diff for "down" migration pnpm skeema diff --allow-unsafe -# 6. restore skeema/some-table.sql +# 5. restore skeema/some-table.sql git stash pop ``` From ccc0cc0c82f07be4f04fcdcb0504d4933bb487a3 Mon Sep 17 00:00:00 2001 From: Hiroshi Ogawa Date: Thu, 11 May 2023 12:30:26 +0900 Subject: [PATCH 02/10] feat: add FULLTEXT KEY --- ...0230511032932_bookmarkEntries-text-fulltext-key.ts | 11 +++++++++++ app/db/skeema/bookmarkEntries.sql | 3 ++- 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 app/db/migrations/20230511032932_bookmarkEntries-text-fulltext-key.ts diff --git a/app/db/migrations/20230511032932_bookmarkEntries-text-fulltext-key.ts b/app/db/migrations/20230511032932_bookmarkEntries-text-fulltext-key.ts new file mode 100644 index 0000000000..df8ffaa0ad --- /dev/null +++ b/app/db/migrations/20230511032932_bookmarkEntries-text-fulltext-key.ts @@ -0,0 +1,11 @@ +import type { Knex } from "knex"; + +export async function up(knex: Knex) { + await knex.raw( + "ALTER TABLE `bookmarkEntries` ADD FULLTEXT KEY `text` (`text`) /*!50100 WITH PARSER `ngram` */ ;" + ); +} + +export async function down(knex: Knex) { + await knex.raw("ALTER TABLE `bookmarkEntries` DROP KEY `text`;"); +} diff --git a/app/db/skeema/bookmarkEntries.sql b/app/db/skeema/bookmarkEntries.sql index 4a50f21799..3fc5e05743 100644 --- a/app/db/skeema/bookmarkEntries.sql +++ b/app/db/skeema/bookmarkEntries.sql @@ -11,5 +11,6 @@ CREATE TABLE `bookmarkEntries` ( PRIMARY KEY (`id`), KEY `bookmarkEntries_userId_createdAt_key` (`userId`,`createdAt`), KEY `bookmarkEntries_videoId_key` (`videoId`), - KEY `bookmarkEntries_captionEntryId_key` (`captionEntryId`) + KEY `bookmarkEntries_captionEntryId_key` (`captionEntryId`), + FULLTEXT (`text`) WITH PARSER ngram ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; From 8e22db13718f91a1da44ef723493101aecc1c65d Mon Sep 17 00:00:00 2001 From: Hiroshi Ogawa Date: Thu, 11 May 2023 12:32:15 +0900 Subject: [PATCH 03/10] chore: skeema lint --- app/db/skeema/bookmarkEntries.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/db/skeema/bookmarkEntries.sql b/app/db/skeema/bookmarkEntries.sql index 3fc5e05743..87f80ffea8 100644 --- a/app/db/skeema/bookmarkEntries.sql +++ b/app/db/skeema/bookmarkEntries.sql @@ -12,5 +12,5 @@ CREATE TABLE `bookmarkEntries` ( KEY `bookmarkEntries_userId_createdAt_key` (`userId`,`createdAt`), KEY `bookmarkEntries_videoId_key` (`videoId`), KEY `bookmarkEntries_captionEntryId_key` (`captionEntryId`), - FULLTEXT (`text`) WITH PARSER ngram + FULLTEXT KEY `text` (`text`) /*!50100 WITH PARSER `ngram` */ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; From 7db9a64d1d2ad203a2697ccffbffe154f94985d1 Mon Sep 17 00:00:00 2001 From: Hiroshi Ogawa Date: Thu, 11 May 2023 12:46:02 +0900 Subject: [PATCH 04/10] chore: skeema lint --- app/db/skeema/bookmarkEntries.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/db/skeema/bookmarkEntries.sql b/app/db/skeema/bookmarkEntries.sql index 87f80ffea8..922a63b2e1 100644 --- a/app/db/skeema/bookmarkEntries.sql +++ b/app/db/skeema/bookmarkEntries.sql @@ -12,5 +12,5 @@ CREATE TABLE `bookmarkEntries` ( KEY `bookmarkEntries_userId_createdAt_key` (`userId`,`createdAt`), KEY `bookmarkEntries_videoId_key` (`videoId`), KEY `bookmarkEntries_captionEntryId_key` (`captionEntryId`), - FULLTEXT KEY `text` (`text`) /*!50100 WITH PARSER `ngram` */ + FULLTEXT KEY `text` (`text`) /*!50100 WITH PARSER `ngram` */ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; From 1e2b7e0ac349251caaafd78bf4483ac553a21446 Mon Sep 17 00:00:00 2001 From: Hiroshi Ogawa Date: Thu, 11 May 2023 12:58:14 +0900 Subject: [PATCH 05/10] feat: use MATCH query --- app/trpc/routes/bookmarks.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/app/trpc/routes/bookmarks.ts b/app/trpc/routes/bookmarks.ts index ce26f43915..b1fe29c084 100644 --- a/app/trpc/routes/bookmarks.ts +++ b/app/trpc/routes/bookmarks.ts @@ -121,8 +121,12 @@ export const trpcRoutesBookmarks = { .where( E.and( E.eq(T.bookmarkEntries.userId, ctx.user.id), - // TODO: index - mapOption(input.q, (v) => E.like(T.bookmarkEntries.text, `%${v}%`)) + // cf. https://dev.mysql.com/doc/refman/8.0/en/fulltext-boolean.html + mapOption( + input.q, + (v) => + sql`MATCH (${T.bookmarkEntries.text}) AGAINST (${v} IN BOOLEAN MODE)` + ) ) ) .orderBy(E.desc(T.bookmarkEntries.createdAt)) From 64accafa8a0df86f73e8b2cf92932f1fa9ffdf56 Mon Sep 17 00:00:00 2001 From: Hiroshi Ogawa Date: Thu, 11 May 2023 16:53:08 +0900 Subject: [PATCH 06/10] fix: workaround single character match --- app/trpc/routes/bookmarks.ts | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/app/trpc/routes/bookmarks.ts b/app/trpc/routes/bookmarks.ts index b1fe29c084..829cd4372b 100644 --- a/app/trpc/routes/bookmarks.ts +++ b/app/trpc/routes/bookmarks.ts @@ -121,12 +121,15 @@ export const trpcRoutesBookmarks = { .where( E.and( E.eq(T.bookmarkEntries.userId, ctx.user.id), - // cf. https://dev.mysql.com/doc/refman/8.0/en/fulltext-boolean.html - mapOption( - input.q, - (v) => - sql`MATCH (${T.bookmarkEntries.text}) AGAINST (${v} IN BOOLEAN MODE)` - ) + mapOption(input.q, (q) => { + // cf. https://dev.mysql.com/doc/refman/8.0/en/fulltext-boolean.html + // default ngram size 2 doesn't support single character search. + // by appending "*", it will match a word having `q` unless it appears at the end. + if (q.length === 1) { + q += "*"; + } + return sql`MATCH (${T.bookmarkEntries.text}) AGAINST (${q} IN BOOLEAN MODE)`; + }) ) ) .orderBy(E.desc(T.bookmarkEntries.createdAt)) From b1e1f5fa9727a3ab84f5f5a8facaf459e235b7c9 Mon Sep 17 00:00:00 2001 From: Hiroshi Ogawa Date: Thu, 11 May 2023 17:35:11 +0900 Subject: [PATCH 07/10] feat: add `bookmarkEntries.textCharacters` for single character search --- app/db/drizzle-client.server.ts | 1 + app/db/skeema/bookmarkEntries.sql | 5 ++++- app/misc/cli.ts | 19 +++++++++++++++++++ app/trpc/routes/bookmarks.ts | 10 ++++++---- app/utils/practice-system.test.ts | 1 + 5 files changed, 31 insertions(+), 5 deletions(-) diff --git a/app/db/drizzle-client.server.ts b/app/db/drizzle-client.server.ts index 14eedc95d9..a4f829d1e8 100644 --- a/app/db/drizzle-client.server.ts +++ b/app/db/drizzle-client.server.ts @@ -101,6 +101,7 @@ const bookmarkEntries = mysqlTable("bookmarkEntries", { ...timestampColumns, // text: text("text").notNull(), + textCharacters: json("textCharacters").notNull(), side: int("side").notNull(), // 0 | 1 offset: int("offset").notNull(), }); diff --git a/app/db/skeema/bookmarkEntries.sql b/app/db/skeema/bookmarkEntries.sql index 922a63b2e1..7132db73a5 100644 --- a/app/db/skeema/bookmarkEntries.sql +++ b/app/db/skeema/bookmarkEntries.sql @@ -1,6 +1,8 @@ +-- `textChars` for quick workaround of single character search CREATE TABLE `bookmarkEntries` ( `id` bigint NOT NULL AUTO_INCREMENT, `text` text NOT NULL, + `textCharacters` json NOT NULL DEFAULT (json_array()), `side` int NOT NULL, `offset` int NOT NULL, `createdAt` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, @@ -12,5 +14,6 @@ CREATE TABLE `bookmarkEntries` ( KEY `bookmarkEntries_userId_createdAt_key` (`userId`,`createdAt`), KEY `bookmarkEntries_videoId_key` (`videoId`), KEY `bookmarkEntries_captionEntryId_key` (`captionEntryId`), - FULLTEXT KEY `text` (`text`) /*!50100 WITH PARSER `ngram` */ + KEY `bookmarkEntries_textCharacters_key` ((cast(`textCharacters` as char(1) array))), + FULLTEXT KEY `text` (`text`) /*!50100 WITH PARSER `ngram` */ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; diff --git a/app/misc/cli.ts b/app/misc/cli.ts index 9299116043..7e844b544a 100644 --- a/app/misc/cli.ts +++ b/app/misc/cli.ts @@ -525,6 +525,25 @@ async function fixBookmarkEntriesOffset(rawArgs: unknown) { console.log({ stats }); } +// +// resetBookmarkEntriesTextChars +// + +cli + .command(resetBookmarkEntriesTextCharacters.name) + .action(resetBookmarkEntriesTextCharacters); + +async function resetBookmarkEntriesTextCharacters() { + const rows = await db.select().from(T.bookmarkEntries); + + for (const row of rows) { + await db + .update(T.bookmarkEntries) + .set({ textCharacters: Array.from(row.text), updatedAt: row.updatedAt }) + .where(E.eq(T.bookmarkEntries.id, row.id)); + } +} + // // main // diff --git a/app/trpc/routes/bookmarks.ts b/app/trpc/routes/bookmarks.ts index 829cd4372b..8956e07104 100644 --- a/app/trpc/routes/bookmarks.ts +++ b/app/trpc/routes/bookmarks.ts @@ -42,6 +42,7 @@ export const trpcRoutesBookmarks = { // insert with counter cache increment const [{ insertId }] = await db.insert(T.bookmarkEntries).values({ ...input, + textCharacters: Array.from(input.text), userId: ctx.user.id, }); await db @@ -122,12 +123,13 @@ export const trpcRoutesBookmarks = { E.and( E.eq(T.bookmarkEntries.userId, ctx.user.id), mapOption(input.q, (q) => { - // cf. https://dev.mysql.com/doc/refman/8.0/en/fulltext-boolean.html - // default ngram size 2 doesn't support single character search. - // by appending "*", it will match a word having `q` unless it appears at the end. + // default ngram size 2 doesn't support single character search, + // so we rely on JSON array multi-valued index. if (q.length === 1) { - q += "*"; + // https://dev.mysql.com/doc/refman/8.0/en/create-index.html#create-index-multi-valued + return sql`${q} MEMBER OF(${T.bookmarkEntries.textCharacters})`; } + // https://dev.mysql.com/doc/refman/8.0/en/fulltext-boolean.html return sql`MATCH (${T.bookmarkEntries.text}) AGAINST (${q} IN BOOLEAN MODE)`; }) ) diff --git a/app/utils/practice-system.test.ts b/app/utils/practice-system.test.ts index 3fedd44af6..bd62371cae 100644 --- a/app/utils/practice-system.test.ts +++ b/app/utils/practice-system.test.ts @@ -41,6 +41,7 @@ describe("PracticeSystem", () => { .insert(T.bookmarkEntries) .values({ text: "Bonjour à tous", + textCharacters: [], side: 0, offset: 8, userId: hook.user.id, From eb258ff7c7f9f196c00853aa0b97fc9cd9c49fa5 Mon Sep 17 00:00:00 2001 From: Hiroshi Ogawa Date: Thu, 11 May 2023 17:37:45 +0900 Subject: [PATCH 08/10] chore: update knex migrations --- .../20230511032932_bookmarkEntries-text-fulltext-key.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/app/db/migrations/20230511032932_bookmarkEntries-text-fulltext-key.ts b/app/db/migrations/20230511032932_bookmarkEntries-text-fulltext-key.ts index df8ffaa0ad..977830cb29 100644 --- a/app/db/migrations/20230511032932_bookmarkEntries-text-fulltext-key.ts +++ b/app/db/migrations/20230511032932_bookmarkEntries-text-fulltext-key.ts @@ -2,10 +2,12 @@ import type { Knex } from "knex"; export async function up(knex: Knex) { await knex.raw( - "ALTER TABLE `bookmarkEntries` ADD FULLTEXT KEY `text` (`text`) /*!50100 WITH PARSER `ngram` */ ;" + "ALTER TABLE `bookmarkEntries` ADD COLUMN `textCharacters` json NOT NULL DEFAULT (json_array()) AFTER `text`, ADD KEY `bookmarkEntries_textCharacters_key` ((cast(`textCharacters` as char(1) array))), ADD FULLTEXT KEY `text` (`text`) /*!50100 WITH PARSER `ngram` */ ;" ); } export async function down(knex: Knex) { - await knex.raw("ALTER TABLE `bookmarkEntries` DROP KEY `text`;"); + await knex.raw( + "ALTER TABLE `bookmarkEntries` DROP COLUMN `textCharacters`, DROP KEY `bookmarkEntries_textCharacters_key`, DROP KEY `text`;" + ); } From 881ab28bc713c095ac34ea2ef30a236433e3883c Mon Sep 17 00:00:00 2001 From: Hiroshi Ogawa Date: Thu, 11 May 2023 17:41:46 +0900 Subject: [PATCH 09/10] chore: skeema lint --- app/db/skeema/bookmarkEntries.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/db/skeema/bookmarkEntries.sql b/app/db/skeema/bookmarkEntries.sql index 7132db73a5..4e4a284be2 100644 --- a/app/db/skeema/bookmarkEntries.sql +++ b/app/db/skeema/bookmarkEntries.sql @@ -15,5 +15,5 @@ CREATE TABLE `bookmarkEntries` ( KEY `bookmarkEntries_videoId_key` (`videoId`), KEY `bookmarkEntries_captionEntryId_key` (`captionEntryId`), KEY `bookmarkEntries_textCharacters_key` ((cast(`textCharacters` as char(1) array))), - FULLTEXT KEY `text` (`text`) /*!50100 WITH PARSER `ngram` */ + FULLTEXT KEY `text` (`text`) /*!50100 WITH PARSER `ngram` */ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; From 234d79590f7b672258deeee30ff7fbef44ba5db4 Mon Sep 17 00:00:00 2001 From: Hiroshi Ogawa Date: Thu, 11 May 2023 17:42:02 +0900 Subject: [PATCH 10/10] chore: skeema lint --- app/db/skeema/bookmarkEntries.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/app/db/skeema/bookmarkEntries.sql b/app/db/skeema/bookmarkEntries.sql index 4e4a284be2..b9cb1a0fe6 100644 --- a/app/db/skeema/bookmarkEntries.sql +++ b/app/db/skeema/bookmarkEntries.sql @@ -1,4 +1,3 @@ --- `textChars` for quick workaround of single character search CREATE TABLE `bookmarkEntries` ( `id` bigint NOT NULL AUTO_INCREMENT, `text` text NOT NULL,