Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add option to include parse tokens in serialize #1741

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 60 additions & 34 deletions packages/@atjson/document/src/serialize.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,6 @@ export enum TokenType {
MARK_COLLAPSED,
}

const START_TOKENS = [
TokenType.BLOCK_START,
TokenType.MARK_START,
TokenType.PARSE_START,
];

class Root extends BlockAnnotation {
static vendorPrefix = "atjson";
static type = "root";
Expand All @@ -143,49 +137,78 @@ export type Token = {
edgeBehaviour: { leading: EdgeBehaviour; trailing: EdgeBehaviour };
};

export function sortTokens(a: Token, b: Token) {
let indexDelta = a.index - b.index;
export function sortTokens(lhs: Token, rhs: Token) {
let indexDelta = lhs.index - rhs.index;
if (indexDelta !== 0) {
return indexDelta;
}

let isLhsStart =
lhs.type === TokenType.BLOCK_START ||
lhs.type === TokenType.MARK_START ||
lhs.type === TokenType.PARSE_START;
let isLhsEnd = !isLhsStart;
let isRhsStart =
rhs.type === TokenType.BLOCK_START ||
rhs.type === TokenType.MARK_START ||
rhs.type === TokenType.PARSE_START;
let isRhsEnd = !isRhsStart;

// Handle start before end for a 0 length mark:
// We're assuming that one of `a` or `b` is a start
// token and the other is the end token. Sort the start
// token first
if (a.annotation.id === b.annotation.id) {
return START_TOKENS.indexOf(a.type) !== -1 ? -1 : 1;
if (lhs.annotation.id === rhs.annotation.id) {
return isLhsStart ? -1 : 1;
}

// Sort end tokens before start tokens
if (
START_TOKENS.indexOf(a.type) !== -1 &&
START_TOKENS.indexOf(b.type) === -1
) {
if (isLhsStart && isRhsEnd) {
return 1;
} else if (
START_TOKENS.indexOf(a.type) === -1 &&
START_TOKENS.indexOf(b.type) !== -1
) {
} else if (isLhsEnd && isRhsStart) {
return -1;
}
let multiplier = START_TOKENS.indexOf(a.type) === -1 ? -1 : 1;

let startDelta = b.annotation.start - a.annotation.start;
if (startDelta !== 0) {
return startDelta * multiplier;
}
let endDelta = b.annotation.end - a.annotation.end;
if (endDelta !== 0) {
return endDelta * multiplier;
// In the following example, we are sorting tokens
// where the start tokens are in the same position
// and the end positions are different.
//
// We always want to create the most contiguous
// non-overlapping ranges, so we will place the
// parse token in this example _after_ the paragraph
// start token.
//
// In the ending case, we will put the parse token
// end token _before_ the paragraph end token.
//
// ```
// <p>Hello, world</p>
// ^ ^ ^ ^
// |-| ParseToken |--|
// |----Paragraph----|
// ```
if (isLhsStart && isRhsStart) {
if (lhs.annotation.end < rhs.annotation.end) {
return 1;
} else if (lhs.annotation.end > rhs.annotation.end) {
return -1;
}
} else if (isLhsEnd && isRhsEnd) {
if (lhs.annotation.start < rhs.annotation.start) {
return 1;
} else if (lhs.annotation.start > rhs.annotation.start) {
return -1;
}
}
let rankDelta = a.annotation.rank - b.annotation.rank;

let multiplier = isLhsEnd ? -1 : 1;
let rankDelta = lhs.annotation.rank - rhs.annotation.rank;
if (rankDelta !== 0) {
return rankDelta * multiplier;
}
return a.annotation.type > b.annotation.type
return lhs.annotation.type > rhs.annotation.type
? multiplier * -1
: a.annotation.type < b.annotation.type
: lhs.annotation.type < rhs.annotation.type
? multiplier
: 0;
}
Expand Down Expand Up @@ -231,6 +254,7 @@ export function serialize(
withStableIds?: boolean;
includeBlockRanges?: boolean;
onUnknown?: "warn" | "throw" | "ignore";
includeParseTokens?: boolean;
}
): { text: string; blocks: Block[]; marks: Mark[] } {
// Blocks and object annotations are both stored
Expand Down Expand Up @@ -288,11 +312,13 @@ export function serialize(
for (let annotation of doc.annotations) {
let isBlockAnnotation = annotation instanceof BlockAnnotation;
let isObjectAnnotation = annotation instanceof ObjectAnnotation;
let types: [TokenType, TokenType] = is(annotation, ParseAnnotation)
? [TokenType.PARSE_START, TokenType.PARSE_END]
: isBlockAnnotation || isObjectAnnotation
? [TokenType.BLOCK_START, TokenType.BLOCK_END]
: [TokenType.MARK_START, TokenType.MARK_END];
let isParseToken = is(annotation, ParseAnnotation);
let types: [TokenType, TokenType] =
isParseToken && !options?.includeParseTokens
? [TokenType.PARSE_START, TokenType.PARSE_END]
: isBlockAnnotation || isObjectAnnotation
? [TokenType.BLOCK_START, TokenType.BLOCK_END]
: [TokenType.MARK_START, TokenType.MARK_END];
let edgeBehaviour = annotation.getAnnotationConstructor().edgeBehaviour;
let shared = { start: -1 };
if (
Expand Down
142 changes: 142 additions & 0 deletions packages/@atjson/document/test/serialize.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,148 @@ describe("serialize", () => {
});
});

test("includeParseTokens = true", () => {
expect(
serialize(
new TestSource({
content: "<p>hello, <em>world</em></p>",
annotations: [
new Paragraph({
start: 0,
end: 28,
}),
new ParseAnnotation({
start: 0,
end: 3,
}),
new Italic({
start: 10,
end: 24,
attributes: {},
}),
new ParseAnnotation({
start: 10,
end: 14,
}),
new ParseAnnotation({
start: 19,
end: 24,
}),
new ParseAnnotation({
start: 24,
end: 28,
}),
],
}),
{ includeParseTokens: true, withStableIds: true }
)
).toMatchInlineSnapshot(`
{
"blocks": [
{
"attributes": {},
"id": "B00000000",
"parents": [],
"selfClosing": false,
"type": "paragraph",
},
],
"marks": [
{
"attributes": {},
"id": "M00000000",
"range": "(1..4]",
"type": "parse-token",
},
{
"attributes": {},
"id": "M00000001",
"range": "(11..15]",
"type": "parse-token",
},
{
"attributes": {},
"id": "M00000002",
"range": "(11..25]",
"type": "italic",
},
{
"attributes": {},
"id": "M00000003",
"range": "(20..25]",
"type": "parse-token",
},
{
"attributes": {},
"id": "M00000004",
"range": "(25..29]",
"type": "parse-token",
},
],
"text": "<p>hello, <em>world</em></p>",
}
`);
});

test("includeParseTokens = false", () => {
expect(
serialize(
new TestSource({
content: "<p>hello, <em>world</em></p>",
annotations: [
new Paragraph({
start: 0,
end: 28,
}),
new ParseAnnotation({
start: 0,
end: 3,
}),
new Italic({
start: 10,
end: 24,
attributes: {},
}),
new ParseAnnotation({
start: 10,
end: 14,
}),
new ParseAnnotation({
start: 19,
end: 24,
}),
new ParseAnnotation({
start: 24,
end: 28,
}),
],
}),
{ includeParseTokens: false, withStableIds: true }
)
).toMatchInlineSnapshot(`
{
"blocks": [
{
"attributes": {},
"id": "B00000000",
"parents": [],
"selfClosing": false,
"type": "paragraph",
},
],
"marks": [
{
"attributes": {},
"id": "M00000000",
"range": "(8..13]",
"type": "italic",
},
],
"text": "hello, world",
}
`);
});

test("text block insertion", () => {
expect(
serialize(
Expand Down
Loading