Skip to content

Commit

Permalink
fix: improved parsing of markdown fenced codeblocks' langtag args
Browse files Browse the repository at this point in the history
  • Loading branch information
ErikBjare committed Jan 29, 2025
1 parent 1adf2dc commit 8e7359d
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 14 deletions.
26 changes: 17 additions & 9 deletions src/components/ChatMessage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,22 @@ marked.use(
})
);

export function handleWrappedFencedCodeBlocks(content: string) {
interface ProcessedContent {
processedContent: string;
fences: string[];
}

export function processNestedCodeBlocks(content: string): ProcessedContent {
// Early exit if no code blocks
if (content.split('```').length < 3) {
return content;
return { processedContent: content, fences: [] };
}

const lines = content.split('\n');
const stack: string[] = []; // Stack of language tags to track nesting
let result = '';
let currentBlock: string[] = [];
const fences: string[] = []; // Store all fence info for later use

for (const line of lines) {
const strippedLine = line.trim();
Expand All @@ -48,6 +54,7 @@ export function handleWrappedFencedCodeBlocks(content: string) {
const remainingContent = lines.slice(lines.indexOf(line) + 1).join('\n');
if (remainingContent.includes('```') && remainingContent.split('```').length > 2) {
stack.push(lang);
fences.push(lang); // Store fence info
result += '~~~' + lang + '\n';
} else {
result += line + '\n';
Expand Down Expand Up @@ -77,7 +84,10 @@ export function handleWrappedFencedCodeBlocks(content: string) {
}
}

return result.trim();
return {
processedContent: result.trim(),
fences
};
}

export function transformThinkingTags(content: string) {
Expand All @@ -103,15 +113,13 @@ export const ChatMessage: FC<Props> = ({ message }) => {
const processContent = async () => {
try {
// Transform thinking tags before markdown parsing
let processedContent = transformThinkingTags(content);
const processedContent = transformThinkingTags(content);

// Handle wrapped fenced code blocks
processedContent = handleWrappedFencedCodeBlocks(processedContent);

// Find start fences for codeblocks (hljs doesn't include paths like "```save PATH")
const fences = [...processedContent.matchAll(/(~~~|```)[^\n]+/g)].map((s: RegExpExecArray) => s[0].replace(/(~~~|```)/g, ""));
// Process nested code blocks and collect fence info
const { processedContent: transformedContent, fences } = processNestedCodeBlocks(processedContent);

let parsedResult = await marked.parse(processedContent, {
let parsedResult = await marked.parse(transformedContent, {
async: true,
});

Expand Down
19 changes: 14 additions & 5 deletions src/components/__tests__/ChatMessage.test.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { handleWrappedFencedCodeBlocks, transformThinkingTags } from '../ChatMessage';
import { processNestedCodeBlocks, transformThinkingTags } from '../ChatMessage';

describe('handleWrappedFencedCodeBlocks', () => {
describe('processNestedCodeBlocks', () => {
it('should handle nested code blocks', () => {
const input = `\`\`\`markdown
Here's a nested block
Expand All @@ -16,15 +16,21 @@ print("hello")
\`\`\`
~~~`;

expect(handleWrappedFencedCodeBlocks(input)).toBe(expected);
expect(processNestedCodeBlocks(input)).toEqual({
processedContent: expected,
fences: ['markdown']
});
});

it('should not modify single code blocks', () => {
const input = `\`\`\`python
print("hello")
\`\`\``;

expect(handleWrappedFencedCodeBlocks(input)).toBe(input);
expect(processNestedCodeBlocks(input)).toEqual({
processedContent: input,
fences: []
});
});

it('should handle multiple nested blocks', () => {
Expand All @@ -50,7 +56,10 @@ console.log("world")
\`\`\`
~~~`;

expect(handleWrappedFencedCodeBlocks(input)).toBe(expected);
expect(processNestedCodeBlocks(input)).toEqual({
processedContent: expected,
fences: ['markdown']
});
});
});

Expand Down

0 comments on commit 8e7359d

Please sign in to comment.