-
Notifications
You must be signed in to change notification settings - Fork 4.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Footnotes: Add block-level caching when parsing content for footnotes #52577
Changes from all commits
76e54ec
6c29f63
5bf0bef
6f99986
badef7c
62f34ae
919d6c5
0020e54
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
/** | ||
* Internal dependencies | ||
*/ | ||
import getRichTextValuesCached from './get-rich-text-values-cached'; | ||
|
||
const cache = new WeakMap(); | ||
|
||
function getBlockFootnotesOrder( block ) { | ||
if ( ! cache.has( block ) ) { | ||
const content = getRichTextValuesCached( block ).join( '' ); | ||
const newOrder = []; | ||
|
||
// https://github.com/WordPress/gutenberg/pull/43204 lands. We can then | ||
// get the order directly from the rich text values. | ||
if ( content.indexOf( 'data-fn' ) !== -1 ) { | ||
const regex = /data-fn="([^"]+)"/g; | ||
let match; | ||
while ( ( match = regex.exec( content ) ) !== null ) { | ||
newOrder.push( match[ 1 ] ); | ||
} | ||
} | ||
cache.set( block, newOrder ); | ||
} | ||
|
||
return cache.get( block ); | ||
} | ||
|
||
export default function getFootnotesOrder( blocks ) { | ||
return blocks.flatMap( getBlockFootnotesOrder ); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/** | ||
* WordPress dependencies | ||
*/ | ||
import { privateApis as blockEditorPrivateApis } from '@wordpress/block-editor'; | ||
|
||
/** | ||
* Internal dependencies | ||
*/ | ||
import { unlock } from '../private-apis'; | ||
|
||
// TODO: The following line should have been: | ||
// | ||
// const unlockedApis = unlock( blockEditorPrivateApis ); | ||
// | ||
// But there are hidden circular dependencies in RNMobile code, specifically in | ||
// certain native components in the `components` package that depend on | ||
// `block-editor`. What follows is a workaround that defers the `unlock` call | ||
// to prevent native code from failing. | ||
// | ||
// Fix once https://github.com/WordPress/gutenberg/issues/52692 is closed. | ||
let unlockedApis; | ||
|
||
const cache = new WeakMap(); | ||
|
||
export default function getRichTextValuesCached( block ) { | ||
if ( ! unlockedApis ) { | ||
unlockedApis = unlock( blockEditorPrivateApis ); | ||
} | ||
|
||
if ( ! cache.has( block ) ) { | ||
const values = unlockedApis.getRichTextValues( [ block ] ); | ||
cache.set( block, values ); | ||
} | ||
return cache.get( block ); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
/** | ||
* Internal dependencies | ||
*/ | ||
import getFootnotesOrder from './get-footnotes-order'; | ||
|
||
let oldFootnotes = {}; | ||
|
||
export function updateFootnotesFromMeta( blocks, meta ) { | ||
const output = { blocks }; | ||
if ( ! meta ) return output; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this syntax makes me so nervous. it's been the vector of some of the biggest attacks and looks otherwise so innocent. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you clarify? Is it the lack of braces around the THEN block? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah that's right. it's essentially the same discussion as with automatic semicolon insertion, which the industry has overwhelmingly rejected. I thought we overwhelmingly rejected code blocks without braces after major exploits (notably the OpenSSL-bypass). even there are some trivial cases, particularly with comments, that can fool auto-formatters into creating the bugs. formatters move comments around, and the newlines and comments themselves can rip apart these coupled blocks. the braces tie them together. curious what the advantages are of doing it this way |
||
|
||
// If meta.footnotes is empty, it means the meta is not registered. | ||
if ( meta.footnotes === undefined ) return output; | ||
|
||
const newOrder = getFootnotesOrder( blocks ); | ||
|
||
const footnotes = meta.footnotes ? JSON.parse( meta.footnotes ) : []; | ||
const currentOrder = footnotes.map( ( fn ) => fn.id ); | ||
|
||
if ( currentOrder.join( '' ) === newOrder.join( '' ) ) return output; | ||
|
||
const newFootnotes = newOrder.map( | ||
( fnId ) => | ||
footnotes.find( ( fn ) => fn.id === fnId ) || | ||
oldFootnotes[ fnId ] || { | ||
id: fnId, | ||
content: '', | ||
} | ||
); | ||
|
||
function updateAttributes( attributes ) { | ||
// Only attempt to update attributes, if attributes is an object. | ||
if ( | ||
! attributes || | ||
Array.isArray( attributes ) || | ||
typeof attributes !== 'object' | ||
) { | ||
return attributes; | ||
} | ||
|
||
attributes = { ...attributes }; | ||
|
||
for ( const key in attributes ) { | ||
const value = attributes[ key ]; | ||
|
||
if ( Array.isArray( value ) ) { | ||
attributes[ key ] = value.map( updateAttributes ); | ||
continue; | ||
} | ||
|
||
if ( typeof value !== 'string' ) { | ||
continue; | ||
} | ||
|
||
if ( value.indexOf( 'data-fn' ) === -1 ) { | ||
continue; | ||
} | ||
|
||
// When we store rich text values, this would no longer | ||
// require a regex. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no regex is required today, no? and it's JS so it's easy to build a DOM of the HTML and then modify it without risking more corruption. it's also unclear to me what our purpose is here, partially because of the obscurity of the regex patterns themselves. const dom = parseHTML( value );
// Set the footnote id as the link's text.
dom.querySelectorAll( 'sup[data-fn] > a' ).forEach( ( node ) => {
const fnId = node.parentNode.getAttribute( 'data-fn' );
node.innerText = newOrder.indexOf( fnId ) + 1;
} );
// Replace footnotes that are only anchor tags with the <sup>-wrapped version.
// From: <a data-fn="a431c">a431c</a>
// To: <sup data-fn="a431c" class="fn"><a href="#a431c" id="a431c-link">a431c</a></sup>
dom.querySelectorAll( 'a[data-fn]' ).forEach( ( node ) => {
const fnId = node.parentNode.getAttribute( 'data-fn' );
const sup = dom.createElement( 'sup' );
sup.setAttribute( 'data-fn', fnId );
sup.classList.add( 'fn' );
const link = dom.createElement( 'a' );
link.setAttribute( 'href', `#${ fnId }` );
link.setAttribute( 'id', `${ fnId }-link` );
link.innerText = fnId + 1;
sup.appendChild( link );
node.parentNode.replaceNode( node, sup );
} );
attributes[ key ] = dom.outerHTML; There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These regexes are just temporary until we get rich text values in the store. If we can ever do that at least :( I would prefer if we could cache the rich text values themselves somewhere instead of caching rich text values for a block. |
||
const regex = | ||
/(<sup[^>]+data-fn="([^"]+)"[^>]*><a[^>]*>)[\d*]*<\/a><\/sup>/g; | ||
|
||
attributes[ key ] = value.replace( | ||
regex, | ||
( match, opening, fnId ) => { | ||
const index = newOrder.indexOf( fnId ); | ||
return `${ opening }${ index + 1 }</a></sup>`; | ||
} | ||
); | ||
|
||
const compatRegex = /<a[^>]+data-fn="([^"]+)"[^>]*>\*<\/a>/g; | ||
|
||
attributes[ key ] = attributes[ key ].replace( | ||
compatRegex, | ||
( match, fnId ) => { | ||
const index = newOrder.indexOf( fnId ); | ||
return `<sup data-fn="${ fnId }" class="fn"><a href="#${ fnId }" id="${ fnId }-link">${ | ||
index + 1 | ||
}</a></sup>`; | ||
} | ||
); | ||
} | ||
|
||
return attributes; | ||
} | ||
|
||
function updateBlocksAttributes( __blocks ) { | ||
return __blocks.map( ( block ) => { | ||
return { | ||
...block, | ||
attributes: updateAttributes( block.attributes ), | ||
innerBlocks: updateBlocksAttributes( block.innerBlocks ), | ||
}; | ||
} ); | ||
} | ||
|
||
// We need to go through all block attributes deeply and update the | ||
// footnote anchor numbering (textContent) to match the new order. | ||
const newBlocks = updateBlocksAttributes( blocks ); | ||
|
||
oldFootnotes = { | ||
...oldFootnotes, | ||
...footnotes.reduce( ( acc, fn ) => { | ||
if ( ! newOrder.includes( fn.id ) ) { | ||
acc[ fn.id ] = fn; | ||
} | ||
return acc; | ||
}, {} ), | ||
}; | ||
|
||
return { | ||
meta: { | ||
...meta, | ||
footnotes: JSON.stringify( newFootnotes ), | ||
}, | ||
blocks: newBlocks, | ||
}; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
given that we have easy access to a DOM here, why parse the HTML with the regular expressions? would it be just the same if we did this methodically instead? particularly since we're caching the values?