Skip to content

Commit

Permalink
simplify ids - just take all child ids
Browse files Browse the repository at this point in the history
  • Loading branch information
spacecowboy committed Feb 7, 2025
1 parent e269d63 commit 98d13ec
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,10 @@ class HtmlLinearizer {
finalizeAndAddCurrentElement(blockStyle)

val cite = element.attr("cite").ifBlank { null }
// This is not correct. If the blockquote contains elements which will split
// the blockquote, then ids will be duplicated. This won't crash or anything
// but in-page links will go to the wrong element
val ids = element.allIds()

// Text should be collected into LinearBlockQuote
// Other types should be added separately
Expand All @@ -356,6 +360,7 @@ class HtmlLinearizer {
if (acc.isNotEmpty()) {
add(
LinearBlockQuote(
ids = ids,
cite = cite,
content = acc,
),
Expand All @@ -370,6 +375,7 @@ class HtmlLinearizer {
if (it.isNotEmpty()) {
add(
LinearBlockQuote(
ids = ids,
cite = cite,
content = it,
),
Expand Down Expand Up @@ -410,6 +416,7 @@ class HtmlLinearizer {
parseIframeVideo(it)
}
} else {
val ids = element.allIds()
// Wordpress likes nested figures to get images side by side
val imageCandidates =
element.descendantImageCandidates(baseUrl = baseUrl)
Expand All @@ -435,6 +442,7 @@ class HtmlLinearizer {

add(
LinearImage(
ids = ids,
sources = imageCandidates,
caption = caption,
link = link,
Expand All @@ -447,13 +455,16 @@ class HtmlLinearizer {
"img" -> {
finalizeAndAddCurrentElement(blockStyle)

val ids = element.allIds()

getImageSource(baseUrl, element).let { candidates ->
if (candidates.isNotEmpty()) {
val captionText: String? =
stripHtml(element.attr("alt"))
.takeIf { it.isNotBlank() }
add(
LinearImage(
ids = ids,
sources = candidates,
// Parse a LinearText with annotations from element.attr("alt")
caption =
Expand Down Expand Up @@ -482,6 +493,7 @@ class HtmlLinearizer {
.forEachIndexed { index, listItem ->
val item =
LinearListItem(
ids = element.allIds(),
orderedIndex =
if (ordered) {
index + 1
Expand Down Expand Up @@ -520,6 +532,7 @@ class HtmlLinearizer {

// This can also be auto, but for tables that's equivalent to LTR probably
val leftToRight = element.attrInHierarchy("dir") != "rtl"
val ids = element.allIds()

val rowSequence =
sequence<Element> {
Expand Down Expand Up @@ -561,7 +574,7 @@ class HtmlLinearizer {
)
} else {
add(
LinearTable.build(leftToRight = leftToRight) {
LinearTable.build(ids = ids, leftToRight = leftToRight) {
rowSequence
.forEach { row ->
newRow()
Expand Down Expand Up @@ -617,7 +630,7 @@ class HtmlLinearizer {
.takeIf { it.isNotEmpty() }

if (sources != null) {
add(LinearAudio(sources))
add(LinearAudio(ids = element.allIds(), sources = sources))
}
}

Expand Down Expand Up @@ -648,7 +661,7 @@ class HtmlLinearizer {
.takeIf { it.isNotEmpty() }

if (sources != null) {
add(LinearVideo(sources))
add(LinearVideo(ids = element.allIds(), sources = sources))
}
}

Expand All @@ -673,6 +686,7 @@ class HtmlLinearizer {
getVideo(element.attr("abs:src").ifBlank { null })?.let { video ->
add(
LinearVideo(
ids = element.allIds(),
sources =
listOf(
LinearVideoSource(
Expand Down Expand Up @@ -1044,3 +1058,18 @@ private fun resolve(
}
return URL(base, rel)
}

private suspend fun SequenceScope<Element>.yieldDescendantsOf(element: Element) {
for (child in element.children()) {
yield(child)
yieldDescendantsOf(child)
}
}

private fun Element.allIds(): Set<String> = sequence {
yield(this@allIds)
yieldDescendantsOf(this@allIds)
}
.map { it.id() }
.filterNot { it.isEmpty() }
.toSet()
Original file line number Diff line number Diff line change
Expand Up @@ -30,27 +30,17 @@ data class LinearArticle(
*/
sealed interface LinearElement

fun LinearElement.ids(): Set<String> =
when (this) {
is LinearAudio -> emptySet() // TODO
is LinearBlockQuote -> idsSeq().toSet()
is LinearImage -> emptySet() // TODO
is LinearListItem -> idsSeq().toSet()
fun LinearElement.ids(): Set<String> {
return when(this) {
is LinearAudio -> ids
is LinearBlockQuote -> ids
is LinearImage -> ids
is LinearListItem -> ids
is LinearText -> ids
is LinearTable -> emptySet() // TODO
is LinearVideo -> emptySet() // TODO
}

fun LinearElement.idsSeq(): Sequence<String> =
when (this) {
is LinearAudio -> emptySequence() // TODO
is LinearBlockQuote -> content.asSequence().flatMap { it.idsSeq() }
is LinearImage -> emptySequence() // TODO
is LinearListItem -> content.asSequence().flatMap { it.idsSeq() }
is LinearText -> ids.asSequence()
is LinearTable -> emptySequence() // TODO
is LinearVideo -> emptySequence() // TODO
is LinearTable -> ids
is LinearVideo -> ids
}
}

/**
* Represents a list of items, ordered or unordered
Expand Down Expand Up @@ -87,27 +77,29 @@ data class LinearList(
* Represents a single item in a list
*/
data class LinearListItem(
val ids: Set<String>,
// If non-null, this is part of a ordered list and this is the user-visible index
val orderedIndex: Int?,
val content: List<LinearElement>,
) : LinearElement {
constructor(orderedIndex: Int?, block: ListBuilderScope<LinearElement>.() -> Unit) : this(orderedIndex = orderedIndex, content = ListBuilderScope(block).items)
): LinearElement {
constructor(ids: Set<String>, orderedIndex: Int?, block: ListBuilderScope<LinearElement>.() -> Unit) : this(ids = ids, orderedIndex = orderedIndex, content = ListBuilderScope(block).items)

constructor(orderedIndex: Int?, vararg elements: LinearElement) : this(orderedIndex = orderedIndex, content = elements.toList())
constructor(ids: Set<String>, orderedIndex: Int?, vararg elements: LinearElement) : this(ids = ids, orderedIndex = orderedIndex, content = elements.toList())

fun isEmpty(): Boolean = content.isEmpty()

fun isNotEmpty(): Boolean = content.isNotEmpty()

class Builder {
var ids = mutableSetOf<String>()
var orderedIndex: Int? = null
private val content: MutableList<LinearElement> = mutableListOf()

fun add(element: LinearElement) {
content.add(element)
}

fun build(): LinearListItem = LinearListItem(orderedIndex = orderedIndex, content = content)
fun build(): LinearListItem = LinearListItem(ids = ids, orderedIndex = orderedIndex, content = content)
}

companion object {
Expand All @@ -119,6 +111,7 @@ data class LinearListItem(
* Represents a table
*/
data class LinearTable(
val ids: Set<String>,
val rowCount: Int,
val colCount: Int,
private val cellsReal: ArrayMap<Coordinate, LinearTableCellItem>,
Expand All @@ -127,11 +120,13 @@ data class LinearTable(
get() = cellsReal

constructor(
ids: Set<String>,
rowCount: Int,
colCount: Int,
cells: List<LinearTableCellItem>,
leftToRight: Boolean,
) : this(
ids,
rowCount,
colCount,
ArrayMap<Coordinate, LinearTableCellItem>().apply {
Expand All @@ -158,6 +153,7 @@ data class LinearTable(
): LinearTableCellItem? = cells[Coordinate(row = row, col = col)]

class Builder(
val ids: Set<String>,
val leftToRight: Boolean,
) {
private val cells: ArrayMap<Coordinate, LinearTableCellItem> = ArrayMap()
Expand Down Expand Up @@ -208,6 +204,7 @@ data class LinearTable(

fun build(): LinearTable =
LinearTable(
ids = ids,
rowCount = rowCount,
colCount = colCount,
cellsReal =
Expand All @@ -231,9 +228,10 @@ data class LinearTable(

companion object {
fun build(
ids: Set<String>,
leftToRight: Boolean,
block: Builder.() -> Unit,
): LinearTable = Builder(leftToRight = leftToRight).apply(block).build()
): LinearTable = Builder(ids = ids, leftToRight = leftToRight).apply(block).build()
}
}

Expand Down Expand Up @@ -299,12 +297,13 @@ enum class LinearTableCellItemType {
}

data class LinearBlockQuote(
val ids: Set<String>,
val cite: String?,
val content: List<LinearElement>,
) : LinearElement {
constructor(cite: String?, block: ListBuilderScope<LinearElement>.() -> Unit) : this(cite = cite, content = ListBuilderScope(block).items)
constructor(ids: Set<String>, cite: String?, block: ListBuilderScope<LinearElement>.() -> Unit) : this(ids = ids, cite = cite, content = ListBuilderScope(block).items)

constructor(cite: String?, vararg elements: LinearElement) : this(cite = cite, content = elements.toList())
constructor(ids: Set<String>, cite: String?, vararg elements: LinearElement) : this(ids = ids, cite = cite, content = elements.toList())
}

/**
Expand Down Expand Up @@ -342,6 +341,7 @@ val LinearTextBlockStyle.shouldSoftWrap: Boolean
* Represents an image element
*/
data class LinearImage(
val ids: Set<String>,
val sources: List<LinearImageSource>,
val caption: LinearText?,
val link: String?,
Expand Down Expand Up @@ -374,6 +374,7 @@ data class LinearImageSource(
* Represents a video element
*/
data class LinearVideo(
val ids: Set<String>,
val sources: List<LinearVideoSource>,
) : LinearElement {
init {
Expand Down Expand Up @@ -411,6 +412,7 @@ data class LinearVideoSource(
* Represents an audio element
*/
data class LinearAudio(
val ids: Set<String>,
val sources: List<LinearAudioSource>,
) : LinearElement {
init {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1055,6 +1055,7 @@ private fun PreviewTextElement() {
private fun PreviewBlockQuote() {
val blockQuote =
LinearBlockQuote(
ids = emptySet(),
cite = "https://example.com",
content =
listOf(
Expand Down Expand Up @@ -1100,6 +1101,7 @@ private fun PreviewPreFormatted() {
private fun PreviewLinearOrderedListContent() {
PreviewContent(
LinearListItem(
ids = emptySet(),
orderedIndex = 1,
content =
listOf(
Expand All @@ -1118,6 +1120,7 @@ private fun PreviewLinearOrderedListContent() {
private fun PreviewLinearUnorderedListContent() {
PreviewContent(
LinearListItem(
ids = emptySet(),
orderedIndex = null,
content =
listOf(
Expand All @@ -1136,6 +1139,7 @@ private fun PreviewLinearUnorderedListContent() {
private fun PreviewLinearImageContent() {
val linearImage =
LinearImage(
ids = emptySet(),
sources =
listOf(
LinearImageSource(
Expand Down Expand Up @@ -1163,6 +1167,7 @@ private fun PreviewLinearImageContent() {
private fun PreviewLinearTableContent() {
val linearTable =
LinearTable(
ids = emptySet(),
rowCount = 2,
colCount = 2,
leftToRight = false,
Expand Down Expand Up @@ -1231,6 +1236,7 @@ private fun PreviewLinearTableContent() {
private fun PreviewNestedTableContent() {
val linearTable =
LinearTable(
ids = emptySet(),
rowCount = 2,
colCount = 2,
leftToRight = false,
Expand All @@ -1243,6 +1249,7 @@ private fun PreviewNestedTableContent() {
content =
listOf(
LinearImage(
ids = emptySet(),
sources =
listOf(
LinearImageSource(
Expand Down Expand Up @@ -1270,6 +1277,7 @@ private fun PreviewNestedTableContent() {
content =
listOf(
LinearListItem(
ids = emptySet(),
orderedIndex = 1,
content =
listOf(
Expand All @@ -1281,6 +1289,7 @@ private fun PreviewNestedTableContent() {
),
),
LinearListItem(
ids = emptySet(),
orderedIndex = 2,
content =
listOf(
Expand All @@ -1292,6 +1301,7 @@ private fun PreviewNestedTableContent() {
),
),
LinearListItem(
ids = emptySet(),
orderedIndex = 3,
content =
listOf(
Expand Down Expand Up @@ -1324,6 +1334,7 @@ private fun PreviewNestedTableContent() {
content =
listOf(
LinearTable(
ids = emptySet(),
rowCount = 2,
colCount = 2,
leftToRight = false,
Expand Down Expand Up @@ -1396,6 +1407,7 @@ private fun PreviewNestedTableContent() {
private fun PreviewColSpanningTable() {
val linearTable =
LinearTable(
ids = emptySet(),
rowCount = 2,
colCount = 2,
cellsReal =
Expand Down

0 comments on commit 98d13ec

Please sign in to comment.