Repo created
This commit is contained in:
parent
75dc487a7a
commit
39c29d175b
6317 changed files with 388324 additions and 2 deletions
8
library/html-cleaner/build.gradle.kts
Normal file
8
library/html-cleaner/build.gradle.kts
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
plugins {
|
||||
id(ThunderbirdPlugins.Library.jvm)
|
||||
alias(libs.plugins.android.lint)
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation(libs.jsoup)
|
||||
}
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
package app.k9mail.html.cleaner
|
||||
|
||||
import org.jsoup.nodes.Document
|
||||
import org.jsoup.safety.Cleaner
|
||||
import org.jsoup.safety.Safelist
|
||||
|
||||
internal class BodyCleaner {
|
||||
private val cleaner: Cleaner
|
||||
private val allowedBodyAttributes = setOf(
|
||||
"id", "class", "dir", "lang", "style",
|
||||
"alink", "background", "bgcolor", "link", "text", "vlink",
|
||||
)
|
||||
|
||||
init {
|
||||
val allowList = Safelist.relaxed()
|
||||
.addTags(
|
||||
"font",
|
||||
"hr",
|
||||
"ins",
|
||||
"del",
|
||||
"center",
|
||||
"map",
|
||||
"area",
|
||||
"title",
|
||||
"tt",
|
||||
"kbd",
|
||||
"samp",
|
||||
"var",
|
||||
"style",
|
||||
"s",
|
||||
)
|
||||
.addAttributes("font", "color", "face", "size")
|
||||
.addAttributes("a", "name")
|
||||
.addAttributes("div", "align")
|
||||
.addAttributes(
|
||||
"table",
|
||||
"align",
|
||||
"background",
|
||||
"bgcolor",
|
||||
"border",
|
||||
"cellpadding",
|
||||
"cellspacing",
|
||||
"width",
|
||||
)
|
||||
.addAttributes("tr", "align", "background", "bgcolor", "valign")
|
||||
.addAttributes(
|
||||
"th",
|
||||
"align", "background", "bgcolor", "colspan", "headers", "height", "nowrap", "rowspan", "scope",
|
||||
"sorted", "valign", "width",
|
||||
)
|
||||
.addAttributes(
|
||||
"td",
|
||||
"align", "background", "bgcolor", "colspan", "headers", "height", "nowrap", "rowspan", "scope",
|
||||
"valign", "width",
|
||||
)
|
||||
.addAttributes("map", "name")
|
||||
.addAttributes("area", "shape", "coords", "href", "alt")
|
||||
.addProtocols("area", "href", "http", "https")
|
||||
.addAttributes("img", "usemap")
|
||||
.addAttributes(":all", "class", "style", "id", "dir")
|
||||
.addProtocols("img", "src", "http", "https", "cid", "data")
|
||||
// Allow all URI schemes in links
|
||||
.removeProtocols("a", "href", "ftp", "http", "https", "mailto")
|
||||
|
||||
cleaner = Cleaner(allowList)
|
||||
}
|
||||
|
||||
fun clean(dirtyDocument: Document): Document {
|
||||
val cleanedDocument = cleaner.clean(dirtyDocument)
|
||||
copyDocumentType(dirtyDocument, cleanedDocument)
|
||||
copyBodyAttributes(dirtyDocument, cleanedDocument)
|
||||
return cleanedDocument
|
||||
}
|
||||
|
||||
private fun copyDocumentType(dirtyDocument: Document, cleanedDocument: Document) {
|
||||
dirtyDocument.documentType()?.let { documentType ->
|
||||
cleanedDocument.insertChildren(0, documentType)
|
||||
}
|
||||
}
|
||||
|
||||
private fun copyBodyAttributes(dirtyDocument: Document, cleanedDocument: Document) {
|
||||
val cleanedBody = cleanedDocument.body()
|
||||
for (attribute in dirtyDocument.body().attributes()) {
|
||||
if (attribute.key !in allowedBodyAttributes) continue
|
||||
|
||||
if (attribute.hasDeclaredValue()) {
|
||||
cleanedBody.attr(attribute.key, attribute.value)
|
||||
} else {
|
||||
cleanedBody.attr(attribute.key, true)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
package app.k9mail.html.cleaner
|
||||
|
||||
import org.jsoup.nodes.DataNode
|
||||
import org.jsoup.nodes.Document
|
||||
import org.jsoup.nodes.Element
|
||||
import org.jsoup.nodes.Node
|
||||
import org.jsoup.nodes.TextNode
|
||||
import org.jsoup.parser.Tag
|
||||
import org.jsoup.select.NodeTraversor
|
||||
import org.jsoup.select.NodeVisitor
|
||||
|
||||
private val ALLOWED_TAGS = listOf("style", "meta", "base")
|
||||
|
||||
internal class HeadCleaner {
|
||||
fun clean(dirtyDocument: Document, cleanedDocument: Document) {
|
||||
copySafeNodes(dirtyDocument.head(), cleanedDocument.head())
|
||||
}
|
||||
|
||||
private fun copySafeNodes(source: Element, destination: Element) {
|
||||
val cleaningVisitor = CleaningVisitor(source, destination)
|
||||
NodeTraversor.traverse(cleaningVisitor, source)
|
||||
}
|
||||
}
|
||||
|
||||
internal class CleaningVisitor(
|
||||
private val root: Element,
|
||||
private var destination: Element,
|
||||
) : NodeVisitor {
|
||||
private var elementToSkip: Element? = null
|
||||
|
||||
override fun head(source: Node, depth: Int) {
|
||||
if (elementToSkip != null) return
|
||||
|
||||
if (source is Element) {
|
||||
if (isSafeTag(source)) {
|
||||
val sourceTag = source.tagName()
|
||||
val destinationAttributes = source.attributes().clone()
|
||||
val destinationChild = Element(Tag.valueOf(sourceTag), source.baseUri(), destinationAttributes)
|
||||
destination.appendChild(destinationChild)
|
||||
destination = destinationChild
|
||||
} else if (source !== root) {
|
||||
elementToSkip = source
|
||||
}
|
||||
} else if (source is TextNode) {
|
||||
val destinationText = TextNode(source.wholeText)
|
||||
destination.appendChild(destinationText)
|
||||
} else if (source is DataNode && isSafeTag(source.parent())) {
|
||||
val destinationData = DataNode(source.wholeData)
|
||||
destination.appendChild(destinationData)
|
||||
}
|
||||
}
|
||||
|
||||
override fun tail(source: Node, depth: Int) {
|
||||
if (source === elementToSkip) {
|
||||
elementToSkip = null
|
||||
} else if (source is Element && isSafeTag(source)) {
|
||||
destination = destination.parent() ?: error("Missing parent")
|
||||
}
|
||||
}
|
||||
|
||||
private fun isSafeTag(node: Node?): Boolean {
|
||||
if (node == null || isMetaRefresh(node)) return false
|
||||
|
||||
val tag = node.nodeName().lowercase()
|
||||
return tag in ALLOWED_TAGS
|
||||
}
|
||||
|
||||
private fun isMetaRefresh(node: Node): Boolean {
|
||||
val tag = node.nodeName().lowercase()
|
||||
if (tag != "meta") return false
|
||||
|
||||
val attributeValue = node.attributes().getIgnoreCase("http-equiv").trim().lowercase()
|
||||
return attributeValue == "refresh"
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
package app.k9mail.html.cleaner
|
||||
|
||||
interface HtmlHeadProvider {
|
||||
val headHtml: String
|
||||
}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
package app.k9mail.html.cleaner
|
||||
|
||||
import org.jsoup.nodes.Document
|
||||
|
||||
class HtmlProcessor(private val htmlHeadProvider: HtmlHeadProvider) {
|
||||
private val htmlSanitizer = HtmlSanitizer()
|
||||
|
||||
fun processForDisplay(html: String): String {
|
||||
return htmlSanitizer.sanitize(html)
|
||||
.addCustomHeadContents()
|
||||
.toCompactString()
|
||||
}
|
||||
|
||||
private fun Document.addCustomHeadContents() = apply {
|
||||
head().append(htmlHeadProvider.headHtml)
|
||||
}
|
||||
|
||||
private fun Document.toCompactString(): String {
|
||||
outputSettings()
|
||||
.prettyPrint(false)
|
||||
.indentAmount(0)
|
||||
|
||||
return html()
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
package app.k9mail.html.cleaner
|
||||
|
||||
import org.jsoup.Jsoup
|
||||
import org.jsoup.nodes.Document
|
||||
|
||||
internal class HtmlSanitizer {
|
||||
private val headCleaner = HeadCleaner()
|
||||
private val bodyCleaner = BodyCleaner()
|
||||
|
||||
fun sanitize(html: String): Document {
|
||||
val dirtyDocument = Jsoup.parse(html)
|
||||
val cleanedDocument = bodyCleaner.clean(dirtyDocument)
|
||||
headCleaner.clean(dirtyDocument, cleanedDocument)
|
||||
return cleanedDocument
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,539 @@
|
|||
package app.k9mail.html.cleaner
|
||||
|
||||
import assertk.assertThat
|
||||
import assertk.assertions.isEqualTo
|
||||
import org.jsoup.nodes.Document
|
||||
import org.junit.Test
|
||||
|
||||
class HtmlSanitizerTest {
|
||||
private val htmlSanitizer = HtmlSanitizer()
|
||||
|
||||
@Test
|
||||
fun shouldRemoveMetaRefreshInHead() {
|
||||
val html =
|
||||
"""
|
||||
<html>
|
||||
<head><meta http-equiv="refresh" content="1; URL=http://example.com/"></head>
|
||||
<body>Message</body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo("<html><head></head><body>Message</body></html>")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldRemoveMetaRefreshBetweenHeadAndBody() {
|
||||
val html =
|
||||
"""
|
||||
<html>
|
||||
<head></head>
|
||||
<meta http-equiv="refresh" content="1; URL=http://example.com/">
|
||||
<body>Message</body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo("<html><head></head><body>Message</body></html>")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldRemoveMetaRefreshInBody() {
|
||||
val html =
|
||||
"""
|
||||
<html>
|
||||
<head></head>
|
||||
<body><meta http-equiv="refresh" content="1; URL=http://example.com/">Message</body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo("<html><head></head><body>Message</body></html>")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldRemoveMetaRefreshWithUpperCaseAttributeValue() {
|
||||
val html =
|
||||
"""
|
||||
<html>
|
||||
<head><meta http-equiv="REFRESH" content="1; URL=http://example.com/"></head>
|
||||
<body>Message</body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo("<html><head></head><body>Message</body></html>")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldRemoveMetaRefreshWithMixedCaseAttributeValue() {
|
||||
val html =
|
||||
"""
|
||||
<html>
|
||||
<head><meta http-equiv="Refresh" content="1; URL=http://example.com/"></head>
|
||||
<body>Message</body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo("<html><head></head><body>Message</body></html>")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldRemoveMetaRefreshWithoutQuotesAroundAttributeValue() {
|
||||
val html =
|
||||
"""
|
||||
<html>
|
||||
<head><meta http-equiv=refresh content="1; URL=http://example.com/"></head>
|
||||
<body>Message</body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo("<html><head></head><body>Message</body></html>")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldRemoveMetaRefreshWithSpacesInAttributeValue() {
|
||||
val html =
|
||||
"""
|
||||
<html>
|
||||
<head><meta http-equiv="refresh " content="1; URL=http://example.com/"></head>
|
||||
<body>Message</body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo("<html><head></head><body>Message</body></html>")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldRemoveMultipleMetaRefreshTags() {
|
||||
val html =
|
||||
"""
|
||||
<html>
|
||||
<head><meta http-equiv="refresh" content="1; URL=http://example.com/"></head>
|
||||
<body><meta http-equiv="refresh" content="1; URL=http://example.com/">Message</body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo("<html><head></head><body>Message</body></html>")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldRemoveMetaRefreshButKeepOtherMetaTags() {
|
||||
val html =
|
||||
"""
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
|
||||
<meta http-equiv="refresh" content="1; URL=http://example.com/">
|
||||
</head>
|
||||
<body>Message</body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo(
|
||||
"""
|
||||
<html>
|
||||
<head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head>
|
||||
<body>Message</body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks(),
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldProduceValidHtmlFromHtmlWithXmlDeclaration() {
|
||||
val html =
|
||||
"""
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<html>
|
||||
<head></head>
|
||||
<body></body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo("<html><head></head><body></body></html>")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldNormalizeTables() {
|
||||
val html = "<html><head></head><body><table><tr><td></td><td></td></tr></table></body></html>"
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo(
|
||||
"<html><head></head><body><table><tbody><tr><td></td><td></td></tr></tbody></table></body></html>",
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldHtmlEncodeXmlDirectives() {
|
||||
val html =
|
||||
"""
|
||||
<html>
|
||||
<head></head>
|
||||
<body>
|
||||
<table><tr><td><!==><!==>Hmailserver service shutdown:</td><td><!==><!==>Ok</td></tr></table>
|
||||
</body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo(
|
||||
"""
|
||||
<html>
|
||||
<head></head>
|
||||
<body><table><tbody><tr><td>Hmailserver service shutdown:</td><td>Ok</td></tr></tbody></table></body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks(),
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldKeepHrTags() {
|
||||
val html = "<html><head></head><body>one<hr>two<hr />three</body></html>"
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo("<html><head></head><body>one<hr>two<hr>three</body></html>")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldKeepInsDelTags() {
|
||||
val html = "<html><head></head><body><ins>Inserted</ins><del>Deleted</del></body></html>"
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo(html)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldKeepMapAreaTags() {
|
||||
val html =
|
||||
"""
|
||||
<html>
|
||||
<head></head>
|
||||
<body>
|
||||
<map name="planetmap">
|
||||
<area shape="rect" coords="0,0,82,126" href="http://domain.com/sun.htm" alt="Sun">
|
||||
<area shape="circle" coords="90,58,3" href="http://domain.com/mercur.htm" alt="Mercury">
|
||||
<area shape="circle" coords="124,58,8" href="http://domain.com/venus.htm" alt="Venus">
|
||||
</map>
|
||||
</body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo(html)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldKeepImgUsemap() {
|
||||
val html =
|
||||
"""
|
||||
<html>
|
||||
<head></head>
|
||||
<body><img src="http://domain.com/image.jpg" usemap="#planetmap"></body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo(html)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldKeepAllowedElementsInHeadAndSkipTheRest() {
|
||||
val html =
|
||||
"""
|
||||
<html>
|
||||
<head>
|
||||
<title>remove this</title>
|
||||
<style>keep this</style>
|
||||
<script>remove this</script>
|
||||
</head>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString())
|
||||
.isEqualTo("<html><head><style>keep this</style></head><body></body></html>")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldRemoveIFrames() {
|
||||
val html = """<html><body><iframe src="http://www.google.com" /></body></html>"""
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo("<html><head></head><body></body></html>")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldKeepFormattingTags() {
|
||||
val html = """<html><body><center><font face="Arial" color="red" size="12">A</font></center></body></html>"""
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo(
|
||||
"""
|
||||
<html>
|
||||
<head></head>
|
||||
<body><center><font face="Arial" color="red" size="12">A</font></center></body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks(),
|
||||
)
|
||||
}
|
||||
|
||||
// This test will fail when jsoup updates its list of allowed "protocols" for the a.href attribute.
|
||||
// When that happens, please adjust the removeProtocols("a", "href", …) line in BodyCleaner.
|
||||
@Test
|
||||
fun shouldKeepUris() {
|
||||
val html =
|
||||
"""
|
||||
<html>
|
||||
<body>
|
||||
<a href="http://example.com/index.html">HTTP</a>
|
||||
<a href="https://example.com/default.html">HTTPS</a>
|
||||
<a href="mailto:user@example.com">Mailto</a>
|
||||
<a href="tel:00442079460111">Telephone</a>
|
||||
<a href="sms:00442079460111">SMS</a>
|
||||
<a href="sip:user@example.com">SIP</a>
|
||||
<a href="unknown:foobar">Unknown</a>
|
||||
<a href="rtsp://example.com/media.mp4">RTSP</a>
|
||||
</body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo(
|
||||
"""
|
||||
<html>
|
||||
<head></head>
|
||||
<body>
|
||||
<a href="http://example.com/index.html">HTTP</a>
|
||||
<a href="https://example.com/default.html">HTTPS</a>
|
||||
<a href="mailto:user@example.com">Mailto</a>
|
||||
<a href="tel:00442079460111">Telephone</a>
|
||||
<a href="sms:00442079460111">SMS</a>
|
||||
<a href="sip:user@example.com">SIP</a>
|
||||
<a href="unknown:foobar">Unknown</a>
|
||||
<a href="rtsp://example.com/media.mp4">RTSP</a>
|
||||
</body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks(),
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldKeepDirAttribute() {
|
||||
val html =
|
||||
"""
|
||||
<html>
|
||||
<head></head>
|
||||
<body><table><tbody><tr><td dir="rtl"></td></tr></tbody></table></body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo(html)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldKeepAllowedBodyAttributes() {
|
||||
val html =
|
||||
"""
|
||||
<html>
|
||||
<body style="color: #fff" onload="alert()" class="body" id></body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo(
|
||||
"""
|
||||
<html>
|
||||
<head></head>
|
||||
<body style="color: #fff" class="body" id></body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks(),
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `should keep HTML 5 doctype`() {
|
||||
val html =
|
||||
"""
|
||||
<!doctype html>
|
||||
<html><head></head><body>text</body></html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo(html)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `should keep HTML 4_01 doctype`() {
|
||||
val html =
|
||||
"""
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
||||
<html><head></head><body>text</body></html>
|
||||
""".trimIndent().trimLineBreaks()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo(
|
||||
"""
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
||||
<html><head></head><body>text</body></html>
|
||||
""".trimIndent().trimLineBreaks(),
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `should keep 'align' attribute on 'div' element`() {
|
||||
val html = """<div align="center">text</div>"""
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo(
|
||||
"""
|
||||
<html>
|
||||
<head></head>
|
||||
<body>
|
||||
<div align="center">text</div>
|
||||
</body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks(),
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `should keep 'name' attribute on 'a' element`() {
|
||||
val html = """<a name="something">"""
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo(
|
||||
"""
|
||||
<html>
|
||||
<head></head>
|
||||
<body>
|
||||
<a name="something"></a>
|
||||
</body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks(),
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `should keep 'tt' element`() {
|
||||
assertTagsNotStripped("tt")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `should keep 'kbd' element`() {
|
||||
assertTagsNotStripped("kbd")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `should keep 'samp' element`() {
|
||||
assertTagsNotStripped("samp")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `should keep 'var' element`() {
|
||||
assertTagsNotStripped("var")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `should keep 's' element`() {
|
||||
assertTagsNotStripped("s")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `should keep 'base' element`() {
|
||||
val html =
|
||||
"""
|
||||
<html>
|
||||
<head>
|
||||
<base href="https://domain.example/">
|
||||
</head>
|
||||
<body>
|
||||
<a href="relative">Link</a>
|
||||
</body>
|
||||
</html>
|
||||
""".compactHtml()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo(html)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `should keep 'style' element in body`() {
|
||||
val html =
|
||||
"""
|
||||
<html>
|
||||
<head></head>
|
||||
<body>
|
||||
<style>.test { color: #000 }</style>
|
||||
</body>
|
||||
</html>
|
||||
""".compactHtml()
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo(html)
|
||||
}
|
||||
|
||||
private fun assertTagsNotStripped(element: String) {
|
||||
val html = """<$element>some text</$element>"""
|
||||
|
||||
val result = htmlSanitizer.sanitize(html)
|
||||
|
||||
assertThat(result.toCompactString()).isEqualTo(
|
||||
"""
|
||||
<html>
|
||||
<head></head>
|
||||
<body>
|
||||
<$element>some text</$element>
|
||||
</body>
|
||||
</html>
|
||||
""".trimIndent().trimLineBreaks(),
|
||||
)
|
||||
}
|
||||
|
||||
private fun Document.toCompactString(): String {
|
||||
outputSettings()
|
||||
.prettyPrint(false)
|
||||
.indentAmount(0)
|
||||
|
||||
return html()
|
||||
}
|
||||
|
||||
private fun String.trimLineBreaks() = replace("\n", "")
|
||||
|
||||
private fun String.compactHtml() = lines().joinToString(separator = "") { it.trim() }
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue