node/tools/eslint/node_modules/micromark/lib/compile/html.js
Michaël Zasso 2eff28fb7a
tools: move ESLint to tools/eslint
Greatly simplify how ESLint and its plugins are installed.

PR-URL: https://github.com/nodejs/node/pull/53413
Reviewed-By: Antoine du Hamel <duhamelantoine1995@gmail.com>
2024-06-19 19:54:08 +00:00

811 lines
22 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

'use strict'
var decodeEntity = require('parse-entities/decode-entity.js')
var codes = require('../character/codes.js')
var assign = require('../constant/assign.js')
var constants = require('../constant/constants.js')
var hasOwnProperty = require('../constant/has-own-property.js')
var types = require('../constant/types.js')
var combineHtmlExtensions = require('../util/combine-html-extensions.js')
var chunkedPush = require('../util/chunked-push.js')
var miniflat = require('../util/miniflat.js')
var normalizeIdentifier = require('../util/normalize-identifier.js')
var normalizeUri = require('../util/normalize-uri.js')
var safeFromInt = require('../util/safe-from-int.js')
function _interopDefaultLegacy(e) {
return e && typeof e === 'object' && 'default' in e ? e : {default: e}
}
var decodeEntity__default = /*#__PURE__*/ _interopDefaultLegacy(decodeEntity)
// While micromark is a lexer/tokenizer, the common case of going from markdown
// This ensures that certain characters which have special meaning in HTML are
// dealt with.
// Technically, we can skip `>` and `"` in many cases, but CM includes them.
var characterReferences = {'"': 'quot', '&': 'amp', '<': 'lt', '>': 'gt'}
// These two are allowlists of essentially safe protocols for full URLs in
// respectively the `href` (on `<a>`) and `src` (on `<img>`) attributes.
// They are based on what is allowed on GitHub,
// <https://github.com/syntax-tree/hast-util-sanitize/blob/9275b21/lib/github.json#L31>
var protocolHref = /^(https?|ircs?|mailto|xmpp)$/i
var protocolSrc = /^https?$/i
function compileHtml(options) {
// Configuration.
// Includes `htmlExtensions` (an array of extensions), `defaultLineEnding` (a
// preferred EOL), `allowDangerousProtocol` (whether to allow potential
// dangerous protocols), and `allowDangerousHtml` (whether to allow potential
// dangerous HTML).
var settings = options || {}
// Tags is needed because according to markdown, links and emphasis and
// whatnot can exist in images, however, as HTML doesnt allow content in
// images, the tags are ignored in the `alt` attribute, but the content
// remains.
var tags = true
// An object to track identifiers to media (URLs and titles) defined with
// definitions.
var definitions = {}
// A lot of the handlers need to capture some of the output data, modify it
// somehow, and then deal with it.
// We do that by tracking a stack of buffers, that can be opened (with
// `buffer`) and closed (with `resume`) to access them.
var buffers = [[]]
// As we can have links in images and the other way around, where the deepest
// ones are closed first, we need to track which one were in.
var mediaStack = []
// Same for tightness, which is specific to lists.
// We need to track if were currently in a tight or loose container.
var tightStack = []
var defaultHandlers = {
enter: {
blockQuote: onenterblockquote,
codeFenced: onentercodefenced,
codeFencedFenceInfo: buffer,
codeFencedFenceMeta: buffer,
codeIndented: onentercodeindented,
codeText: onentercodetext,
content: onentercontent,
definition: onenterdefinition,
definitionDestinationString: onenterdefinitiondestinationstring,
definitionLabelString: buffer,
definitionTitleString: buffer,
emphasis: onenteremphasis,
htmlFlow: onenterhtmlflow,
htmlText: onenterhtml,
image: onenterimage,
label: buffer,
link: onenterlink,
listItemMarker: onenterlistitemmarker,
listItemValue: onenterlistitemvalue,
listOrdered: onenterlistordered,
listUnordered: onenterlistunordered,
paragraph: onenterparagraph,
reference: buffer,
resource: onenterresource,
resourceDestinationString: onenterresourcedestinationstring,
resourceTitleString: buffer,
setextHeading: onentersetextheading,
strong: onenterstrong
},
exit: {
atxHeading: onexitatxheading,
atxHeadingSequence: onexitatxheadingsequence,
autolinkEmail: onexitautolinkemail,
autolinkProtocol: onexitautolinkprotocol,
blockQuote: onexitblockquote,
characterEscapeValue: onexitdata,
characterReferenceMarkerHexadecimal: onexitcharacterreferencemarker,
characterReferenceMarkerNumeric: onexitcharacterreferencemarker,
characterReferenceValue: onexitcharacterreferencevalue,
codeFenced: onexitflowcode,
codeFencedFence: onexitcodefencedfence,
codeFencedFenceInfo: onexitcodefencedfenceinfo,
codeFencedFenceMeta: resume,
codeFlowValue: onexitcodeflowvalue,
codeIndented: onexitflowcode,
codeText: onexitcodetext,
codeTextData: onexitdata,
data: onexitdata,
definition: onexitdefinition,
definitionDestinationString: onexitdefinitiondestinationstring,
definitionLabelString: onexitdefinitionlabelstring,
definitionTitleString: onexitdefinitiontitlestring,
emphasis: onexitemphasis,
hardBreakEscape: onexithardbreak,
hardBreakTrailing: onexithardbreak,
htmlFlow: onexithtml,
htmlFlowData: onexitdata,
htmlText: onexithtml,
htmlTextData: onexitdata,
image: onexitmedia,
label: onexitlabel,
labelText: onexitlabeltext,
lineEnding: onexitlineending,
link: onexitmedia,
listOrdered: onexitlistordered,
listUnordered: onexitlistunordered,
paragraph: onexitparagraph,
reference: resume,
referenceString: onexitreferencestring,
resource: resume,
resourceDestinationString: onexitresourcedestinationstring,
resourceTitleString: onexitresourcetitlestring,
setextHeading: onexitsetextheading,
setextHeadingLineSequence: onexitsetextheadinglinesequence,
setextHeadingText: onexitsetextheadingtext,
strong: onexitstrong,
thematicBreak: onexitthematicbreak
}
}
// Combine the HTML extensions with the default handlers.
// An HTML extension is an object whose fields are either `enter` or `exit`
// (reflecting whether a token is entered or exited).
// The values at such objects are names of tokens mapping to handlers.
// Handlers are called, respectively when a token is opener or closed, with
// that token, and a context as `this`.
var handlers = combineHtmlExtensions(
[defaultHandlers].concat(miniflat(settings.htmlExtensions))
)
// Handlers do often need to keep track of some state.
// That state is provided here as a key-value store (an object).
var data = {tightStack: tightStack}
// The context for handlers references a couple of useful functions.
// In handlers from extensions, those can be accessed at `this`.
// For the handlers here, they can be accessed directly.
var context = {
lineEndingIfNeeded: lineEndingIfNeeded,
options: settings,
encode: encode,
raw: raw,
tag: tag,
buffer: buffer,
resume: resume,
setData: setData,
getData: getData
}
// Generally, micromark copies line endings (`'\r'`, `'\n'`, `'\r\n'`) in the
// markdown document over to the compiled HTML.
// In some cases, such as `> a`, CommonMark requires that extra line endings
// are added: `<blockquote>\n<p>a</p>\n</blockquote>`.
// This variable hold the default line ending when given (or `undefined`),
// and in the latter case will be updated to the first found line ending if
// there is one.
var lineEndingStyle = settings.defaultLineEnding
// Return the function that handles a slice of events.
return compile
// Deal w/ a slice of events.
// Return either the empty string if theres nothing of note to return, or the
// result when done.
function compile(events) {
// As definitions can come after references, we need to figure out the media
// (urls and titles) defined by them before handling the references.
// So, we do sort of what HTML does: put metadata at the start (in head), and
// then put content after (`body`).
var head = []
var body = []
var index
var start
var listStack
var handler
var result
index = -1
start = 0
listStack = []
while (++index < events.length) {
// Figure out the line ending style used in the document.
if (
!lineEndingStyle &&
(events[index][1].type === types.lineEnding ||
events[index][1].type === types.lineEndingBlank)
) {
lineEndingStyle = events[index][2].sliceSerialize(events[index][1])
}
// Preprocess lists to infer whether the list is loose or not.
if (
events[index][1].type === types.listOrdered ||
events[index][1].type === types.listUnordered
) {
if (events[index][0] === 'enter') {
listStack.push(index)
} else {
prepareList(events.slice(listStack.pop(), index))
}
}
// Move definitions to the front.
if (events[index][1].type === types.definition) {
if (events[index][0] === 'enter') {
body = chunkedPush(body, events.slice(start, index))
start = index
} else {
head = chunkedPush(head, events.slice(start, index + 1))
start = index + 1
}
}
}
head = chunkedPush(head, body)
head = chunkedPush(head, events.slice(start))
result = head
index = -1
// Handle the start of the document, if defined.
if (handlers.enter.null) {
handlers.enter.null.call(context)
}
// Handle all events.
while (++index < events.length) {
handler = handlers[result[index][0]]
if (hasOwnProperty.call(handler, result[index][1].type)) {
handler[result[index][1].type].call(
assign({sliceSerialize: result[index][2].sliceSerialize}, context),
result[index][1]
)
}
}
// Handle the end of the document, if defined.
if (handlers.exit.null) {
handlers.exit.null.call(context)
}
return buffers[0].join('')
}
// Figure out whether lists are loose or not.
function prepareList(slice) {
var length = slice.length - 1 // Skip close.
var index = 0 // Skip open.
var containerBalance = 0
var loose
var atMarker
var event
while (++index < length) {
event = slice[index]
if (event[1]._container) {
atMarker = undefined
if (event[0] === 'enter') {
containerBalance++
} else {
containerBalance--
}
} else if (event[1].type === types.listItemPrefix) {
if (event[0] === 'exit') {
atMarker = true
}
} else if (event[1].type === types.linePrefix);
else if (event[1].type === types.lineEndingBlank) {
if (event[0] === 'enter' && !containerBalance) {
if (atMarker) {
atMarker = undefined
} else {
loose = true
}
}
} else {
atMarker = undefined
}
}
slice[0][1]._loose = loose
}
// Set data into the key-value store.
function setData(key, value) {
data[key] = value
}
// Get data from the key-value store.
function getData(key) {
return data[key]
}
// Capture some of the output data.
function buffer() {
buffers.push([])
}
// Stop capturing and access the output data.
function resume() {
return buffers.pop().join('')
}
// Output (parts of) HTML tags.
function tag(value) {
if (!tags) return
setData('lastWasTag', true)
buffers[buffers.length - 1].push(value)
}
// Output raw data.
function raw(value) {
setData('lastWasTag')
buffers[buffers.length - 1].push(value)
}
// Output an extra line ending.
function lineEnding() {
raw(lineEndingStyle || '\n')
}
// Output an extra line ending if the previous value wasnt EOF/EOL.
function lineEndingIfNeeded() {
var buffer = buffers[buffers.length - 1]
var slice = buffer[buffer.length - 1]
var previous = slice ? slice.charCodeAt(slice.length - 1) : codes.eof
if (
previous === codes.lf ||
previous === codes.cr ||
previous === codes.eof
) {
return
}
lineEnding()
}
// Make a value safe for injection in HTML (except w/ `ignoreEncode`).
function encode(value) {
return getData('ignoreEncode') ? value : value.replace(/["&<>]/g, replace)
function replace(value) {
return '&' + characterReferences[value] + ';'
}
}
// Make a value safe for injection as a URL.
// This does encode unsafe characters with percent-encoding, skipping already
// encoded sequences (`normalizeUri`).
// Further unsafe characters are encoded as character references (`encode`).
// Finally, if the URL includes an unknown protocol (such as a dangerous
// example, `javascript:`), the value is ignored.
function url(url, protocol) {
var value = encode(normalizeUri(url || ''))
var colon = value.indexOf(':')
var questionMark = value.indexOf('?')
var numberSign = value.indexOf('#')
var slash = value.indexOf('/')
if (
settings.allowDangerousProtocol ||
// If there is no protocol, its relative.
colon < 0 ||
// If the first colon is after a `?`, `#`, or `/`, its not a protocol.
(slash > -1 && colon > slash) ||
(questionMark > -1 && colon > questionMark) ||
(numberSign > -1 && colon > numberSign) ||
// It is a protocol, it should be allowed.
protocol.test(value.slice(0, colon))
) {
return value
}
return ''
}
//
// Handlers.
//
function onenterlistordered(token) {
tightStack.push(!token._loose)
lineEndingIfNeeded()
tag('<ol')
setData('expectFirstItem', true)
}
function onenterlistunordered(token) {
tightStack.push(!token._loose)
lineEndingIfNeeded()
tag('<ul')
setData('expectFirstItem', true)
}
function onenterlistitemvalue(token) {
var value
if (getData('expectFirstItem')) {
value = parseInt(this.sliceSerialize(token), constants.numericBaseDecimal)
if (value !== 1) {
tag(' start="' + encode(String(value)) + '"')
}
}
}
function onenterlistitemmarker() {
if (getData('expectFirstItem')) {
tag('>')
} else {
onexitlistitem()
}
lineEndingIfNeeded()
tag('<li>')
setData('expectFirstItem')
// “Hack” to prevent a line ending from showing up if the item is empty.
setData('lastWasTag')
}
function onexitlistordered() {
onexitlistitem()
tightStack.pop()
lineEnding()
tag('</ol>')
}
function onexitlistunordered() {
onexitlistitem()
tightStack.pop()
lineEnding()
tag('</ul>')
}
function onexitlistitem() {
if (getData('lastWasTag') && !getData('slurpAllLineEndings')) {
lineEndingIfNeeded()
}
tag('</li>')
setData('slurpAllLineEndings')
}
function onenterblockquote() {
tightStack.push(false)
lineEndingIfNeeded()
tag('<blockquote>')
}
function onexitblockquote() {
tightStack.pop()
lineEndingIfNeeded()
tag('</blockquote>')
setData('slurpAllLineEndings')
}
function onenterparagraph() {
if (!tightStack[tightStack.length - 1]) {
lineEndingIfNeeded()
tag('<p>')
}
setData('slurpAllLineEndings')
}
function onexitparagraph() {
if (tightStack[tightStack.length - 1]) {
setData('slurpAllLineEndings', true)
} else {
tag('</p>')
}
}
function onentercodefenced() {
lineEndingIfNeeded()
tag('<pre><code')
setData('fencesCount', 0)
}
function onexitcodefencedfenceinfo() {
var value = resume()
tag(' class="language-' + value + '"')
}
function onexitcodefencedfence() {
if (!getData('fencesCount')) {
tag('>')
setData('fencedCodeInside', true)
setData('slurpOneLineEnding', true)
}
setData('fencesCount', getData('fencesCount') + 1)
}
function onentercodeindented() {
lineEndingIfNeeded()
tag('<pre><code>')
}
function onexitflowcode() {
// Send an extra line feed if we saw data.
if (getData('flowCodeSeenData')) lineEndingIfNeeded()
tag('</code></pre>')
if (getData('fencesCount') < 2) lineEndingIfNeeded()
setData('flowCodeSeenData')
setData('fencesCount')
setData('slurpOneLineEnding')
}
function onenterimage() {
mediaStack.push({image: true})
tags = undefined // Disallow tags.
}
function onenterlink() {
mediaStack.push({})
}
function onexitlabeltext(token) {
mediaStack[mediaStack.length - 1].labelId = this.sliceSerialize(token)
}
function onexitlabel() {
mediaStack[mediaStack.length - 1].label = resume()
}
function onexitreferencestring(token) {
mediaStack[mediaStack.length - 1].referenceId = this.sliceSerialize(token)
}
function onenterresource() {
buffer() // We can have line endings in the resource, ignore them.
mediaStack[mediaStack.length - 1].destination = ''
}
function onenterresourcedestinationstring() {
buffer()
// Ignore encoding the result, as well first percent encode the url and
// encode manually after.
setData('ignoreEncode', true)
}
function onexitresourcedestinationstring() {
mediaStack[mediaStack.length - 1].destination = resume()
setData('ignoreEncode')
}
function onexitresourcetitlestring() {
mediaStack[mediaStack.length - 1].title = resume()
}
function onexitmedia() {
var index = mediaStack.length - 1 // Skip current.
var media = mediaStack[index]
var context =
media.destination === undefined
? definitions[normalizeIdentifier(media.referenceId || media.labelId)]
: media
tags = true
while (index--) {
if (mediaStack[index].image) {
tags = undefined
break
}
}
if (media.image) {
tag('<img src="' + url(context.destination, protocolSrc) + '" alt="')
raw(media.label)
tag('"')
} else {
tag('<a href="' + url(context.destination, protocolHref) + '"')
}
tag(context.title ? ' title="' + context.title + '"' : '')
if (media.image) {
tag(' />')
} else {
tag('>')
raw(media.label)
tag('</a>')
}
mediaStack.pop()
}
function onenterdefinition() {
buffer()
mediaStack.push({})
}
function onexitdefinitionlabelstring(token) {
// Discard label, use the source content instead.
resume()
mediaStack[mediaStack.length - 1].labelId = this.sliceSerialize(token)
}
function onenterdefinitiondestinationstring() {
buffer()
setData('ignoreEncode', true)
}
function onexitdefinitiondestinationstring() {
mediaStack[mediaStack.length - 1].destination = resume()
setData('ignoreEncode')
}
function onexitdefinitiontitlestring() {
mediaStack[mediaStack.length - 1].title = resume()
}
function onexitdefinition() {
var id = normalizeIdentifier(mediaStack[mediaStack.length - 1].labelId)
resume()
if (!hasOwnProperty.call(definitions, id)) {
definitions[id] = mediaStack[mediaStack.length - 1]
}
mediaStack.pop()
}
function onentercontent() {
setData('slurpAllLineEndings', true)
}
function onexitatxheadingsequence(token) {
// Exit for further sequences.
if (getData('headingRank')) return
setData('headingRank', this.sliceSerialize(token).length)
lineEndingIfNeeded()
tag('<h' + getData('headingRank') + '>')
}
function onentersetextheading() {
buffer()
setData('slurpAllLineEndings')
}
function onexitsetextheadingtext() {
setData('slurpAllLineEndings', true)
}
function onexitatxheading() {
tag('</h' + getData('headingRank') + '>')
setData('headingRank')
}
function onexitsetextheadinglinesequence(token) {
setData(
'headingRank',
this.sliceSerialize(token).charCodeAt(0) === codes.equalsTo ? 1 : 2
)
}
function onexitsetextheading() {
var value = resume()
lineEndingIfNeeded()
tag('<h' + getData('headingRank') + '>')
raw(value)
tag('</h' + getData('headingRank') + '>')
setData('slurpAllLineEndings')
setData('headingRank')
}
function onexitdata(token) {
raw(encode(this.sliceSerialize(token)))
}
function onexitlineending(token) {
if (getData('slurpAllLineEndings')) {
return
}
if (getData('slurpOneLineEnding')) {
setData('slurpOneLineEnding')
return
}
if (getData('inCodeText')) {
raw(' ')
return
}
raw(encode(this.sliceSerialize(token)))
}
function onexitcodeflowvalue(token) {
raw(encode(this.sliceSerialize(token)))
setData('flowCodeSeenData', true)
}
function onexithardbreak() {
tag('<br />')
}
function onenterhtmlflow() {
lineEndingIfNeeded()
onenterhtml()
}
function onexithtml() {
setData('ignoreEncode')
}
function onenterhtml() {
if (settings.allowDangerousHtml) {
setData('ignoreEncode', true)
}
}
function onenteremphasis() {
tag('<em>')
}
function onenterstrong() {
tag('<strong>')
}
function onentercodetext() {
setData('inCodeText', true)
tag('<code>')
}
function onexitcodetext() {
setData('inCodeText')
tag('</code>')
}
function onexitemphasis() {
tag('</em>')
}
function onexitstrong() {
tag('</strong>')
}
function onexitthematicbreak() {
lineEndingIfNeeded()
tag('<hr />')
}
function onexitcharacterreferencemarker(token) {
setData('characterReferenceType', token.type)
}
function onexitcharacterreferencevalue(token) {
var value = this.sliceSerialize(token)
value = getData('characterReferenceType')
? safeFromInt(
value,
getData('characterReferenceType') ===
types.characterReferenceMarkerNumeric
? constants.numericBaseDecimal
: constants.numericBaseHexadecimal
)
: decodeEntity__default['default'](value)
raw(encode(value))
setData('characterReferenceType')
}
function onexitautolinkprotocol(token) {
var uri = this.sliceSerialize(token)
tag('<a href="' + url(uri, protocolHref) + '">')
raw(encode(uri))
tag('</a>')
}
function onexitautolinkemail(token) {
var uri = this.sliceSerialize(token)
tag('<a href="' + url('mailto:' + uri, protocolHref) + '">')
raw(encode(uri))
tag('</a>')
}
}
module.exports = compileHtml