mirror of
https://git.proxmox.com/git/rustc
synced 2025-08-14 10:37:51 +00:00
336 lines
9.9 KiB
Rust
336 lines
9.9 KiB
Rust
// Copyright 2014-2017 The html5ever Project Developers. See the
|
|
// COPYRIGHT file at the top-level directory of this distribution.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
// option. This file may not be copied, modified, or distributed
|
|
// except according to those terms.
|
|
|
|
extern crate html5ever;
|
|
extern crate typed_arena;
|
|
|
|
use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
|
|
use html5ever::tendril::{StrTendril, TendrilSink};
|
|
use html5ever::{parse_document, Attribute, ExpandedName, QualName};
|
|
use std::borrow::Cow;
|
|
use std::cell::{Cell, RefCell};
|
|
use std::collections::HashSet;
|
|
use std::io::{self, Read};
|
|
use std::ptr;
|
|
|
|
fn main() {
|
|
let mut bytes = Vec::new();
|
|
io::stdin().read_to_end(&mut bytes).unwrap();
|
|
let arena = typed_arena::Arena::new();
|
|
html5ever_parse_slice_into_arena(&bytes, &arena);
|
|
}
|
|
|
|
fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> {
|
|
let sink = Sink {
|
|
arena: arena,
|
|
document: arena.alloc(Node::new(NodeData::Document)),
|
|
quirks_mode: QuirksMode::NoQuirks,
|
|
};
|
|
parse_document(sink, Default::default())
|
|
.from_utf8()
|
|
.one(bytes)
|
|
}
|
|
|
|
type Arena<'arena> = &'arena typed_arena::Arena<Node<'arena>>;
|
|
|
|
type Ref<'arena> = &'arena Node<'arena>;
|
|
|
|
type Link<'arena> = Cell<Option<Ref<'arena>>>;
|
|
|
|
struct Sink<'arena> {
|
|
arena: Arena<'arena>,
|
|
document: Ref<'arena>,
|
|
quirks_mode: QuirksMode,
|
|
}
|
|
|
|
pub struct Node<'arena> {
|
|
parent: Link<'arena>,
|
|
next_sibling: Link<'arena>,
|
|
previous_sibling: Link<'arena>,
|
|
first_child: Link<'arena>,
|
|
last_child: Link<'arena>,
|
|
data: NodeData<'arena>,
|
|
}
|
|
|
|
pub enum NodeData<'arena> {
|
|
Document,
|
|
Doctype {
|
|
name: StrTendril,
|
|
public_id: StrTendril,
|
|
system_id: StrTendril,
|
|
},
|
|
Text {
|
|
contents: RefCell<StrTendril>,
|
|
},
|
|
Comment {
|
|
contents: StrTendril,
|
|
},
|
|
Element {
|
|
name: QualName,
|
|
attrs: RefCell<Vec<Attribute>>,
|
|
template_contents: Option<Ref<'arena>>,
|
|
mathml_annotation_xml_integration_point: bool,
|
|
},
|
|
ProcessingInstruction {
|
|
target: StrTendril,
|
|
contents: StrTendril,
|
|
},
|
|
}
|
|
|
|
impl<'arena> Node<'arena> {
|
|
fn new(data: NodeData<'arena>) -> Self {
|
|
Node {
|
|
parent: Cell::new(None),
|
|
previous_sibling: Cell::new(None),
|
|
next_sibling: Cell::new(None),
|
|
first_child: Cell::new(None),
|
|
last_child: Cell::new(None),
|
|
data: data,
|
|
}
|
|
}
|
|
|
|
fn detach(&self) {
|
|
let parent = self.parent.take();
|
|
let previous_sibling = self.previous_sibling.take();
|
|
let next_sibling = self.next_sibling.take();
|
|
|
|
if let Some(next_sibling) = next_sibling {
|
|
next_sibling.previous_sibling.set(previous_sibling);
|
|
} else if let Some(parent) = parent {
|
|
parent.last_child.set(previous_sibling);
|
|
}
|
|
|
|
if let Some(previous_sibling) = previous_sibling {
|
|
previous_sibling.next_sibling.set(next_sibling);
|
|
} else if let Some(parent) = parent {
|
|
parent.first_child.set(next_sibling);
|
|
}
|
|
}
|
|
|
|
fn append(&'arena self, new_child: &'arena Self) {
|
|
new_child.detach();
|
|
new_child.parent.set(Some(self));
|
|
if let Some(last_child) = self.last_child.take() {
|
|
new_child.previous_sibling.set(Some(last_child));
|
|
debug_assert!(last_child.next_sibling.get().is_none());
|
|
last_child.next_sibling.set(Some(new_child));
|
|
} else {
|
|
debug_assert!(self.first_child.get().is_none());
|
|
self.first_child.set(Some(new_child));
|
|
}
|
|
self.last_child.set(Some(new_child));
|
|
}
|
|
|
|
fn insert_before(&'arena self, new_sibling: &'arena Self) {
|
|
new_sibling.detach();
|
|
new_sibling.parent.set(self.parent.get());
|
|
new_sibling.next_sibling.set(Some(self));
|
|
if let Some(previous_sibling) = self.previous_sibling.take() {
|
|
new_sibling.previous_sibling.set(Some(previous_sibling));
|
|
debug_assert!(ptr::eq::<Node>(
|
|
previous_sibling.next_sibling.get().unwrap(),
|
|
self
|
|
));
|
|
previous_sibling.next_sibling.set(Some(new_sibling));
|
|
} else if let Some(parent) = self.parent.get() {
|
|
debug_assert!(ptr::eq::<Node>(parent.first_child.get().unwrap(), self));
|
|
parent.first_child.set(Some(new_sibling));
|
|
}
|
|
self.previous_sibling.set(Some(new_sibling));
|
|
}
|
|
}
|
|
|
|
impl<'arena> Sink<'arena> {
|
|
fn new_node(&self, data: NodeData<'arena>) -> Ref<'arena> {
|
|
self.arena.alloc(Node::new(data))
|
|
}
|
|
|
|
fn append_common<P, A>(&self, child: NodeOrText<Ref<'arena>>, previous: P, append: A)
|
|
where
|
|
P: FnOnce() -> Option<Ref<'arena>>,
|
|
A: FnOnce(Ref<'arena>),
|
|
{
|
|
let new_node = match child {
|
|
NodeOrText::AppendText(text) => {
|
|
// Append to an existing Text node if we have one.
|
|
if let Some(&Node {
|
|
data: NodeData::Text { ref contents },
|
|
..
|
|
}) = previous()
|
|
{
|
|
contents.borrow_mut().push_tendril(&text);
|
|
return;
|
|
}
|
|
self.new_node(NodeData::Text {
|
|
contents: RefCell::new(text),
|
|
})
|
|
},
|
|
NodeOrText::AppendNode(node) => node,
|
|
};
|
|
|
|
append(new_node)
|
|
}
|
|
}
|
|
|
|
impl<'arena> TreeSink for Sink<'arena> {
|
|
type Handle = Ref<'arena>;
|
|
type Output = Ref<'arena>;
|
|
|
|
fn finish(self) -> Ref<'arena> {
|
|
self.document
|
|
}
|
|
|
|
fn parse_error(&mut self, _: Cow<'static, str>) {}
|
|
|
|
fn get_document(&mut self) -> Ref<'arena> {
|
|
self.document
|
|
}
|
|
|
|
fn set_quirks_mode(&mut self, mode: QuirksMode) {
|
|
self.quirks_mode = mode;
|
|
}
|
|
|
|
fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool {
|
|
ptr::eq::<Node>(*x, *y)
|
|
}
|
|
|
|
fn elem_name<'a>(&self, target: &'a Ref<'arena>) -> ExpandedName<'a> {
|
|
match target.data {
|
|
NodeData::Element { ref name, .. } => name.expanded(),
|
|
_ => panic!("not an element!"),
|
|
}
|
|
}
|
|
|
|
fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> {
|
|
if let NodeData::Element {
|
|
template_contents: Some(ref contents),
|
|
..
|
|
} = target.data
|
|
{
|
|
contents
|
|
} else {
|
|
panic!("not a template element!")
|
|
}
|
|
}
|
|
|
|
fn is_mathml_annotation_xml_integration_point(&self, target: &Ref<'arena>) -> bool {
|
|
if let NodeData::Element {
|
|
mathml_annotation_xml_integration_point,
|
|
..
|
|
} = target.data
|
|
{
|
|
mathml_annotation_xml_integration_point
|
|
} else {
|
|
panic!("not an element!")
|
|
}
|
|
}
|
|
|
|
fn create_element(
|
|
&mut self,
|
|
name: QualName,
|
|
attrs: Vec<Attribute>,
|
|
flags: ElementFlags,
|
|
) -> Ref<'arena> {
|
|
self.new_node(NodeData::Element {
|
|
name: name,
|
|
attrs: RefCell::new(attrs),
|
|
template_contents: if flags.template {
|
|
Some(self.new_node(NodeData::Document))
|
|
} else {
|
|
None
|
|
},
|
|
mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
|
|
})
|
|
}
|
|
|
|
fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> {
|
|
self.new_node(NodeData::Comment { contents: text })
|
|
}
|
|
|
|
fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> {
|
|
self.new_node(NodeData::ProcessingInstruction {
|
|
target: target,
|
|
contents: data,
|
|
})
|
|
}
|
|
|
|
fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
|
|
self.append_common(
|
|
child,
|
|
|| parent.last_child.get(),
|
|
|new_node| parent.append(new_node),
|
|
)
|
|
}
|
|
|
|
fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
|
|
self.append_common(
|
|
child,
|
|
|| sibling.previous_sibling.get(),
|
|
|new_node| sibling.insert_before(new_node),
|
|
)
|
|
}
|
|
|
|
fn append_based_on_parent_node(
|
|
&mut self,
|
|
element: &Ref<'arena>,
|
|
prev_element: &Ref<'arena>,
|
|
child: NodeOrText<Ref<'arena>>,
|
|
) {
|
|
if element.parent.get().is_some() {
|
|
self.append_before_sibling(element, child)
|
|
} else {
|
|
self.append(prev_element, child)
|
|
}
|
|
}
|
|
|
|
fn append_doctype_to_document(
|
|
&mut self,
|
|
name: StrTendril,
|
|
public_id: StrTendril,
|
|
system_id: StrTendril,
|
|
) {
|
|
self.document.append(self.new_node(NodeData::Doctype {
|
|
name: name,
|
|
public_id: public_id,
|
|
system_id: system_id,
|
|
}))
|
|
}
|
|
|
|
fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec<Attribute>) {
|
|
let mut existing = if let NodeData::Element { ref attrs, .. } = target.data {
|
|
attrs.borrow_mut()
|
|
} else {
|
|
panic!("not an element")
|
|
};
|
|
|
|
let existing_names = existing
|
|
.iter()
|
|
.map(|e| e.name.clone())
|
|
.collect::<HashSet<_>>();
|
|
existing.extend(
|
|
attrs
|
|
.into_iter()
|
|
.filter(|attr| !existing_names.contains(&attr.name)),
|
|
);
|
|
}
|
|
|
|
fn remove_from_parent(&mut self, target: &Ref<'arena>) {
|
|
target.detach()
|
|
}
|
|
|
|
fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) {
|
|
let mut next_child = node.first_child.get();
|
|
while let Some(child) = next_child {
|
|
debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node));
|
|
next_child = child.next_sibling.get();
|
|
new_parent.append(child)
|
|
}
|
|
}
|
|
}
|