// Copyright 2014-2017 The html5ever Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. extern crate phf_codegen; extern crate string_cache_codegen; #[macro_use] extern crate serde_derive; extern crate serde_json; use std::collections::HashMap; use std::env; use std::fs::File; use std::io::{BufRead, BufReader, BufWriter, Write}; use std::path::Path; static NAMESPACES: &'static [(&'static str, &'static str)] = &[ ("", ""), ("*", "*"), ("html", "http://www.w3.org/1999/xhtml"), ("xml", "http://www.w3.org/XML/1998/namespace"), ("xmlns", "http://www.w3.org/2000/xmlns/"), ("xlink", "http://www.w3.org/1999/xlink"), ("svg", "http://www.w3.org/2000/svg"), ("mathml", "http://www.w3.org/1998/Math/MathML"), ]; fn main() { let generated = Path::new(&env::var("OUT_DIR").unwrap()).join("generated.rs"); let mut generated = BufWriter::new(File::create(&generated).unwrap()); let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); named_entities_to_phf( &Path::new(&manifest_dir).join("data").join("entities.json"), &Path::new(&env::var("OUT_DIR").unwrap()).join("named_entities.rs"), ); // Create a string cache for local names let local_names = Path::new(&env::var("CARGO_MANIFEST_DIR").unwrap()).join("local_names.txt"); let mut local_names_atom = string_cache_codegen::AtomType::new("LocalName", "local_name!"); for line in BufReader::new(File::open(&local_names).unwrap()).lines() { let local_name = line.unwrap(); local_names_atom.atom(&local_name); local_names_atom.atom(&local_name.to_ascii_lowercase()); } local_names_atom .with_macro_doc("Takes a local name as a string and returns its key in the string cache.") .write_to(&mut generated) .unwrap(); // Create a string cache for namespace prefixes string_cache_codegen::AtomType::new("Prefix", "namespace_prefix!") .with_macro_doc("Takes a namespace prefix string and returns its key in a string cache.") .atoms(NAMESPACES.iter().map(|&(prefix, _url)| prefix)) .write_to(&mut generated) .unwrap(); // Create a string cache for namespace urls string_cache_codegen::AtomType::new("Namespace", "namespace_url!") .with_macro_doc("Takes a namespace url string and returns its key in a string cache.") .atoms(NAMESPACES.iter().map(|&(_prefix, url)| url)) .write_to(&mut generated) .unwrap(); writeln!( generated, r#" /// Maps the input of `namespace_prefix!` to the output of `namespace_url!`. #[macro_export] macro_rules! ns {{ "# ) .unwrap(); for &(prefix, url) in NAMESPACES { writeln!( generated, "({}) => {{ namespace_url!({:?}) }};", prefix, url ) .unwrap(); } writeln!(generated, "}}").unwrap(); } fn named_entities_to_phf(from: &Path, to: &Path) { // A struct matching the entries in entities.json. #[derive(Deserialize, Debug)] struct CharRef { codepoints: Vec, //characters: String, // Present in the file but we don't need it } let entities: HashMap = serde_json::from_reader(&mut File::open(from).unwrap()).unwrap(); let mut entities: HashMap<&str, (u32, u32)> = entities .iter() .map(|(name, char_ref)| { assert!(name.starts_with("&")); assert!(char_ref.codepoints.len() <= 2); ( &name[1..], ( char_ref.codepoints[0], *char_ref.codepoints.get(1).unwrap_or(&0), ), ) }) .collect(); // Add every missing prefix of those keys, mapping to NULL characters. for key in entities.keys().cloned().collect::>() { for n in 1..key.len() { entities.entry(&key[..n]).or_insert((0, 0)); } } entities.insert("", (0, 0)); let mut phf_map = phf_codegen::Map::new(); for (key, value) in entities { phf_map.entry(key, &format!("{:?}", value)); } let mut file = File::create(to).unwrap(); writeln!( &mut file, r#" /// A map of entity names to their codepoints. The second codepoint will /// be 0 if the entity contains a single codepoint. Entities have their preceeding '&' removed. /// /// # Examples /// /// ``` /// use markup5ever::data::NAMED_ENTITIES; /// /// assert_eq!(NAMED_ENTITIES.get("gt;").unwrap(), &(62, 0)); /// ``` "# ) .unwrap(); write!( &mut file, "pub static NAMED_ENTITIES: Map<&'static str, (u32, u32)> = " ) .unwrap(); phf_map.build(&mut file).unwrap(); write!(&mut file, ";\n").unwrap(); }