mirror of
https://github.com/lise-henry/crowbook
synced 2024-05-28 18:16:32 +02:00
rewrote escape_html function instead of using pasted one, and add escape_tex function
This commit is contained in:
parent
c668efb9d3
commit
2e63a46546
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "crowbook"
|
||||
version = "0.1.0"
|
||||
version = "0.1.0-unreleased"
|
||||
authors = ["Elisabeth Henry <liz.henry@ouvaton.org>"]
|
||||
description = "Yet another converter from Markdown file to (HTML, LaTeX, Epub)"
|
||||
repository = "https://github.com/lise-henry/crowbook"
|
||||
|
|
|
@ -1,129 +1,36 @@
|
|||
// File taken from pulldown-cmark: https://github.com/google/pulldown-cmark
|
||||
// The only modification I made is that escape_href and escape_html return
|
||||
// a string by value and not by reference in the arguments function.
|
||||
// Lise
|
||||
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
//! Utility functions for HTML escaping
|
||||
|
||||
use std::str::from_utf8;
|
||||
|
||||
static HREF_SAFE: [u8; 128] = [
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||
];
|
||||
|
||||
static HEX_CHARS: &'static [u8] = b"0123456789ABCDEF";
|
||||
|
||||
pub fn escape_href(s: &str) -> String {
|
||||
let mut ob = String::new();
|
||||
let mut mark = 0;
|
||||
for i in 0..s.len() {
|
||||
let c = s.as_bytes()[i];
|
||||
if c >= 0x80 || HREF_SAFE[c as usize] == 0 {
|
||||
// character needing escape
|
||||
|
||||
// write partial substring up to mark
|
||||
if mark < i {
|
||||
ob.push_str(&s[mark..i]);
|
||||
}
|
||||
match c {
|
||||
b'&' => {
|
||||
ob.push_str("&");
|
||||
},
|
||||
b'\'' => {
|
||||
ob.push_str("'");
|
||||
},
|
||||
_ => {
|
||||
let mut buf = [0u8; 3];
|
||||
buf[0] = b'%';
|
||||
buf[1] = HEX_CHARS[((c as usize) >> 4) & 0xF];
|
||||
buf[2] = HEX_CHARS[(c as usize) & 0xF];
|
||||
ob.push_str(from_utf8(&buf).unwrap());
|
||||
}
|
||||
}
|
||||
mark = i + 1; // all escaped characters are ASCII
|
||||
/// Escape characters <, >, and &
|
||||
pub fn escape_html(input: &str) -> String {
|
||||
let mut output = String::new();
|
||||
for c in input.chars() {
|
||||
match c {
|
||||
'<' => output.push_str("<"),
|
||||
'>' => output.push_str(">"),
|
||||
'&' => output.push_str("&"),
|
||||
_ => output.push(c),
|
||||
}
|
||||
}
|
||||
ob.push_str(&s[mark..]);
|
||||
ob
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
static HTML_ESCAPE_TABLE: [u8; 256] = [
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
];
|
||||
|
||||
static HTML_ESCAPES: [&'static str; 6] = [
|
||||
"",
|
||||
""",
|
||||
"&",
|
||||
"/",
|
||||
"<",
|
||||
">"
|
||||
];
|
||||
|
||||
pub fn escape_html(s: &str) -> String {
|
||||
let mut ob = String::new();
|
||||
let size = s.len();
|
||||
let bytes = s.as_bytes();
|
||||
let mut mark = 0;
|
||||
let mut i = 0;
|
||||
while i < size {
|
||||
match bytes[i..].iter().position(|&c| HTML_ESCAPE_TABLE[c as usize] != 0) {
|
||||
Some(pos) => {
|
||||
i += pos;
|
||||
}
|
||||
None => break
|
||||
/// Escape characters for tex file
|
||||
pub fn escape_tex(input: &str) -> String {
|
||||
let mut output = String::new();
|
||||
for c in input.chars() {
|
||||
match c {
|
||||
'&' => output.push_str(r"\&"),
|
||||
'%' => output.push_str(r"\%"),
|
||||
'$' => output.push_str(r"\$"),
|
||||
'#' => output.push_str(r"\#"),
|
||||
'_' => output.push_str(r"\_"),
|
||||
'{' => output.push_str(r"\{"),
|
||||
'}' => output.push_str(r"\}"),
|
||||
'~' => output.push_str(r"\textasciitilde"),
|
||||
'^' => output.push_str(r"\textasciicircum"),
|
||||
'\\' => output.push_str(r"\textbackslash"),
|
||||
_ => output.push(c)
|
||||
}
|
||||
let c = bytes[i];
|
||||
let escape = HTML_ESCAPE_TABLE[c as usize];
|
||||
if escape != 0 && c != b'/' {
|
||||
ob.push_str(&s[mark..i]);
|
||||
ob.push_str(HTML_ESCAPES[escape as usize]);
|
||||
mark = i + 1; // all escaped characters are ASCII
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
ob.push_str(&s[mark..]);
|
||||
ob
|
||||
|
||||
output
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ use error::{Error,Result};
|
|||
use token::Token;
|
||||
use zipper::Zipper;
|
||||
use templates::latex::*;
|
||||
use escape::escape_tex;
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
|
@ -42,7 +43,7 @@ impl<'a> LatexRenderer<'a> {
|
|||
let mut content = String::from("");
|
||||
for &(n, ref v) in &self.book.chapters {
|
||||
self.current_chapter = n;
|
||||
content.push_str(&self.render_vec(v));
|
||||
content.push_str(&self.render_vec(v, true));
|
||||
}
|
||||
|
||||
|
||||
|
@ -70,20 +71,20 @@ impl<'a> LatexRenderer<'a> {
|
|||
|
||||
|
||||
/// Transform a vector of `Token`s to LaTeX
|
||||
fn render_vec(&mut self, tokens: &[Token]) -> String {
|
||||
fn render_vec(&mut self, tokens: &[Token], escape: bool) -> String {
|
||||
let mut res = String::new();
|
||||
|
||||
for token in tokens {
|
||||
res.push_str(&self.parse_token(&token));
|
||||
res.push_str(&self.parse_token(&token, escape));
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
fn parse_token(&mut self, token: &Token) -> String {
|
||||
fn parse_token(&mut self, token: &Token, escape: bool) -> String {
|
||||
match *token {
|
||||
Token::Str(ref text) => text.clone(),
|
||||
Token::Str(ref text) => if escape {escape_tex(text)} else {text.clone()},
|
||||
Token::Paragraph(ref vec) => format!("{}\n\n",
|
||||
self.render_vec(vec)),
|
||||
self.render_vec(vec, escape)),
|
||||
Token::Header(n, ref vec) => {
|
||||
let mut content = String::new();
|
||||
if n == 1 {
|
||||
|
@ -104,22 +105,22 @@ impl<'a> LatexRenderer<'a> {
|
|||
content.push_str("*");
|
||||
}
|
||||
content.push_str(r"{");
|
||||
content.push_str(&self.render_vec(vec));
|
||||
content.push_str(&self.render_vec(vec, true));
|
||||
content.push_str("}\n");
|
||||
content
|
||||
},
|
||||
Token::Emphasis(ref vec) => format!("\\emph{{{}}}", self.render_vec(vec)),
|
||||
Token::Strong(ref vec) => format!("\\textbf{{{}}}", self.render_vec(vec)),
|
||||
Token::Code(ref vec) => format!("\\texttt{{{}}}", self.render_vec(vec)),
|
||||
Token::BlockQuote(ref vec) => format!("\\begin{{quotation}}\n{}\\end{{quotation}}\n", self.render_vec(vec)),
|
||||
Token::CodeBlock(_, ref vec) => format!("\\begin{{verbatim}}\n{}\\end{{verbatim}}\n", self.render_vec(vec)),
|
||||
Token::Emphasis(ref vec) => format!("\\emph{{{}}}", self.render_vec(vec, escape)),
|
||||
Token::Strong(ref vec) => format!("\\textbf{{{}}}", self.render_vec(vec, escape)),
|
||||
Token::Code(ref vec) => format!("\\texttt{{{}}}", self.render_vec(vec, escape)),
|
||||
Token::BlockQuote(ref vec) => format!("\\begin{{quotation}}\n{}\\end{{quotation}}\n", self.render_vec(vec, escape)),
|
||||
Token::CodeBlock(_, ref vec) => format!("\\begin{{verbatim}}\n{}\\end{{verbatim}}\n", self.render_vec(vec, false)),
|
||||
Token::Rule => String::from("\\HRule\n"),
|
||||
Token::SoftBreak => String::from(" "),
|
||||
Token::HardBreak => String::from("\n"),
|
||||
Token::List(ref vec) => format!("\\begin{{itemize}}\n{}\\end{{itemize}}", self.render_vec(vec)),
|
||||
Token::OrderedList(_, ref vec) => format!("\\begin{{enumerate}}\n{}\\end{{enumerate}}\n", self.render_vec(vec)),
|
||||
Token::Item(ref vec) => format!("\\item {}\n", self.render_vec(vec)),
|
||||
Token::Link(_, _, ref vec) => self.render_vec(vec), //todo
|
||||
Token::List(ref vec) => format!("\\begin{{itemize}}\n{}\\end{{itemize}}", self.render_vec(vec, escape)),
|
||||
Token::OrderedList(_, ref vec) => format!("\\begin{{enumerate}}\n{}\\end{{enumerate}}\n", self.render_vec(vec, escape)),
|
||||
Token::Item(ref vec) => format!("\\item {}\n", self.render_vec(vec, escape)),
|
||||
Token::Link(_, _, ref vec) => self.render_vec(vec, escape), //todo
|
||||
Token::Image(_, _, _) => panic!("Not yet implemented"),
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue