1
0
Fork 0
mirror of https://github.com/lise-henry/crowbook synced 2024-05-28 18:16:32 +02:00

rewrote escape_html function instead of using pasted one, and add escape_tex function

This commit is contained in:
Elisabeth Henry 2016-02-20 18:35:48 +01:00
parent c668efb9d3
commit 2e63a46546
3 changed files with 47 additions and 139 deletions

View File

@ -1,6 +1,6 @@
[package]
name = "crowbook"
version = "0.1.0"
version = "0.1.0-unreleased"
authors = ["Elisabeth Henry <liz.henry@ouvaton.org>"]
description = "Yet another converter from Markdown file to (HTML, LaTeX, Epub)"
repository = "https://github.com/lise-henry/crowbook"

View File

@ -1,129 +1,36 @@
// File taken from pulldown-cmark: https://github.com/google/pulldown-cmark
// The only modification I made is that escape_href and escape_html return
// a string by value and not by reference in the arguments function.
// Lise
// Copyright 2015 Google Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//! Utility functions for HTML escaping
use std::str::from_utf8;
static HREF_SAFE: [u8; 128] = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
];
static HEX_CHARS: &'static [u8] = b"0123456789ABCDEF";
pub fn escape_href(s: &str) -> String {
let mut ob = String::new();
let mut mark = 0;
for i in 0..s.len() {
let c = s.as_bytes()[i];
if c >= 0x80 || HREF_SAFE[c as usize] == 0 {
// character needing escape
// write partial substring up to mark
if mark < i {
ob.push_str(&s[mark..i]);
}
match c {
b'&' => {
ob.push_str("&amp;");
},
b'\'' => {
ob.push_str("&#x27;");
},
_ => {
let mut buf = [0u8; 3];
buf[0] = b'%';
buf[1] = HEX_CHARS[((c as usize) >> 4) & 0xF];
buf[2] = HEX_CHARS[(c as usize) & 0xF];
ob.push_str(from_utf8(&buf).unwrap());
}
}
mark = i + 1; // all escaped characters are ASCII
/// Escape characters <, >, and &
pub fn escape_html(input: &str) -> String {
let mut output = String::new();
for c in input.chars() {
match c {
'<' => output.push_str("&lt;"),
'>' => output.push_str("&gt;"),
'&' => output.push_str("&amp;"),
_ => output.push(c),
}
}
ob.push_str(&s[mark..]);
ob
output
}
static HTML_ESCAPE_TABLE: [u8; 256] = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
];
static HTML_ESCAPES: [&'static str; 6] = [
"",
"&quot;",
"&amp;",
"&#47;",
"&lt;",
"&gt;"
];
pub fn escape_html(s: &str) -> String {
let mut ob = String::new();
let size = s.len();
let bytes = s.as_bytes();
let mut mark = 0;
let mut i = 0;
while i < size {
match bytes[i..].iter().position(|&c| HTML_ESCAPE_TABLE[c as usize] != 0) {
Some(pos) => {
i += pos;
}
None => break
/// Escape characters for tex file
pub fn escape_tex(input: &str) -> String {
let mut output = String::new();
for c in input.chars() {
match c {
'&' => output.push_str(r"\&"),
'%' => output.push_str(r"\%"),
'$' => output.push_str(r"\$"),
'#' => output.push_str(r"\#"),
'_' => output.push_str(r"\_"),
'{' => output.push_str(r"\{"),
'}' => output.push_str(r"\}"),
'~' => output.push_str(r"\textasciitilde"),
'^' => output.push_str(r"\textasciicircum"),
'\\' => output.push_str(r"\textbackslash"),
_ => output.push(c)
}
let c = bytes[i];
let escape = HTML_ESCAPE_TABLE[c as usize];
if escape != 0 && c != b'/' {
ob.push_str(&s[mark..i]);
ob.push_str(HTML_ESCAPES[escape as usize]);
mark = i + 1; // all escaped characters are ASCII
}
i += 1;
}
ob.push_str(&s[mark..]);
ob
output
}

View File

@ -3,6 +3,7 @@ use error::{Error,Result};
use token::Token;
use zipper::Zipper;
use templates::latex::*;
use escape::escape_tex;
use std::path::Path;
@ -42,7 +43,7 @@ impl<'a> LatexRenderer<'a> {
let mut content = String::from("");
for &(n, ref v) in &self.book.chapters {
self.current_chapter = n;
content.push_str(&self.render_vec(v));
content.push_str(&self.render_vec(v, true));
}
@ -70,20 +71,20 @@ impl<'a> LatexRenderer<'a> {
/// Transform a vector of `Token`s to LaTeX
fn render_vec(&mut self, tokens: &[Token]) -> String {
fn render_vec(&mut self, tokens: &[Token], escape: bool) -> String {
let mut res = String::new();
for token in tokens {
res.push_str(&self.parse_token(&token));
res.push_str(&self.parse_token(&token, escape));
}
res
}
fn parse_token(&mut self, token: &Token) -> String {
fn parse_token(&mut self, token: &Token, escape: bool) -> String {
match *token {
Token::Str(ref text) => text.clone(),
Token::Str(ref text) => if escape {escape_tex(text)} else {text.clone()},
Token::Paragraph(ref vec) => format!("{}\n\n",
self.render_vec(vec)),
self.render_vec(vec, escape)),
Token::Header(n, ref vec) => {
let mut content = String::new();
if n == 1 {
@ -104,22 +105,22 @@ impl<'a> LatexRenderer<'a> {
content.push_str("*");
}
content.push_str(r"{");
content.push_str(&self.render_vec(vec));
content.push_str(&self.render_vec(vec, true));
content.push_str("}\n");
content
},
Token::Emphasis(ref vec) => format!("\\emph{{{}}}", self.render_vec(vec)),
Token::Strong(ref vec) => format!("\\textbf{{{}}}", self.render_vec(vec)),
Token::Code(ref vec) => format!("\\texttt{{{}}}", self.render_vec(vec)),
Token::BlockQuote(ref vec) => format!("\\begin{{quotation}}\n{}\\end{{quotation}}\n", self.render_vec(vec)),
Token::CodeBlock(_, ref vec) => format!("\\begin{{verbatim}}\n{}\\end{{verbatim}}\n", self.render_vec(vec)),
Token::Emphasis(ref vec) => format!("\\emph{{{}}}", self.render_vec(vec, escape)),
Token::Strong(ref vec) => format!("\\textbf{{{}}}", self.render_vec(vec, escape)),
Token::Code(ref vec) => format!("\\texttt{{{}}}", self.render_vec(vec, escape)),
Token::BlockQuote(ref vec) => format!("\\begin{{quotation}}\n{}\\end{{quotation}}\n", self.render_vec(vec, escape)),
Token::CodeBlock(_, ref vec) => format!("\\begin{{verbatim}}\n{}\\end{{verbatim}}\n", self.render_vec(vec, false)),
Token::Rule => String::from("\\HRule\n"),
Token::SoftBreak => String::from(" "),
Token::HardBreak => String::from("\n"),
Token::List(ref vec) => format!("\\begin{{itemize}}\n{}\\end{{itemize}}", self.render_vec(vec)),
Token::OrderedList(_, ref vec) => format!("\\begin{{enumerate}}\n{}\\end{{enumerate}}\n", self.render_vec(vec)),
Token::Item(ref vec) => format!("\\item {}\n", self.render_vec(vec)),
Token::Link(_, _, ref vec) => self.render_vec(vec), //todo
Token::List(ref vec) => format!("\\begin{{itemize}}\n{}\\end{{itemize}}", self.render_vec(vec, escape)),
Token::OrderedList(_, ref vec) => format!("\\begin{{enumerate}}\n{}\\end{{enumerate}}\n", self.render_vec(vec, escape)),
Token::Item(ref vec) => format!("\\item {}\n", self.render_vec(vec, escape)),
Token::Link(_, _, ref vec) => self.render_vec(vec, escape), //todo
Token::Image(_, _, _) => panic!("Not yet implemented"),
}
}