mirror of
https://github.com/lise-henry/crowbook
synced 2024-06-06 10:36:10 +02:00
initial commit
This commit is contained in:
parent
a1f16ee8d0
commit
940b5c99fa
|
@ -0,0 +1,15 @@
|
|||
# Compiled files
|
||||
Cargo.lock
|
||||
*.o
|
||||
*.so
|
||||
*.rlib
|
||||
*.dll
|
||||
*~
|
||||
.*
|
||||
\#*
|
||||
|
||||
# Executables
|
||||
*.exe
|
||||
|
||||
# Generated by Cargo
|
||||
/target/
|
|
@ -0,0 +1,8 @@
|
|||
language: rust
|
||||
rust:
|
||||
- stable
|
||||
- beta
|
||||
- nightly
|
||||
matrix:
|
||||
allow_failures:
|
||||
- rust: nightly
|
|
@ -0,0 +1,16 @@
|
|||
[package]
|
||||
name = "crowbook"
|
||||
version = "0.1.0"
|
||||
authors = ["Elisabeth Henry <liz.henry@ouvaton.org>"]
|
||||
|
||||
[lib]
|
||||
name = "crowbook"
|
||||
path = "src/lib/lib.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "crowbook"
|
||||
path = "src/bin/main.rs"
|
||||
doc = false
|
||||
|
||||
[dependencies]
|
||||
pulldown-cmark = "0.0.7"
|
|
@ -0,0 +1,42 @@
|
|||
extern crate crowbook;
|
||||
|
||||
use crowbook::{ast_to_html, Parser, French};
|
||||
|
||||
|
||||
fn main() {
|
||||
let doc = "
|
||||
Foo
|
||||
===
|
||||
|
||||
« Oh la chevalier que voulez vous ? »
|
||||
|
||||
|
||||
```rust
|
||||
fn min(x : &u32, y : u32) -> &u32 {
|
||||
if x < y { x } else { y }
|
||||
}
|
||||
```
|
||||
|
||||
Bar
|
||||
---
|
||||
|
||||
Some paragraph
|
||||
|
||||
* a list
|
||||
* inside a list
|
||||
* another item
|
||||
|
||||
3. three
|
||||
4. four
|
||||
5. five
|
||||
|
||||
[& some link](http://foo/bar?baz=42&coin=plop)
|
||||
";
|
||||
|
||||
let french = French::new('~');
|
||||
let mut parser = Parser::new().with_cleaner(Box::new(french));
|
||||
let v = parser.parse(doc).unwrap();
|
||||
println!("{:?}", &v);
|
||||
|
||||
println!("{}", ast_to_html(v));
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
use std::borrow::Cow;
|
||||
use escape::escape_html;
|
||||
use token::Token;
|
||||
|
||||
fn parse_token<'a>(token: Token<'a>) -> Cow<'a, str> {
|
||||
match token {
|
||||
Token::Str(text) => Cow::Owned(escape_html(&*text)),
|
||||
Token::Paragraph(vec) => Cow::Owned(format!("<p>{}</p>\n", ast_to_html(vec))),
|
||||
Token::Header(n, vec) => Cow::Owned(format!("<h{}>{}</h{}>\n", n, ast_to_html(vec), n)),
|
||||
Token::Emphasis(vec) => Cow::Owned(format!("<em>{}</em>", ast_to_html(vec))),
|
||||
Token::Strong(vec) => Cow::Owned(format!("<b>{}</b>", ast_to_html(vec))),
|
||||
Token::Code(vec) => Cow::Owned(format!("<code>{}</code>", ast_to_html(vec))),
|
||||
Token::BlockQuote(vec) => Cow::Owned(format!("<blockquote>{}</blockquote>\n", ast_to_html(vec))),
|
||||
Token::CodeBlock(language, vec) => {
|
||||
let s = ast_to_html(vec);
|
||||
if language.is_empty() {
|
||||
Cow::Owned(format!("<pre><code>\n{}</code></pre>\n", s))
|
||||
} else {
|
||||
Cow::Owned(format!("<pre><code class = \"language-{}\">{}</code></pre>\n", language, s))
|
||||
}
|
||||
},
|
||||
Token::Rule => Cow::Borrowed("<p class = \"rule\">***</p>\n"),
|
||||
Token::SoftBreak => Cow::Borrowed(" "),
|
||||
Token::HardBreak => Cow::Borrowed("<br />\n"),
|
||||
Token::List(vec) => Cow::Owned(format!("<ul>\n{}</ul>\n", ast_to_html(vec))),
|
||||
Token::OrderedList(n, vec) => Cow::Owned(format!("<ol start = \"{}\">\n{}</ol>\n", n, ast_to_html(vec))),
|
||||
Token::Item(vec) => Cow::Owned(format!("<li>{}</li>\n", ast_to_html(vec))),
|
||||
Token::Link(url, title, vec) => Cow::Owned(format!("<a href = \"{}\"{}>{}</a>",
|
||||
url,
|
||||
if title.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
format!(" title = \"{}\"", title)
|
||||
},
|
||||
ast_to_html(vec))),
|
||||
Token::Image(url, title, alt) => Cow::Owned(format!("<img src = \"{}\" title = \"{}\" alt = \"{}\" />",
|
||||
url,
|
||||
title,
|
||||
ast_to_html(alt)))
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/// Transform a vector of `Token`s to HTML format.
|
||||
pub fn ast_to_html(tokens: Vec<Token>) -> String {
|
||||
let mut res = String::new();
|
||||
|
||||
for token in tokens {
|
||||
res.push_str(&parse_token(token));
|
||||
}
|
||||
res
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
use token::Token;
|
||||
use std::borrow::Cow;
|
||||
|
||||
fn parse_token<'a>(token: Token<'a>) -> Cow<'a, str> {
|
||||
match token {
|
||||
Token::Str(text) => text,
|
||||
Token::Paragraph(vec) => {
|
||||
let mut s = ast_to_md(vec);
|
||||
s.push_str("\n\n");
|
||||
Cow::Owned(s)
|
||||
},
|
||||
Token::Header(n, vec) => {
|
||||
let s = ast_to_md(vec);
|
||||
let mut hashes = String::new();
|
||||
if n > 0 && n < 6 {
|
||||
for _ in 0..n {
|
||||
hashes.push('#');
|
||||
}
|
||||
} else {
|
||||
panic!("Error: wrong title level");
|
||||
}
|
||||
Cow::Owned(format!("{} {} {}\n", hashes, s, hashes))
|
||||
},
|
||||
Token::Emphasis(vec) => Cow::Owned(format!("*{}*", ast_to_md(vec))),
|
||||
Token::Strong(vec) => Cow::Owned(format!("**{}**", ast_to_md(vec))),
|
||||
Token::Code(vec) => Cow::Owned(format!("`{}`", ast_to_md(vec))),
|
||||
Token::BlockQuote(vec) => Cow::Owned(format!("> {}", ast_to_md(vec))),
|
||||
Token::CodeBlock(language, vec) => Cow::Owned(format!("```{}\n{}\n```\n", language, ast_to_md(vec))),
|
||||
Token::Rule => Cow::Borrowed("***"),
|
||||
Token::SoftBreak => Cow::Borrowed(" "),
|
||||
Token::HardBreak => Cow::Borrowed("\n"),
|
||||
_ => Cow::Borrowed("???")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub fn ast_to_md(tokens: Vec<Token>) -> String {
|
||||
let mut res = String::new();
|
||||
|
||||
for token in tokens {
|
||||
res.push_str(&parse_token(token));
|
||||
}
|
||||
res
|
||||
}
|
|
@ -0,0 +1,107 @@
|
|||
use std::borrow::Cow;
|
||||
|
||||
/// Custom function because we don't really want to touch \t or \n
|
||||
fn is_whitespace(c: char) -> bool {
|
||||
c == ' ' || c == ' ' || c == ' '
|
||||
}
|
||||
|
||||
/// Trait for cleaning a string.
|
||||
/// This trait should be called for text that is e.g. in a paragraph, a title,
|
||||
/// NOT for code blocks, hyperlikns and so on!
|
||||
pub trait Cleaner {
|
||||
/// Cleans a string, removing multiple whitespaces
|
||||
fn clean<'a>(&self, s: &mut Cow<'a, str>) {
|
||||
if s.contains(is_whitespace) { // if not, no need to do anything
|
||||
let mut new_s = String::with_capacity(s.len());
|
||||
let mut previous_space = false;
|
||||
let mut modified = false;
|
||||
for c in s.chars() {
|
||||
if is_whitespace(c) {
|
||||
if previous_space {
|
||||
// previous char already a space, don't copy it
|
||||
// but signal the new string is different
|
||||
modified = true;
|
||||
} else {
|
||||
new_s.push(c);
|
||||
previous_space = true;
|
||||
}
|
||||
} else {
|
||||
previous_space = false;
|
||||
new_s.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
if modified {
|
||||
// only copy new string if it is modified
|
||||
// (if it is not we just wasted our time in this function call)
|
||||
let old_s = s.to_mut();
|
||||
*old_s = new_s
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Cleaner for () {}
|
||||
|
||||
/// Implementation for french 'cleaning'
|
||||
pub struct French {
|
||||
nb_char: char,
|
||||
}
|
||||
|
||||
impl French {
|
||||
/// Creates a new french cleaner, which will replace spaces with nb_char when appropriate.
|
||||
pub fn new(nb_char: char) -> French {
|
||||
French { nb_char: nb_char }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl Cleaner for French {
|
||||
// puts non breaking spaces between :, ;, ?, !, «, »
|
||||
fn clean<'a>(&self, s: &mut Cow<'a, str>) {
|
||||
fn is_trouble(c: char) -> bool {
|
||||
match c {
|
||||
'?'|'!'|';'|':'|'»'|'«' => true,
|
||||
_ => false
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if !s.contains(is_trouble) { // if not, no need to do anything
|
||||
return;
|
||||
}
|
||||
().clean(s); // first pass with default impl
|
||||
let mut new_s = String::with_capacity(s.len());
|
||||
{
|
||||
let mut chars = s.chars();
|
||||
if let Some(mut current) = chars.next() {
|
||||
while let Some(next) = chars.next() {
|
||||
if is_whitespace(current) {
|
||||
match next {
|
||||
'?' | '»' | '!' | ';' | ':' => new_s.push(self.nb_char),
|
||||
_ => new_s.push(current)
|
||||
}
|
||||
} else {
|
||||
new_s.push(current);
|
||||
if current == '«' {
|
||||
if is_whitespace(next) {
|
||||
new_s.push(self.nb_char);
|
||||
if let Some(next) = chars.next() {
|
||||
current = next;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
current = next;
|
||||
}
|
||||
new_s.push(current);
|
||||
}
|
||||
}
|
||||
|
||||
let old_s = s.to_mut();
|
||||
*old_s = new_s
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
use std::error;
|
||||
use std::result;
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Debug)]
|
||||
/// Crowbook error type
|
||||
pub enum Error {
|
||||
Parser(&'static str),
|
||||
}
|
||||
|
||||
impl error::Error for Error {
|
||||
fn description(&self) -> &str {
|
||||
match self {
|
||||
&Error::Parser(ref s) => s
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
&Error::Parser(ref s) => {
|
||||
try!(f.write_str("Parser error: "));
|
||||
f.write_str(s)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub type Result<T> = result::Result<T, Error>;
|
||||
|
||||
|
|
@ -0,0 +1,129 @@
|
|||
// File taken from pulldown-cmark: https://github.com/google/pulldown-cmark
|
||||
// The only modification I made is that escape_href and escape_html return
|
||||
// a string by value and not by reference in the arguments function.
|
||||
// Lise
|
||||
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
//! Utility functions for HTML escaping
|
||||
|
||||
use std::str::from_utf8;
|
||||
|
||||
static HREF_SAFE: [u8; 128] = [
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||
];
|
||||
|
||||
static HEX_CHARS: &'static [u8] = b"0123456789ABCDEF";
|
||||
|
||||
pub fn escape_href(s: &str) -> String {
|
||||
let mut ob = String::new();
|
||||
let mut mark = 0;
|
||||
for i in 0..s.len() {
|
||||
let c = s.as_bytes()[i];
|
||||
if c >= 0x80 || HREF_SAFE[c as usize] == 0 {
|
||||
// character needing escape
|
||||
|
||||
// write partial substring up to mark
|
||||
if mark < i {
|
||||
ob.push_str(&s[mark..i]);
|
||||
}
|
||||
match c {
|
||||
b'&' => {
|
||||
ob.push_str("&");
|
||||
},
|
||||
b'\'' => {
|
||||
ob.push_str("'");
|
||||
},
|
||||
_ => {
|
||||
let mut buf = [0u8; 3];
|
||||
buf[0] = b'%';
|
||||
buf[1] = HEX_CHARS[((c as usize) >> 4) & 0xF];
|
||||
buf[2] = HEX_CHARS[(c as usize) & 0xF];
|
||||
ob.push_str(from_utf8(&buf).unwrap());
|
||||
}
|
||||
}
|
||||
mark = i + 1; // all escaped characters are ASCII
|
||||
}
|
||||
}
|
||||
ob.push_str(&s[mark..]);
|
||||
ob
|
||||
}
|
||||
|
||||
static HTML_ESCAPE_TABLE: [u8; 256] = [
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
];
|
||||
|
||||
static HTML_ESCAPES: [&'static str; 6] = [
|
||||
"",
|
||||
""",
|
||||
"&",
|
||||
"/",
|
||||
"<",
|
||||
">"
|
||||
];
|
||||
|
||||
pub fn escape_html(s: &str) -> String {
|
||||
let mut ob = String::new();
|
||||
let size = s.len();
|
||||
let bytes = s.as_bytes();
|
||||
let mut mark = 0;
|
||||
let mut i = 0;
|
||||
while i < size {
|
||||
match bytes[i..].iter().position(|&c| HTML_ESCAPE_TABLE[c as usize] != 0) {
|
||||
Some(pos) => {
|
||||
i += pos;
|
||||
}
|
||||
None => break
|
||||
}
|
||||
let c = bytes[i];
|
||||
let escape = HTML_ESCAPE_TABLE[c as usize];
|
||||
if escape != 0 && c != b'/' {
|
||||
ob.push_str(&s[mark..i]);
|
||||
ob.push_str(HTML_ESCAPES[escape as usize]);
|
||||
mark = i + 1; // all escaped characters are ASCII
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
ob.push_str(&s[mark..]);
|
||||
ob
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
extern crate pulldown_cmark as cmark;
|
||||
|
||||
pub mod escape;
|
||||
pub mod parser;
|
||||
pub mod ast_to_md;
|
||||
pub mod ast_to_html;
|
||||
pub mod cleaner;
|
||||
pub mod token;
|
||||
pub mod error;
|
||||
|
||||
pub use ast_to_html::ast_to_html;
|
||||
pub use parser::Parser;
|
||||
pub use token::Token;
|
||||
pub use cleaner::Cleaner;
|
||||
pub use cleaner::French;
|
||||
pub use error::{Result, Error};
|
|
@ -0,0 +1,107 @@
|
|||
use cmark::{Parser as CMParser, Event, Tag};
|
||||
use token::Token;
|
||||
use cleaner::Cleaner;
|
||||
use error::{Result,Error};
|
||||
|
||||
/// A parser that reads markdown and convert it to AST (a vector of `Token`s)
|
||||
pub struct Parser {
|
||||
numbering: Option<String>, // None for no numbering, or a String with the name you want
|
||||
cleaner: Option<Box<Cleaner>>, // An optional parameter to clean source code
|
||||
|
||||
verbatim: bool, // set to true when in e.g. a code block
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
/// Creates a parser with the default options
|
||||
pub fn new() -> Parser {
|
||||
Parser {
|
||||
verbatim: false,
|
||||
numbering: None,
|
||||
cleaner: Some(Box::new(())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_cleaner(mut self, cleaner: Box<Cleaner>) -> Parser {
|
||||
self.cleaner = Some(cleaner);
|
||||
self
|
||||
}
|
||||
|
||||
/// Parse a string and returns an AST, that is a vector of `Token`s
|
||||
///
|
||||
/// Returns a result, at this method might fail.
|
||||
pub fn parse<'a>(&mut self, s: &'a str) -> Result<Vec<Token<'a>>> {
|
||||
let mut p = CMParser::new(s);
|
||||
|
||||
let mut res = vec!();
|
||||
try!(self.parse_events(&mut p, &mut res, None));
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
fn parse_events<'a>(&mut self, p: &mut CMParser<'a>, v: &mut Vec<Token<'a>>, current_tag: Option<&Tag>) -> Result<()> {
|
||||
while let Some(event) = p.next() {
|
||||
match event {
|
||||
Event::Text(mut text) => {
|
||||
if let Some(ref cleaner) = self.cleaner {
|
||||
if !self.verbatim {
|
||||
cleaner.clean(&mut text);
|
||||
}
|
||||
}
|
||||
v.push(Token::Str(text));
|
||||
},
|
||||
Event::Start(tag) => try!(self.parse_tag(p, v, tag)),
|
||||
Event::End(tag) => {
|
||||
debug_assert!(format!("{:?}", Some(&tag)) == format!("{:?}", current_tag),
|
||||
format!("Error: opening and closing tags mismatch!\n{:?} ≠ {:?}",
|
||||
tag,
|
||||
current_tag));
|
||||
break;
|
||||
},
|
||||
Event::SoftBreak => v.push(Token::SoftBreak),
|
||||
Event::HardBreak => v.push(Token::HardBreak),
|
||||
Event::Html(_) | Event::InlineHtml(_) => return Err(Error::Parser("No support for HTML code inside of Markdown, sorry.")),
|
||||
Event::FootnoteReference(_) => return Err(Error::Parser("No support for footnotes yet."))
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_tag<'a>(&mut self, p: &mut CMParser<'a>, v: &mut Vec<Token<'a>>, tag: Tag<'a>) -> Result<()> {
|
||||
let mut res = vec!();
|
||||
|
||||
match tag {
|
||||
Tag::Code | Tag::CodeBlock(_) => self.verbatim = true,
|
||||
_ => (),
|
||||
}
|
||||
|
||||
try!(self.parse_events(p, &mut res, Some(&tag)));
|
||||
|
||||
self.verbatim = false;
|
||||
|
||||
let token = match tag {
|
||||
Tag::Paragraph => Token::Paragraph(res),
|
||||
Tag::Emphasis => Token::Emphasis(res),
|
||||
Tag::Strong => Token::Strong(res),
|
||||
Tag::Code => Token::Code(res),
|
||||
Tag::Header(x) => Token::Header(x, res),
|
||||
Tag::Link(url, title) => Token::Link(url, title, res),
|
||||
Tag::Image(url, title) => Token::Image(url, title, res),
|
||||
Tag::Rule => Token::Rule,
|
||||
Tag::List(opt) => {
|
||||
if let Some(n) = opt {
|
||||
Token::OrderedList(n, res)
|
||||
} else {
|
||||
Token::List(res)
|
||||
}},
|
||||
Tag::Item => Token::Item(res),
|
||||
Tag::BlockQuote => Token::BlockQuote(res),
|
||||
Tag::CodeBlock(language) => Token::CodeBlock(language, res),
|
||||
Tag::Table(_) | Tag::TableHead | Tag::TableRow | Tag::TableCell => return Err(Error::Parser("No support for tables yet")),
|
||||
Tag::FootnoteDefinition(_) => return Err(Error::Parser("No support for footnotes")),
|
||||
};
|
||||
v.push(token);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
use std::borrow::Cow;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Token<'a> {
|
||||
Str(Cow<'a, str>),
|
||||
Paragraph(Vec<Token<'a>>),
|
||||
Header(i32, Vec<Token<'a>>), //title level, list of tokens
|
||||
Emphasis(Vec<Token<'a>>),
|
||||
Strong(Vec<Token<'a>>),
|
||||
Code(Vec<Token<'a>>),
|
||||
BlockQuote(Vec<Token<'a>>),
|
||||
CodeBlock(Cow<'a, str>, Vec<Token<'a>>), //language, content of the block
|
||||
|
||||
List(Vec<Token<'a>>),
|
||||
OrderedList(usize, Vec<Token<'a>>), //starting number, list
|
||||
Item(Vec<Token<'a>>),
|
||||
|
||||
Rule,
|
||||
SoftBreak,
|
||||
HardBreak,
|
||||
|
||||
Link(Cow<'a, str>, Cow<'a, str>, Vec<Token<'a>>), //url, title, list
|
||||
Image(Cow<'a, str>, Cow<'a, str>, Vec<Token<'a>>), //url, title, alt text
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
extern crate crowbook;
|
||||
|
||||
use self::crowbook::cleaner::{French, Cleaner};
|
||||
use std::borrow::Cow;
|
||||
|
||||
#[test]
|
||||
fn default() {
|
||||
let mut res = Cow::Borrowed(" Remove supplementary spaces but don't trim either ");
|
||||
().clean(&mut res);
|
||||
assert_eq!(&res, " Remove supplementary spaces but don't trim either ");
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn french() {
|
||||
let mut res = Cow::Borrowed(" « Comment allez-vous ? » demanda-t-elle à son interlocutrice qui lui répondit : « Mais très bien ma chère ! »");
|
||||
let french = French::new('~');
|
||||
french.clean(&mut res);
|
||||
println!("{}", &res);
|
||||
assert_eq!(&res, " «~Comment allez-vous~?~» demanda-t-elle à son interlocutrice qui lui répondit~: «~Mais très bien ma chère~!~»");
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
extern crate crowbook;
|
||||
|
||||
use self::crowbook::{Parser, ast_to_html};
|
||||
|
||||
|
||||
#[test]
|
||||
fn combination() {
|
||||
let doc = "
|
||||
Foo
|
||||
===
|
||||
|
||||
```rust
|
||||
fn min(x: &u32, y: u32) -> &u32 {
|
||||
if x < y { x } else { y }
|
||||
}
|
||||
```
|
||||
|
||||
Bar
|
||||
---
|
||||
|
||||
Some paragraph
|
||||
|
||||
* a list
|
||||
* inside a list
|
||||
* another item
|
||||
|
||||
3. three
|
||||
4. four
|
||||
5. five
|
||||
|
||||
[& some link](http://foo/bar?baz=42&coin=plop)
|
||||
";
|
||||
let expected = "<h1>Foo</h1>
|
||||
<pre><code class = \"language-rust\">fn min(x: &u32, y: u32) -> &u32 {
|
||||
if x < y { x } else { y }
|
||||
}
|
||||
</code></pre>
|
||||
<h2>Bar</h2>
|
||||
<p>Some paragraph</p>
|
||||
<ul>
|
||||
<li><p>a list</p>
|
||||
<ul>
|
||||
<li>inside a list</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><p>another item</p>
|
||||
</li>
|
||||
</ul>
|
||||
<ol start = \"3\">
|
||||
<li>three</li>
|
||||
<li>four</li>
|
||||
<li>five</li>
|
||||
</ol>
|
||||
<p><a href = \"http://foo/bar?baz=42&coin=plop\">& some link</a></p>
|
||||
";
|
||||
let actual = ast_to_html(Parser::new().parse(doc).unwrap());
|
||||
assert_eq!(actual, expected);
|
||||
}
|
|
@ -0,0 +1,152 @@
|
|||
extern crate crowbook;
|
||||
|
||||
use self::crowbook::{Parser, Token};
|
||||
use std::borrow::Cow;
|
||||
|
||||
fn parse_from_str<'a>(doc: &'a str) -> Vec<Token<'a>> {
|
||||
let mut parser = Parser::new();
|
||||
parser.parse(doc).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn h_p_em() {
|
||||
let doc = "
|
||||
Test
|
||||
====
|
||||
|
||||
some *emphasis* required
|
||||
";
|
||||
let mut parser = Parser::new();
|
||||
let res = parser.parse(doc).unwrap();
|
||||
|
||||
assert_eq!(res, vec!(
|
||||
Token::Header(1, vec!(
|
||||
Token::Str(Cow::Borrowed("Test")))),
|
||||
Token::Paragraph(vec!(
|
||||
Token::Str(Cow::Borrowed("some ")),
|
||||
Token::Emphasis(vec!(
|
||||
Token::Str(Cow::Borrowed("emphasis")))),
|
||||
Token::Str(Cow::Borrowed(" required"))))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn link_inline() {
|
||||
let doc = "[a link](http://foo.bar)";
|
||||
let mut parser = Parser::new();
|
||||
let res = parser.parse(doc).unwrap();
|
||||
|
||||
assert_eq!(res,
|
||||
vec!(
|
||||
Token::Paragraph(vec!(
|
||||
Token::Link(Cow::Borrowed("http://foo.bar"),
|
||||
Cow::Borrowed(""),
|
||||
vec!(
|
||||
Token::Str(Cow::Borrowed("a link"))))))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reference_link() {
|
||||
let doc = "
|
||||
[reference link][1]
|
||||
|
||||
[1]: http://foo.bar
|
||||
";
|
||||
let expected = r#"[Paragraph([Link("http://foo.bar", "", [Str("reference link")])])]"#;
|
||||
let result = format!("{:?}", parse_from_str(doc));
|
||||
assert_eq!(&result, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule() {
|
||||
let doc = "a paragraph
|
||||
****
|
||||
another one";
|
||||
let expected = r#"[Paragraph([Str("a paragraph")]), Rule, Paragraph([Str("another one")])]"#;
|
||||
let result = format!("{:?}", parse_from_str(doc));
|
||||
assert_eq!(&result, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lists() {
|
||||
let doc = "
|
||||
* banana
|
||||
3. 3
|
||||
- 4
|
||||
* apple
|
||||
* orange
|
||||
";
|
||||
let expected = r#"[List([Item([Str("banana"), OrderedList(3, [Item([Str("3")])]), List([Item([Str("4")])])]), Item([Str("apple")]), Item([Str("orange")])])]"#;
|
||||
let result = format!("{:?}", parse_from_str(doc));
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn blockquote() {
|
||||
let doc = "
|
||||
normal paragraph
|
||||
|
||||
> some
|
||||
> blockquote
|
||||
";
|
||||
let expected = "[Paragraph([Str(\"normal paragraph\")]), BlockQuote([Paragraph([Str(\"some\"), SoftBreak, Str(\"blockquote\")])])]";
|
||||
let result = format!("{:?}", parse_from_str(doc));
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_block() {
|
||||
let doc = "
|
||||
normal paragraph
|
||||
|
||||
```
|
||||
code block
|
||||
```
|
||||
|
||||
```rust
|
||||
rust code block
|
||||
```
|
||||
";
|
||||
let expected = r#"[Paragraph([Str("normal paragraph")]), CodeBlock("", [Str("code block\n")]), CodeBlock("rust", [Str("rust code block\n")])]"#;
|
||||
let result = format!("{:?}", parse_from_str(doc));
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strong_emphasis() {
|
||||
let doc = "
|
||||
*normal emphasis*
|
||||
|
||||
**strong emphasis**
|
||||
";
|
||||
let expected = r#"[Paragraph([Emphasis([Str("normal emphasis")])]), Paragraph([Strong([Str("strong emphasis")])])]"#;
|
||||
let result = format!("{:?}", parse_from_str(doc));
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code() {
|
||||
let doc = "some `code` inlined";
|
||||
let expected = r#"[Paragraph([Str("some "), Code([Str("code")]), Str(" inlined")])]"#;
|
||||
let result = format!("{:?}", parse_from_str(doc));
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn image_reference() {
|
||||
let doc = "
|
||||
![alt text][logo]
|
||||
|
||||
[logo]: http://foo.bar/baz.png \"Title\"
|
||||
";
|
||||
let expected = r#"[Paragraph([Image("http://foo.bar/baz.png", "Title", [Str("alt text")])])]"#;
|
||||
let result = format!("{:?}", parse_from_str(doc));
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn image_inline() {
|
||||
let doc = "![alt text](http://foo.bar/baz.png \"Title\")";
|
||||
let expected = r#"[Paragraph([Image("http://foo.bar/baz.png", "Title", [Str("alt text")])])]"#;
|
||||
let result = format!("{:?}", parse_from_str(doc));
|
||||
assert_eq!(result, expected);
|
||||
}
|
Loading…
Reference in New Issue