initial commit

2024-06-06 10:36:10 +02:00 · 2016-02-18 04:06:14 +01:00 · 2016-02-18 04:06:14 +01:00 · 940b5c99fa
parent a1f16ee8d0
commit 940b5c99fa
15 changed files with 824 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,15 @@
+# Compiled files
+Cargo.lock
+*.o
+*.so
+*.rlib
+*.dll
+*~
+.*
+\#*
+
+# Executables
+*.exe
+
+# Generated by Cargo
+/target/
--- a/.travis.yml
+++ b/.travis.yml
@ -0,0 +1,8 @@
+language: rust
+rust:
+  - stable
+  - beta
+  - nightly
+matrix:
+  allow_failures:
+    - rust: nightly
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,16 @@
+[package]
+name = "crowbook"
+version = "0.1.0"
+authors = ["Elisabeth Henry <liz.henry@ouvaton.org>"]
+
+[lib]
+name = "crowbook"
+path = "src/lib/lib.rs"
+
+[[bin]]
+name = "crowbook"
+path = "src/bin/main.rs"
+doc = false
+
+[dependencies]
+pulldown-cmark = "0.0.7"
--- a/src/bin/main.rs
+++ b/src/bin/main.rs
@ -0,0 +1,42 @@
+extern crate crowbook;
+
+use crowbook::{ast_to_html, Parser, French};
+
+
+fn main() {
+    let doc = "
+Foo
+===
+
+« Oh la chevalier que voulez vous ? » 
+
+
+```rust
+fn min(x : &u32, y : u32) -> &u32 {
+    if x < y { x } else { y }
+}
+```
+
+Bar
+---
+
+Some paragraph
+
+* a list
+    * inside a list
+* another item
+
+3. three
+4. four
+5. five
+
+[& some link](http://foo/bar?baz=42&coin=plop)
+";
+
+    let french = French::new('~');
+    let mut parser = Parser::new().with_cleaner(Box::new(french));
+    let v = parser.parse(doc).unwrap();
+    println!("{:?}", &v);
+
+    println!("{}", ast_to_html(v));
+}
--- a/src/lib/ast_to_html.rs
+++ b/src/lib/ast_to_html.rs
@ -0,0 +1,52 @@
+use std::borrow::Cow;
+use escape::escape_html;
+use token::Token;
+
+fn parse_token<'a>(token: Token<'a>) -> Cow<'a, str> {
+    match token {
+        Token::Str(text) => Cow::Owned(escape_html(&*text)),
+        Token::Paragraph(vec) => Cow::Owned(format!("<p>{}</p>\n", ast_to_html(vec))),
+        Token::Header(n, vec) => Cow::Owned(format!("<h{}>{}</h{}>\n", n, ast_to_html(vec), n)),
+        Token::Emphasis(vec) => Cow::Owned(format!("<em>{}</em>", ast_to_html(vec))),
+        Token::Strong(vec) => Cow::Owned(format!("<b>{}</b>", ast_to_html(vec))),
+        Token::Code(vec) => Cow::Owned(format!("<code>{}</code>", ast_to_html(vec))),
+        Token::BlockQuote(vec) => Cow::Owned(format!("<blockquote>{}</blockquote>\n", ast_to_html(vec))),
+        Token::CodeBlock(language, vec) => {
+            let s = ast_to_html(vec);
+            if language.is_empty() {
+                Cow::Owned(format!("<pre><code>\n{}</code></pre>\n", s))
+            } else {
+                Cow::Owned(format!("<pre><code class = \"language-{}\">{}</code></pre>\n", language, s))
+            }
+        },
+        Token::Rule => Cow::Borrowed("<p class = \"rule\">***</p>\n"),
+        Token::SoftBreak => Cow::Borrowed(" "),
+        Token::HardBreak => Cow::Borrowed("<br />\n"),
+        Token::List(vec) => Cow::Owned(format!("<ul>\n{}</ul>\n", ast_to_html(vec))),
+        Token::OrderedList(n, vec) => Cow::Owned(format!("<ol start = \"{}\">\n{}</ol>\n", n, ast_to_html(vec))),
+        Token::Item(vec) => Cow::Owned(format!("<li>{}</li>\n", ast_to_html(vec))),
+        Token::Link(url, title, vec) => Cow::Owned(format!("<a href = \"{}\"{}>{}</a>",
+                                                           url,
+                                                           if title.is_empty() {
+                                                               String::new()
+                                                           } else {
+                                                               format!(" title = \"{}\"", title)
+                                                           },
+                                                           ast_to_html(vec))),
+                                            Token::Image(url, title, alt) => Cow::Owned(format!("<img src = \"{}\" title = \"{}\" alt = \"{}\" />",
+                                                                                                url,
+                                                                                                title,
+                                                                                                ast_to_html(alt)))
+                                            
+    }
+}
+
+/// Transform a vector of `Token`s to HTML format.
+pub fn ast_to_html(tokens: Vec<Token>) -> String {
+    let mut res = String::new();
+
+    for token in tokens {
+        res.push_str(&parse_token(token));
+    }
+    res
+}
--- a/src/lib/ast_to_md.rs
+++ b/src/lib/ast_to_md.rs
@ -0,0 +1,44 @@
+use token::Token;
+use std::borrow::Cow;
+
+fn parse_token<'a>(token: Token<'a>) -> Cow<'a, str> {
+    match token {
+        Token::Str(text) => text,
+        Token::Paragraph(vec) => {
+            let mut s = ast_to_md(vec);
+            s.push_str("\n\n");
+            Cow::Owned(s)
+        },
+        Token::Header(n, vec) => {
+            let s = ast_to_md(vec);
+            let mut hashes = String::new();
+            if n > 0 && n < 6 {
+                for _ in 0..n {
+                    hashes.push('#');
+                }
+            } else {
+                panic!("Error: wrong title level");
+            }
+            Cow::Owned(format!("{} {} {}\n", hashes, s, hashes))
+        },
+        Token::Emphasis(vec) => Cow::Owned(format!("*{}*", ast_to_md(vec))),
+        Token::Strong(vec) => Cow::Owned(format!("**{}**", ast_to_md(vec))),
+        Token::Code(vec) => Cow::Owned(format!("`{}`", ast_to_md(vec))),
+        Token::BlockQuote(vec) => Cow::Owned(format!("> {}", ast_to_md(vec))),
+        Token::CodeBlock(language, vec) => Cow::Owned(format!("```{}\n{}\n```\n", language, ast_to_md(vec))),
+        Token::Rule => Cow::Borrowed("***"),
+        Token::SoftBreak => Cow::Borrowed(" "),
+        Token::HardBreak => Cow::Borrowed("\n"),
+        _ => Cow::Borrowed("???")
+    }
+}
+
+
+pub fn ast_to_md(tokens: Vec<Token>) -> String {
+    let mut res = String::new();
+
+    for token in tokens {
+        res.push_str(&parse_token(token));
+    }
+    res
+}
--- a/src/lib/cleaner.rs
+++ b/src/lib/cleaner.rs
@ -0,0 +1,107 @@
+use std::borrow::Cow;
+
+/// Custom function because we don't really want to touch \t or \n
+fn is_whitespace(c: char) -> bool {
+    c == ' ' || c == ' ' || c == ' '
+}
+
+/// Trait for cleaning a string.
+/// This trait should be called for text that is e.g. in a paragraph, a title,
+/// NOT for code blocks, hyperlikns and so on!
+pub trait Cleaner {
+    /// Cleans a string, removing multiple whitespaces
+    fn clean<'a>(&self, s: &mut Cow<'a, str>) {
+        if s.contains(is_whitespace) { // if not, no need to do anything
+            let mut new_s = String::with_capacity(s.len());
+            let mut previous_space = false;
+            let mut modified = false;
+            for c in s.chars() {
+                if is_whitespace(c) {
+                    if previous_space {
+                        // previous char already a space, don't copy it
+                        // but signal the new string is different
+                        modified = true;
+                    } else {
+                        new_s.push(c);
+                        previous_space = true;
+                    }
+                } else {
+                    previous_space = false;
+                    new_s.push(c);
+                }
+            }
+
+            if modified {
+                // only copy new string if it is modified
+                // (if it is not we just wasted our time in this function call)
+                let old_s = s.to_mut();
+                *old_s = new_s
+            } 
+        }
+    }
+}
+
+impl Cleaner for () {}
+
+/// Implementation for french 'cleaning'
+pub struct French {
+    nb_char: char,
+}
+
+impl French {
+    /// Creates a new french cleaner, which will replace spaces with nb_char when appropriate.
+    pub fn new(nb_char: char) -> French {
+        French { nb_char: nb_char }
+    }
+}
+    
+
+impl Cleaner for French {
+    // puts non breaking spaces between :, ;, ?, !, «, »
+    fn clean<'a>(&self, s: &mut Cow<'a, str>) {
+        fn is_trouble(c: char) -> bool {
+            match c {
+                '?'|'!'|';'|':'|'»'|'«' => true,
+                _ => false
+            }
+        }
+
+        
+        if !s.contains(is_trouble) { // if not, no need to do anything
+            return;
+        }
+        ().clean(s); // first pass with default impl
+        let mut new_s = String::with_capacity(s.len());
+        {
+            let mut chars = s.chars();
+            if let Some(mut current) = chars.next() {
+                while let  Some(next) = chars.next() {
+                    if is_whitespace(current) {
+                        match next {
+                            '?' | '»' | '!' | ';' | ':' => new_s.push(self.nb_char),
+                            _ => new_s.push(current)
+                        }
+                    } else {
+                        new_s.push(current);
+                        if current == '«' {
+                            if is_whitespace(next) {
+                                new_s.push(self.nb_char);
+                                if let Some(next) = chars.next() {
+                                    current = next;
+                                    continue;
+                                }
+                            }
+                        }
+                    }
+                    current = next;
+                }
+                new_s.push(current);
+            }
+        }
+            
+        let old_s = s.to_mut();
+        *old_s = new_s
+    }
+}
+            
+
--- a/src/lib/error.rs
+++ b/src/lib/error.rs
@ -0,0 +1,33 @@
+use std::error;
+use std::result;
+use std::fmt;
+
+#[derive(Debug)]
+/// Crowbook error type
+pub enum Error {
+    Parser(&'static str),
+}
+
+impl error::Error for Error {
+    fn description(&self) -> &str {
+        match self {
+            &Error::Parser(ref s) => s
+        }
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            &Error::Parser(ref s) => {
+                try!(f.write_str("Parser error: "));
+                f.write_str(s)
+            }
+        }
+    }
+}
+
+
+pub type Result<T> = result::Result<T, Error>;
+
+
--- a/src/lib/escape.rs
+++ b/src/lib/escape.rs
@ -0,0 +1,129 @@
+// File taken from pulldown-cmark: https://github.com/google/pulldown-cmark
+// The only modification I made is that escape_href and escape_html return
+// a string by value and not by reference in the arguments function.
+//                                                              Lise
+
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+//! Utility functions for HTML escaping
+
+use std::str::from_utf8;
+
+static HREF_SAFE: [u8; 128] = [
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
+        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+    ];
+
+static HEX_CHARS: &'static [u8] = b"0123456789ABCDEF";
+
+pub fn escape_href(s: &str) -> String {
+    let mut ob = String::new();
+    let mut mark = 0;
+    for i in 0..s.len() {
+        let c = s.as_bytes()[i];
+        if c >= 0x80 || HREF_SAFE[c as usize] == 0 {
+            // character needing escape
+
+            // write partial substring up to mark
+            if mark < i {
+                ob.push_str(&s[mark..i]);
+            }
+            match c {
+                b'&' => {
+                    ob.push_str("&amp;");
+                },
+                b'\'' => {
+                    ob.push_str("&#x27;");
+                },
+                _ => {
+                    let mut buf = [0u8; 3];
+                    buf[0] = b'%';
+                    buf[1] = HEX_CHARS[((c as usize) >> 4) & 0xF];
+                    buf[2] = HEX_CHARS[(c as usize) & 0xF];
+                    ob.push_str(from_utf8(&buf).unwrap());
+                }
+            }
+            mark = i + 1;  // all escaped characters are ASCII
+        }
+    }
+    ob.push_str(&s[mark..]);
+    ob
+}
+
+static HTML_ESCAPE_TABLE: [u8; 256] = [
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    ];
+
+static HTML_ESCAPES: [&'static str; 6] = [
+        "",
+        "&quot;",
+        "&amp;",
+        "&#47;",
+        "&lt;",
+        "&gt;"
+    ];
+
+pub fn escape_html(s: &str) -> String {
+    let mut ob = String::new();
+    let size = s.len();
+    let bytes = s.as_bytes();
+    let mut mark = 0;
+    let mut i = 0;
+    while i < size {
+        match bytes[i..].iter().position(|&c| HTML_ESCAPE_TABLE[c as usize] != 0) {
+            Some(pos) => {
+                i += pos;
+            }
+            None => break
+        }
+        let c = bytes[i];
+        let escape = HTML_ESCAPE_TABLE[c as usize];
+        if escape != 0 && c != b'/' {
+            ob.push_str(&s[mark..i]);
+            ob.push_str(HTML_ESCAPES[escape as usize]);
+            mark = i + 1;  // all escaped characters are ASCII
+        }
+        i += 1;
+    }
+    ob.push_str(&s[mark..]);
+    ob
+}
--- a/src/lib/lib.rs
+++ b/src/lib/lib.rs
@ -0,0 +1,16 @@
+extern crate pulldown_cmark as cmark;
+
+pub mod escape;
+pub mod parser;
+pub mod ast_to_md;
+pub mod ast_to_html;
+pub mod cleaner;
+pub mod token;
+pub mod error;
+
+pub use ast_to_html::ast_to_html;
+pub use parser::Parser;
+pub use token::Token;
+pub use cleaner::Cleaner;
+pub use cleaner::French;
+pub use error::{Result, Error};
--- a/src/lib/parser.rs
+++ b/src/lib/parser.rs
@ -0,0 +1,107 @@
+use cmark::{Parser as CMParser, Event, Tag};
+use token::Token;
+use cleaner::Cleaner;
+use error::{Result,Error};
+
+/// A parser that reads markdown and convert it to AST (a vector of `Token`s)
+pub struct Parser {
+    numbering: Option<String>, // None for no numbering, or a String with the name you want
+    cleaner: Option<Box<Cleaner>>, // An optional parameter to clean source code
+
+    verbatim: bool, // set to true when in e.g. a code block
+}
+
+impl Parser {
+    /// Creates a parser with the default options
+    pub fn new() -> Parser {
+        Parser {
+            verbatim: false,
+            numbering: None,
+            cleaner: Some(Box::new(())),
+        }
+    }
+
+    pub fn with_cleaner(mut self, cleaner: Box<Cleaner>) -> Parser {
+        self.cleaner = Some(cleaner);
+        self
+    }
+
+    /// Parse a string and returns an AST, that is a vector of `Token`s
+    ///
+    /// Returns a result, at this method might fail.
+    pub fn parse<'a>(&mut self, s: &'a str) -> Result<Vec<Token<'a>>> {
+        let mut p = CMParser::new(s);
+
+        let mut res = vec!();
+        try!(self.parse_events(&mut p, &mut res, None));
+        Ok(res)
+    }
+    
+    fn parse_events<'a>(&mut self, p: &mut CMParser<'a>, v: &mut Vec<Token<'a>>, current_tag: Option<&Tag>) -> Result<()> {
+        while let Some(event) = p.next() {
+            match event {
+                Event::Text(mut text) => {
+                    if let Some(ref cleaner) = self.cleaner {
+                        if !self.verbatim {
+                            cleaner.clean(&mut text);
+                        }
+                    }
+                    v.push(Token::Str(text));
+                },
+                Event::Start(tag) => try!(self.parse_tag(p, v, tag)),
+                Event::End(tag) => {
+                    debug_assert!(format!("{:?}", Some(&tag)) == format!("{:?}", current_tag),
+                                  format!("Error: opening and closing tags mismatch!\n{:?} ≠ {:?}",
+                                          tag,
+                                          current_tag));
+                    break;
+                },
+                Event::SoftBreak => v.push(Token::SoftBreak),
+                Event::HardBreak => v.push(Token::HardBreak),
+                Event::Html(_) | Event::InlineHtml(_) => return Err(Error::Parser("No support for HTML code inside of Markdown, sorry.")),
+                Event::FootnoteReference(_) => return Err(Error::Parser("No support for footnotes yet."))
+            }
+        }
+        Ok(())
+    }
+
+    fn parse_tag<'a>(&mut self, p: &mut CMParser<'a>, v: &mut Vec<Token<'a>>, tag: Tag<'a>) -> Result<()> {
+        let mut res = vec!();
+
+        match tag {
+            Tag::Code | Tag::CodeBlock(_) => self.verbatim = true,
+            _ => (),
+        }
+        
+        try!(self.parse_events(p, &mut res, Some(&tag)));
+
+        self.verbatim = false;
+        
+        let token = match tag {
+            Tag::Paragraph => Token::Paragraph(res),
+            Tag::Emphasis => Token::Emphasis(res),
+            Tag::Strong => Token::Strong(res),
+            Tag::Code => Token::Code(res),
+            Tag::Header(x) => Token::Header(x, res),
+            Tag::Link(url, title) => Token::Link(url, title, res),
+            Tag::Image(url, title) => Token::Image(url, title, res),
+            Tag::Rule => Token::Rule,
+            Tag::List(opt) => {
+                if let Some(n) = opt {
+                    Token::OrderedList(n, res)
+                } else {
+                    Token::List(res)
+                }},
+            Tag::Item => Token::Item(res),
+            Tag::BlockQuote => Token::BlockQuote(res),
+            Tag::CodeBlock(language) => Token::CodeBlock(language, res),
+            Tag::Table(_) | Tag::TableHead | Tag::TableRow | Tag::TableCell => return Err(Error::Parser("No support for tables yet")),
+            Tag::FootnoteDefinition(_) => return Err(Error::Parser("No support for footnotes")),
+        };
+        v.push(token);
+        Ok(())
+    }
+}
+
+
+
--- a/src/lib/token.rs
+++ b/src/lib/token.rs
@ -0,0 +1,24 @@
+use std::borrow::Cow;
+
+#[derive(Debug, PartialEq)]
+pub enum Token<'a> {
+    Str(Cow<'a, str>), 
+    Paragraph(Vec<Token<'a>>), 
+    Header(i32, Vec<Token<'a>>), //title level, list of tokens
+    Emphasis(Vec<Token<'a>>),
+    Strong(Vec<Token<'a>>),
+    Code(Vec<Token<'a>>),
+    BlockQuote(Vec<Token<'a>>),
+    CodeBlock(Cow<'a, str>, Vec<Token<'a>>), //language, content of the block
+
+    List(Vec<Token<'a>>),
+    OrderedList(usize, Vec<Token<'a>>), //starting number, list
+    Item(Vec<Token<'a>>),
+    
+    Rule,
+    SoftBreak,
+    HardBreak,
+    
+    Link(Cow<'a, str>, Cow<'a, str>, Vec<Token<'a>>), //url, title, list
+    Image(Cow<'a, str>, Cow<'a, str>, Vec<Token<'a>>), //url, title, alt text
+}
--- a/tests/cleaner.rs
+++ b/tests/cleaner.rs
@ -0,0 +1,21 @@
+extern crate crowbook;
+
+use self::crowbook::cleaner::{French, Cleaner};
+use std::borrow::Cow;
+
+#[test]
+fn default() {
+    let mut res = Cow::Borrowed("   Remove    supplementary   spaces    but    don't    trim     either   ");
+    ().clean(&mut res);
+    assert_eq!(&res, " Remove supplementary spaces but don't trim either ");
+}
+
+
+#[test]
+fn french() {
+    let mut res = Cow::Borrowed("  «  Comment allez-vous ? » demanda-t-elle à son   interlocutrice  qui lui répondit  : « Mais très bien ma chère  !  »");
+    let french = French::new('~');
+    french.clean(&mut res);
+    println!("{}", &res);
+    assert_eq!(&res, " «~Comment allez-vous~?~» demanda-t-elle à son interlocutrice qui lui répondit~: «~Mais très bien ma chère~!~»");
+}
--- a/tests/html.rs
+++ b/tests/html.rs
@ -0,0 +1,58 @@
+extern crate crowbook;
+
+use self::crowbook::{Parser, ast_to_html};
+
+
+#[test]
+fn combination() {
+    let doc = "
+Foo
+===
+
+```rust
+fn min(x: &u32, y: u32) -> &u32 {
+    if x < y { x } else { y }
+}
+```
+
+Bar
+---
+
+Some paragraph
+
+* a list
+    * inside a list
+* another item
+
+3. three
+4. four
+5. five
+
+[& some link](http://foo/bar?baz=42&coin=plop)
+";
+    let expected = "<h1>Foo</h1>
+<pre><code class = \"language-rust\">fn min(x: &amp;u32, y: u32) -&gt; &amp;u32 {
+    if x &lt; y { x } else { y }
+}
+</code></pre>
+<h2>Bar</h2>
+<p>Some paragraph</p>
+<ul>
+<li><p>a list</p>
+<ul>
+<li>inside a list</li>
+</ul>
+</li>
+<li><p>another item</p>
+</li>
+</ul>
+<ol start = \"3\">
+<li>three</li>
+<li>four</li>
+<li>five</li>
+</ol>
+<p><a href = \"http://foo/bar?baz=42&coin=plop\">&amp; some link</a></p>
+";
+    let actual = ast_to_html(Parser::new().parse(doc).unwrap());
+    assert_eq!(actual, expected);
+}
--- a/tests/parser.rs
+++ b/tests/parser.rs
@ -0,0 +1,152 @@
+extern crate crowbook;
+
+use self::crowbook::{Parser, Token};
+use std::borrow::Cow;
+
+fn parse_from_str<'a>(doc: &'a str) -> Vec<Token<'a>> {
+    let mut parser = Parser::new();
+    parser.parse(doc).unwrap()
+}
+
+#[test]
+fn h_p_em() {
+    let doc = "
+Test
+====
+
+some *emphasis* required
+";
+    let mut parser = Parser::new();
+    let res = parser.parse(doc).unwrap();
+    
+    assert_eq!(res, vec!(
+        Token::Header(1, vec!(
+            Token::Str(Cow::Borrowed("Test")))),
+        Token::Paragraph(vec!(
+            Token::Str(Cow::Borrowed("some ")),
+            Token::Emphasis(vec!(
+                Token::Str(Cow::Borrowed("emphasis")))),
+            Token::Str(Cow::Borrowed(" required"))))));
+}
+
+#[test]
+fn link_inline() {
+    let doc = "[a link](http://foo.bar)";
+    let mut parser = Parser::new();
+    let res = parser.parse(doc).unwrap();
+
+    assert_eq!(res,
+               vec!(
+                   Token::Paragraph(vec!(
+                       Token::Link(Cow::Borrowed("http://foo.bar"),
+                                   Cow::Borrowed(""),
+                                   vec!(
+                                       Token::Str(Cow::Borrowed("a link"))))))));
+}
+
+#[test]
+fn reference_link() {
+    let doc = "
+[reference link][1]
+
+[1]: http://foo.bar
+";
+    let expected = r#"[Paragraph([Link("http://foo.bar", "", [Str("reference link")])])]"#;
+    let result = format!("{:?}", parse_from_str(doc));
+    assert_eq!(&result, expected);    
+}
+
+#[test]
+fn rule() {
+    let doc = "a paragraph
+****
+another one";
+    let expected = r#"[Paragraph([Str("a paragraph")]), Rule, Paragraph([Str("another one")])]"#;
+    let result = format!("{:?}", parse_from_str(doc));
+    assert_eq!(&result, expected);
+}
+        
+#[test]
+fn lists() {
+    let doc = "
+* banana
+    3. 3
+    -  4
+* apple
+* orange
+";
+    let expected = r#"[List([Item([Str("banana"), OrderedList(3, [Item([Str("3")])]), List([Item([Str("4")])])]), Item([Str("apple")]), Item([Str("orange")])])]"#;
+    let result = format!("{:?}", parse_from_str(doc));
+    assert_eq!(result, expected);
+}
+
+#[test]
+fn blockquote() {
+    let doc = "
+normal paragraph
+
+> some
+> blockquote
+";
+    let expected = "[Paragraph([Str(\"normal paragraph\")]), BlockQuote([Paragraph([Str(\"some\"), SoftBreak, Str(\"blockquote\")])])]";
+    let result = format!("{:?}", parse_from_str(doc));
+    assert_eq!(result, expected);
+}
+
+#[test]
+fn code_block() {
+        let doc = "
+normal paragraph
+
+```
+code block
+```
+
+```rust
+rust code block
+```
+";
+    let expected = r#"[Paragraph([Str("normal paragraph")]), CodeBlock("", [Str("code block\n")]), CodeBlock("rust", [Str("rust code block\n")])]"#;
+    let result = format!("{:?}", parse_from_str(doc));
+    assert_eq!(result, expected);
+}
+
+#[test]
+fn strong_emphasis() {
+    let doc = "
+*normal emphasis*
+
+**strong emphasis**
+";
+    let expected = r#"[Paragraph([Emphasis([Str("normal emphasis")])]), Paragraph([Strong([Str("strong emphasis")])])]"#;
+    let result = format!("{:?}", parse_from_str(doc));
+    assert_eq!(result, expected);
+}
+
+#[test]
+fn code() {
+    let doc = "some `code` inlined";
+    let expected = r#"[Paragraph([Str("some "), Code([Str("code")]), Str(" inlined")])]"#;
+    let result = format!("{:?}", parse_from_str(doc));
+    assert_eq!(result, expected);
+}
+
+#[test]
+fn image_reference() {
+    let doc = "
+![alt text][logo]
+
+[logo]: http://foo.bar/baz.png \"Title\"
+";
+    let expected = r#"[Paragraph([Image("http://foo.bar/baz.png", "Title", [Str("alt text")])])]"#;
+    let result = format!("{:?}", parse_from_str(doc));
+    assert_eq!(result, expected);
+}
+
+#[test]
+fn image_inline() {
+    let doc = "![alt text](http://foo.bar/baz.png \"Title\")";
+    let expected = r#"[Paragraph([Image("http://foo.bar/baz.png", "Title", [Str("alt text")])])]"#;
+    let result = format!("{:?}", parse_from_str(doc));
+    assert_eq!(result, expected);
+}