1
0
Fork 0
mirror of https://github.com/lise-henry/crowbook synced 2024-05-23 04:56:17 +02:00

Adding more text statistics

Using the excellent 'punkt' crate for language dependent sentence counting
and the 'hyphenation' create for syllable counting.
This commit is contained in:
hirschenberger 2018-01-20 17:32:20 +01:00
parent 8b2c0ecfd0
commit 01660bc7a0
5 changed files with 437 additions and 68 deletions

273
Cargo.lock generated
View File

@ -1,37 +1,3 @@
[root]
name = "crowbook"
version = "0.14.0"
dependencies = [
"caribon 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.27.1 (registry+https://github.com/rust-lang/crates.io-index)",
"console 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"crowbook-intl 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"crowbook-intl-runtime 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"crowbook-text-processing 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
"epub-builder 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"hyper 0.10.13 (registry+https://github.com/rust-lang/crates.io-index)",
"indicatif 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
"mime_guess 1.8.3 (registry+https://github.com/rust-lang/crates.io-index)",
"mustache 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
"numerals 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"pulldown-cmark 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.21 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.21 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"simplelog 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
"syntect 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
"url 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
"uuid 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
"walkdir 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"yaml-rust 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "aho-corasick"
version = "0.6.3"
@ -97,6 +63,17 @@ dependencies = [
"safemem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "bincode"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byteorder 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 0.8.23 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "bincode"
version = "0.8.0"
@ -231,6 +208,42 @@ dependencies = [
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crowbook"
version = "0.14.0"
dependencies = [
"caribon 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.27.1 (registry+https://github.com/rust-lang/crates.io-index)",
"console 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"crowbook-intl 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"crowbook-intl-runtime 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"crowbook-text-processing 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
"epub-builder 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"hyper 0.10.13 (registry+https://github.com/rust-lang/crates.io-index)",
"hyphenation 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
"indicatif 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
"mime_guess 1.8.3 (registry+https://github.com/rust-lang/crates.io-index)",
"mustache 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
"numerals 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"pulldown-cmark 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"punkt 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.21 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.21 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"simplelog 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
"syntect 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
"url 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
"uuid 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
"walkdir 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"yaml-rust 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crowbook-intl"
version = "0.2.1"
@ -358,6 +371,30 @@ dependencies = [
"url 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "hyphenation"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bincode 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
"fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"hyphenation_commons 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
"pocket-resources 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 0.8.23 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-normalization 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-segmentation 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "hyphenation_commons"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 0.8.23 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_codegen 0.8.23 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "idna"
version = "0.1.4"
@ -483,11 +520,34 @@ name = "num"
version = "0.1.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"num-bigint 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)",
"num-complex 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)",
"num-integer 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
"num-iter 0.1.34 (registry+https://github.com/rust-lang/crates.io-index)",
"num-rational 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-bigint"
version = "0.1.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"num-integer 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.3.18 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-complex"
version = "0.1.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"num-traits 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-integer"
version = "0.1.35"
@ -505,6 +565,17 @@ dependencies = [
"num-traits 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-rational"
version = "0.1.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"num-bigint 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)",
"num-integer 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-traits"
version = "0.1.40"
@ -604,6 +675,15 @@ dependencies = [
"rand 0.3.18 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "phf_macros"
version = "0.7.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"phf_generator 0.7.21 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_shared 0.7.21 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "phf_shared"
version = "0.7.21"
@ -630,6 +710,11 @@ dependencies = [
"xml-rs 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "pocket-resources"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "podio"
version = "0.1.5"
@ -643,6 +728,18 @@ dependencies = [
"bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "punkt"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"num 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.7.21 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_macros 0.7.21 (registry+https://github.com/rust-lang/crates.io-index)",
"rust-freqdist 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "quote"
version = "0.3.15"
@ -708,6 +805,11 @@ name = "regex-syntax"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "rust-freqdist"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "rustc-demangle"
version = "0.1.5"
@ -746,11 +848,36 @@ name = "scopeguard"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "serde"
version = "0.8.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "serde"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "serde_codegen"
version = "0.8.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_codegen_internals 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 0.10.8 (registry+https://github.com/rust-lang/crates.io-index)",
"syntex 0.54.0 (registry+https://github.com/rust-lang/crates.io-index)",
"syntex_syntax 0.54.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "serde_codegen_internals"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"syn 0.10.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "serde_derive"
version = "1.0.21"
@ -820,6 +947,15 @@ name = "strsim"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "syn"
version = "0.10.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "syn"
version = "0.11.11"
@ -858,6 +994,51 @@ dependencies = [
"yaml-rust 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "syntex"
version = "0.54.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"syntex_errors 0.54.0 (registry+https://github.com/rust-lang/crates.io-index)",
"syntex_syntax 0.54.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "syntex_errors"
version = "0.54.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
"syntex_pos 0.54.0 (registry+https://github.com/rust-lang/crates.io-index)",
"term 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "syntex_pos"
version = "0.54.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "syntex_syntax"
version = "0.54.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
"syntex_errors 0.54.0 (registry+https://github.com/rust-lang/crates.io-index)",
"syntex_pos 0.54.0 (registry+https://github.com/rust-lang/crates.io-index)",
"term 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "tempdir"
version = "0.3.5"
@ -952,6 +1133,11 @@ name = "unicode-normalization"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "unicode-segmentation"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "unicode-width"
version = "0.1.4"
@ -1077,6 +1263,7 @@ dependencies = [
"checksum backtrace-sys 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "44585761d6161b0f57afc49482ab6bd067e4edef48c12a152c237eb0203f7661"
"checksum base64 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "96434f987501f0ed4eb336a411e0631ecd1afa11574fe148587adc4ff96143c9"
"checksum base64 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7c4a342b450b268e1be8036311e2c613d7f8a7ed31214dff1cc3b60852a3168d"
"checksum bincode 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "55eb0b7fd108527b0c77860f75eca70214e11a8b4c6ef05148c54c05a25d48ad"
"checksum bincode 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e103c8b299b28a9c6990458b7013dc4a8356a9b854c51b9883241f5866fac36e"
"checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d"
"checksum bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4efd02e230a02e18f92fc2735f44597385ed02ad8f831e7c1c1156ee5e1ab3a5"
@ -1108,6 +1295,8 @@ dependencies = [
"checksum gcc 0.3.54 (registry+https://github.com/rust-lang/crates.io-index)" = "5e33ec290da0d127825013597dbdfc28bee4964690c7ce1166cbc2a7bd08b1bb"
"checksum httparse 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "af2f2dd97457e8fb1ae7c5a420db346af389926e36f43768b96f101546b04a07"
"checksum hyper 0.10.13 (registry+https://github.com/rust-lang/crates.io-index)" = "368cb56b2740ebf4230520e2b90ebb0461e69034d85d1945febd9b3971426db2"
"checksum hyphenation 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b1382f91156aeb35382eecd80b203186f89a0a73ad3f4b318a5ff9f5c20667b6"
"checksum hyphenation_commons 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "43e24b9d58d845c91b7793c40e7dbf1b9838a331250aa3c16cafa7ddb5acba3c"
"checksum idna 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "014b298351066f1512874135335d62a789ffe78a9974f94b43ed5621951eaf7d"
"checksum indicatif 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3ba24d01b1bc8bb67a28f71935c7545f2dff16e6c6a2b0b20af1514b6c9f096a"
"checksum itoa 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8324a32baf01e2ae060e9de58ed0bc2320c9a2833491ee36cd3b4c414de4db8c"
@ -1125,8 +1314,11 @@ dependencies = [
"checksum msdos_time 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "65ba9d75bcea84e07812618fedf284a64776c2f2ea0cad6bca7f69739695a958"
"checksum mustache 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)" = "ddb004e419334fc9172d0a5ff91c0770bdd6239091b0b343eb5926101f0a7d13"
"checksum num 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "a311b77ebdc5dd4cf6449d81e4135d9f0e3b153839ac90e648a8ef538f923525"
"checksum num-bigint 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)" = "bdc1494b5912f088f260b775799468d9b9209ac60885d8186a547a0476289e23"
"checksum num-complex 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)" = "58de7b4bf7cf5dbecb635a5797d489864eadd03b107930cbccf9e0fd7428b47c"
"checksum num-integer 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)" = "d1452e8b06e448a07f0e6ebb0bb1d92b8890eea63288c0b627331d53514d0fba"
"checksum num-iter 0.1.34 (registry+https://github.com/rust-lang/crates.io-index)" = "7485fcc84f85b4ecd0ea527b14189281cf27d60e583ae65ebc9c088b13dffe01"
"checksum num-rational 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)" = "0b950f75e042fdd710460084d19c8efdcd72d65183ead8ecd04b90483f5a55d2"
"checksum num-traits 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "99843c856d68d8b4313b03a17e33c4bb42ae8f6610ea81b28abe076ac721b9b0"
"checksum num_cpus 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "514f0d73e64be53ff320680ca671b64fe3fb91da01e1ae2ddc99eb51d453b20d"
"checksum numerals 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6036c7048efd6d56ddf1ff3763cb5bbe54e270725c4c80c2e318198b4806a6a5"
@ -1139,11 +1331,14 @@ dependencies = [
"checksum phf 0.7.21 (registry+https://github.com/rust-lang/crates.io-index)" = "cb325642290f28ee14d8c6201159949a872f220c62af6e110a56ea914fbe42fc"
"checksum phf_codegen 0.7.21 (registry+https://github.com/rust-lang/crates.io-index)" = "d62594c0bb54c464f633175d502038177e90309daf2e0158be42ed5f023ce88f"
"checksum phf_generator 0.7.21 (registry+https://github.com/rust-lang/crates.io-index)" = "6b07ffcc532ccc85e3afc45865469bf5d9e4ef5bfcf9622e3cfe80c2d275ec03"
"checksum phf_macros 0.7.21 (registry+https://github.com/rust-lang/crates.io-index)" = "00edda362e3d89cbf8fb24099c9e8ca844d8ad2cc04587f73b027f063d734504"
"checksum phf_shared 0.7.21 (registry+https://github.com/rust-lang/crates.io-index)" = "07e24b0ca9643bdecd0632f2b3da6b1b89bbb0030e0b992afc1113b23a7bc2f2"
"checksum pkg-config 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "3a8b4c6b8165cd1a1cd4b9b120978131389f64bdaf456435caa41e630edba903"
"checksum plist 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c61ac2afed2856590ae79d6f358a24b85ece246d2aa134741a66d589519b7503"
"checksum pocket-resources 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c135f38778ad324d9e9ee68690bac2c1a51f340fdf96ca13e2ab3914eb2e51d8"
"checksum podio 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e5422a1ee1bc57cc47ae717b0137314258138f38fd5f3cea083f43a9725383a0"
"checksum pulldown-cmark 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a656fdb8b6848f896df5e478a0eb9083681663e37dcb77dd16981ff65329fe8b"
"checksum punkt 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c740bff6bd63432a03e44274c0557ff68deb0a5f0b7598e41be8a398c04f68fd"
"checksum quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e920b65c65f10b2ae65c831a81a073a89edd28c7cce89475bff467ab4167a"
"checksum rand 0.3.18 (registry+https://github.com/rust-lang/crates.io-index)" = "6475140dfd8655aeb72e1fd4b7a1cc1c202be65d71669476e392fe62532b9edd"
"checksum rayon 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ed02d09394c94ffbdfdc755ad62a132e94c3224a8354e78a1200ced34df12edf"
@ -1152,13 +1347,17 @@ dependencies = [
"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76"
"checksum regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1731164734096285ec2a5ec7fea5248ae2f5485b3feeb0115af4fda2183b2d1b"
"checksum regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad890a5eef7953f55427c50575c680c42841653abd2b028b68cd223d157f62db"
"checksum rust-freqdist 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ca136c6f6d53a2de7264bb392ea7c1f83357e00d131a24275b1661ea1c23c3af"
"checksum rustc-demangle 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "aee45432acc62f7b9a108cc054142dac51f979e69e71ddce7d6fc7adf29e817e"
"checksum rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)" = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda"
"checksum safemem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e27a8b19b835f7aea908818e871f5cc3a5a186550c30773be987e155e8163d8f"
"checksum same-file 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d931a44fdaa43b8637009e7632a02adc4f2b2e0733c08caa4cf00e8da4a117a7"
"checksum same-file 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "70a18720d745fb9ca6a041b37cb36d0b21066006b6cff8b5b360142d4b81fb60"
"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
"checksum serde 0.8.23 (registry+https://github.com/rust-lang/crates.io-index)" = "9dad3f759919b92c3068c696c15c3d17238234498bbdcc80f2c469606f948ac8"
"checksum serde 1.0.21 (registry+https://github.com/rust-lang/crates.io-index)" = "6eda663e865517ee783b0891a3f6eb3a253e0b0dabb46418969ee9635beadd9e"
"checksum serde_codegen 0.8.23 (registry+https://github.com/rust-lang/crates.io-index)" = "a4c5d8a33087d8984f9535daa62a6498a08f6476050b00ab9339dd847e4c25cc"
"checksum serde_codegen_internals 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)" = "afad7924a009f859f380e4a2e3a509a845c2ac66435fcead74a4d983b21ae806"
"checksum serde_derive 1.0.21 (registry+https://github.com/rust-lang/crates.io-index)" = "652bc323d694dc925829725ec6c890156d8e70ae5202919869cb00fe2eff3788"
"checksum serde_derive_internals 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)" = "32f1926285523b2db55df263d2aa4eb69ddcfa7a7eade6430323637866b513ab"
"checksum serde_json 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "e4586746d1974a030c48919731ecffd0ed28d0c40749d0d18d43b3a7d6c9b20e"
@ -1168,9 +1367,14 @@ dependencies = [
"checksum stable_deref_trait 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "15132e0e364248108c5e2c02e3ab539be8d6f5d52a01ca9bbf27ed657316f02b"
"checksum stemmer 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8bc851510b472ff407137208a23f4f58e0cb41fdb5c3e38c9fd4482ea03c46f1"
"checksum strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d15c810519a91cf877e7e36e63fe068815c678181439f2f29e2562147c3694"
"checksum syn 0.10.8 (registry+https://github.com/rust-lang/crates.io-index)" = "58fd09df59565db3399efbba34ba8a2fec1307511ebd245d0061ff9d42691673"
"checksum syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d3b891b9015c88c576343b9b3e41c2c11a51c219ef067b264bd9c8aa9b441dad"
"checksum synom 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a393066ed9010ebaed60b9eafa373d4b1baac186dd7e008555b0f702b51945b6"
"checksum syntect 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ff5898205c88327ab1c99dfd25337e9f29e547e0596a5b098a6780a43801e3c5"
"checksum syntex 0.54.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb3f52553a966675982404dc34028291b347e0c9a9c0b0b34f2da6be8a0443f8"
"checksum syntex_errors 0.54.0 (registry+https://github.com/rust-lang/crates.io-index)" = "dee2f6e49c075f71332bb775219d5982bee6732d26227fa1ae1b53cdb12f5cc5"
"checksum syntex_pos 0.54.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8df3921c7945dfb9ffc53aa35adb2cf4313b5ab5f079c3619b3d4eb82a0efc2b"
"checksum syntex_syntax 0.54.0 (registry+https://github.com/rust-lang/crates.io-index)" = "dc960085bae44591e22d01f6c0e82a8aec832f8659aca556cdf8ecbdac2bb47b"
"checksum tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "87974a6f5c1dfb344d733055601650059a3363de2a6104819293baff662132d6"
"checksum term 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "fa63644f74ce96fbeb9b794f66aff2a52d601cbd5e80f4b97123e3899f4570f1"
"checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096"
@ -1183,6 +1387,7 @@ dependencies = [
"checksum unicase 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7f4765f83163b74f957c797ad9253caf97f103fb064d3999aea9568d09fc8a33"
"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
"checksum unicode-normalization 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "51ccda9ef9efa3f7ef5d91e8f9b83bbe6955f9bf86aec89d5cce2c874625920f"
"checksum unicode-segmentation 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c3bc443ded17b11305ffffe6b37e2076f328a5a8cb6aa877b1b98f77699e98b5"
"checksum unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "bf3a113775714a22dcb774d8ea3655c53a32debae63a063acc00a91cc586245f"
"checksum unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8c1f860d7d29cf02cb2f3f359fd35991af3d30bac52c57d265a3c461074cb4dc"
"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"

View File

@ -57,6 +57,8 @@ crowbook-intl-runtime = "0.1"
numerals = "0.1"
epub-builder = "0.3"
log = "0.3"
punkt = "1.0"
hyphenation = "0.6"
textwrap = { version = "0.9", optional = true }
serde = { version = "1", optional = true }
serde_json = { version = "1", optional = true }

View File

@ -46,7 +46,7 @@ pub enum Highlight {
Js,
Syntect,
}
/// Base structure for rendering HTML files
///
@ -206,7 +206,7 @@ impl<'a> HtmlRenderer<'a> {
},
} // _ => panic!("Parts are not supported yet"),
self.current_part = n.is_part();
self.filename = filename;
}
@ -234,10 +234,10 @@ impl<'a> HtmlRenderer<'a> {
n
};
self.inc_header(n);
let number = self.current_chapter[n as usize];
let c_title = self.render_vec(vec)?;
if n <= 1 && self.current_numbering >= 1 {
let header = if n == 0 {
Header::Part
@ -298,7 +298,7 @@ impl<'a> HtmlRenderer<'a> {
n,
self.link_number,
data.text,
n))
n))
}
}
@ -658,9 +658,9 @@ impl<'a> HtmlRenderer<'a> {
}
</script>"#;
</script>"#;
self.templatize(json)
}
@ -708,8 +708,8 @@ impl<'a> HtmlRenderer<'a> {
}
Err(err) => {
Err(Error::render(&this.as_ref().book.source,
(lformat!("rendering 'html.header' template:\n{error}",
error = err))))
lformat!("rendering 'html.header' template:\n{error}",
error = err)))
}
}
} else {

View File

@ -39,7 +39,7 @@
//! * read the chapters (written in Markdown) listed in this
//! configuration file and pass them to to `Parser`, get back an AST and store it in memory
//! * call the various renderers according to the book's parameters
//! and generate the appopriate files.
//! and generate the appopriate files.
//!
//! ## Example
//!
@ -116,6 +116,8 @@ extern crate crowbook_intl_runtime;
extern crate numerals;
extern crate epub_builder;
extern crate uuid;
extern crate punkt;
extern crate hyphenation;
#[macro_use]
extern crate log;
#[macro_use]

View File

@ -20,11 +20,19 @@ use text_view::view_as_text;
use style;
use std::fmt;
use std::f64;
use punkt::{SentenceTokenizer, TrainingData};
use punkt::params::Standard;
use hyphenation;
use hyphenation::{Hyphenation, Language};
struct ChapterStats {
pub name: String,
pub word_count: usize,
pub char_count: usize,
pub sentence_count: usize,
pub syllable_count: usize,
pub flesch_score: f64,
}
pub struct Stats {
@ -33,42 +41,194 @@ pub struct Stats {
impl Stats {
pub fn new(book: &Book) -> Stats {
let mut stats = Stats{
chapters: vec!(),
};
let mut stats = Stats { chapters: vec![] };
let lang = book.options.get_str("lang").unwrap();
let (td, hy, flesch_func) = Stats::language_data(lang);
for c in &book.chapters {
let name = c.filename.clone();
let text = view_as_text(&c.content);
let wc = text.split_whitespace().count();
let cc = text.len();
stats.chapters.push(ChapterStats {
let words: Vec<_> = text.split_whitespace().collect();
let wc = words.len();
// Note: Don't count the bytes with `len()` count the actual (multibyte-)characters
let cc = text.chars().count();
let sc = SentenceTokenizer::<Standard>::new(&text, &td).count();
let corp = hyphenation::load(hy).unwrap();
// Count the number of syllables for earch word.
let syl = words
.iter()
.fold(0, |acc, w| acc + w.opportunities(&corp).len() + 1);
let mut chapter_stats = ChapterStats {
name: name,
word_count: wc,
char_count: cc
});
char_count: cc,
sentence_count: sc,
syllable_count: syl,
flesch_score: f64::NAN,
};
if let Some(ref f) = flesch_func {
chapter_stats.flesch_score = f(&chapter_stats);
}
stats.chapters.push(chapter_stats);
}
stats
}
// The Flesch reading index formulae for different languages are from the `YoastSEO.js` text
// analysis library. See: https://github.com/Yoast/YoastSEO.js/issues/267
fn language_data(
lang: &str,
) -> (
TrainingData,
Language,
Option<Box<Fn(&ChapterStats) -> f64>>,
) {
match lang {
"cz" => (TrainingData::czech(), Language::Czech, None),
"da" => (TrainingData::danish(), Language::Danish, None),
"nl" => (
TrainingData::dutch(),
Language::Dutch,
Some(Box::new(|s: &ChapterStats| {
206.84 - 77.0 * (s.syllable_count as f64 / s.word_count as f64)
- 0.93 * (s.word_count as f64 / s.sentence_count as f64)
})),
),
"en" => (
TrainingData::english(),
Language::English_GB,
Some(Box::new(|s: &ChapterStats| {
206.835 - 0.77 * (s.syllable_count as f64 * 100.0 / s.word_count as f64)
- 0.93 * (s.word_count as f64 / s.sentence_count as f64)
})),
),
"et" => (TrainingData::estonian(), Language::Estonian, None),
"fi" => (TrainingData::finnish(), Language::Finnish, None),
"fr" => (
TrainingData::french(),
Language::French,
Some(Box::new(|s: &ChapterStats| {
207.0 - 1.015 * (s.word_count as f64 / s.sentence_count as f64)
- 73.6 * (s.syllable_count as f64 / s.word_count as f64)
})),
),
"de" => (
TrainingData::german(),
Language::German_1996,
Some(Box::new(|s: &ChapterStats| {
180.0 - (s.word_count as f64 / s.sentence_count as f64)
- 84.6 * (s.syllable_count as f64 / s.word_count as f64)
})),
),
"el" => (TrainingData::greek(), Language::Greek_Poly, None),
"it" => (
TrainingData::italian(),
Language::Italian,
Some(Box::new(|s: &ChapterStats| {
217.0 - 1.3 * (s.word_count as f64 / s.sentence_count as f64)
- 60.0 * (s.syllable_count as f64 * 100.0 / s.word_count as f64)
})),
),
"no" => (TrainingData::norwegian(), Language::Norwegian_Bokmal, None),
"pl" => (TrainingData::polish(), Language::Polish, None),
"pt" => (TrainingData::portuguese(), Language::Portuguese, None),
"sl" => (TrainingData::slovene(), Language::Slovenian, None),
"es" => (
TrainingData::spanish(),
Language::Spanish,
Some(Box::new(|s: &ChapterStats| {
206.84 - 1.02 * (s.word_count as f64 / s.sentence_count as f64)
- 60.0 * (s.syllable_count as f64 * 100.0 / s.word_count as f64)
})),
),
"sv" => (TrainingData::swedish(), Language::Swedish, None),
"tk" => (TrainingData::turkish(), Language::Turkish, None),
_ => {
warn!(
"Unknown language: '{}' for text statistics, using 'en' default.",
lang
);
(TrainingData::english(), Language::English_GB, None)
}
}
}
fn flesch_text(score: f64) -> String {
String::from(match score {
s if s.is_nan() => "Not availabe",
s if s < 30.0 => "Very difficult",
s if s < 50.0 => "Difficult",
s if s < 60.0 => "Fairly difficult",
s if s < 70.0 => "Standard",
s if s < 80.0 => "Faily easy",
s if s < 90.0 => "Easy",
_ => "Very Easy",
})
}
}
impl fmt::Display for Stats {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:<30} {:>6} {:>7}\n---------\n",
style::header(&lformat!("Chapter")),
style::header(&lformat!("Words")),
style::header(&lformat!("Chars")))?;
for c in &self.chapters {
write!(f, "{:<30} {:>6} {:>7}\n",
style::element(&c.name),
c.word_count,
c.char_count)?;
}
let total = self.chapters
let max_chapter_length = self.chapters
.iter()
.fold((0, 0), |acc, c| (acc.0 + c.word_count, acc.1 + c.char_count));
write!(f, "---------\n{:<30} {:>6} {:>7}\n",
style::element(&lformat!("TOTAL:")),
total.0,
total.1)
.max_by_key(|e| e.name.chars().count())
.unwrap()
.name
.chars()
.count() + 3;
write!(
f,
"{:<width$} {:>8} {:>10} {:>7} {:>11} {:>11} {:>16} {:>29}\n---------\n",
style::header(&lformat!("Chapter")),
style::header(&lformat!("Chars")),
style::header(&lformat!("Syllables")),
style::header(&lformat!("Words")),
style::header(&lformat!("Sentences")),
style::header(&lformat!("Chars/Word")),
style::header(&lformat!("Words/Sentence")),
style::header(&lformat!("Flesch reading ease index")),
width = max_chapter_length
)?;
for c in &self.chapters {
write!(
f,
"{:<width$} {:>8} {:>10} {:>7} {:>11} {:>11.2} {:>16.2} {:>8.1} => {:>17}\n",
style::element(&c.name),
c.char_count,
c.syllable_count,
c.word_count,
c.sentence_count,
c.char_count as f64 / c.word_count as f64,
c.word_count as f64 / c.sentence_count as f64,
c.flesch_score,
Self::flesch_text(c.flesch_score),
width = max_chapter_length
)?;
}
let total = self.chapters.iter().fold((0, 0, 0, 0, 0.0, 0), |acc, c| {
(
acc.0 + c.char_count,
acc.1 + c.syllable_count,
acc.2 + c.word_count,
acc.3 + c.sentence_count,
acc.4 + c.flesch_score,
acc.5 + 1,
)
});
write!(
f,
"---------\n{:<width$} {:>8} {:>10} {:>7} {:>11} {:>11.2} {:>16.2} {:>8.1} => {:>17}\n",
style::element(&lformat!("TOTAL:")),
total.0,
total.1,
total.2,
total.3,
total.0 as f64 / total.2 as f64,
total.2 as f64 / total.3 as f64,
total.4 / total.5 as f64,
Self::flesch_text(total.4 / total.5 as f64),
width = max_chapter_length
)
}
}