Merge pull request #145 from Kroisse/fix/develop/strip_formatting

Make stripping color codes efficiently
This commit is contained in:
Aaron Weiss 2018-09-07 11:00:06 -04:00 committed by GitHub
commit c8aed29e80
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -1,163 +1,188 @@
//! An extension trait that provides the ability to strip IRC colors from a string
use std::borrow::Cow;
#[derive(PartialEq)]
enum ParserState {
Text,
ColorCode,
Foreground1,
Foreground1(char),
Foreground2,
Comma,
Background1,
Background1(char),
}
struct Parser {
state: ParserState,
}
/// An extension trait giving strings a function to strip IRC colors
pub trait FormattedStringExt {
pub trait FormattedStringExt<'a> {
/// Returns true if the string contains color, bold, underline or italics
fn is_formatted(&self) -> bool;
/// Returns the string with all color, bold, underline and italics stripped
fn strip_formatting(&self) -> Cow<str>;
fn strip_formatting(self) -> Cow<'a, str>;
}
const FORMAT_CHARACTERS: &[char] = &[
'\x02', // bold
'\x1F', // underline
'\x16', // reverse
'\x0F', // normal
'\x03', // color
];
impl FormattedStringExt for str {
impl<'a> FormattedStringExt<'a> for &'a str {
fn is_formatted(&self) -> bool {
self.contains('\x02') || // bold
self.contains('\x1F') || // underline
self.contains('\x16') || // reverse
self.contains('\x0F') || // normal
self.contains('\x03') // color
self.contains(FORMAT_CHARACTERS)
}
fn strip_formatting(&self) -> Cow<str> {
let mut parser = Parser {
fn strip_formatting(self) -> Cow<'a, str> {
if !self.is_formatted() {
return Cow::Borrowed(self);
}
let mut s = String::from(self);
strip_formatting(&mut s);
Cow::Owned(s)
}
}
fn strip_formatting(buf: &mut String) {
let mut parser = Parser::new();
buf.retain(|cur| parser.next(cur));
}
impl Parser {
fn new() -> Self {
Parser {
state: ParserState::Text,
};
let mut prev: char = '\x00';
let result: Cow<str> = self
.chars()
.filter(move |cur| {
let result = match parser.state {
ParserState::Text | ParserState::Foreground1 | ParserState::Foreground2 if *cur == '\x03' => {
parser.state = ParserState::ColorCode;
false
},
ParserState::Text => !['\x02', '\x1F', '\x16', '\x0F'].contains(cur),
ParserState::ColorCode if (*cur).is_digit(10) => {
parser.state = ParserState::Foreground1;
false
},
ParserState::Foreground1 if (*cur).is_digit(6) => {
// can only consume another digit if previous char was 1.
if (prev) == '1' {
parser.state = ParserState::Foreground2;
false
} else {
parser.state = ParserState::Text;
true
}
},
ParserState::Foreground1 if *cur == ',' => {
parser.state = ParserState::Comma;
false
},
ParserState::Foreground2 if *cur == ',' => {
parser.state = ParserState::Comma;
false
},
ParserState::Comma if ((*cur).is_digit(10)) => {
parser.state = ParserState::Background1;
false
},
ParserState::Background1 if (*cur).is_digit(6) => {
// can only consume another digit if previous char was 1.
parser.state = ParserState::Text;
if (prev) == '1' {
false
} else {
true
}
}
_ => {
parser.state = ParserState::Text;
true
}
};
prev = *cur;
return result
})
.collect();
result
}
}
fn next(&mut self, cur: char) -> bool {
use self::ParserState::*;
match self.state {
Text | Foreground1(_) | Foreground2 if cur == '\x03' => {
self.state = ColorCode;
false
}
Text => {
!FORMAT_CHARACTERS.contains(&cur)
}
ColorCode if cur.is_digit(10) => {
self.state = Foreground1(cur);
false
}
Foreground1('1') if cur.is_digit(6) => {
// can only consume another digit if previous char was 1.
self.state = Foreground2;
false
}
Foreground1(_) if cur.is_digit(6) => {
self.state = Text;
true
}
Foreground1(_) if cur == ',' => {
self.state = Comma;
false
}
Foreground2 if cur == ',' => {
self.state = Comma;
false
}
Comma if (cur.is_digit(10)) => {
self.state = Background1(cur);
false
}
Background1(prev) if cur.is_digit(6) => {
// can only consume another digit if previous char was 1.
self.state = Text;
prev != '1'
}
_ => {
self.state = Text;
true
}
}
}
}
impl FormattedStringExt for String {
impl FormattedStringExt<'static> for String {
fn is_formatted(&self) -> bool {
(&self[..]).is_formatted()
self.as_str().is_formatted()
}
fn strip_formatting(&self) -> Cow<str> {
(&self[..]).strip_formatting()
fn strip_formatting(mut self) -> Cow<'static, str> {
if !self.is_formatted() {
return Cow::Owned(self);
}
strip_formatting(&mut self);
Cow::Owned(self)
}
}
#[cfg(test)]
mod test {
use std::borrow::Cow;
use proto::colors::FormattedStringExt;
#[test]
fn test_strip_bold() {
assert_eq!("l\x02ol".strip_formatting(), "lol");
macro_rules! test_formatted_string_ext {
{ $( $name:ident ( $($line:tt)* ), )* } => {
$(
mod $name {
use super::*;
test_formatted_string_ext!(@ $($line)*);
}
)*
};
(@ $text:expr, should stripped into $expected:expr) => {
#[test]
fn test_formatted() {
assert!($text.is_formatted());
}
#[test]
fn test_strip() {
assert_eq!($text.strip_formatting(), $expected);
}
};
(@ $text:expr, is not formatted) => {
#[test]
fn test_formatted() {
assert!(!$text.is_formatted());
}
#[test]
fn test_strip() {
assert_eq!($text.strip_formatting(), $text);
}
}
}
test_formatted_string_ext! {
blank("", is not formatted),
blank2(" ", is not formatted),
blank3("\t\r\n", is not formatted),
bold("l\x02ol", should stripped into "lol"),
bold_from_string(String::from("l\x02ol"), should stripped into "lol"),
bold_hangul("우왕\x02", should stripped into "우왕굳"),
fg_color("l\x033ol", should stripped into "lol"),
fg_color2("l\x0312ol", should stripped into "lol"),
fg_bg_11("l\x031,2ol", should stripped into "lol"),
fg_bg_21("l\x0312,3ol", should stripped into "lol"),
fg_bg_12("l\x031,12ol", should stripped into "lol"),
fg_bg_22("l\x0312,13ol", should stripped into "lol"),
string_with_multiple_colors("hoo\x034r\x033a\x0312y", should stripped into "hooray"),
string_with_digit_after_color("\x0344\x0355\x0366", should stripped into "456"),
string_with_multiple_2digit_colors("hoo\x0310r\x0311a\x0312y", should stripped into "hooray"),
string_with_digit_after_2digit_color("\x031212\x031111\x031010", should stripped into "121110"),
thinking("🤔...", is not formatted),
unformatted("a plain text", is not formatted),
}
#[test]
fn test_strip_fg_color() {
assert_eq!("l\x033ol".strip_formatting(), "lol");
}
#[test]
fn test_strip_fg_color2() {
assert_eq!("l\x0312ol".strip_formatting(), "lol");
}
#[test]
fn test_strip_fg_bg_11() {
assert_eq!("l\x031,2ol".strip_formatting(), "lol");
}
#[test]
fn test_strip_fg_bg_21() {
assert_eq!("l\x0312,3ol".strip_formatting(), "lol");
}
#[test]
fn test_strip_fg_bg_12() {
assert_eq!("l\x031,12ol".strip_formatting(), "lol");
}
#[test]
fn test_strip_fg_bg_22() {
assert_eq!("l\x0312,13ol".strip_formatting(), "lol");
}
#[test]
fn test_strip_string_with_multiple_colors() {
assert_eq!("hoo\x034r\x033a\x0312y".strip_formatting(), "hooray");
}
#[test]
fn test_strip_string_with_digit_after_color() {
assert_eq!("\x0344\x0355\x0366".strip_formatting(), "456");
}
#[test]
fn test_strip_string_with_multiple_2digit_colors() {
assert_eq!("hoo\x0310r\x0311a\x0312y".strip_formatting(), "hooray");
}
#[test]
fn test_strip_string_with_digit_after_2digit_color() {
assert_eq!("\x031212\x031111\x031010".strip_formatting(), "121110");
fn test_strip_no_allocation_for_unformatted_text() {
if let Cow::Borrowed(formatted) = "plain text".strip_formatting() {
assert_eq!(formatted, "plain text");
} else {
panic!("allocation detected");
}
}
}