Merge pull request #145 from Kroisse/fix/develop/strip_formatting

Make stripping color codes efficiently
This commit is contained in:
Aaron Weiss 2018-09-07 11:00:06 -04:00 committed by GitHub
commit c8aed29e80
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -1,163 +1,188 @@
//! An extension trait that provides the ability to strip IRC colors from a string //! An extension trait that provides the ability to strip IRC colors from a string
use std::borrow::Cow; use std::borrow::Cow;
#[derive(PartialEq)]
enum ParserState { enum ParserState {
Text, Text,
ColorCode, ColorCode,
Foreground1, Foreground1(char),
Foreground2, Foreground2,
Comma, Comma,
Background1, Background1(char),
} }
struct Parser { struct Parser {
state: ParserState, state: ParserState,
} }
/// An extension trait giving strings a function to strip IRC colors /// An extension trait giving strings a function to strip IRC colors
pub trait FormattedStringExt { pub trait FormattedStringExt<'a> {
/// Returns true if the string contains color, bold, underline or italics /// Returns true if the string contains color, bold, underline or italics
fn is_formatted(&self) -> bool; fn is_formatted(&self) -> bool;
/// Returns the string with all color, bold, underline and italics stripped /// Returns the string with all color, bold, underline and italics stripped
fn strip_formatting(&self) -> Cow<str>; fn strip_formatting(self) -> Cow<'a, str>;
} }
const FORMAT_CHARACTERS: &[char] = &[
'\x02', // bold
'\x1F', // underline
'\x16', // reverse
'\x0F', // normal
'\x03', // color
];
impl FormattedStringExt for str { impl<'a> FormattedStringExt<'a> for &'a str {
fn is_formatted(&self) -> bool { fn is_formatted(&self) -> bool {
self.contains('\x02') || // bold self.contains(FORMAT_CHARACTERS)
self.contains('\x1F') || // underline
self.contains('\x16') || // reverse
self.contains('\x0F') || // normal
self.contains('\x03') // color
} }
fn strip_formatting(&self) -> Cow<str> { fn strip_formatting(self) -> Cow<'a, str> {
let mut parser = Parser { if !self.is_formatted() {
return Cow::Borrowed(self);
}
let mut s = String::from(self);
strip_formatting(&mut s);
Cow::Owned(s)
}
}
fn strip_formatting(buf: &mut String) {
let mut parser = Parser::new();
buf.retain(|cur| parser.next(cur));
}
impl Parser {
fn new() -> Self {
Parser {
state: ParserState::Text, state: ParserState::Text,
}; }
let mut prev: char = '\x00'; }
let result: Cow<str> = self
.chars() fn next(&mut self, cur: char) -> bool {
.filter(move |cur| { use self::ParserState::*;
let result = match parser.state { match self.state {
ParserState::Text | ParserState::Foreground1 | ParserState::Foreground2 if *cur == '\x03' => { Text | Foreground1(_) | Foreground2 if cur == '\x03' => {
parser.state = ParserState::ColorCode; self.state = ColorCode;
false false
}, }
ParserState::Text => !['\x02', '\x1F', '\x16', '\x0F'].contains(cur), Text => {
ParserState::ColorCode if (*cur).is_digit(10) => { !FORMAT_CHARACTERS.contains(&cur)
parser.state = ParserState::Foreground1; }
ColorCode if cur.is_digit(10) => {
self.state = Foreground1(cur);
false false
}, }
ParserState::Foreground1 if (*cur).is_digit(6) => { Foreground1('1') if cur.is_digit(6) => {
// can only consume another digit if previous char was 1. // can only consume another digit if previous char was 1.
if (prev) == '1' { self.state = Foreground2;
parser.state = ParserState::Foreground2;
false false
} else { }
parser.state = ParserState::Text; Foreground1(_) if cur.is_digit(6) => {
self.state = Text;
true true
} }
}, Foreground1(_) if cur == ',' => {
ParserState::Foreground1 if *cur == ',' => { self.state = Comma;
parser.state = ParserState::Comma;
false false
},
ParserState::Foreground2 if *cur == ',' => {
parser.state = ParserState::Comma;
false
},
ParserState::Comma if ((*cur).is_digit(10)) => {
parser.state = ParserState::Background1;
false
},
ParserState::Background1 if (*cur).is_digit(6) => {
// can only consume another digit if previous char was 1.
parser.state = ParserState::Text;
if (prev) == '1' {
false
} else {
true
} }
Foreground2 if cur == ',' => {
self.state = Comma;
false
}
Comma if (cur.is_digit(10)) => {
self.state = Background1(cur);
false
}
Background1(prev) if cur.is_digit(6) => {
// can only consume another digit if previous char was 1.
self.state = Text;
prev != '1'
} }
_ => { _ => {
parser.state = ParserState::Text; self.state = Text;
true true
} }
};
prev = *cur;
return result
})
.collect();
result
} }
}
} }
impl FormattedStringExt for String { impl FormattedStringExt<'static> for String {
fn is_formatted(&self) -> bool { fn is_formatted(&self) -> bool {
(&self[..]).is_formatted() self.as_str().is_formatted()
} }
fn strip_formatting(&self) -> Cow<str> { fn strip_formatting(mut self) -> Cow<'static, str> {
(&self[..]).strip_formatting() if !self.is_formatted() {
return Cow::Owned(self);
}
strip_formatting(&mut self);
Cow::Owned(self)
} }
} }
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use std::borrow::Cow;
use proto::colors::FormattedStringExt; use proto::colors::FormattedStringExt;
macro_rules! test_formatted_string_ext {
{ $( $name:ident ( $($line:tt)* ), )* } => {
$(
mod $name {
use super::*;
test_formatted_string_ext!(@ $($line)*);
}
)*
};
(@ $text:expr, should stripped into $expected:expr) => {
#[test] #[test]
fn test_strip_bold() { fn test_formatted() {
assert_eq!("l\x02ol".strip_formatting(), "lol"); assert!($text.is_formatted());
}
#[test]
fn test_strip() {
assert_eq!($text.strip_formatting(), $expected);
}
};
(@ $text:expr, is not formatted) => {
#[test]
fn test_formatted() {
assert!(!$text.is_formatted());
}
#[test]
fn test_strip() {
assert_eq!($text.strip_formatting(), $text);
}
}
}
test_formatted_string_ext! {
blank("", is not formatted),
blank2(" ", is not formatted),
blank3("\t\r\n", is not formatted),
bold("l\x02ol", should stripped into "lol"),
bold_from_string(String::from("l\x02ol"), should stripped into "lol"),
bold_hangul("우왕\x02", should stripped into "우왕굳"),
fg_color("l\x033ol", should stripped into "lol"),
fg_color2("l\x0312ol", should stripped into "lol"),
fg_bg_11("l\x031,2ol", should stripped into "lol"),
fg_bg_21("l\x0312,3ol", should stripped into "lol"),
fg_bg_12("l\x031,12ol", should stripped into "lol"),
fg_bg_22("l\x0312,13ol", should stripped into "lol"),
string_with_multiple_colors("hoo\x034r\x033a\x0312y", should stripped into "hooray"),
string_with_digit_after_color("\x0344\x0355\x0366", should stripped into "456"),
string_with_multiple_2digit_colors("hoo\x0310r\x0311a\x0312y", should stripped into "hooray"),
string_with_digit_after_2digit_color("\x031212\x031111\x031010", should stripped into "121110"),
thinking("🤔...", is not formatted),
unformatted("a plain text", is not formatted),
} }
#[test] #[test]
fn test_strip_fg_color() { fn test_strip_no_allocation_for_unformatted_text() {
assert_eq!("l\x033ol".strip_formatting(), "lol"); if let Cow::Borrowed(formatted) = "plain text".strip_formatting() {
assert_eq!(formatted, "plain text");
} else {
panic!("allocation detected");
} }
#[test]
fn test_strip_fg_color2() {
assert_eq!("l\x0312ol".strip_formatting(), "lol");
}
#[test]
fn test_strip_fg_bg_11() {
assert_eq!("l\x031,2ol".strip_formatting(), "lol");
}
#[test]
fn test_strip_fg_bg_21() {
assert_eq!("l\x0312,3ol".strip_formatting(), "lol");
}
#[test]
fn test_strip_fg_bg_12() {
assert_eq!("l\x031,12ol".strip_formatting(), "lol");
}
#[test]
fn test_strip_fg_bg_22() {
assert_eq!("l\x0312,13ol".strip_formatting(), "lol");
}
#[test]
fn test_strip_string_with_multiple_colors() {
assert_eq!("hoo\x034r\x033a\x0312y".strip_formatting(), "hooray");
}
#[test]
fn test_strip_string_with_digit_after_color() {
assert_eq!("\x0344\x0355\x0366".strip_formatting(), "456");
}
#[test]
fn test_strip_string_with_multiple_2digit_colors() {
assert_eq!("hoo\x0310r\x0311a\x0312y".strip_formatting(), "hooray");
}
#[test]
fn test_strip_string_with_digit_after_2digit_color() {
assert_eq!("\x031212\x031111\x031010".strip_formatting(), "121110");
} }
} }