fix(tazjin/tgsa): Preserve newlines in messages

Change-Id: I9e2d7038357a5510898d153740ae0c0e1f7a8a3a
Reviewed-on: https://cl.tvl.fyi/c/depot/+/5483
Reviewed-by: tazjin <tazjin@tvl.su>
Autosubmit: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
This commit is contained in:
Vincent Ambo 2022-04-20 13:00:37 +02:00 committed by clbot
parent 094b8cc7af
commit e0b9d9b1cd
3 changed files with 26 additions and 4 deletions

View file

@ -1084,6 +1084,7 @@ version = "0.1.0"
dependencies = [
"anyhow",
"crimp",
"ego-tree",
"rouille",
"scraper",
"url",

View file

@ -6,6 +6,7 @@ edition = "2021"
[dependencies]
anyhow = "1.0"
crimp = "0.2"
ego-tree = "0.6" # in tandem with 'scraper'
rouille = "3.5"
scraper = "0.12"
url = "2.2"

View file

@ -84,10 +84,30 @@ fn parse_tgmessage(embed: &str) -> Result<TgMessage> {
.concat();
let msg_sel = Selector::parse("div.tgme_widget_message_text.js-message_text").unwrap();
let message = doc
.select(&msg_sel)
.next()
.map(|m| m.text().collect::<Vec<&str>>().concat());
// The ElementRef::text() iterator does not yield newlines present
// in the message, so it is partially reimplemented here.
let message = if let Some(msg_elem) = doc.select(&msg_sel).next() {
use ego_tree::iter::Edge;
use scraper::node::Node;
let mut out = String::new();
for edge in &mut msg_elem.traverse() {
if let Edge::Open(node) = edge {
match node.value() {
Node::Text(ref text) => out.push_str(&*text),
Node::Element(elem) if elem.name() == "br" => out.push_str("\n"),
_ => {}
}
}
}
Some(out)
} else {
// Not all Telegram messages have a textual message.
None
};
let photo_sel = Selector::parse("a.tgme_widget_message_photo_wrap").unwrap();
let mut photos = vec![];