简单的字数统计 rust 程序输出有效的标准输出,但在通过管道传输到具有特定内容的头程序时会出现恐慌

问题描述

我有锈迹:


thread 'main' panicked at 'Failed printing to stdout: broken pipe (os error 32)',library/std/src/io/stdio.rs:993:9
stack backtrace:
   0:     0x559ffa959dc0 - std::backtrace_rs::backtrace::libunwind::trace::h72c2fb8038f1bbee
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/../../backtrace/src/backtrace/libunwind.rs:96
   1:     0x559ffa959dc0 - std::backtrace_rs::backtrace::trace_unsynchronized::h1e3b084883f1e78c
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/../../backtrace/src/backtrace/mod.rs:66
   2:     0x559ffa959dc0 - std::sys_common::backtrace::_print_fmt::h3bf6a7ebf7f0394a
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:79
   3:     0x559ffa959dc0 - <std::sys_common::backtrace::_print::displayBacktrace as core::fmt::display>::fmt::h2e8cb764b7fe02e7
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:58
   4:     0x559ffa972f6c - core::fmt::write::h7a1184eaee6a8644
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/core/src/fmt/mod.rs:1080
   5:     0x559ffa957b12 - std::io::Write::write_fmt::haeeb374d93a67eac
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/io/mod.rs:1516
   6:     0x559ffa95beed - std::sys_common::backtrace::_print::h1d14a7f6ad632dc8
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:61
   7:     0x559ffa95beed - std::sys_common::backtrace::print::h301abac8bb2e3e81
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:48
   8:     0x559ffa95beed - std::panicking::default_hook::{{closure}}::hde0cb80358a6920a
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:208
   9:     0x559ffa95bb98 - std::panicking::default_hook::h9b1a691049a0ec8f
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:227
  10:     0x559ffa95c5d1 - std::panicking::rust_panic_with_hook::h2bdec87b60580584
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:577
  11:     0x559ffa95c179 - std::panicking::begin_panic_handler::{{closure}}::h101ca09d9df5db47
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:484
  12:     0x559ffa95a22c - std::sys_common::backtrace::__rust_end_short_backtrace::h3bb85654c20113ca
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:153
  13:     0x559ffa95c139 - rust_begin_unwind
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:483
  14:     0x559ffa95c0eb - std::panicking::begin_panic_fmt::hf0503558fbe5b251
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:437
  15:     0x559ffa957022 - std::io::stdio::print_to::h9435376f36962f3f
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/io/stdio.rs:993
  16:     0x559ffa957022 - std::io::stdio::_print::h0d31d4b9faa6e1ec
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/io/stdio.rs:1005
  17:     0x559ffa944807 - wordstats::main::h1c2ea6400047a5eb
  18:     0x559ffa942e73 - std::sys_common::backtrace::__rust_begin_short_backtrace::h9e31cf87ddc88116
  19:     0x559ffa942e49 - std::rt::lang_start::{{closure}}::h6c6491f05894818f
  20:     0x559ffa95c9f7 - core::ops::function::impls::<impl core::ops::function::FnOnce<A> for &F>::call_once::he179d32a5d10d957
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/core/src/ops/function.rs:259
  21:     0x559ffa95c9f7 - std::panicking::try::do_call::hcb3d5e7be089b2b4
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:381
  22:     0x559ffa95c9f7 - std::panicking::try::h7ac93b0cd56fb701
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:345
  23:     0x559ffa95c9f7 - std::panic::catch_unwind::h7b40e396c93a4fcd
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panic.rs:382
  24:     0x559ffa95c9f7 - std::rt::lang_start_internal::h142b9cc66267fea1
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/rt.rs:51
  25:     0x559ffa944ae2 - main
  26:     0x7f6223a380b3 - __libc_start_main
  27:     0x559ffa94209e - _start
  28:                0x0 - <unkNown>

当我编译这个程序时

use diacritics;
use std::collections::HashMap;
use std::io;
use std::io::prelude::*;

#[derive(Debug)]
struct Entry {
    word: String,count: u32,}

static SEParaTORS: &'static [char] = &[
    ' ',','.','!','?','\'','"','\n','(',')','#','{','}','[',']','-',';',':',];

fn main() {
    let mut words: HashMap<String,u32> = HashMap::new();
    let stdin = io::stdin();
    for line in stdin.lock().lines() {
        line_processor(line.unwrap(),&mut words)
    }
    output(&mut words);
}

fn line_processor(line: String,words: &mut HashMap<String,u32>) {
    let formatted_line;
    let mut word = String::new();
    formatted_line = diacritics::remove_diacritics(&line).to_lowercase();

    for c in formatted_line.chars() {
        if SEParaTORS.contains(&c) {
            add_word(word,words);
            word = String::new();
        } else {
            word.push_str(&c.to_string());
        }
    }
}

fn add_word(word: String,u32>) {
    if word.len() > 0 {
        if words.contains_key::<str>(&word) {
            words.insert(word.to_string(),words.get(&word).unwrap() + 1);
        } else {
            words.insert(word.to_string(),1);
        }
        // println!("word >{}<",word.to_string())
    }
}

fn output(words: &mut HashMap<String,u32>) {
    let mut stack = Vec::<Entry>::new();

    for (k,v) in words {
        stack.push(Entry {
            word: k.to_string(),count: *v,});
    }

    stack.sort_by(|a,b| b.count.cmp(&a.count));
    stack.reverse();

    while let Some(entry) = stack.pop() {
        println!("{}\t{}",entry.count,entry.word);
    }
}

这样:

cargo build --release

我像这样运行程序:

cat src/sample.txt | ./target/release/wordstats  | head -n 50

这个程序应该只显示这样的东西(最高字数),没有任何痕迹:

15  the
14  in
11  are
10  and
10  of
9   species
9   bats
8   horseshoe
8   is
6   or
6   as
5   which
5   their

某些回显内容或某些其他文件(例如 cat src/main.rs | ...)就是这种情况 但不适用于 this file content,它是随机维基百科页面的一部分。

我的程序是一个愚蠢的字数统计,它只是打印一个表格排序的键值列表。

当我在 head -n 50 程序中输出结果时会出现问题,但在打印完整输出时不会出现问题

知道为什么我会得到这样的痕迹吗?我是否在我的程序中以错误的方式处理了某些事情,或者它是否与其他事情有关(rust lib / unix misbehavior)

我的 rustc 版本是:rustc 1.48.0 (7eac88abb 2020-11-16)

编辑:

添加缺少的 Cargo.toml

[package]
name = "wordstats"
version = "0.1.0"
authors = ["Eric Régnier <utopman@gmail.com>"]
edition = "2018"

[dependencies]
diacritics = "0.1.1"

解决方法

首先,您没有提供足够的信息来重现您的问题。您提供了使用第三方依赖项但忽略提供 Cargo.toml 的源代码。在您的情况下,很容易在不影响手头问题的情况下删除依赖项的使用,所以这就是我所做的。

其次,在非玩具命令行程序中使用 println! 正是出于这个原因。也就是说,有两个问题结合在一起会产生这种不良行为:

  1. println! 会在写入标准输出时发生任何错误而发生恐慌。
  2. Rust 的运行时所做的少数事情之一是 ignore SIGPIPE,这意味着您的应用程序不会收到 PIPE 信号,而是对已关闭的文件描述符的相应写入返回错误。 (在该链接中,您可以看到我公开提倡改变这种行为。)

在典型的 C 程序中,SIGPIPE 不会被忽略。它通常也不会被明确处理。当一个进程收到一个它不处理的信号时,该进程终止。在这种情况下,这正是您想要的。一旦 head 停止读取它的标准输入(你的标准输出),你希望你的程序停止,但你也希望它优雅地停止而不恐慌或打印错误。因为这就是 Unix CLI 实用程序所做的。

您有两种方法可以解决此问题。一种方法是更改​​您的代码以显式处理 BrokenPipe 错误。您的代码的编写方式就像错误不会发生一样,因为您 unwrap 是读取标准输出的结果。所以你的程序不是惯用的,也没有设置来处理错误。因此,为了正确处理 BrokenPipe,我必须进行一些小的更改,以便正确地显示错误:

use std::collections::HashMap;
use std::io;
use std::io::prelude::*;

#[derive(Debug)]
struct Entry {
    word: String,count: u32,}

static SEPARATORS: &'static [char] = &[
    ' ',','.','!','?','\'','"','\n','(',')','#','{','}','[',']','-',';',':',];

fn main() {
    if let Err(err) = try_main() {
        if err.kind() == std::io::ErrorKind::BrokenPipe {
            return;
        }
        // Ignore any error that may occur while writing to stderr.
        let _ = writeln!(std::io::stderr(),"{}",err);
    }
}

fn try_main() -> Result<(),std::io::Error> {
    let mut words: HashMap<String,u32> = HashMap::new();
    let stdin = io::stdin();
    for result in stdin.lock().lines() {
        let line = result?;
        line_processor(line,&mut words)
    }
    output(&mut words)?;
    Ok(())
}

fn line_processor(line: String,words: &mut HashMap<String,u32>) {
    let mut word = String::new();

    for c in line.chars() {
        if SEPARATORS.contains(&c) {
            add_word(word,words);
            word = String::new();
        } else {
            word.push_str(&c.to_string());
        }
    }
}

fn add_word(word: String,u32>) {
    if word.len() > 0 {
        if words.contains_key::<str>(&word) {
            words.insert(word.to_string(),words.get(&word).unwrap() + 1);
        } else {
            words.insert(word.to_string(),1);
        }
        // println!("word >{}<",word.to_string())
    }
}

fn output(words: &mut HashMap<String,u32>) -> Result<(),std::io::Error> {
    let mut stack = Vec::<Entry>::new();

    for (k,v) in words {
        stack.push(Entry {
            word: k.to_string(),count: *v,});
    }

    stack.sort_by(|a,b| b.count.cmp(&a.count));
    stack.reverse();

    let stdout = io::stdout();
    let mut stdout = stdout.lock();
    while let Some(entry) = stack.pop() {
        writeln!(stdout,"{}\t{}",entry.count,entry.word)?;
    }
    Ok(())
}

第二种处理方法是返回到 SIGPIPE 的默认行为。这将导致您的 Rust 应用程序表现得像一个 C 应用程序。这可以通过定义一个函数来实现,将 SIGPIPE 的信号处理程序重置为 SIG_DFL

#[cfg(unix)]
fn reset_sigpipe() {
    unsafe {
        libc::signal(libc::SIGPIPE,libc::SIG_DFL);
    }
}

#[cfg(not(unix))]
fn reset_sigpipe() {
    // no-op
}

然后将其称为 main 中的第一件事。然后您可以删除 BrokenPipe 错误的任何特定处理,因为它不会发生。相反,您的进程将收到一个 PIPE 信号,随后它将终止。完整代码如下:

use std::collections::HashMap;
use std::io;
use std::io::prelude::*;

#[derive(Debug)]
struct Entry {
    word: String,];

fn main() {
    if let Err(err) = try_main() {
        let _ = writeln!(std::io::stderr(),std::io::Error> {
    reset_sigpipe();
    
    let mut words: HashMap<String,entry.word)?;
    }
    Ok(())
}


#[cfg(unix)]
fn reset_sigpipe() {
    unsafe {
        libc::signal(libc::SIGPIPE,libc::SIG_DFL);
    }
}

#[cfg(not(unix))]
fn reset_sigpipe() {
    // no-op
}

相关问答

Selenium Web驱动程序和Java。元素在(x,y)点处不可单击。其...
Python-如何使用点“。” 访问字典成员?
Java 字符串是不可变的。到底是什么意思?
Java中的“ final”关键字如何工作?(我仍然可以修改对象。...
“loop:”在Java代码中。这是什么,为什么要编译?
java.lang.ClassNotFoundException:sun.jdbc.odbc.JdbcOdbc...