bcc-tools

1. 安装

1.1 直接安装（缺少部分工具，例如biopattern）

sudo apt-get install bpfcc-tools linux-headers-$(uname -r)

安装结束之后文件位于/sbin下，例如/sbin/biolatency-bpfcc

1.2 源码编译（较全）

1.2.1 安装依赖

# For Focal (20.04.1 LTS)
sudo apt install -y zip bison build-essential cmake flex git libedit-dev \
  libllvm12 llvm-12-dev libclang-12-dev python zlib1g-dev libelf-dev libfl-dev python3-setuptools \
  liblzma-dev arping netperf iperf

# For Hirsute (21.04) or Impish (21.10)
sudo apt install -y zip bison build-essential cmake flex git libedit-dev \
  libllvm12 llvm-12-dev libclang-12-dev python3 zlib1g-dev libelf-dev libfl-dev python3-setuptools \
  liblzma-dev arping netperf iperf

# For Jammy (22.04)
sudo apt install -y zip bison build-essential cmake flex git libedit-dev \
  libllvm14 llvm-14-dev libclang-14-dev python3 zlib1g-dev libelf-dev libfl-dev python3-setuptools \
  liblzma-dev libdebuginfod-dev arping netperf iperf

# For Lunar Lobster (23.04)
sudo apt install -y zip bison build-essential cmake flex git libedit-dev \
  libllvm15 llvm-15-dev libclang-15-dev python3 zlib1g-dev libelf-dev libfl-dev python3-setuptools \
  liblzma-dev libdebuginfod-dev arping netperf iperf libpolly-15-dev

# For Mantic Minotaur (23.10)
sudo apt install -y zip bison build-essential cmake flex git libedit-dev \
  libllvm16 llvm-16-dev libclang-16-dev python3 zlib1g-dev libelf-dev libfl-dev python3-setuptools \
  liblzma-dev libdebuginfod-dev arping netperf iperf libpolly-16-dev

# For Noble Numbat (24.04)
sudo apt install -y zip bison build-essential cmake flex git libedit-dev \
  libllvm18 llvm-18-dev libclang-18-dev python3 zlib1g-dev libelf-dev libfl-dev python3-setuptools \
  liblzma-dev libdebuginfod-dev arping netperf iperf libpolly-18-dev

# For other versions
sudo apt-get -y install zip bison build-essential cmake flex git libedit-dev \
  libllvm3.7 llvm-3.7-dev libclang-3.7-dev python zlib1g-dev libelf-dev python3-setuptools \
  liblzma-dev arping netperf iperf

# For Lua support
sudo apt-get -y install luajit luajit-5.1-dev

1.2.2 下载并安装

git clone https://github.com/iovisor/bcc.git
mkdir bcc/build; cd bcc/build
cmake ..
make
sudo make install
# sudo make install DESTDIR=...
cmake -DPYTHON_CMD=python3 .. # build python3 binding
pushd src/python/
make
sudo make install
popd

1.2.3 纠错

将刚刚编译好的Python手动安装

cp -r src/python/bcc-python3/bcc/* /usr/lib/python3/dist-packages/bcc/

主要参考：

AttributeError: /lib/x86_64-linux-gnu/libbcc.so.0: undefined symbol: bpf_module_create_b · Issue #4583 · iovisor/bcc

AttributeError: /lib/x86_64-linux-gnu/libbcc.so.0: undefined symbol: bpf_module_create_b_bccso-CSDN博客

undefined symbol: bpf_module_create_b · Issue #4114 · iovisor/bcc

安装结束后位于/usr/share/bcc/tools/，例如/usr/share/bcc/tools/biopattern

2. 使用

直接运行即可。

目前写了一个Python脚本，还没跑通：

#!/usr/bin/env python3
import subprocess
import threading
import time
import csv
import argparse
import os
import sys
from datetime import datetime
import re
import fcntl # Needed for non-blocking reads

# --- Global event to signal threads to stop ---
stop_event = threading.Event()

def user_input_thread():
    """A simple thread that waits for the user to press Enter and then sets the stop event."""
    input("Tool is running... Press [Enter] to stop and save results.\n")
    stop_event.set()

# ==============================================================================
# --- PARSER FUNCTIONS ---
# Each function is responsible for processing the raw text output of a specific tool.
# ==============================================================================

def parse_biosnoop_output(raw_data: str) -> (list, list):
    """Custom parser for biosnoop's output."""
    print("Using 'biosnoop' parser...")
    headers = ["TIME", "COMM", "PID", "DISK", "T", "SECTOR", "BYTES", "LAT(ms)"]
    data_rows = []
    lines = raw_data.strip().split('\n')
    start_line = 1 if lines and lines[0].strip().startswith("TIME") else 0

    for line in lines[start_line:]:
        if not line.strip(): continue
        parts = re.split(r'\s+', line.strip(), maxsplit=len(headers)-1)
        if len(parts) == len(headers):
            data_rows.append(parts)

    return headers, data_rows

def parse_biotop_output(raw_data: str) -> (list, list):
    """Custom parser for biotop's output, which only processes the final screen."""
    print("Using 'biotop' parser...")
    headers = ["PID", "COMM", "D", "DISK", "I/O", "Kbytes", "AVGms"]
    data_rows = []
    last_header_index = raw_data.rfind("PID")
    if last_header_index == -1: return headers, []

    relevant_data = raw_data[last_header_index:]
    lines = relevant_data.strip().split('\n')

    for line in lines[1:]:
        line = line.strip()
        if not line or "Ending" in line: continue
        parts = re.split(r'\s+', line, maxsplit=len(headers)-1)
        if len(parts) == len(headers):
            data_rows.append(parts)

    return headers, data_rows

def parse_biopattern_output(raw_data: str) -> (list, list):
    """Custom parser for biopattern's output."""
    print("Using 'biopattern' parser...")
    headers = ["DISK", "%RND", "%SEQ", "COUNT", "KBYTES"]
    data_rows = []
    last_header_index = raw_data.rfind("DISK")
    if last_header_index == -1: return headers, []

    relevant_data = raw_data[last_header_index:]
    lines = relevant_data.strip().split('\n')

    for line in lines[1:]:
        line = line.strip()
        if not line or "Tracing" in line or "Ending" in line: continue
        parts = re.split(r'\s+', line)
        if len(parts) == len(headers):
            data_rows.append(parts)

    return headers, data_rows

def parse_histogram_output(raw_data: str, tool_name: str) -> (list, list):
    """A generic parser for histogram-based tools like biolatency and bitesize."""
    print(f"Using '{tool_name}' (histogram) parser...")

    if tool_name == "biolatency":
        headers = ["Disk", "Latency Range", "Unit", "Count", "Distribution"]
    elif tool_name == "bitesize":
        headers = ["Size Range (KBytes)", "Count", "Distribution"]
    else: # Fallback
        headers = ["Range", "Count", "Distribution"]

    data_rows = []
    current_disk = "all"
    current_unit = "us" if " (us)" in raw_data else "ms"

    lines = raw_data.strip().split('\n')
    for line in lines:
        line = line.strip()
        if not line or "Tracing" in line or "Ending" in line: continue

        disk_match = re.match(r'disk = (\S+)', line)
        if disk_match:
            current_disk = disk_match.group(1)
            continue

        hist_match = re.match(r'(\S+\s*->\s*\S+|\S+)\s*:\s*(\d+)\s*\|(.*)\|', line)
        if hist_match:
            groups = hist_match.groups()
            rng, count, dist = groups[0].strip(), groups[1], groups[2].strip()

            if tool_name == "biolatency":
                data_rows.append([current_disk, rng, current_unit, count, dist])
            elif tool_name == "bitesize":
                data_rows.append([rng, count, dist])

    return headers, data_rows

# --- Parser Map ---
# Associates tool names with their parsing functions.
PARSERS = {
    "biosnoop": parse_biosnoop_output,
    "biotop": parse_biotop_output,
    "biopattern": parse_biopattern_output,
    "biolatency": lambda data: parse_histogram_output(data, "biolatency"),
    "bitesize": lambda data: parse_histogram_output(data, "bitesize"),
}

# ==============================================================================
# --- MAIN EXECUTION LOGIC ---
# ==============================================================================

def main():
    """The main function that orchestrates the entire process."""
    parser = argparse.ArgumentParser(
        description="A script to monitor system activity using bcc-tools and save the results to a CSV file.",
        formatter_class=argparse.RawTextHelpFormatter,
        epilog=f"""
Examples:
  sudo ./monitor_and_save.py biosnoop
  sudo ./monitor_and_save.py biopattern -o /tmp/logs
  sudo ./monitor_and_save.py biolatency

Supported tools: {', '.join(PARSERS.keys())}
"""
    )
    parser.add_argument("tool", help="The name of the bcc tool to run.")
    parser.add_argument("-o", "--output_dir", default=".", help="The directory to save the CSV file (default: current directory).")
    args = parser.parse_args()

    tool_name = args.tool
    output_dir = args.output_dir

    if tool_name not in PARSERS:
        print(f"Error: Unsupported tool '{tool_name}'. Please choose from: {list(PARSERS.keys())}", file=sys.stderr)
        sys.exit(1)

    if os.geteuid() != 0:
        print("Error: This script requires root privileges to run bcc-tools. Please use 'sudo'.", file=sys.stderr)
        sys.exit(1)

    tool_path = f"/usr/sbin/{tool_name}-bpfcc"
    if not os.path.exists(tool_path):
        print(f"Error: Tool '{tool_path}' not found. Please ensure bcc-tools are installed correctly.", file=sys.stderr)
        sys.exit(1)

    os.makedirs(output_dir, exist_ok=True)

    # --- Start Monitoring ---
    input_thread = threading.Thread(target=user_input_thread)
    input_thread.start()

    command = [tool_path]
    print(f"Starting: {' '.join(command)}...")

    process = subprocess.Popen(
        command,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
        encoding='utf-8',
        errors='ignore',
        preexec_fn=os.setsid
    )

    # --- Set up non-blocking reads to handle all tool types ---
    stdout_fd = process.stdout.fileno()
    flags = fcntl.fcntl(stdout_fd, fcntl.F_GETFL)
    fcntl.fcntl(stdout_fd, fcntl.F_SETFL, flags | os.O_NONBLOCK)

    captured_output = ""
    print("-" * 60)
    while not stop_event.is_set():
        try:
            # Try to read a chunk of data. This will not block.
            chunk = process.stdout.read(4096)

            if chunk:
                # If we got data, print it immediately and save it.
                print(chunk, end='', flush=True)
                captured_output += chunk
            else:
                # An empty chunk means the process exited.
                if process.poll() is not None:
                    break
                # If process is running but there's no data, pause briefly.
                time.sleep(0.1)

        except BlockingIOError:
            # This is expected when no data is available. Pause and try again.
            time.sleep(0.1)
            continue
        except KeyboardInterrupt:
            # Allow Ctrl+C to stop the script.
            break
    print("-" * 60)

    # --- Stop and Process Results ---
    print("\nStopping the tool...")
    try:
        os.killpg(os.getpgid(process.pid), 15) # Send SIGTERM to the process group
        process.wait(timeout=5)
    except (ProcessLookupError, PermissionError):
        pass # Process already terminated
    except subprocess.TimeoutExpired:
        print("Tool did not terminate gracefully, forcing shutdown...")
        os.killpg(os.getpgid(process.pid), 9) # Send SIGKILL

    print("Processing captured data...")
    full_output = captured_output

    parser_func = PARSERS[tool_name]
    headers, data_rows = parser_func(full_output)

    if not data_rows:
        print("⚠️ Warning: No valid data was parsed. The tool may have run for too short a time.")
        return

    # --- Save to CSV ---
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_filename = os.path.join(output_dir, f"{tool_name}_output_{timestamp}.csv")

    try:
        with open(output_filename, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(headers)
            writer.writerows(data_rows)
        print(f"✅ Success! Results saved to: {output_filename}")
    except IOError as e:
        print(f"❌ Error: Could not write to file {output_filename}. Reason: {e}", file=sys.stderr)

if __name__ == "__main__":
    main()