Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
- name: Install dependencies
run: |
sudo apt-get update && sudo apt-get install -y \
golang-1.22 \
golang-1.23 \
git \
make \
gcc \
Expand Down
18 changes: 10 additions & 8 deletions bpf/trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@ extern int LINUX_KERNEL_VERSION __kconfig;
// 定义数据结构来存储调度延迟信息
struct sched_latency_t
{
__u32 pid; // 进程ID
__u32 tid; // 线程ID
__u64 delay_ns; // 调度延迟(纳秒)
__u64 ts; // 时间戳
__u32 preempted_pid; // 被抢占的进程ID
char preempted_comm[16]; // 被抢占的进程名
__u64 is_preempt; // 是否抢占(0: 否, 1: 是)
char comm[16]; // 进程名
__u32 pid; // 进程ID
__u32 tid; // 线程ID
__u64 delay_ns; // 调度延迟(纳秒)
__u64 ts; // 时间戳
__u32 preempted_pid; // 被抢占的进程ID
char preempted_comm[16]; // 被抢占的进程名
__u64 is_preempt; // 是否抢占(0: 否, 1: 是)
char comm[16]; // 进程名
__u32 preempted_pid_state; // 被抢占的进程状态
} __attribute__((packed));

struct sched_latency_t *unused_sched_latency_t __attribute__((unused));
Expand Down Expand Up @@ -201,6 +202,7 @@ static __always_inline void handle_sched_switch(u32 prev_pid, u32 prev_tgid,
.tid = next_pid,
.delay_ns = delay,
.ts = now,
.preempted_pid_state = prev_state,
};

bpf_probe_read_kernel_str(&latency.comm, sizeof(latency.comm), next_comm);
Expand Down
2 changes: 1 addition & 1 deletion cmd/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ btf:
kernel: "/sys/kernel/btf/vmlinux"

output:
type: file
type: clickhouse
clickhouse:
host: "192.168.200.201"
port: "9000"
Expand Down
2 changes: 2 additions & 0 deletions deploy/sql/clickhouse/sched.ck
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ CREATE TABLE shepherd.sched_latency

`date` Date DEFAULT today(),

`preempted_pid_state` UInt32,

`datetime` DateTime64(9) DEFAULT now64(9)
)
ENGINE = MergeTree
Expand Down
3 changes: 1 addition & 2 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
module github.com/cen-ngc5139/shepherd

go 1.22.4
toolchain go1.24.1
go 1.23.4

require (
github.com/ClickHouse/clickhouse-go/v2 v2.30.0
Expand Down
3 changes: 2 additions & 1 deletion internal/output/output.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ func (o *Output) InitSinkCli(cfg config.OutputConfig) (err error) {
INSERT INTO sched_latency (
pid, tid, delay_ns, ts,
preempted_pid, preempted_comm,
is_preempt, comm
is_preempt, comm,
preempted_pid_state
)
`)
if err != nil {
Expand Down
4 changes: 3 additions & 1 deletion internal/output/sched_delay.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ func insertSchedMetrics(ctx context.Context, conn clickhouse.Conn, batch driver.
sanitizeString(convertInt8ToString(event.PreemptedComm[:])),
event.IsPreempt,
sanitizeString(convertInt8ToString(event.Comm[:])),
event.PreemptedPidState,
)
if err != nil {
log.Errorf("failed to append to batch: %v", err)
Expand All @@ -132,7 +133,8 @@ func insertSchedMetrics(ctx context.Context, conn clickhouse.Conn, batch driver.
INSERT INTO sched_latency (
pid, tid, delay_ns, ts,
preempted_pid, preempted_comm,
is_preempt, comm
is_preempt, comm,
preempted_pid_state
)
`)
if err != nil {
Expand Down
64 changes: 64 additions & 0 deletions internal/output/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,67 @@ func filterNonASCII(data []byte) string {
func sanitizeString(s string) string {
return strings.TrimSpace(s)
}


// 线程状态常量
const (
TASK_RUNNING = 0x00000000
TASK_INTERRUPTIBLE = 0x00000001
TASK_UNINTERRUPTIBLE = 0x00000002
TASK_STOPPED = 0x00000004
TASK_TRACED = 0x00000008
EXIT_DEAD = 0x00000010
EXIT_ZOMBIE = 0x00000020
EXIT_TRACE = EXIT_ZOMBIE | EXIT_DEAD
TASK_PARKED = 0x00000040
TASK_DEAD = 0x00000080
TASK_WAKEKILL = 0x00000100
TASK_WAKING = 0x00000200
TASK_NOLOAD = 0x00000400
TASK_NEW = 0x00000800
TASK_RTLOCK_WAIT = 0x00001000
TASK_FREEZABLE = 0x00002000
TASK_FREEZABLE_UNSAFE = 0x00004000 // 取决于: IS_ENABLED(CONFIG_LOCKDEP)
TASK_FROZEN = 0x00008000
TASK_STATE_MAX = 0x00010000 // 截至 Linux 内核 6.9
)

// 任务状态映射表
var taskStates = map[uint32]string{
0x00000000: "R", // "RUNNING"
0x00000001: "S", // "INTERRUPTIBLE"
0x00000002: "D", // "UNINTERRUPTIBLE"
0x00000004: "T", // "STOPPED"
0x00000008: "t", // "TRACED"
0x00000010: "X", // "EXIT_DEAD"
0x00000020: "Z", // "EXIT_ZOMBIE"
0x00000040: "P", // "PARKED"
0x00000080: "dd", // "DEAD"
0x00000100: "wk", // "WAKEKILL"
0x00000200: "wg", // "WAKING"
0x00000400: "I", // "NOLOAD"
0x00000800: "N", // "NEW"
0x00001000: "rt", // "RTLOCK_WAIT"
0x00002000: "fe", // "FREEZABLE"
0x00004000: "fu", // "__TASK_FREEZABLE_UNSAFE = (0x00004000 * IS_ENABLED(CONFIG_LOCKDEP))"
0x00008000: "fo", // "FROZEN"
}

// GetTaskStateName 将内核任务状态位掩码转换为可读字符串
func GetTaskStateName(taskState uint32) string {
if taskState == 0 {
return "R"
}
if taskState&TASK_NOLOAD != 0 { // 空闲内核线程等待工作
return "I"
}

var names []string
for state, name := range taskStates {
if taskState&state != 0 {
names = append(names, name)
}
}

return strings.Join(names, "+")
}