本文源码版本基于 go1.21.13
代码有点多，只是简单介绍下 G、M、P 每个的结构体

# runtime2

GMP 简介：https://blog.twelveeee.top/2023/Go/go GMP intro/

本文基于 Go 1.21.13 的 runtime2.go 源码，梳理了 Go 调度器中的 G (goroutine)、M (machine)、P (processor) 三个核心结构体，以及全局调度器 schedt 的关键字段。

G (goroutine)：是 Go 的用户态线程，封装了栈、调度上下文、panic/defer 链表、状态流转标识及 GC 相关信息，是用户代码运行的最小执行单元。
M (machine)：抽象操作系统内核线程，负责实际执行 G。它与 P 绑定，利用自己的 g0 栈来运行调度逻辑，并维护与调度、信号、cgo 调用等相关的上下文。
P (processor)：是 M 执行 Go 代码的必需上下文，维护着本地运行队列、本地内存分配缓存、定时器及 GC 写屏障等状态，它保证了 M 在运行时能高效管理 goroutine 与内存。
schedt：作为全局调度器，统一管理空闲 / 运行中的 G、M、P 队列，协调 GC、STW、安全点、以及负载均衡，保证整个 GMP 系统的正常运转。

整体来看，Go 调度的核心思想是：G 是任务单元，M 是执行者，P 负责调度上下文和资源，schedt 做全局协调。这种 GMP 模型保证了 goroutine 调度的高并发性和轻量级，避免了对 OS 线程的过度依赖。

# G

Go runtime 里调度器 G (goroutine) 的实现，是承载用户协程的核心数据结构，记录了 goroutine 执行所需的各种上下文信息。

# Goroutine 基本字段

基本字段包括栈与函数执行环境管理，异常处理机制（panic/defer），调度上下文保存，状态流转与调度控制，GC 协作与内存安全，运行时辅助信息等

	type g struct {
	// 栈相关
	stack stack // 描述当前 goroutine 的栈内存区间 [stack.lo, stack.hi)，lo 是栈底，hi 是栈顶。
	// 检查栈空间是否足够的值，低于这个值会扩张，
	stackguard0 uintptr //stackguard0 供 Go 代码使用
	stackguard1 uintptr //stackguard1 供 C (CGO) 代码使用

	// 指向当前 goroutine 最内层的 panic 和 defer 结构（链表）。
	_panic *_panic // innermost panic - offset known to liblink
	_defer *_defer // innermost defer

	// 调度和状态流转相关
	m *m // 指向当前绑定的 M
	sched gobuf // 保存 goroutine 被挂起时的寄存器等调度现场（栈指针、程序计数器等）。

	// Goroutine 状态流转
	// Goroutine 进入系统调用 (syscall) 时会保存栈指针和程序计数器（方便 GC 和调度器）。
	syscallsp uintptr // if status==Gsyscall, syscallsp = sched.sp to use during gc
	syscallpc uintptr // if status==Gsyscall, syscallpc = sched.pc to use during gc
	//param 一个通用指针字段，在运行时一些特殊场景用于临时参数传递，例如：
	// 1. channel 唤醒 goroutine 时传参数
	// 2. GC 辅助分配返回信号
	// 3. debugCallWrap 用于启动新协程传参
	param unsafe.Pointer
	stktopsp uintptr //sp 位于堆栈顶部，以检查回溯
	atomicstatus atomic.Uint32 //goroutine 状态 (_Grunnable, _Grunning, _Gwaiting, _Gsyscall 等)。
	stackLock uint32 // 栈扫描时的锁（GC / 性能分析用）
	goid uint64 //goroutine 唯一 ID（调试用，用户代码无法直接拿到）
	schedlink guintptr
	waitsince int64 // G 阻塞时长
	waitreason waitReason // 阻塞原因
	preempt bool // 标记 goroutine 被请求抢占。
	preemptStop bool // 抢占时进入 _Gpreempted 状态。
	preemptShrink bool // 在同步安全点收缩

	// GC 相关
	asyncSafePoint bool // 异步安全点。如果 g 在异步安全点停止则设置为 true，表示在栈上没有精确的指针信息。
	paniconfault bool // 地址异常引起的 panic（代替了崩溃）。
	gcAssistBytes int64 // GC 扫描字节数（在分配内存时可能被要求 “协助 GC” 扫描一定的字节数）
	gcscandone bool // 该 goroutine 栈是否已被 GC 扫描。
	throwsplit bool // 表明不允许拆封栈
	activeStackChans bool // 表示有未锁定的通道指向此 goroutine 的堆栈。如果为真，堆栈复制需要获取通道锁来保护堆栈的这些区域。
	parkingOnChan atomic.Bool // 表示 goroutine 即将在 chansend 或 chanrecv 上 stop。用于表示堆叠收缩的不安全点。

	// 用于 Go scheduler latency profiling（调度延迟统计）和 runtime trace。
	raceignore int8 // ignore race detection events
	tracking bool // whether we're tracking this G for sched latency statistics
	trackingSeq uint8 // used to decide whether to track this G
	trackingStamp int64 // timestamp of when the G last started being tracked
	runnableTime int64 // the amount of time spent runnable, cleared when running, only used when tracking
	lockedm muintptr

	// 崩溃或信号处理时保存的上下文。
	sig uint32
	writebuf []byte
	sigcode0 uintptr
	sigcode1 uintptr
	sigpc uintptr
	parentGoid uint64 // 父 goroutine
	gopc uintptr // 创造当前 goroutine 的语句 pc 指针
	ancestors *[]ancestorInfo // 创建当前 goroutine 的祖先信息 goroutine
	startpc uintptr // pc of goroutine function
	racectx uintptr
	waiting *sudog // 等待处理的 g 队列
	cgoCtxt []uintptr // cgo traceback context
	labels unsafe.Pointer // profiler labels
	timer *timer // 缓存 time.Sleep 所需的定时器结构，避免频繁分配。
	selectDone atomic.Uint32 // are we participating in a select and did someone win the race?

	// goroutineProfiled indicates the status of this goroutine's stack for the
	// current in-progress goroutine profile
	goroutineProfiled goroutineProfileStateHolder

	// Per-G tracer state.
	trace gTraceState
	}

	type _panic struct {
	argp unsafe.Pointer //defer 调用的参数指针
	arg any //panic 的参数
	link *_panic // 上一层 panic（链表）
	pc uintptr // 在 runtime 里恢复时使用的程序计数器
	sp unsafe.Pointer // 栈指针
	recovered bool // 是否被 recover 捕获
	aborted bool // 是否被 abort
	goexit bool // 特殊标记：runtime.Goexit
	}

	type _defer struct {
	started bool // 是否已经开始执行（防止二次执行）
	heap bool // 是否分配在堆上
	openDefer bool // 是否是 open-coded defer（编译器优化模式）
	sp uintptr //defer 注册时的栈指针，配合栈回溯 traceback
	pc uintptr // 注册 defer 位置的 PC
	fn func() //defer func 对应的函数指针
	_panic *_panic // 当前运行的 panic（如果这个 defer 是在 panic unwinding 阶段调用的）
	link *_defer // 下一层 defer，形成链表

	// 下面这些主要在 open-coded defer 模式下生效：
	fd unsafe.Pointer // 编译器生成的 funcdata，保存 defer 信息
	varp uintptr // 关联的栈帧变量指针
	framepc uintptr // 当前函数帧的返回 PC，用于 gentraceback ()
	}

	//goroutine 被挂起时的最小执行上下文
	type gobuf struct {
	sp uintptr // 栈指针寄存器，保存 goroutine 被挂起时的栈顶位置
	pc uintptr // 程序计数器，保存 goroutine 被挂起时正在执行的下一条指令地址
	g guintptr // 指向当前现场对应的 g 结构（goroutine 对象）
	ctxt unsafe.Pointer // 保存调度现场时的附加上下文指针。
	ret uintptr // 返回地址
	lr uintptr
	bp uintptr // for framepointer-enabled architectures
	}

# Goroutine 状态流转

	// defined constants
	const (
	// G status

	//goroutine 刚被分配，还没有初始化。
	// 还没有绑定函数入口，也没有分配栈。
	// 一般在 newg 之后立即切换到 _Grunnable。
	_Gidle = iota // 0

	//goroutine 现在是可运行状态，在调度器的运行队列里，还没有被调度执行。
	// 在某个 P 的 local runq 或 global runq 里排队。
	// 栈目前不属于自己（所以 GC 扫描时可以随意移动 / 复制）。
	_Grunnable // 1

	// 当前 goroutine 正在运行，CPU 上正在执行它的用户代码。
	// 栈处于 “锁定” 状态，只能由这个 goroutine 自己使用。
	_Grunning // 2

	//goroutine 正在执行系统调用（阻塞在 syscall 上）。
	// 栈依旧属于这个 goroutine。不在 run queue 上。
	// 有对应的 M，但 P 会被释放给其他 goroutines 使用。防止 syscall 长时间阻塞一个 P。
	_Gsyscall // 3

	//goroutine 正在 runtime 内部等待（比如 channel、定时器、network poll 等），阻塞状态。
	// 通常被挂在某个等待队列里（chan 队列、timer 队列）
	// 栈不属于自己（可能会被 GC 移动）。
	_Gwaiting // 4

	// 废弃的状态，没在用。 gdb 脚本硬编码依赖它
	_Gmoribund_unused // 5

	//goroutine 已经死亡，不再使用。
	// 已退出或者被清理。可能在 freelist 里复用。栈可能释放或回收。
	_Gdead // 6

	// 未使用，保留位。
	_Genqueue_unused // 7

	// 该 goroutine 的栈正在被移动（因为 Go 支持动态扩容和收缩栈）
	// 不在 runq，不运行用户代码。栈处于迁移过程中。
	_Gcopystack // 8

	// 该 goroutine 被抢占暂停在安全点。
	// 类似 _Gwaiting，但还没有别的 goroutine 负责把它重新 ready。
	// 用于定时触发抢占、或者 GC 辅助抢占。
	// 后续需要调度器将它切换回 _Grunnable。
	_Gpreempted // 9

	// GC 相关
	_Gscan = 0x1000
	_Gscanrunnable = _Gscan + _Grunnable // 0x1001 //goroutine 在 runq 中等待运行，但整个栈正被 GC 扫描。
	_Gscanrunning = _Gscan + _Grunning // 0x1002 // 正在运行，但 GC 想让它协助扫描自己的栈（STW 安全点抢占）
	_Gscansyscall = _Gscan + _Gsyscall // 0x1003 // 正在 system call，同时 GC 正在扫描它的栈
	_Gscanwaiting = _Gscan + _Gwaiting // 0x1004 // 处于阻塞状态，且栈在被 GC 扫描。
	_Gscanpreempted = _Gscan + _Gpreempted // 0x1009 // 处于抢占状态，同时栈在被 GC 扫描
	)

# M

m 是 Go runtime 里调度器 M (machine) 的实现，M 代表一个内核线程，实际上就是执行 goroutine 的 “物理承载者”。它和 P (Processor) 绑定，驱动 G (goroutine) 的执行。

# M 基本字段

基本字段包括调度相关，执行栈与上下文保存，与操作系统交互，资源与生命周期管理，调试与监控

	type m struct {
	g0 *g // 特殊的 goroutine，调度栈，用来执行调度相关代码而非用户代码
	morebuf gobuf // 切栈时保存的上下文
	divmod uint32 // div/mod denominator for arm - known to liblink
	_ uint32 // align next field to 8 bytes

	// 调试 & 信号处理相关
	procid uint64 // for debuggers, but offset not hard-coded
	gsignal *g // 信号处理用的 G
	goSigStack gsignalStack // 保存 signal stack
	sigmask sigset // 信号屏蔽
	tls [tlsSlots]uintptr // thread-local storage (for x86 extern register)
	mstartfn func()

	// 调度相关
	curg *g // 当前运行的 G
	caughtsig guintptr //fatal signal 时所在的 goroutine
	p puintptr // 当前 M 占有的 P (nil if not executing go code)
	nextp puintptr // 下一个可绑定的 P
	oldp puintptr // 调用 syscall 前绑定的 P
	id int64 // M 的唯一 ID
	mallocing int32 // 是否在执行 malloc
	throwing throwType // 是否在 panic/throw
	preemptoff string // 不允许被抢占的原因描述
	locks int32 // 持有的锁数量
	dying int32 // 标记此 M 是否正在退出 / 死亡
	profilehz int32 //profiling 采样频率
	spinning bool // M 是否在空转找 G
	blocked bool // M 是否阻塞
	newSigstack bool // minit on C thread called sigaltstack
	printlock int8

	//cgo & 系统调用相关
	incgo bool //m 是否正在执行 cgo 调用
	isextra bool //m 是一个额外的 m
	isExtraInC bool //m 是一个额外的 m，它没有执行 Go 代码
	freeWait atomic.Uint32 // 释放 g0 并删除 m（freeMRef、freeMStack、freeMWait 之一）是否安全
	fastrand uint64
	needextram bool
	traceback uint8
	ncgocall uint64 // 累计 cgo 调用数
	ncgo int32 // 当前进行的 cgo 调用数
	cgoCallersUse atomic.Uint32 // if non-zero, cgoCallers in use temporarily
	cgoCallers *cgoCallers // cgo traceback if crashing in cgo call
	park note

	// 链接管理
	alllink *m // 全局 allm 链表
	freelink *m // 空闲资源链表
	schedlink muintptr // 调度器链表，用于管理调度队列
	lockedg guintptr
	createstack [32]uintptr // 创建此线程的栈
	lockedExt uint32 // tracking for external LockOSThread
	lockedInt uint32 // tracking for internal lockOSThread
	nextwaitm muintptr // 下一个等待锁的 m

	//wait* 用于将参数从 gopark 携带到 park_m 中，因为没有栈可以放置它们。这是它们的唯一目的。
	waitunlockf func(*g, unsafe.Pointer) bool
	waitlock unsafe.Pointer
	waitTraceBlockReason traceBlockReason
	waitTraceSkip int

	syscalltick uint32
	trace mTraceState

	// 这些是因为它们太大，无法放在低级 NOSPLIT 函数的堆栈上。
	libcall libcall
	libcallpc uintptr // for cpu profiler
	libcallsp uintptr
	libcallg guintptr
	syscall libcall // stores syscall parameters on windows

	vdsoSP uintptr // SP for traceback while in VDSO call (0 if not in call)
	vdsoPC uintptr // PC for traceback while in VDSO call

	// preemptGen counts the number of completed preemption
	// signals. This is used to detect when a preemption is
	// requested, but fails.
	preemptGen atomic.Uint32

	// Whether this is a pending preemption signal on this M.
	signalPending atomic.Uint32

	dlogPerM

	mOS // 平台相关的 OS 线程数据（在不同平台里定义）

	// Up to 10 locks held by this m, maintained by the lock ranking code.
	locksHeldLen int
	locksHeld [10]heldLockInfo
	}

# P

P 是 Go runtime 里调度器 P (processor) 的实现，P 是协程调度的关键角色，它维护着与调度、内存分配、垃圾回收、定时器等多个方面相关的本地状态，可以看作是 "M 执行 G 的上下文环境"。

# P 基本字段

	type p struct {
	id int32
	status uint32 // 状态
	link puintptr // 空闲 P 链表用的指针
	m muintptr // 当前 P 绑定的 M (如果空闲则为 nil)
	raceprocctx uintptr // 用于数据竞争检测

	// 缓存 goroutine id，分摊对 runtime・sched.goiden 的访问。
	goidcache uint64
	goidcacheend uint64

	// 调度相关
	schedtick uint32 // 每一个 scheduler call 自增
	syscalltick uint32 // 每一 system call 自增
	sysmontick sysmontick //sysmon 观察到的最后一个 tick，sysmon 会周期性检查调度状态（如长时间 GC、长时间 syscall 等），如果 P 没有活动会触发抢占调度。
	//p 可运行的 G 本地队列。无需锁即可访问。
	runqhead uint32
	runqtail uint32
	runq [256]guintptr
	// 如果 runnext 非空，则表示当前 G 已将其准备好，并且接下来应该运行该 G，而不是 runq 中的 G。
	// 如果当前 G 的时间片还有剩余时间，则 runnext 将继承当前时间片的剩余时间。
	runnext guintptr // 一个快速路径优化，如果某 G 是由当前正在运行的 G 唤醒的，可以直接放在 runnext 位置，它会优先运行（减少调度延迟）。

	// 内存分配 & 缓存
	mcache *mcache // 本地缓存，小对象分配都会先在本地 mcache 中完成，这是 Go 内存分配性能高的关键
	pcache pageCache // 大页分配的缓存
	// 缓存来自堆的 mspan ，加速 span 分配
	mspancache struct {
	len int
	buf [128]*mspan
	}
	deferpool []*_defer // 缓存 _defer 对象，复用避免频繁分配。
	deferpoolbuf [32]*_defer
	sudogcache []*sudog // 缓存 sudog
	sudogbuf [128]*sudog
	// 减少 pinner 分配次数
	pinnerCache *pinner
	// 可用的 G's (status == Gdead) 可以复用已经死亡的 G 对象，避免频繁 malloc
	gFree struct {
	gList
	n int32
	}

	// 定时器相关
	timer0When atomic.Int64 // 堆顶 timer 的到期时间
	timersLock mutex // 访问 timers 时需要加锁
	timers []*timer // 在某个时候要采取的行动。
	timerModifiedEarliest atomic.Int64
	numTimers atomic.Uint32 // P 的堆中的计时器数量。
	deletedTimers atomic.Uint32 // P 的堆中的 timerDeleted 计时器的数量。
	timerRaceCtx uintptr // 执行计时器功能时使用的 Race context。

	// 垃圾回收相关
	gcAssistTime int64 //assistAlloc 的 Nanoseconds 数
	gcFractionalMarkTime int64 //fractional mark worker (atomic) 的 Nanoseconds 数
	// 有关当前 goroutines 的 gc-time 统计数据
	scannedStackSize uint64 // 此 P 扫描的 goroutine 的堆栈大小
	scannedStacks uint64 // 此 P 扫描的 goroutine 数量
	maxStackScanDelta int64 // 累计当前活跃 Goroutine（即有资格进行堆栈扫描的 Goroutine）所占用的堆栈空间大小。
	limiterEvent limiterEvent // 跟踪 GC CPU 限制器的事件。
	//gcMarkWorkerMode 是下一个 mark worker 的运行模式。也就是说，它用于与 gcController.findRunnableGCWorker 选中并立即执行的工作线程进行通信。
	// 在调度其他工作线程时，必须将此字段设置为 gcMarkWorkerNotWorker。
	gcMarkWorkerMode gcMarkWorkerMode
	gcMarkWorkerStartTime int64 // 是最近一个 mark worker 启动时的 nanotime ()。
	gcw gcWork //gcw 是此 P 的 GC 工作缓冲区，用于存放当前被扫描出来的对象。在 STW 和 GC 调度间切换时要特别处理。
	wbBuf wbBuf //wbBuf 是这个 P 的 GC 写入屏障缓冲区。

	// 其他
	runSafePointFn uint32 // 如果为 1，则在下一个安全点运行 sched.safePointFn
	trace pTraceState //runtime trace 相关
	palloc persistentAlloc //per-P 以避免互斥，每个 P 有自己的单独 allocator，减少全局锁争用
	statsSeq atomic.Uint32 // 判断是否在写 stats
	preempt bool // 如果为 true，表明当前 goroutine 将被尽快抢占
	pageTraceBuf pageTraceBuf //pageTraceBuf 用于记录页分配 / 释放的 trace 日志，只有开启 GOEXPERIMENT=pagetrace 时才会用到
	}

# P 状态流转

	const (
	// P status

	// P 处于空闲状态，没有正在执行用户代码或调度逻辑。
	// 可以被新的 M 获取来运行 G；
	//run queue（本地运行队列）为空；
	// 与任何 M 绑定。
	_Pidle = iota

	// P 正在与某个 M 绑定，并用于运行用户代码或调度器逻辑。
	// 该状态下，P 属于某个 M；
	// 执行用户 Goroutine 或调度任务；
	// 只有拥有该 P 的 M 可以改变它的状态；
	_Prunning

	// P 与正在执行系统调用的 M 有亲和性 (affinity)，但并不直接绑定。
	// 在系统调用阻塞时间过长时，P 可以被其他 M 偷走，以避免调度停滞；
	// 因为存在 CAS（Compare-And-Swap）操作竞争，可能出现 ABA 问题（即 P 在被 CAS 拿回之前，可能已被其他 M 使用过）。
	_Psyscall

	// P 被 GC STW 时挂起。
	// 属于发起 STW 的 M
	// 依旧保留自身的 run queue
	_Pgcstop

	// 因为 GOMAXPROCS 减小了，不再使用的 P
	// 被认为 “死亡”，资源基本被清理；
	// 如果后续 GOMAXPROCS 增大，可以复用；
	_Pdead
	)

# schedt

schedt 是全局唯一的调度器实例，保存了 Go 调度器运行时需要的核心全局状态，比如 Goroutine 的运行队列、空闲 M/P 的缓存、统计信息、定时器、安全点、垃圾回收（GC）的协调状态等等。

	type schedt struct {
	goidgen atomic.Uint64 //goroutine ID 的生成器，全局自增 ID 分配来源
	lastpoll atomic.Int64 // 最近一次网络轮询的时间戳
	pollUntil atomic.Int64 // 当前 M 在 network poll 中会阻塞到的时间

	lock mutex

	// When increasing nmidle, nmidlelocked, nmsys, or nmfreed, be
	// sure to call checkdead().

	// M 管理
	midle muintptr // 空闲的 M 链表
	nmidle int32 // 空闲的 M 数量
	nmidlelocked int32 // 被锁定（只能运行特定 goroutine）的空闲 M
	mnext int64 // 下一个 M 的 ID 分配器
	maxmcount int32 // 允许创建的最大 M 数量（防止无限创建线程）
	nmsys int32 // 特殊系统 M 数量（不计入死锁检查，比如 GC / 获取跟踪用的 sysmon M）
	nmfreed int64 // 已被释放的 M 累计数。

	ngsys atomic.Int32 // number of system goroutines

	// P 管理
	pidle puintptr // 空闲的 P 链表
	npidle atomic.Int32 // 空闲的 P 数量
	nmspinning atomic.Int32 // 当前处于自旋状态的 M 数（忙等以尝试获取 G）。
	needspinning atomic.Uint32 // 调度器是否需要更多自旋线程（细节见 proc.go 的 “Delicate dance” 注释）。

	runq gQueue // 全局运行 G 队列
	runqsize int32 // 全局运行 G 队列大小

	// 可控开关，能选择是否禁止用户 Goroutine 的调度（例如 runtime 某些特殊场景）
	disable struct {
	user bool
	runnable gQueue // 等待运行的 G 队列
	n int32 // 等待运行的 G 队列的长度
	}

	// 全局缓存回收的 G 对象
	gFree struct {
	lock mutex
	stack gList // 有栈的 G 队列
	noStack gList // 没有栈的 G 队列
	n int32
	}

	//sudog 对象的缓存和锁
	sudoglock mutex
	sudogcache *sudog

	// 可用的 defer pool 和锁
	deferlock mutex
	deferpool *_defer

	//freem 是等待在 m.exited 被设置时被释放的 m 的列表。通过 m.freelink 链接。
	freem *m

	gcwaiting atomic.Bool // GC 是否在等待运行
	stopwait int32 // STW 时的同步
	stopnote note // STW 时的同步
	sysmonwait atomic.Bool //sysmon 相关状态
	sysmonnote note

	// 用于 GC 的安全点机制
	safePointFn func(*p)
	safePointWait int32
	safePointNote note

	profilehz int32 // CPU profiling 的采样频率。

	procresizetime int64 // 最后更新 gomaxprocs 的 nanotime ()
	totaltime int64 // ∫gomaxprocs dt up to procresizetime

	// sysmonlock protects sysmon's actions on the runtime.
	//
	// Acquire and hold this mutex to block sysmon from interacting
	// with the rest of the runtime.
	sysmonlock mutex

	timeToRun timeHistogram // 统计 Goroutine 从 _Grunnable 到 _Grunning 的延迟分布
	idleTime atomic.Int64 // 统计 P 的累计空闲时间（在 GC 周期内会清零）。

	totalMutexWaitTime atomic.Int64 // Goroutine 在 sync.Mutex 等锁上等待的总时间
	}