0%

《Golang》栈检查与栈扩容

通过前面的学习已经知道,大部分函数在编译器编译后会插入一段栈检查逻辑,如下所示

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
"".Test STEXT size=111 args=0x18 locals=0x28
0x0000 00000 (./bin/main/main.go:17) TEXT "".Test(SB), ABIInternal, $40-24
0x0000 00000 (./bin/main/main.go:17) MOVQ (TLS), CX // 将线程本地存储(thread local storage)压入 CX 寄存器
0x0009 00009 (./bin/main/main.go:17) CMPQ SP, 16(CX) // 16(CX)表示g的16个字节的位置,查看g结构体发现正是stackguard0。这里将SP与g.stackguard0比较
0x000d 00013 (./bin/main/main.go:17) PCDATA $0, $-2
0x000d 00013 (./bin/main/main.go:17) JLS 104 // 如果SP <= g.stackguard0,跳转到栈检查。stackguard0 = stack.lo+StackGuard(一个常量)。g实际的栈空间是[stack.lo, stack.hi)
0x000f 00015 (./bin/main/main.go:17) PCDATA $0, $-1
0x000f 00015 (./bin/main/main.go:17) SUBQ $40, SP
0x0013 00019 (./bin/main/main.go:17) MOVQ BP, 32(SP)
0x0018 00024 (./bin/main/main.go:17) LEAQ 32(SP), BP
0x001d 00029 (./bin/main/main.go:17) PCDATA $0, $-2
0x001d 00029 (./bin/main/main.go:17) PCDATA $1, $-2
0x001d 00029 (./bin/main/main.go:17) FUNCDATA $0, gclocals·9fb7f0986f647f17cb53dda1484e0f7a(SB)
0x001d 00029 (./bin/main/main.go:17) FUNCDATA $1, gclocals·69c1753bd5f81501d95132d08af04464(SB)
0x001d 00029 (./bin/main/main.go:17) FUNCDATA $2, gclocals·9fb7f0986f647f17cb53dda1484e0f7a(SB)
0x001d 00029 (./bin/main/main.go:18) PCDATA $0, $1
0x001d 00029 (./bin/main/main.go:18) PCDATA $1, $0
0x001d 00029 (./bin/main/main.go:18) LEAQ type.uint8(SB), AX
0x0024 00036 (./bin/main/main.go:18) PCDATA $0, $0
0x0024 00036 (./bin/main/main.go:18) MOVQ AX, (SP)
0x0028 00040 (./bin/main/main.go:18) MOVQ $200, 8(SP)
0x0031 00049 (./bin/main/main.go:18) MOVQ $200, 16(SP)
0x003a 00058 (./bin/main/main.go:18) CALL runtime.makeslice(SB)
0x003f 00063 (./bin/main/main.go:18) PCDATA $0, $1
0x003f 00063 (./bin/main/main.go:18) MOVQ 24(SP), AX
0x0044 00068 (./bin/main/main.go:19) MOVB $23, (AX)
0x0047 00071 (./bin/main/main.go:20) PCDATA $0, $0
0x0047 00071 (./bin/main/main.go:20) PCDATA $1, $1
0x0047 00071 (./bin/main/main.go:20) MOVQ AX, "".~r0+48(SP)
0x004c 00076 (./bin/main/main.go:20) MOVQ $200, "".~r0+56(SP)
0x0055 00085 (./bin/main/main.go:20) MOVQ $200, "".~r0+64(SP)
0x005e 00094 (./bin/main/main.go:20) MOVQ 32(SP), BP
0x0063 00099 (./bin/main/main.go:20) ADDQ $40, SP
0x0067 00103 (./bin/main/main.go:20) RET
0x0068 00104 (./bin/main/main.go:20) NOP
0x0068 00104 (./bin/main/main.go:17) PCDATA $1, $-1
0x0068 00104 (./bin/main/main.go:17) PCDATA $0, $-2
0x0068 00104 (./bin/main/main.go:17) CALL runtime.morestack_noctxt(SB)
0x006d 00109 (./bin/main/main.go:17) PCDATA $0, $-1
0x006d 00109 (./bin/main/main.go:17) JMP 0 // 跳回到一开始,又一次检查栈,这个时候栈大小肯定是够的,不会导致死循环

首先要明确一个事情,每个线程都有自己的栈空间(因为如果各个线程公用栈空间的话,因为线程之间独立运行,就会造成函数返回错乱),同样g也有各自的栈空间

g中的每个函数的栈帧都在g的栈空间中存在,很明显栈帧的边界绝对不能超过g的栈边界,不然就会导致g之间干扰从而访问错乱

栈是向下扩容的,栈顶的值越小,栈容量就越大

明确之后我们接着看

函数被调用会优先进行栈检查,SP是栈帧的栈顶, 当 SP <= g.stackguard0 的时候,表示栈帧的边界超过g的安全边界,表示快要超出了g的栈边界,那么g需要进行栈扩容

接着就会调用runtime.morestack_noctxt函数,然后跳到runtime·morestack执行

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
// morestack but not preserving ctxt.
TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
MOVL $0, DX
JMP runtime·morestack(SB)

TEXT runtime·morestack(SB),NOSPLIT,$0-0
// Cannot grow scheduler stack (m->g0).
get_tls(CX) // TLS装载到CX
MOVQ g(CX), BX // 当前g放到BX
MOVQ g_m(BX), BX // g.m放到BX
MOVQ m_g0(BX), SI // m.g0放到SI
CMPQ g(CX), SI // 比较当前g与g.m.g0
JNE 3(PC) // 不相等的话跳转到PC(表示当前执行位置)下面第3行执行,相等的话终止morestack。这一块表达的是:不给g0调度器扩容栈,因为它不是真正的g,也不需要扩容
CALL runtime·badmorestackg0(SB)
CALL runtime·abort(SB)

// Cannot grow signal stack (m->gsignal).
MOVQ m_gsignal(BX), SI // m.gsignal放入SI
CMPQ g(CX), SI // 比较当前g与m.gsignal
JNE 3(PC) // gsignal(m0.g0才具有gsignal)不给扩容
CALL runtime·badmorestackgsignal(SB)
CALL runtime·abort(SB)

// Called from f.
// Set m->morebuf to f's caller.
NOP SP // tell vet SP changed - stop checking offsets
MOVQ 8(SP), AX // f's caller's PC
MOVQ AX, (m_morebuf+gobuf_pc)(BX)
LEAQ 16(SP), AX // f's caller's SP
MOVQ AX, (m_morebuf+gobuf_sp)(BX)
get_tls(CX)
MOVQ g(CX), SI
MOVQ SI, (m_morebuf+gobuf_g)(BX)

// Set g->sched to context in f.
MOVQ 0(SP), AX // f's PC
MOVQ AX, (g_sched+gobuf_pc)(SI) // 0(SP)的内容(就是caller morestack_noctxt的返回地址,也就是本例的0x006d的位置)赋值给g.gobuf.pc,newstack中会使用gogo跳到pc处执行,也就是跳到了0x006d的位置
MOVQ SI, (g_sched+gobuf_g)(SI)
LEAQ 8(SP), AX // f's SP
MOVQ AX, (g_sched+gobuf_sp)(SI)
MOVQ BP, (g_sched+gobuf_bp)(SI)
MOVQ DX, (g_sched+gobuf_ctxt)(SI)

// Call newstack on m->g0's stack.
MOVQ m_g0(BX), BX
MOVQ BX, g(CX)
MOVQ (g_sched+gobuf_sp)(BX), SP // 恢复到g.m.g0的栈帧,后面的执行将在g0的栈帧中执行
CALL runtime·newstack(SB) // 调用runtime·newstack
CALL runtime·abort(SB) // crash if newstack returns
RET

接下来看newstack函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
func newstack() {
thisg := getg()
// TODO: double check all gp. shouldn't be getg().
if thisg.m.morebuf.g.ptr().stackguard0 == stackFork {
throw("stack growth after fork")
}
if thisg.m.morebuf.g.ptr() != thisg.m.curg {
print("runtime: newstack called from g=", hex(thisg.m.morebuf.g), "\n"+"\tm=", thisg.m, " m->curg=", thisg.m.curg, " m->g0=", thisg.m.g0, " m->gsignal=", thisg.m.gsignal, "\n")
morebuf := thisg.m.morebuf
traceback(morebuf.pc, morebuf.sp, morebuf.lr, morebuf.g.ptr())
throw("runtime: wrong goroutine in newstack")
}

gp := thisg.m.curg

if thisg.m.curg.throwsplit {
// Update syscallsp, syscallpc in case traceback uses them.
morebuf := thisg.m.morebuf
gp.syscallsp = morebuf.sp
gp.syscallpc = morebuf.pc
pcname, pcoff := "(unknown)", uintptr(0)
f := findfunc(gp.sched.pc)
if f.valid() {
pcname = funcname(f)
pcoff = gp.sched.pc - f.entry
}
print("runtime: newstack at ", pcname, "+", hex(pcoff),
" sp=", hex(gp.sched.sp), " stack=[", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n",
"\tmorebuf={pc:", hex(morebuf.pc), " sp:", hex(morebuf.sp), " lr:", hex(morebuf.lr), "}\n",
"\tsched={pc:", hex(gp.sched.pc), " sp:", hex(gp.sched.sp), " lr:", hex(gp.sched.lr), " ctxt:", gp.sched.ctxt, "}\n")

thisg.m.traceback = 2 // Include runtime frames
traceback(morebuf.pc, morebuf.sp, morebuf.lr, gp)
throw("runtime: stack split at bad time")
}

morebuf := thisg.m.morebuf
thisg.m.morebuf.pc = 0
thisg.m.morebuf.lr = 0
thisg.m.morebuf.sp = 0
thisg.m.morebuf.g = 0

// NOTE: stackguard0 may change underfoot, if another thread
// is about to try to preempt gp. Read it just once and use that same
// value now and below.
preempt := atomic.Loaduintptr(&gp.stackguard0) == stackPreempt // 当前g是否需要被抢占,gp.stackguard0在sysmon中被设置

// Be conservative about where we preempt.
// We are interested in preempting user Go code, not runtime code.
// If we're holding locks, mallocing, or preemption is disabled, don't
// preempt.
// This check is very early in newstack so that even the status change
// from Grunning to Gwaiting and back doesn't happen in this case.
// That status change by itself can be viewed as a small preemption,
// because the GC might change Gwaiting to Gscanwaiting, and then
// this goroutine has to wait for the GC to finish before continuing.
// If the GC is in some way dependent on this goroutine (for example,
// it needs a lock held by the goroutine), that small preemption turns
// into a real deadlock.
if preempt { // 如果需要抢占
if !canPreemptM(thisg.m) { // 如果m被当前g独占,则不抢占,继续运行当前g
// Let the goroutine keep running for now.
// gp->preempt is set, so it will be preempted next time.
gp.stackguard0 = gp.stack.lo + _StackGuard
gogo(&gp.sched) // never return
}
}

if gp.stack.lo == 0 { // 如果当前g实际的栈顶等于0,表示当前g没有正在使用的栈空间
throw("missing stack in newstack")
}
sp := gp.sched.sp
if sys.ArchFamily == sys.AMD64 || sys.ArchFamily == sys.I386 || sys.ArchFamily == sys.WASM {
// The call to morestack cost a word.
sp -= sys.PtrSize
}
if stackDebug >= 1 || sp < gp.stack.lo {
print("runtime: newstack sp=", hex(sp), " stack=[", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n",
"\tmorebuf={pc:", hex(morebuf.pc), " sp:", hex(morebuf.sp), " lr:", hex(morebuf.lr), "}\n",
"\tsched={pc:", hex(gp.sched.pc), " sp:", hex(gp.sched.sp), " lr:", hex(gp.sched.lr), " ctxt:", gp.sched.ctxt, "}\n")
}
if sp < gp.stack.lo { // 如果栈帧的开始位置超出了g栈边界
print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->status=", hex(readgstatus(gp)), "\n ")
print("runtime: split stack overflow: ", hex(sp), " < ", hex(gp.stack.lo), "\n")
throw("runtime: split stack overflow")
}

if preempt { // 如果需要抢占
if gp == thisg.m.g0 { // g0无法被抢占
throw("runtime: preempt g0")
}
if thisg.m.p == 0 && thisg.m.locks == 0 {
throw("runtime: g is running but p is not")
}

if gp.preemptShrink { // 是否在抢占时收缩g栈
// We're at a synchronous safe point now, so
// do the pending stack shrink.
gp.preemptShrink = false
shrinkstack(gp) // 收缩栈
}

if gp.preemptStop { // 是否停止抢占
preemptPark(gp) // never returns
}

// Act like goroutine called runtime.Gosched.
gopreempt_m(gp) // never return 将g从running状态改成runnable状态,然后调用schedule函数进入调度
}

// Allocate a bigger segment and move the stack.
oldsize := gp.stack.hi - gp.stack.lo // 当前的g栈大小
newsize := oldsize * 2 // 新的栈大小。成倍增加
if newsize > maxstacksize { // 如果g栈大小增大后超出了规定的默认最大值1m,则报错
print("runtime: goroutine stack exceeds ", maxstacksize, "-byte limit\n")
print("runtime: sp=", hex(sp), " stack=[", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n")
throw("stack overflow")
}

// The goroutine must be executing in order to call newstack,
// so it must be Grunning (or Gscanrunning).
casgstatus(gp, _Grunning, _Gcopystack) // 改变g的状态

// The concurrent GC will not scan the stack while we are doing the copy since
// the gp is in a Gcopystack status.
copystack(gp, newsize) // 安排新栈空间
if stackDebug >= 1 {
print("stack grow done\n")
}
casgstatus(gp, _Gcopystack, _Grunning) // g的状态改回来
gogo(&gp.sched) // 跳到g.gobuf.pc处执行,就是跳到morestack_noctxt后面一行继续执行
}

其中调用了一个copystack函数,也看看

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
func copystack(gp *g, newsize uintptr) {
if gp.syscallsp != 0 {
throw("stack growth not allowed in system call")
}
old := gp.stack
if old.lo == 0 {
throw("nil stackbase")
}
used := old.hi - gp.sched.sp // g栈开始位置减去栈帧的栈顶当前位置,就是已经使用了的

// allocate new stack
new := stackalloc(uint32(newsize)) // 从运行时管理的内存池中分配新大小的栈
if stackPoisonCopy != 0 { // 是否填充整个栈
fillstack(new, 0xfd)
}
if stackDebug >= 1 {
print("copystack gp=", gp, " [", hex(old.lo), " ", hex(old.hi-used), " ", hex(old.hi), "]", " -> [", hex(new.lo), " ", hex(new.hi-used), " ", hex(new.hi), "]/", newsize, "\n")
}

// Compute adjustment.
var adjinfo adjustinfo
adjinfo.old = old
adjinfo.delta = new.hi - old.hi // 新的栈底与老的栈底的偏移

// Adjust sudogs, synchronizing with channel ops if necessary.
ncopy := used
if !gp.activeStackChans {
adjustsudogs(gp, &adjinfo)
} else {
// sudogs may be pointing in to the stack and gp has
// released channel locks, so other goroutines could
// be writing to gp's stack. Find the highest such
// pointer so we can handle everything there and below
// carefully. (This shouldn't be far from the bottom
// of the stack, so there's little cost in handling
// everything below it carefully.)
adjinfo.sghi = findsghi(gp, old)

// Synchronize with channel ops and copy the part of
// the stack they may interact with.
ncopy -= syncadjustsudogs(gp, used, &adjinfo)
}

// Copy the stack (or the rest of it) to the new location
memmove(unsafe.Pointer(new.hi-ncopy), unsafe.Pointer(old.hi-ncopy), ncopy) // 把之前使用了的栈区间的内容复制过来

// Adjust remaining structures that have pointers into stacks.
// We have to do most of these before we traceback the new
// stack because gentraceback uses them.
adjustctxt(gp, &adjinfo) // 调整ctxt
adjustdefers(gp, &adjinfo) // 调整defers
adjustpanics(gp, &adjinfo) // 调整panics
if adjinfo.sghi != 0 {
adjinfo.sghi += adjinfo.delta
}

// Swap out old stack for new one
gp.stack = new // 设置成新栈
gp.stackguard0 = new.lo + _StackGuard // NOTE: might clobber a preempt request
gp.sched.sp = new.hi - used
gp.stktopsp += adjinfo.delta

// Adjust pointers in the new stack.
gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, adjustframe, noescape(unsafe.Pointer(&adjinfo)), 0)

// free old stack
if stackPoisonCopy != 0 {
fillstack(old, 0xfc)
}
stackfree(old) // 释放老的栈空间
}

大部分函数在编译后都会函数的开始处插入一段栈检查的逻辑,检查发现g栈空间不够用了,就会进行栈扩容,顺带会进行g抢占

但不是所有的函数都会插入栈检查逻辑,有的函数可能会被编译器优化内联掉。也有的函数会被识别为 NOSPLIT,从而不进行栈检查。当然,不进行栈检查都是为了提高性能

下篇预告

Golang内存管理




微信关注我,及时接收最新技术文章