mirror of https://github.com/golang/go.git
runtime: fix profiling on windows/ARM
Fix profiling handler to get the correct g for the m being profiled. Store a pointer to the TLS slot holding g in the thread's m. This enables the profiling handler to get the current g for the thread, even if the thread is executing external code or system code. Updates #26148 Signed-off-by: Jordan Rhee <jordanrh@microsoft.com> Change-Id: Ie061284c12341c76c7d96cc0c2d5bac969230829 Reviewed-on: https://go-review.googlesource.com/c/153718 Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
parent
12c0f1b3e6
commit
6764d7aeac
|
|
@ -872,24 +872,14 @@ func profilem(mp *m, thread uintptr) {
|
||||||
default:
|
default:
|
||||||
panic("unsupported architecture")
|
panic("unsupported architecture")
|
||||||
case "arm":
|
case "arm":
|
||||||
// TODO(jordanrh1): this is incorrect when Go is executing
|
tls := &mp.tls[0]
|
||||||
// on the system or signal stacks because curg returns
|
gp = **((***g)(unsafe.Pointer(tls)))
|
||||||
// the current user g. The true g is stored in thread
|
|
||||||
// local storage, which we cannot access from another CPU.
|
|
||||||
// We cannot pull R10 from the thread context because
|
|
||||||
// it might be executing C code, in which case R10
|
|
||||||
// would not be g.
|
|
||||||
gp = mp.curg
|
|
||||||
case "386", "amd64":
|
case "386", "amd64":
|
||||||
tls := &mp.tls[0]
|
tls := &mp.tls[0]
|
||||||
gp = *((**g)(unsafe.Pointer(tls)))
|
gp = *((**g)(unsafe.Pointer(tls)))
|
||||||
}
|
}
|
||||||
|
|
||||||
if gp == nil {
|
|
||||||
sigprofNonGoPC(r.ip())
|
|
||||||
} else {
|
|
||||||
sigprof(r.ip(), r.sp(), 0, gp, mp)
|
sigprof(r.ip(), r.sp(), 0, gp, mp)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func profileloop1(param uintptr) uint32 {
|
func profileloop1(param uintptr) uint32 {
|
||||||
|
|
|
||||||
|
|
@ -362,6 +362,9 @@ TEXT runtime·tstart_stdcall(SB),NOSPLIT|NOFRAME,$0
|
||||||
MOVW R0, g_m(g)
|
MOVW R0, g_m(g)
|
||||||
BL runtime·save_g(SB)
|
BL runtime·save_g(SB)
|
||||||
|
|
||||||
|
// do per-thread TLS initialization
|
||||||
|
BL runtime·init_thread_tls(SB)
|
||||||
|
|
||||||
// Layout new m scheduler stack on os stack.
|
// Layout new m scheduler stack on os stack.
|
||||||
MOVW R13, R0
|
MOVW R13, R0
|
||||||
MOVW R0, g_stack+stack_hi(g)
|
MOVW R0, g_stack+stack_hi(g)
|
||||||
|
|
@ -595,3 +598,95 @@ useQPC:
|
||||||
B runtime·nanotimeQPC(SB) // tail call
|
B runtime·nanotimeQPC(SB) // tail call
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
// save_g saves the g register (R10) into thread local memory
|
||||||
|
// so that we can call externally compiled
|
||||||
|
// ARM code that will overwrite those registers.
|
||||||
|
// NOTE: runtime.gogo assumes that R1 is preserved by this function.
|
||||||
|
// runtime.mcall assumes this function only clobbers R0 and R11.
|
||||||
|
// Returns with g in R0.
|
||||||
|
// Save the value in the _TEB->TlsSlots array.
|
||||||
|
// Effectively implements TlsSetValue().
|
||||||
|
// tls_g stores the TLS slot allocated TlsAlloc().
|
||||||
|
TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0
|
||||||
|
MRC 15, 0, R0, C13, C0, 2
|
||||||
|
ADD $0xe10, R0
|
||||||
|
MOVW $runtime·tls_g(SB), R11
|
||||||
|
MOVW (R11), R11
|
||||||
|
MOVW g, R11<<2(R0)
|
||||||
|
MOVW g, R0 // preserve R0 across call to setg<>
|
||||||
|
RET
|
||||||
|
|
||||||
|
// load_g loads the g register from thread-local memory,
|
||||||
|
// for use after calling externally compiled
|
||||||
|
// ARM code that overwrote those registers.
|
||||||
|
// Get the value from the _TEB->TlsSlots array.
|
||||||
|
// Effectively implements TlsGetValue().
|
||||||
|
TEXT runtime·load_g(SB),NOSPLIT|NOFRAME,$0
|
||||||
|
MRC 15, 0, R0, C13, C0, 2
|
||||||
|
ADD $0xe10, R0
|
||||||
|
MOVW $runtime·tls_g(SB), g
|
||||||
|
MOVW (g), g
|
||||||
|
MOVW g<<2(R0), g
|
||||||
|
RET
|
||||||
|
|
||||||
|
// This is called from rt0_go, which runs on the system stack
|
||||||
|
// using the initial stack allocated by the OS.
|
||||||
|
// It calls back into standard C using the BL below.
|
||||||
|
// To do that, the stack pointer must be 8-byte-aligned.
|
||||||
|
TEXT runtime·_initcgo(SB),NOSPLIT|NOFRAME,$0
|
||||||
|
MOVM.DB.W [R4, R14], (R13) // push {r4, lr}
|
||||||
|
|
||||||
|
// Ensure stack is 8-byte aligned before calling C code
|
||||||
|
MOVW R13, R4
|
||||||
|
BIC $0x7, R13
|
||||||
|
|
||||||
|
// Allocate a TLS slot to hold g across calls to external code
|
||||||
|
MOVW $runtime·_TlsAlloc(SB), R0
|
||||||
|
MOVW (R0), R0
|
||||||
|
BL (R0)
|
||||||
|
|
||||||
|
// Assert that slot is less than 64 so we can use _TEB->TlsSlots
|
||||||
|
CMP $64, R0
|
||||||
|
MOVW $runtime·abort(SB), R1
|
||||||
|
BL.GE (R1)
|
||||||
|
|
||||||
|
// Save Slot into tls_g
|
||||||
|
MOVW $runtime·tls_g(SB), R1
|
||||||
|
MOVW R0, (R1)
|
||||||
|
|
||||||
|
BL runtime·init_thread_tls(SB)
|
||||||
|
|
||||||
|
MOVW R4, R13
|
||||||
|
MOVM.IA.W (R13), [R4, R15] // pop {r4, pc}
|
||||||
|
|
||||||
|
// void init_thread_tls()
|
||||||
|
//
|
||||||
|
// Does per-thread TLS initialization. Saves a pointer to the TLS slot
|
||||||
|
// holding G, in the current m.
|
||||||
|
//
|
||||||
|
// g->m->tls[0] = &_TEB->TlsSlots[tls_g]
|
||||||
|
//
|
||||||
|
// The purpose of this is to enable the profiling handler to get the
|
||||||
|
// current g associated with the thread. We cannot use m->curg because curg
|
||||||
|
// only holds the current user g. If the thread is executing system code or
|
||||||
|
// external code, m->curg will be NULL. The thread's TLS slot always holds
|
||||||
|
// the current g, so save a reference to this location so the profiling
|
||||||
|
// handler can get the real g from the thread's m.
|
||||||
|
//
|
||||||
|
// Clobbers R0-R3
|
||||||
|
TEXT runtime·init_thread_tls(SB),NOSPLIT|NOFRAME,$0
|
||||||
|
// compute &_TEB->TlsSlots[tls_g]
|
||||||
|
MRC 15, 0, R0, C13, C0, 2
|
||||||
|
ADD $0xe10, R0
|
||||||
|
MOVW $runtime·tls_g(SB), R1
|
||||||
|
MOVW (R1), R1
|
||||||
|
MOVW R1<<2, R1
|
||||||
|
ADD R1, R0
|
||||||
|
|
||||||
|
// save in g->m->tls[0]
|
||||||
|
MOVW g_m(g), R1
|
||||||
|
MOVW R0, m_tls(R1)
|
||||||
|
RET
|
||||||
|
|
||||||
|
// Holds the TLS Slot, which was allocated by TlsAlloc()
|
||||||
|
GLOBL runtime·tls_g+0(SB), NOPTR, $4
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// +build !windows
|
||||||
|
|
||||||
#include "go_asm.h"
|
#include "go_asm.h"
|
||||||
#include "go_tls.h"
|
#include "go_tls.h"
|
||||||
#include "funcdata.h"
|
#include "funcdata.h"
|
||||||
|
|
@ -23,9 +25,6 @@
|
||||||
#ifdef GOOS_darwin
|
#ifdef GOOS_darwin
|
||||||
#define TLSG_IS_VARIABLE
|
#define TLSG_IS_VARIABLE
|
||||||
#endif
|
#endif
|
||||||
#ifdef GOOS_windows
|
|
||||||
#define TLSG_IS_VARIABLE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// save_g saves the g register into pthread-provided
|
// save_g saves the g register into pthread-provided
|
||||||
// thread-local memory, so that we can call externally compiled
|
// thread-local memory, so that we can call externally compiled
|
||||||
|
|
@ -38,17 +37,6 @@ TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0
|
||||||
// nothing to do as nacl/arm does not use TLS at all.
|
// nothing to do as nacl/arm does not use TLS at all.
|
||||||
MOVW g, R0 // preserve R0 across call to setg<>
|
MOVW g, R0 // preserve R0 across call to setg<>
|
||||||
RET
|
RET
|
||||||
#else
|
|
||||||
#ifdef GOOS_windows
|
|
||||||
// Save the value in the _TEB->TlsSlots array.
|
|
||||||
// Effectively implements TlsSetValue().
|
|
||||||
MRC 15, 0, R0, C13, C0, 2
|
|
||||||
ADD $0xe10, R0
|
|
||||||
MOVW $runtime·tls_g(SB), R11
|
|
||||||
MOVW (R11), R11
|
|
||||||
MOVW g, R11<<2(R0)
|
|
||||||
MOVW g, R0 // preserve R0 accross call to setg<>
|
|
||||||
RET
|
|
||||||
#else
|
#else
|
||||||
// If the host does not support MRC the linker will replace it with
|
// If the host does not support MRC the linker will replace it with
|
||||||
// a call to runtime.read_tls_fallback which jumps to __kuser_get_tls.
|
// a call to runtime.read_tls_fallback which jumps to __kuser_get_tls.
|
||||||
|
|
@ -61,7 +49,6 @@ TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0
|
||||||
MOVW g, R0 // preserve R0 across call to setg<>
|
MOVW g, R0 // preserve R0 across call to setg<>
|
||||||
RET
|
RET
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
// load_g loads the g register from pthread-provided
|
// load_g loads the g register from pthread-provided
|
||||||
// thread-local memory, for use after calling externally compiled
|
// thread-local memory, for use after calling externally compiled
|
||||||
|
|
@ -70,16 +57,6 @@ TEXT runtime·load_g(SB),NOSPLIT,$0
|
||||||
#ifdef GOOS_nacl
|
#ifdef GOOS_nacl
|
||||||
// nothing to do as nacl/arm does not use TLS at all.
|
// nothing to do as nacl/arm does not use TLS at all.
|
||||||
RET
|
RET
|
||||||
#else
|
|
||||||
#ifdef GOOS_windows
|
|
||||||
// Get the value from the _TEB->TlsSlots array.
|
|
||||||
// Effectively implements TlsGetValue().
|
|
||||||
MRC 15, 0, R0, C13, C0, 2
|
|
||||||
ADD $0xe10, R0
|
|
||||||
MOVW $runtime·tls_g(SB), g
|
|
||||||
MOVW (g), g
|
|
||||||
MOVW g<<2(R0), g
|
|
||||||
RET
|
|
||||||
#else
|
#else
|
||||||
// See save_g
|
// See save_g
|
||||||
MRC 15, 0, R0, C13, C0, 3 // fetch TLS base pointer
|
MRC 15, 0, R0, C13, C0, 3 // fetch TLS base pointer
|
||||||
|
|
@ -89,7 +66,6 @@ TEXT runtime·load_g(SB),NOSPLIT,$0
|
||||||
MOVW 0(R0), g
|
MOVW 0(R0), g
|
||||||
RET
|
RET
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
// This is called from rt0_go, which runs on the system stack
|
// This is called from rt0_go, which runs on the system stack
|
||||||
// using the initial stack allocated by the OS.
|
// using the initial stack allocated by the OS.
|
||||||
|
|
@ -102,20 +78,6 @@ TEXT runtime·load_g(SB),NOSPLIT,$0
|
||||||
// Declare a dummy word ($4, not $0) to make sure the
|
// Declare a dummy word ($4, not $0) to make sure the
|
||||||
// frame is 8 bytes and stays 8-byte-aligned.
|
// frame is 8 bytes and stays 8-byte-aligned.
|
||||||
TEXT runtime·_initcgo(SB),NOSPLIT,$4
|
TEXT runtime·_initcgo(SB),NOSPLIT,$4
|
||||||
#ifdef GOOS_windows
|
|
||||||
MOVW R13, R4
|
|
||||||
BIC $0x7, R13
|
|
||||||
MOVW $runtime·_TlsAlloc(SB), R0
|
|
||||||
MOVW (R0), R0
|
|
||||||
BL (R0)
|
|
||||||
// Assert that slot is less than 64 so we can use _TEB->TlsSlots
|
|
||||||
CMP $64, R0
|
|
||||||
MOVW $runtime·abort(SB), R1
|
|
||||||
BL.GE (R1)
|
|
||||||
MOVW $runtime·tls_g(SB), R1
|
|
||||||
MOVW R0, (R1)
|
|
||||||
MOVW R4, R13
|
|
||||||
#else
|
|
||||||
#ifndef GOOS_nacl
|
#ifndef GOOS_nacl
|
||||||
// if there is an _cgo_init, call it.
|
// if there is an _cgo_init, call it.
|
||||||
MOVW _cgo_init(SB), R4
|
MOVW _cgo_init(SB), R4
|
||||||
|
|
@ -131,8 +93,7 @@ TEXT runtime·_initcgo(SB),NOSPLIT,$4
|
||||||
MOVW $setg_gcc<>(SB), R1 // arg 1: setg
|
MOVW $setg_gcc<>(SB), R1 // arg 1: setg
|
||||||
MOVW g, R0 // arg 0: G
|
MOVW g, R0 // arg 0: G
|
||||||
BL (R4) // will clobber R0-R3
|
BL (R4) // will clobber R0-R3
|
||||||
#endif // GOOS_nacl
|
#endif
|
||||||
#endif // GOOS_windows
|
|
||||||
nocgo:
|
nocgo:
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue