调试笔记之VTUNE崩溃
把VTUNE升级到2017 Update 3后,在分析系统采样的数据时崩溃,昨天发生过一次,以为是随机的,截了图之后便将其关闭了。今天再用,又崩溃了,而且感觉是同一个位置。
于是便上调试器看了一下。
通过~* k浏览各个线程,发现是21号线程有异常,其栈回溯如下:
# Child-SP RetAddr Call Site
00 000000ca`5f976170 00007ffd`75989694 ntdll!RtlLookupFunctionEntry+0x11a
01 000000ca`5f9761c0 00007ffd`759e9cba ntdll!RtlDispatchException+0xf4
02 000000ca`5f9768c0 00007ffd`38941daf ntdll!KiUserExceptionDispatch+0x3a
03 000000ca`5f977070 000000ca`5f977330 <Unloaded_amplxe_qfagent_assert_1.18.dll>+0x1daf
04 000000ca`5f977078 00000000`00000029 0x000000ca`5f977330
05 000000ca`5f977080 00000000`00000000 0x29
KiUserExceptionDispatch下的栈帧显示模糊,尝试手工在栈上寻找内核复制上来的信息。
值得特别说明的是,X64时内核向用户态复制的信息与32位是不同的。在x64时,栈顶是CONTEXT结构体,然后是EXCEPTION_RECORD,最后是MACHINE_FRAME。
先显示各个结构体的大小:
0:021> ?? sizeof(_CONTEXT)
unsigned int64 0x4d0
0:021> ?? sizeof(_EXCEPTION_RECORD)
unsigned int64 0x98
0:021> dd 000000ca`5f9768c0 L4d0/4
000000ca`5f9768c0 00000000 00000000 00000000 00000000
000000ca`5f9768d0 00000000 00000000 00000000 00000000
000000ca`5f9768e0 00000000 00000000 00000000 00000000
000000ca`5f9768f0 0010005f 00001fa0 002b0033 0053002b
000000ca`5f976900 002b002b 00010246 00000000 00000000
000000ca`5f976910 00000000 00000000 00000000 00000000
000000ca`5f976920 00000000 00000000 00000000 00000000
000000ca`5f976930 00000000 00000000 00000000 00000000
000000ca`5f976940 00000000 00000000 00000001 00000000
000000ca`5f976950 00000000 00000000 5f977070 000000ca
000000ca`5f976960 5f9770a0 000000ca 10ddc920 000002af
000000ca`5f976970 5f978698 000000ca 32016dd0 00007ffd
000000ca`5f976980 5f976d50 000000ca 00000000 00000000
000000ca`5f976990 00000246 00000000 286bb510 000002af
000000ca`5f9769a0 00000196 00000000 5f977420 000000ca
000000ca`5f9769b0 00000029 00000000 38941daf 00007ffd
000000ca`5f9769c0 0000027f 00000000 00000000 00000000
000000ca`5f9769d0 00000000 00000000 00001fa0 0000ffff
000000ca`5f9769e0 00000000 00000000 00000000 00000000
000000ca`5f9769f0 00000000 00000000 00000000 00000000
000000ca`5f976a00 00000000 00000000 00000000 00000000
000000ca`5f976a10 00000000 00000000 00000000 00000000
000000ca`5f976a20 00000000 00000000 00000000 00000000
000000ca`5f976a30 00000000 00000000 00000000 00000000
000000ca`5f976a40 00000000 00000000 00000000 00000000
000000ca`5f976a50 00000000 00000000 00000000 00000000
000000ca`5f976a60 00000000 00000000 00000000 00000000
000000ca`5f976a70 00000000 00000000 00000000 00000000
000000ca`5f976a80 00000000 00000000 00000000 00000000
000000ca`5f976a90 00000000 00000000 00000000 00000000
000000ca`5f976aa0 00000000 00000000 00000000 00000000
000000ca`5f976ab0 00000000 00000000 00000000 00000000
000000ca`5f976ac0 00000000 00000000 00000000 00000000
000000ca`5f976ad0 00000000 00000000 00000000 00000000
000000ca`5f976ae0 00000000 3ff00000 00000000 00000000
000000ca`5f976af0 00000000 00000000 00000000 00000000
000000ca`5f976b00 00000000 00000000 00000000 00000000
000000ca`5f976b10 00000000 00000000 00000000 00000000
000000ca`5f976b20 00000000 00000000 00000000 00000000
000000ca`5f976b30 00000000 00000000 00000000 00000000
000000ca`5f976b40 00000000 00000000 00000000 00000000
000000ca`5f976b50 00000000 00000000 00000000 00000000
000000ca`5f976b60 00000000 00000000 00000072 00000000
000000ca`5f976b70 0000000b 00000002 2e2aa158 00007ffd
000000ca`5f976b80 00000072 00000000 00000072 00000000
000000ca`5f976b90 00000000 00000000 7594f726 00007ffd
000000ca`5f976ba0 10e86300 000002af 00000000 00000000
000000ca`5f976bb0 5f976e70 000000ca 00000072 00000000
000000ca`5f976bc0 00000000 00000000 2e26ebfe 00007ffd
000000ca`5f976bd0 00000000 00000000 2e310000 00000000
000000ca`5f976be0 00000000 00000000 2e2b00a1 00007ffd
000000ca`5f976bf0 5f978698 000000ca 2e263490 00007ffd
000000ca`5f976c00 000000c0 00000000 0e300cc0 000002af
000000ca`5f976c10 00000000 00000000 2e2b0217 00007ffd
000000ca`5f976c20 00000000 00000000 00000000 00000000
000000ca`5f976c30 00000030 00000000 5f976d90 000000ca
000000ca`5f976c40 5f976cf0 000000ca 2e2a6a57 00007ffd
000000ca`5f976c50 00000029 00000000 29a31ca2 00007ffd
000000ca`5f976c60 5f976d90 000000ca 0000000f 00000000
000000ca`5f976c70 41534944 5f454c42 5f977420 000000ca
000000ca`5f976c80 00000030 00000000 759b6457 00007ffd
000000ca`5f976c90 00000070 00000000 0e300cc0 000002af
000000ca`5f976ca0 00000033 00000000 00000000 00000000
000000ca`5f976cb0 00000000 00000000 29a4719c 00007ffd
000000ca`5f976cc0 00000000 00000000 2e2addea 00007ffd
000000ca`5f976cd0 00000016 00000000 7ab02fcd 000000ca
000000ca`5f976ce0 fffffffe ffffffff 286bb510 000002af
000000ca`5f976cf0 00000000 00000000 29a9b849 00007ffd
000000ca`5f976d00 00000000 00000000 5f976d90 000000ca
000000ca`5f976d10 00000000 00000000 00003cac 00000204
000000ca`5f976d20 5f976d50 000000ca 29a321fe 00007ffd
000000ca`5f976d30 5f976d90 000000ca 00000021 00000000
000000ca`5f976d40 00000000 00000000 00000000 00000000
000000ca`5f976d50 16411320 000002af 00000000 00000000
000000ca`5f976d60 2e320000 00007ffd 2e2a8b38 00007ffd
000000ca`5f976d70 00000000 00000000 00000000 00000000
000000ca`5f976d80 00000000 00000000 00000000 00000000
从段寄存器的值来看,很像是CONTEXT结构体,使用dt命令验证:
0:021> dt _CONTEXT 000000ca`5f9768c0
MSVCR120!_CONTEXT
+0x000 P1Home : 0
+0x008 P2Home : 0
+0x010 P3Home : 0
+0x018 P4Home : 0
+0x020 P5Home : 0
+0x028 P6Home : 0
+0x030 ContextFlags : 0x10005f
+0x034 MxCsr : 0x1fa0
+0x038 SegCs : 0x33
+0x03a SegDs : 0x2b
+0x03c SegEs : 0x2b
+0x03e SegFs : 0x53
+0x040 SegGs : 0x2b
+0x042 SegSs : 0x2b
+0x044 EFlags : 0x10246
+0x048 Dr0 : 0
+0x050 Dr1 : 0
+0x058 Dr2 : 0
+0x060 Dr3 : 0
+0x068 Dr6 : 0
+0x070 Dr7 : 0
+0x078 Rax : 0
+0x080 Rcx : 0
+0x088 Rdx : 1
+0x090 Rbx : 0
+0x098 Rsp : 0x000000ca`5f977070
+0x0a0 Rbp : 0x000000ca`5f9770a0
+0x0a8 Rsi : 0x000002af`10ddc920
+0x0b0 Rdi : 0x000000ca`5f978698
+0x0b8 R8 : 0x00007ffd`32016dd0
+0x0c0 R9 : 0x000000ca`5f976d50
+0x0c8 R10 : 0
+0x0d0 R11 : 0x246
+0x0d8 R12 : 0x000002af`286bb510
+0x0e0 R13 : 0x196
+0x0e8 R14 : 0x000000ca`5f977420
+0x0f0 R15 : 0x29
+0x0f8 Rip : 0x00007ffd`38941daf
+0x100 FltSave : _XSAVE_FORMAT
+0x100 Header : [2] _M128A
+0x120 Legacy : [8] _M128A
+0x1a0 Xmm0 : _M128A
+0x1b0 Xmm1 : _M128A
+0x1c0 Xmm2 : _M128A
+0x1d0 Xmm3 : _M128A
+0x1e0 Xmm4 : _M128A
+0x1f0 Xmm5 : _M128A
+0x200 Xmm6 : _M128A
+0x210 Xmm7 : _M128A
+0x220 Xmm8 : _M128A
+0x230 Xmm9 : _M128A
+0x240 Xmm10 : _M128A
+0x250 Xmm11 : _M128A
+0x260 Xmm12 : _M128A
+0x270 Xmm13 : _M128A
+0x280 Xmm14 : _M128A
+0x290 Xmm15 : _M128A
+0x300 VectorRegister : [26] _M128A
+0x4a0 VectorControl : 0x00007ffd`2e320000
+0x4a8 DebugControl : 0x00007ffd`2e2a8b38
+0x4b0 LastBranchToRip : 0
+0x4b8 LastBranchFromRip : 0
+0x4c0 LastExceptionToRip : 0
+0x4c8 LastExceptionFromRip : 0
看起来没什么问题。
继续dd观察context后的内容,感觉隔了一i段才是EXCEPTION_RECORD
0:021> dd
000000ca`5f976d90 fffffb30 000007b0 fffffb30 000004d0
000000ca`5f976da0 000000f0 000001f0 0000000f 00000000
000000ca`5f976db0 c0000005 00000000 00000000 00000000
000000ca`5f976dc0 38941daf 00007ffd 00000002 ffffd66b
000000ca`5f976dd0 00000008 00000000 38941daf 00007ffd
000000ca`5f976de0 00000000 00000000 00000000 00000000
000000ca`5f976df0 00000000 00000000 00000000 00000000
000000ca`5f976e00 00000000 00000000 00000000 00000000
间隔的距离是两行,即32个字节。
0:021> dt ntdll!_EXCEPTION_RECORD 000000ca`5f976db0
+0x000 ExceptionCode : 0n-1073741819
+0x004 ExceptionFlags : 0
+0x008 ExceptionRecord : (null)
+0x010 ExceptionAddress : 0x00007ffd`38941daf Void
+0x018 NumberParameters : 2
+0x020 ExceptionInformation : [15] 8
观察异常的补充信息:
0:021> dq 000000ca`5f976db0+20
000000ca`5f976dd0 00000000`00000008 00007ffd`38941daf
000000ca`5f976de0 00000000`00000000 00000000`00000000
000000ca`5f976df0 00000000`00000000 00000000`00000000
000000ca`5f976e00 00000000`00000000 00000000`00000000
000000ca`5f976e10 00000000`00000000 00000000`00000000
000000ca`5f976e20 00000000`00000000 00000000`00000000
000000ca`5f976e30 00000000`00000000 00000000`00000000
000000ca`5f976e40 00000000`00000000 00000000`00000000
看来是取指令时触发的。所访问的地址和触发异常的IP地址是一样的,都是00007ffd`38941daf。
使用刚才得到的CONTEXT地址时光倒流:
0:021> .cxr 000000ca`5f9768c0
rax=0000000000000000 rbx=0000000000000000 rcx=0000000000000000
rdx=0000000000000001 rsi=000002af10ddc920 rdi=000000ca5f978698
rip=00007ffd38941daf rsp=000000ca5f977070 rbp=000000ca5f9770a0
r8=00007ffd32016dd0 r9=000000ca5f976d50 r10=0000000000000000
r11=0000000000000246 r12=000002af286bb510 r13=0000000000000196
r14=000000ca5f977420 r15=0000000000000029
iopl=0 nv up ei pl zr na po nc
cs=0033 ss=002b ds=002b es=002b fs=0053 gs=002b efl=00010246
<Unloaded_amplxe_qfagent_assert_1.18.dll>+0x1daf:
00007ffd`38941daf ?? ???
调试器也无法访问。
0:021> dqs @rsp
000000ca`5f977070 000000ca`5f977330
000000ca`5f977078 00000000`00000029
000000ca`5f977080 00000000`00000000
000000ca`5f977088 000002af`169d9448
000000ca`5f977090 00007ffd`3313fe78 amplxe_msdia120!SafeStackAllocator<1024>::`vftable'
000000ca`5f977098 00000000`00000000
000000ca`5f9770a0 000002af`164109f0
000000ca`5f9770a8 00000000`00000000
000000ca`5f9770b0 000002af`16411f60
000000ca`5f9770b8 00000000`00000000
000000ca`5f9770c0 000002af`27f60000
000000ca`5f9770c8 00007ffd`330c0000 amplxe_msdia120!dia::CDiaLoadCallback::QueryInterface+0x8
000000ca`5f9770d0 00000000`00000000
000000ca`5f9770d8 00000000`00000007
000000ca`5f9770e0 00000000`00000000
000000ca`5f9770e8 00000000`00000000
返回地址位置是栈上的地址,没有任何有效指令:
0:021> u 0000ca`5f977330
000000ca`5f977330 0000 add byte ptr [rax],al
000000ca`5f977332 41 ???
000000ca`5f977333 16 ???
观察VTUNE的LOG:
qfagent_1::processAssertionExceptionWindows : 2017-05-22-14-15-55-535 : Assertion report initialized
qfagent_1::processAssertionExceptionWindows : 2017-05-22-14-15-55-603 : Process section is set up in crash report
qfagent_1::runAssertionHandling : 2017-05-22-14-15-55-609 : Started assertion handling in crashed thread...
qfagent_1::startExecuteMinidumpTaskthread : 2017-05-22-14-15-55-613 : EMT Service started!
qfagent_1::ExecuteMinidumpTaskThread::operator() : 2017-05-22-14-15-55-613 : Create THE PIPE
qfagent_1::runAssertionHandling : 2017-05-22-14-15-55-618 : Set assertion info in report
ted
qfagent_1::runAssertionHandling : 2017-05-22-14-15-55-627 : Assertion handling will now continue in worker thread...
qfagent_1::WindowsHandlerState::run : 2017-05-22-14-15-55-645 : Worker thread took over...
qfagent_1::HandlerState::exportCrashInfo : 2017-05-22-14-15-55-652 : Set exception info, thread ID, pid
qfagent_1::ProblemReportStorage::addContext : 2017-05-22-14-15-55-662 : Added context for thread: 0x0000000000003300
qfagent_1::HandlerState::exportCrashInfo : 2017-05-22-14-15-55-668 : Set thread context
qfagent_1::HandlerState::exportCrashInfo : 2017-05-22-14-15-55-676 : Set crash dump info
qfagent_1::HandlerState::exportCrashInfo : 2017-05-22-14-15-55-695 : Added premortal log
qfagent_1::HandlerState::exportCrashInfo : 2017-05-22-14-15-55-702 : Added app property pool
qfagent_1::ProblemReportStorage::detectProductInfo : 2017-05-22-14-15-55-711 : Detecting product info
在名为amplxe-2017.05.22-13h59m53s.8088的日志中,发现大量Alghorithm error,还有两次断言失败:
1019458 [7992] ERROR smip <> - Alghorithm error, addr==e430 Binary name:c:\windows\system32\drivers\wdfilter.sys, at file: C:\bb\INNLphep2w6r\b\b\tmp9g1ltn\vcs\smip3\src\core\smip_bbstorage.cpp:36b
1019458 [7992] ERROR smip <> - Alghorithm error, addr==e430 Binary name:c:\windows\system32\drivers\wdfilter.sys, at file: C:\bb\INNLphep2w6r\b\b\tmp9g1ltn\vcs\smip3\src\core\smip_bbstorage.cpp:36b
1019458 [7992] ERROR smip <> - Alghorithm error, addr==e430 Binary name:c:\windows\system32\drivers\wdfilter.sys, at file: C:\bb\INNLphep2w6r\b\b\tmp9g1ltn\vcs\smip3\src\core\smip_bbstorage.cpp:36b
1019458 [7992] ERROR smip <> - Alghorithm error, addr==e430 Binary name:c:\windows\system32\drivers\wdfilter.sys, at file: C:\bb\INNLphep2w6r\b\b\tmp9g1ltn\vcs\smip3\src\core\smip_bbstorage.cpp:36b
1022218 [7992] WARN dicerresolver <> - We only support MD5 or SHA1 checksums now d:\rs1\minkernel\wdf\framework\shared\core\fxrequestapi.cpp, at file: C:\bb\INNLphep2w6r\b\b\tmp9g1ltn\vcs\dicerresolver2\src\impl\module_bank.cpp:485
1022219 [7992] WARN dicerresolver <> - We only support MD5 or SHA1 checksums now d:\rs1\minkernel\wdf\framework\shared\core\fxrequestapi.cpp, at file: C:\bb\INNLphep2w6r\b\b\tmp9g1ltn\vcs\dicerresolver2\src\impl\module_bank.cpp:485
1022220 [7992] ERROR qfagent.assert <> - ASSERT FAILED: '0', at file: C:\bb\INNLphep2w6r\b\b\tmp9g1ltn\vcs\dicerresolver2\src\impl\module_bank.cpp:406
1040392 [5700] WARN dicerresolver <> - We only support MD5 or SHA1 checksums now d:\rs1.public.fre\sdk\inc\wrl\implements.h, at file: C:\bb\INNLphep2w6r\b\b\tmp9g1ltn\vcs\dicerresolver2\src\impl\module_bank.cpp:485
1040392 [5700] WARN dicerresolver <> - We only support MD5 or SHA1 checksums now d:\rs1.public.fre\sdk\inc\wrl\implements.h, at file: C:\bb\INNLphep2w6r\b\b\tmp9g1ltn\vcs\dicerresolver2\src\impl\module_bank.cpp:485
1040392 [5700] ERROR qfagent.assert <> - ASSERT FAILED: '0', at file: C:\bb\INNLphep2w6r\b\b\tmp9g1ltn\vcs\dicerresolver2\src\impl\module_bank.cpp:406
这与dqs直接观察栈上的执行痕迹刚好吻合,看来很可能是有个挺愚蠢的bug,触发了断言,在处理断言的时候,断言模块意外卸载又出了上面的取指令失败...... 看一下module_bank.cpp,406行应该就知道了。
000000ca`5f978778 00007ffd`30a575b9 amplxe_cpil_2_18!CPIL_2_18::debug::_private::____________________ASSERT____________________+0x29
出问题的版本是:
Windows 8 6.2.9200
Product name: Intel VTune Amplifier, build 510739 (Release)