amd64 assembly for W64

LRN lrn1986 at gmail.com
Sat Jan 8 16:29:15 CET 2011


On 03.01.2011 14:21, Werner Koch wrote:
> On Fri, 31 Dec 2010 05:39, lrn1986 at gmail.com said:
>
>> When configured with --disable-asm, libgcrypt successfully compiles
>> and passes all the tests.
> Then please do this.  The asm support is pretty old and modern compilers
> do a better job than most handcraftet asm.
>
> Please use libgcrypt on W64 only for testing.  We don't yet support it.
> However, support is planned and Windows7 hardware arrived last week.
>
Well, it's appears that i was a bit hasty in saying that everything 
works with --disable-asm.
It works indeed - but only when compiled with -g -O0 (which i did for 
debugging purposes). Without CFLAGS="-g -O0" the resulting library still 
crashes during tests.
I've studied it and discovered that it doesn't crash with -O1, but does 
with -O2 (which is the default when no -Ox option is specified). Then 
i've looked up in gcc documentation what -O1 and -O2 mean and started to 
test libgcrypt with different flags instead of -Ox.
The best i could do is this:

CFLAGS="-g -O1 \
           -fno-auto-inc-dec \
           -fno-cprop-registers \
           -fno-dce \
           -fno-defer-pop \
           -fno-delayed-branch \
           -fno-dse \
           -fguess-branch-probability \
           -fno-if-conversion2 \
           -fif-conversion \
           -fno-ipa-pure-const \
           -fno-ipa-profile \
           -fno-ipa-reference \
           -fno-merge-constants\
           -fno-split-wide-types \
           -fno-tree-bit-ccp \
           -fno-tree-builtin-call-dce \
           -ftree-ccp \
           -fno-tree-ch \
           -fno-tree-copyrename \
           -fno-tree-dce \
           -ftree-dominator-opts \
           -fno-tree-dse \
           -fno-tree-forwprop \
           -fno-tree-fre \
           -fno-tree-phiprop \
           -fno-tree-sra \
           -fno-tree-pta \
           -fno-tree-ter \
           -fno-unit-at-a-time \
           -fomit-frame-pointer \
-fgcse" $cdir/configure --disable-asm --prefix=/mingw 
--build=x86_64-w64-mingw32 --host=x86_64-w64-mingw32 
--target=x86_64-w64-mingw32 --with-gpg-error-prefix=/mingw

"-fgcse" is from -O2, everything else is from -O1. Note that replacing 
-O1 with all the features it enables does NOT produce a faulty library, 
which is why i added -O1 and then disabled optimizations until i found 
the smallest possible subset that still resulted in a crash.

That, however, wasn't really all that helpful, because gdb is still 
unable to show me what is wrong exactly (either that, or i just don't 
know how to use gdb properly, or my self-built x86_64 version of gdb is 
faulty):

Starting program: f:\src\_libgcrypt-1.4.6/./tests/.libs/ac-schemes.exe
[New Thread 4352.0x1bac]
[New Thread 4352.0x508]
[New Thread 4352.0xad0]

Breakpoint 2, ssa_check (handle=0x4f5a90, spec=..., key_public=0x4f5720, 
key_secret=0x4f5e20) at f:/src/libgcrypt-1.4.6/tests/ac-schemes.c:186
186           err = gcry_ac_data_sign_scheme (handle, 
GCRY_AC_SSA_PKCS_V1_5, 0, opts, key_secret,
(gdb) s

Program received signal SIGSEGV, Segmentation fault.
0x000000006c152e1a in ?? ()
(gdb) info all-registers
rax            0x0      0
rbx            0x2      2
rcx            0x0      0
rdx            0x2      2
rsi            0x0      0
rdi            0x4f7528 5207336
rbp            0x0      0x0
rsp            0x22f6f0 0x22f6f0
r8             0x2      2
r9             0xd3335caf       3543358639
r10            0xf216f337       4061590327
r11            0x90dca40        151898688
r12            0x458    1112
r13            0x2      2
r14            0x0      0
r15            0x22f7d8 2291672
rip            0x6c152e1a       0x6c152e1a
eflags         0x10206  [ PF IF RF ]
cs             0x33     51
ss             0x206002b        33947691
ds             0x0      0
es             0x0      0
fs             0x0      0
gs             0x2b0000 2818048
st0            0        (raw 0x00000000000000000000)
st1            0        (raw 0x00000000000000000000)
st2            0        (raw 0x00000000000000000000)
st3            0        (raw 0x00000000000000000000)
st4            0        (raw 0x00000000000000000000)
st5            0        (raw 0x00000000000000000000)
st6            0        (raw 0x00000000000000000000)
st7            0        (raw 0x00000000000000000000)
fctrl          0x37f    895
fstat          0x0      0
ftag           0x0      0
fiseg          0x0      0
fioff          0x0      0
foseg          0x0      0
fooff          0x0      0
fop            0x0      0
xmm0           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, 
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 
0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 
0x0}, uint128 = 0x00000000000000000000000000000000}
xmm1           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, 
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 
0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 
0x0}, uint128 = 0x00000000000000000000000000000000}
xmm2           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, 
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 
0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 
0x0}, uint128 = 0x00000000000000000000000000000000}
xmm3           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, 
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 
0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 
0x0}, uint128 = 0x00000000000000000000000000000000}
xmm4           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, 
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 
0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 
0x0}, uint128 = 0x00000000000000000000000000000000}
xmm5           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, 
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 
0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 
0x0}, uint128 = 0x00000000000000000000000000000000}
xmm6           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, 
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 
0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 
0x0}, uint128 = 0x00000000000000000000000000000000}
xmm7           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, 
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 
0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 
0x0}, uint128 = 0x00000000000000000000000000000000}
xmm8           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, 
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 
0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 
0x0}, uint128 = 0x00000000000000000000000000000000}
xmm9           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, 
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 
0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 
0x0}, uint128 = 0x00000000000000000000000000000000}
xmm10          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, 
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 
0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 
0x0}, uint128 = 0x00000000000000000000000000000000}
xmm11          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, 
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 
0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 
0x0}, uint128 = 0x00000000000000000000000000000000}
xmm12          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, 
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 
0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 
0x0}, uint128 = 0x00000000000000000000000000000000}
xmm13          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, 
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 
0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 
0x0}, uint128 = 0x00000000000000000000000000000000}
xmm14          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, 
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 
0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 
0x0}, uint128 = 0x00000000000000000000000000000000}
xmm15          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, 
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 
0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 
0x0}, uint128 = 0x00000000000000000000000000000000}
mxcsr          0x1f80   [ IM DM ZM OM UM PM ]
(gdb) bt
#0  0x000000006c152e1a in ?? ()
#1  0x0000000000000000 in ?? ()

Since gdb couldn't show me a stack trace, i've turned to ProcessHacker 
and got this (when not running under gdb):
0, ntoskrnl.exe!KeStackAttachProcess+0x1187
1, ntoskrnl.exe!KeAcquireSpinLockAtDpcLevel+0x9dd
2, ntoskrnl.exe!KeWaitForMultipleObjects+0x26b
3, ntoskrnl.exe!MmCreateSection+0xe23
4, ntoskrnl.exe!ObCreateObject+0x712
5, ntoskrnl.exe!KeSynchronizeExecution+0x3a43
6, ntdll.dll!ZwWaitForMultipleObjects+0xa
7, KERNELBASE.dll!GetCurrentThread+0x36
8, kernel32.dll!WaitForMultipleObjectsEx+0xb3
9, kernel32.dll!WinExec+0x3b5
10, kernel32.dll!WinExec+0x537
11, kernel32.dll!WinExec+0x58f
12, kernel32.dll!UnhandledExceptionFilter+0x1fc
13, ntdll.dll!MD5Final+0x1dbc
14, ntdll.dll!_C_specific_handler+0x9c
15, ntdll.dll!RtlCompareUnicodeString+0xad
16, ntdll.dll!RtlTimeToSecondsSince1970+0x62c
17, ntdll.dll!KiUserExceptionDispatcher+0x2e
18, libgcrypt-11.dll!gcry_md_unregister+0x10794
19, libgcrypt-11.dll!gcry_md_unregister+0x106f7
20, libgcrypt-11.dll!gcry_md_unregister+0x10760
21, libgcrypt-11.dll!gcry_md_unregister+0xefe8
22, libgcrypt-11.dll!gcry_md_unregister+0xf340
23, libgcrypt-11.dll!gcry_md_unregister+0xf3b2
24, libgcrypt-11.dll!gcry_md_unregister+0xfdcb
25, libgcrypt-11.dll!gcry_ac_data_sign_scheme+0x2a
26, ac-schemes.exe+0x195f
27, ac-schemes.exe+0x1b0b
28, ac-schemes.exe+0x1d69
29, ac-schemes.exe+0x13ee
30, ac-schemes.exe+0x1548
31, kernel32.dll!BaseThreadInitThunk+0xd
32, ntdll.dll!RtlUserThreadStart+0x21

I am not sure whether to trust this stack trace or not (PH and gdb are 
known to show different traces even in normal circumstances).


With CFLAGS="-O2 -fno-gcse" it produces a library that passes all testcases.

My guess is that there's a bug in the version of gcc i'm using (4.6.0 
trunk), and a certain combination of optimizations simply makes it surface.

I'll try a different version of gcc (4.5.x, i think), but i'd have to 
build it first, which will take another day or so.



More information about the Gcrypt-devel mailing list