-
Notifications
You must be signed in to change notification settings - Fork 9
/
sum_sse4_amd64.s
58 lines (49 loc) · 1.66 KB
/
sum_sse4_amd64.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
//+build !noasm !appengine
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
TEXT ·_sum_float64_sse4(SB), $0-24
MOVQ buf+0(FP), DI
MOVQ len+8(FP), SI
MOVQ res+16(FP), DX
WORD $0xf685 // test esi, esi
JLE LBB0_1
WORD $0xf089 // mov eax, esi
LONG $0xc0570f66 // xorpd xmm0, xmm0
WORD $0xfe83; BYTE $0x03 // cmp esi, 3
JBE LBB0_3
WORD $0xe683; BYTE $0x03 // and esi, 3
WORD $0x8949; BYTE $0xc0 // mov r8, rax
WORD $0x2949; BYTE $0xf0 // sub r8, rsi
JE LBB0_3
LONG $0x104f8d48 // lea rcx, [rdi + 16]
LONG $0xc9570f66 // xorpd xmm1, xmm1
WORD $0x894d; BYTE $0xc1 // mov r9, r8
LONG $0xc0570f66 // xorpd xmm0, xmm0
LBB0_8:
LONG $0x51100f66; BYTE $0xf0 // movupd xmm2, oword [rcx - 16]
LONG $0x19100f66 // movupd xmm3, oword [rcx]
LONG $0xca580f66 // addpd xmm1, xmm2
LONG $0xc3580f66 // addpd xmm0, xmm3
LONG $0x20c18348 // add rcx, 32
LONG $0xfcc18349 // add r9, -4
JNE LBB0_8
LONG $0xc1580f66 // addpd xmm0, xmm1
LONG $0xc07c0f66 // haddpd xmm0, xmm0
WORD $0xf685 // test esi, esi
JNE LBB0_4
JMP LBB0_10
LBB0_3:
WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
LBB0_4:
LONG $0xc70c8d4a // lea rcx, [rdi + 8*r8]
WORD $0x294c; BYTE $0xc0 // sub rax, r8
LBB0_5:
LONG $0x01580ff2 // addsd xmm0, qword [rcx]
LONG $0x08c18348 // add rcx, 8
WORD $0xff48; BYTE $0xc8 // dec rax
JNE LBB0_5
JMP LBB0_10
LBB0_1:
LONG $0xc0570f66 // xorpd xmm0, xmm0
LBB0_10:
LONG $0x02110ff2 // movsd qword [rdx], xmm0
RET