From 1e697147022325620c2e31c4417460f09c9df59d Mon Sep 17 00:00:00 2001 From: Natanael Copa Date: Wed, 25 Dec 2019 12:08:50 +0000 Subject: main/openssl: fix CVE-2019-1551 --- main/openssl/APKBUILD | 8 +- main/openssl/CVE-2019-1551.patch | 757 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 763 insertions(+), 2 deletions(-) create mode 100644 main/openssl/CVE-2019-1551.patch diff --git a/main/openssl/APKBUILD b/main/openssl/APKBUILD index 8381b938674..2b90a1caa6d 100644 --- a/main/openssl/APKBUILD +++ b/main/openssl/APKBUILD @@ -2,7 +2,7 @@ pkgname=openssl pkgver=1.1.1d _abiver=${pkgver%.*} -pkgrel=2 +pkgrel=3 pkgdesc="Toolkit for Transport Layer Security (TLS)" url="https://www.openssl.org" arch="all" @@ -15,6 +15,7 @@ subpackages="$pkgname-dbg $pkgname-libs-static $pkgname-dev $pkgname-doc libcrypto$_abiver:_libcrypto libssl$_abiver:_libssl" source="https://www.openssl.org/source/openssl-$pkgver.tar.gz man-section.patch + CVE-2019-1551.patch " case "$CARCH" in s390x) options="$options !check";; # FIXME: test hangs @@ -22,6 +23,8 @@ esac # secfixes: +# 1.1.1d-r3: +# - CVE-2019-1551 # 1.1.1d-r1: # - CVE-2019-1547 # - CVE-2019-1549 @@ -110,4 +113,5 @@ _libssl() { } sha512sums="2bc9f528c27fe644308eb7603c992bac8740e9f0c3601a130af30c9ffebbf7e0f5c28b76a00bbb478bad40fbe89b4223a58d604001e1713da71ff4b7fe6a08a7 openssl-1.1.1d.tar.gz -3e5c425d219768721d38bb33db7445eb3ea12d9447a16c5b23b9fddfcbd9d40b98b39506aeac9cbaced4be22ad5a6cb8e4d16fbe4850ac50a6b0c716592b2a2b man-section.patch" +3e5c425d219768721d38bb33db7445eb3ea12d9447a16c5b23b9fddfcbd9d40b98b39506aeac9cbaced4be22ad5a6cb8e4d16fbe4850ac50a6b0c716592b2a2b man-section.patch +11ca61515a89766241fe0fae27f3b39767128915f288ea88840bf93e8b50ac416024cb2153efcdf2658d3e82a8e4250a0c069333dbd7347475f9dafcc45370b5 CVE-2019-1551.patch" diff --git a/main/openssl/CVE-2019-1551.patch b/main/openssl/CVE-2019-1551.patch new file mode 100644 index 00000000000..8daf04ebf9f --- /dev/null +++ b/main/openssl/CVE-2019-1551.patch @@ -0,0 +1,757 @@ +From 419102400a2811582a7a3d4a4e317d72e5ce0a8f Mon Sep 17 00:00:00 2001 +From: Andy Polyakov +Date: Wed, 4 Dec 2019 12:48:21 +0100 +Subject: [PATCH] Fix an overflow bug in rsaz_512_sqr + +There is an overflow bug in the x64_64 Montgomery squaring procedure used in +exponentiation with 512-bit moduli. No EC algorithms are affected. Analysis +suggests that attacks against 2-prime RSA1024, 3-prime RSA1536, and DSA1024 as a +result of this defect would be very difficult to perform and are not believed +likely. Attacks against DH512 are considered just feasible. However, for an +attack the target would have to re-use the DH512 private key, which is not +recommended anyway. Also applications directly using the low level API +BN_mod_exp may be affected if they use BN_FLG_CONSTTIME. + +CVE-2019-1551 + +Reviewed-by: Paul Dale +Reviewed-by: Bernd Edlinger +(Merged from https://github.com/openssl/openssl/pull/10575) +--- + crypto/bn/asm/rsaz-x86_64.pl | 381 ++++++++++++++++++----------------- + 1 file changed, 197 insertions(+), 184 deletions(-) + +diff --git a/crypto/bn/asm/rsaz-x86_64.pl b/crypto/bn/asm/rsaz-x86_64.pl +index b1797b649f..7534d5cd03 100755 +--- a/crypto/bn/asm/rsaz-x86_64.pl ++++ b/crypto/bn/asm/rsaz-x86_64.pl +@@ -116,7 +116,7 @@ rsaz_512_sqr: # 25-29% faster than rsaz_512_mul + subq \$128+24, %rsp + .cfi_adjust_cfa_offset 128+24 + .Lsqr_body: +- movq $mod, %rbp # common argument ++ movq $mod, %xmm1 # common off-load + movq ($inp), %rdx + movq 8($inp), %rax + movq $n0, 128(%rsp) +@@ -134,7 +134,8 @@ $code.=<<___; + .Loop_sqr: + movl $times,128+8(%rsp) + #first iteration +- movq %rdx, %rbx ++ movq %rdx, %rbx # 0($inp) ++ mov %rax, %rbp # 8($inp) + mulq %rdx + movq %rax, %r8 + movq 16($inp), %rax +@@ -173,31 +174,29 @@ $code.=<<___; + mulq %rbx + addq %rax, %r14 + movq %rbx, %rax +- movq %rdx, %r15 +- adcq \$0, %r15 ++ adcq \$0, %rdx + +- addq %r8, %r8 #shlq \$1, %r8 +- movq %r9, %rcx +- adcq %r9, %r9 #shld \$1, %r8, %r9 ++ xorq %rcx,%rcx # rcx:r8 = r8 << 1 ++ addq %r8, %r8 ++ movq %rdx, %r15 ++ adcq \$0, %rcx + + mulq %rax +- movq %rax, (%rsp) +- addq %rdx, %r8 +- adcq \$0, %r9 ++ addq %r8, %rdx ++ adcq \$0, %rcx + +- movq %r8, 8(%rsp) +- shrq \$63, %rcx ++ movq %rax, (%rsp) ++ movq %rdx, 8(%rsp) + + #second iteration +- movq 8($inp), %r8 + movq 16($inp), %rax +- mulq %r8 ++ mulq %rbp + addq %rax, %r10 + movq 24($inp), %rax + movq %rdx, %rbx + adcq \$0, %rbx + +- mulq %r8 ++ mulq %rbp + addq %rax, %r11 + movq 32($inp), %rax + adcq \$0, %rdx +@@ -205,7 +204,7 @@ $code.=<<___; + movq %rdx, %rbx + adcq \$0, %rbx + +- mulq %r8 ++ mulq %rbp + addq %rax, %r12 + movq 40($inp), %rax + adcq \$0, %rdx +@@ -213,7 +212,7 @@ $code.=<<___; + movq %rdx, %rbx + adcq \$0, %rbx + +- mulq %r8 ++ mulq %rbp + addq %rax, %r13 + movq 48($inp), %rax + adcq \$0, %rdx +@@ -221,7 +220,7 @@ $code.=<<___; + movq %rdx, %rbx + adcq \$0, %rbx + +- mulq %r8 ++ mulq %rbp + addq %rax, %r14 + movq 56($inp), %rax + adcq \$0, %rdx +@@ -229,39 +228,39 @@ $code.=<<___; + movq %rdx, %rbx + adcq \$0, %rbx + +- mulq %r8 ++ mulq %rbp + addq %rax, %r15 +- movq %r8, %rax ++ movq %rbp, %rax + adcq \$0, %rdx + addq %rbx, %r15 +- movq %rdx, %r8 +- movq %r10, %rdx +- adcq \$0, %r8 ++ adcq \$0, %rdx + +- add %rdx, %rdx +- lea (%rcx,%r10,2), %r10 #shld \$1, %rcx, %r10 +- movq %r11, %rbx +- adcq %r11, %r11 #shld \$1, %r10, %r11 ++ xorq %rbx, %rbx # rbx:r10:r9 = r10:r9 << 1 ++ addq %r9, %r9 ++ movq %rdx, %r8 ++ adcq %r10, %r10 ++ adcq \$0, %rbx + + mulq %rax ++ addq %rcx, %rax ++ movq 16($inp), %rbp ++ adcq \$0, %rdx + addq %rax, %r9 ++ movq 24($inp), %rax + adcq %rdx, %r10 +- adcq \$0, %r11 ++ adcq \$0, %rbx + + movq %r9, 16(%rsp) + movq %r10, 24(%rsp) +- shrq \$63, %rbx + + #third iteration +- movq 16($inp), %r9 +- movq 24($inp), %rax +- mulq %r9 ++ mulq %rbp + addq %rax, %r12 + movq 32($inp), %rax + movq %rdx, %rcx + adcq \$0, %rcx + +- mulq %r9 ++ mulq %rbp + addq %rax, %r13 + movq 40($inp), %rax + adcq \$0, %rdx +@@ -269,7 +268,7 @@ $code.=<<___; + movq %rdx, %rcx + adcq \$0, %rcx + +- mulq %r9 ++ mulq %rbp + addq %rax, %r14 + movq 48($inp), %rax + adcq \$0, %rdx +@@ -277,9 +276,7 @@ $code.=<<___; + movq %rdx, %rcx + adcq \$0, %rcx + +- mulq %r9 +- movq %r12, %r10 +- lea (%rbx,%r12,2), %r12 #shld \$1, %rbx, %r12 ++ mulq %rbp + addq %rax, %r15 + movq 56($inp), %rax + adcq \$0, %rdx +@@ -287,36 +284,40 @@ $code.=<<___; + movq %rdx, %rcx + adcq \$0, %rcx + +- mulq %r9 +- shrq \$63, %r10 ++ mulq %rbp + addq %rax, %r8 +- movq %r9, %rax ++ movq %rbp, %rax + adcq \$0, %rdx + addq %rcx, %r8 +- movq %rdx, %r9 +- adcq \$0, %r9 ++ adcq \$0, %rdx + +- movq %r13, %rcx +- leaq (%r10,%r13,2), %r13 #shld \$1, %r12, %r13 ++ xorq %rcx, %rcx # rcx:r12:r11 = r12:r11 << 1 ++ addq %r11, %r11 ++ movq %rdx, %r9 ++ adcq %r12, %r12 ++ adcq \$0, %rcx + + mulq %rax ++ addq %rbx, %rax ++ movq 24($inp), %r10 ++ adcq \$0, %rdx + addq %rax, %r11 ++ movq 32($inp), %rax + adcq %rdx, %r12 +- adcq \$0, %r13 ++ adcq \$0, %rcx + + movq %r11, 32(%rsp) + movq %r12, 40(%rsp) +- shrq \$63, %rcx + + #fourth iteration +- movq 24($inp), %r10 +- movq 32($inp), %rax ++ mov %rax, %r11 # 32($inp) + mulq %r10 + addq %rax, %r14 + movq 40($inp), %rax + movq %rdx, %rbx + adcq \$0, %rbx + ++ mov %rax, %r12 # 40($inp) + mulq %r10 + addq %rax, %r15 + movq 48($inp), %rax +@@ -325,9 +326,8 @@ $code.=<<___; + movq %rdx, %rbx + adcq \$0, %rbx + ++ mov %rax, %rbp # 48($inp) + mulq %r10 +- movq %r14, %r12 +- leaq (%rcx,%r14,2), %r14 #shld \$1, %rcx, %r14 + addq %rax, %r8 + movq 56($inp), %rax + adcq \$0, %rdx +@@ -336,32 +336,33 @@ $code.=<<___; + adcq \$0, %rbx + + mulq %r10 +- shrq \$63, %r12 + addq %rax, %r9 + movq %r10, %rax + adcq \$0, %rdx + addq %rbx, %r9 +- movq %rdx, %r10 +- adcq \$0, %r10 ++ adcq \$0, %rdx + +- movq %r15, %rbx +- leaq (%r12,%r15,2),%r15 #shld \$1, %r14, %r15 ++ xorq %rbx, %rbx # rbx:r13:r14 = r13:r14 << 1 ++ addq %r13, %r13 ++ movq %rdx, %r10 ++ adcq %r14, %r14 ++ adcq \$0, %rbx + + mulq %rax ++ addq %rcx, %rax ++ adcq \$0, %rdx + addq %rax, %r13 ++ movq %r12, %rax # 40($inp) + adcq %rdx, %r14 +- adcq \$0, %r15 ++ adcq \$0, %rbx + + movq %r13, 48(%rsp) + movq %r14, 56(%rsp) +- shrq \$63, %rbx + + #fifth iteration +- movq 32($inp), %r11 +- movq 40($inp), %rax + mulq %r11 + addq %rax, %r8 +- movq 48($inp), %rax ++ movq %rbp, %rax # 48($inp) + movq %rdx, %rcx + adcq \$0, %rcx + +@@ -369,97 +370,99 @@ $code.=<<___; + addq %rax, %r9 + movq 56($inp), %rax + adcq \$0, %rdx +- movq %r8, %r12 +- leaq (%rbx,%r8,2), %r8 #shld \$1, %rbx, %r8 + addq %rcx, %r9 + movq %rdx, %rcx + adcq \$0, %rcx + ++ mov %rax, %r14 # 56($inp) + mulq %r11 +- shrq \$63, %r12 + addq %rax, %r10 + movq %r11, %rax + adcq \$0, %rdx + addq %rcx, %r10 +- movq %rdx, %r11 +- adcq \$0, %r11 ++ adcq \$0, %rdx + +- movq %r9, %rcx +- leaq (%r12,%r9,2), %r9 #shld \$1, %r8, %r9 ++ xorq %rcx, %rcx # rcx:r8:r15 = r8:r15 << 1 ++ addq %r15, %r15 ++ movq %rdx, %r11 ++ adcq %r8, %r8 ++ adcq \$0, %rcx + + mulq %rax ++ addq %rbx, %rax ++ adcq \$0, %rdx + addq %rax, %r15 ++ movq %rbp, %rax # 48($inp) + adcq %rdx, %r8 +- adcq \$0, %r9 ++ adcq \$0, %rcx + + movq %r15, 64(%rsp) + movq %r8, 72(%rsp) +- shrq \$63, %rcx + + #sixth iteration +- movq 40($inp), %r12 +- movq 48($inp), %rax + mulq %r12 + addq %rax, %r10 +- movq 56($inp), %rax ++ movq %r14, %rax # 56($inp) + movq %rdx, %rbx + adcq \$0, %rbx + + mulq %r12 + addq %rax, %r11 + movq %r12, %rax +- movq %r10, %r15 +- leaq (%rcx,%r10,2), %r10 #shld \$1, %rcx, %r10 + adcq \$0, %rdx +- shrq \$63, %r15 + addq %rbx, %r11 +- movq %rdx, %r12 +- adcq \$0, %r12 ++ adcq \$0, %rdx + +- movq %r11, %rbx +- leaq (%r15,%r11,2), %r11 #shld \$1, %r10, %r11 ++ xorq %rbx, %rbx # rbx:r10:r9 = r10:r9 << 1 ++ addq %r9, %r9 ++ movq %rdx, %r12 ++ adcq %r10, %r10 ++ adcq \$0, %rbx + + mulq %rax ++ addq %rcx, %rax ++ adcq \$0, %rdx + addq %rax, %r9 ++ movq %r14, %rax # 56($inp) + adcq %rdx, %r10 +- adcq \$0, %r11 ++ adcq \$0, %rbx + + movq %r9, 80(%rsp) + movq %r10, 88(%rsp) + + #seventh iteration +- movq 48($inp), %r13 +- movq 56($inp), %rax +- mulq %r13 ++ mulq %rbp + addq %rax, %r12 +- movq %r13, %rax +- movq %rdx, %r13 +- adcq \$0, %r13 ++ movq %rbp, %rax ++ adcq \$0, %rdx + +- xorq %r14, %r14 +- shlq \$1, %rbx +- adcq %r12, %r12 #shld \$1, %rbx, %r12 +- adcq %r13, %r13 #shld \$1, %r12, %r13 +- adcq %r14, %r14 #shld \$1, %r13, %r14 ++ xorq %rcx, %rcx # rcx:r12:r11 = r12:r11 << 1 ++ addq %r11, %r11 ++ movq %rdx, %r13 ++ adcq %r12, %r12 ++ adcq \$0, %rcx + + mulq %rax ++ addq %rbx, %rax ++ adcq \$0, %rdx + addq %rax, %r11 ++ movq %r14, %rax # 56($inp) + adcq %rdx, %r12 +- adcq \$0, %r13 ++ adcq \$0, %rcx + + movq %r11, 96(%rsp) + movq %r12, 104(%rsp) + + #eighth iteration +- movq 56($inp), %rax ++ xorq %rbx, %rbx # rbx:r13 = r13 << 1 ++ addq %r13, %r13 ++ adcq \$0, %rbx ++ + mulq %rax +- addq %rax, %r13 ++ addq %rcx, %rax + adcq \$0, %rdx +- +- addq %rdx, %r14 +- +- movq %r13, 112(%rsp) +- movq %r14, 120(%rsp) ++ addq %r13, %rax ++ adcq %rbx, %rdx + + movq (%rsp), %r8 + movq 8(%rsp), %r9 +@@ -469,6 +472,10 @@ $code.=<<___; + movq 40(%rsp), %r13 + movq 48(%rsp), %r14 + movq 56(%rsp), %r15 ++ movq %xmm1, %rbp ++ ++ movq %rax, 112(%rsp) ++ movq %rdx, 120(%rsp) + + call __rsaz_512_reduce + +@@ -500,9 +507,9 @@ $code.=<<___; + .Loop_sqrx: + movl $times,128+8(%rsp) + movq $out, %xmm0 # off-load +- movq %rbp, %xmm1 # off-load + #first iteration + mulx %rax, %r8, %r9 ++ mov %rax, %rbx + + mulx 16($inp), %rcx, %r10 + xor %rbp, %rbp # cf=0, of=0 +@@ -510,40 +517,39 @@ $code.=<<___; + mulx 24($inp), %rax, %r11 + adcx %rcx, %r9 + +- mulx 32($inp), %rcx, %r12 ++ .byte 0xc4,0x62,0xf3,0xf6,0xa6,0x20,0x00,0x00,0x00 # mulx 32($inp), %rcx, %r12 + adcx %rax, %r10 + +- mulx 40($inp), %rax, %r13 ++ .byte 0xc4,0x62,0xfb,0xf6,0xae,0x28,0x00,0x00,0x00 # mulx 40($inp), %rax, %r13 + adcx %rcx, %r11 + +- .byte 0xc4,0x62,0xf3,0xf6,0xb6,0x30,0x00,0x00,0x00 # mulx 48($inp), %rcx, %r14 ++ mulx 48($inp), %rcx, %r14 + adcx %rax, %r12 + adcx %rcx, %r13 + +- .byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00 # mulx 56($inp), %rax, %r15 ++ mulx 56($inp), %rax, %r15 + adcx %rax, %r14 + adcx %rbp, %r15 # %rbp is 0 + +- mov %r9, %rcx +- shld \$1, %r8, %r9 +- shl \$1, %r8 +- +- xor %ebp, %ebp +- mulx %rdx, %rax, %rdx +- adcx %rdx, %r8 +- mov 8($inp), %rdx +- adcx %rbp, %r9 ++ mulx %rdx, %rax, $out ++ mov %rbx, %rdx # 8($inp) ++ xor %rcx, %rcx ++ adox %r8, %r8 ++ adcx $out, %r8 ++ adox %rbp, %rcx ++ adcx %rbp, %rcx + + mov %rax, (%rsp) + mov %r8, 8(%rsp) + + #second iteration +- mulx 16($inp), %rax, %rbx ++ .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x10,0x00,0x00,0x00 # mulx 16($inp), %rax, %rbx + adox %rax, %r10 + adcx %rbx, %r11 + +- .byte 0xc4,0x62,0xc3,0xf6,0x86,0x18,0x00,0x00,0x00 # mulx 24($inp), $out, %r8 ++ mulx 24($inp), $out, %r8 + adox $out, %r11 ++ .byte 0x66 + adcx %r8, %r12 + + mulx 32($inp), %rax, %rbx +@@ -561,24 +567,25 @@ $code.=<<___; + .byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00 # mulx 56($inp), $out, %r8 + adox $out, %r15 + adcx %rbp, %r8 ++ mulx %rdx, %rax, $out + adox %rbp, %r8 ++ .byte 0x48,0x8b,0x96,0x10,0x00,0x00,0x00 # mov 16($inp), %rdx + +- mov %r11, %rbx +- shld \$1, %r10, %r11 +- shld \$1, %rcx, %r10 +- +- xor %ebp,%ebp +- mulx %rdx, %rax, %rcx +- mov 16($inp), %rdx ++ xor %rbx, %rbx ++ adcx %rcx, %rax ++ adox %r9, %r9 ++ adcx %rbp, $out ++ adox %r10, %r10 + adcx %rax, %r9 +- adcx %rcx, %r10 +- adcx %rbp, %r11 ++ adox %rbp, %rbx ++ adcx $out, %r10 ++ adcx %rbp, %rbx + + mov %r9, 16(%rsp) + .byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00 # mov %r10, 24(%rsp) + + #third iteration +- .byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00 # mulx 24($inp), $out, %r9 ++ mulx 24($inp), $out, %r9 + adox $out, %r12 + adcx %r9, %r13 + +@@ -586,7 +593,7 @@ $code.=<<___; + adox %rax, %r13 + adcx %rcx, %r14 + +- mulx 40($inp), $out, %r9 ++ .byte 0xc4,0x62,0xc3,0xf6,0x8e,0x28,0x00,0x00,0x00 # mulx 40($inp), $out, %r9 + adox $out, %r14 + adcx %r9, %r15 + +@@ -594,27 +601,28 @@ $code.=<<___; + adox %rax, %r15 + adcx %rcx, %r8 + +- .byte 0xc4,0x62,0xc3,0xf6,0x8e,0x38,0x00,0x00,0x00 # mulx 56($inp), $out, %r9 ++ mulx 56($inp), $out, %r9 + adox $out, %r8 + adcx %rbp, %r9 ++ mulx %rdx, %rax, $out + adox %rbp, %r9 ++ mov 24($inp), %rdx + +- mov %r13, %rcx +- shld \$1, %r12, %r13 +- shld \$1, %rbx, %r12 +- +- xor %ebp, %ebp +- mulx %rdx, %rax, %rdx ++ xor %rcx, %rcx ++ adcx %rbx, %rax ++ adox %r11, %r11 ++ adcx %rbp, $out ++ adox %r12, %r12 + adcx %rax, %r11 +- adcx %rdx, %r12 +- mov 24($inp), %rdx +- adcx %rbp, %r13 ++ adox %rbp, %rcx ++ adcx $out, %r12 ++ adcx %rbp, %rcx + + mov %r11, 32(%rsp) +- .byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00 # mov %r12, 40(%rsp) ++ mov %r12, 40(%rsp) + + #fourth iteration +- .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00 # mulx 32($inp), %rax, %rbx ++ mulx 32($inp), %rax, %rbx + adox %rax, %r14 + adcx %rbx, %r15 + +@@ -629,25 +637,25 @@ $code.=<<___; + mulx 56($inp), $out, %r10 + adox $out, %r9 + adcx %rbp, %r10 ++ mulx %rdx, %rax, $out + adox %rbp, %r10 ++ mov 32($inp), %rdx + +- .byte 0x66 +- mov %r15, %rbx +- shld \$1, %r14, %r15 +- shld \$1, %rcx, %r14 +- +- xor %ebp, %ebp +- mulx %rdx, %rax, %rdx ++ xor %rbx, %rbx ++ adcx %rcx, %rax ++ adox %r13, %r13 ++ adcx %rbp, $out ++ adox %r14, %r14 + adcx %rax, %r13 +- adcx %rdx, %r14 +- mov 32($inp), %rdx +- adcx %rbp, %r15 ++ adox %rbp, %rbx ++ adcx $out, %r14 ++ adcx %rbp, %rbx + + mov %r13, 48(%rsp) + mov %r14, 56(%rsp) + + #fifth iteration +- .byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00 # mulx 40($inp), $out, %r11 ++ mulx 40($inp), $out, %r11 + adox $out, %r8 + adcx %r11, %r9 + +@@ -658,18 +666,19 @@ $code.=<<___; + mulx 56($inp), $out, %r11 + adox $out, %r10 + adcx %rbp, %r11 ++ mulx %rdx, %rax, $out ++ mov 40($inp), %rdx + adox %rbp, %r11 + +- mov %r9, %rcx +- shld \$1, %r8, %r9 +- shld \$1, %rbx, %r8 +- +- xor %ebp, %ebp +- mulx %rdx, %rax, %rdx ++ xor %rcx, %rcx ++ adcx %rbx, %rax ++ adox %r15, %r15 ++ adcx %rbp, $out ++ adox %r8, %r8 + adcx %rax, %r15 +- adcx %rdx, %r8 +- mov 40($inp), %rdx +- adcx %rbp, %r9 ++ adox %rbp, %rcx ++ adcx $out, %r8 ++ adcx %rbp, %rcx + + mov %r15, 64(%rsp) + mov %r8, 72(%rsp) +@@ -682,18 +691,19 @@ $code.=<<___; + .byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00 # mulx 56($inp), $out, %r12 + adox $out, %r11 + adcx %rbp, %r12 ++ mulx %rdx, %rax, $out + adox %rbp, %r12 ++ mov 48($inp), %rdx + +- mov %r11, %rbx +- shld \$1, %r10, %r11 +- shld \$1, %rcx, %r10 +- +- xor %ebp, %ebp +- mulx %rdx, %rax, %rdx ++ xor %rbx, %rbx ++ adcx %rcx, %rax ++ adox %r9, %r9 ++ adcx %rbp, $out ++ adox %r10, %r10 + adcx %rax, %r9 +- adcx %rdx, %r10 +- mov 48($inp), %rdx +- adcx %rbp, %r11 ++ adcx $out, %r10 ++ adox %rbp, %rbx ++ adcx %rbp, %rbx + + mov %r9, 80(%rsp) + mov %r10, 88(%rsp) +@@ -703,31 +713,31 @@ $code.=<<___; + adox %rax, %r12 + adox %rbp, %r13 + +- xor %r14, %r14 +- shld \$1, %r13, %r14 +- shld \$1, %r12, %r13 +- shld \$1, %rbx, %r12 +- +- xor %ebp, %ebp +- mulx %rdx, %rax, %rdx +- adcx %rax, %r11 +- adcx %rdx, %r12 ++ mulx %rdx, %rax, $out ++ xor %rcx, %rcx + mov 56($inp), %rdx +- adcx %rbp, %r13 ++ adcx %rbx, %rax ++ adox %r11, %r11 ++ adcx %rbp, $out ++ adox %r12, %r12 ++ adcx %rax, %r11 ++ adox %rbp, %rcx ++ adcx $out, %r12 ++ adcx %rbp, %rcx + + .byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00 # mov %r11, 96(%rsp) + .byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00 # mov %r12, 104(%rsp) + + #eighth iteration + mulx %rdx, %rax, %rdx +- adox %rax, %r13 +- adox %rbp, %rdx ++ xor %rbx, %rbx ++ adcx %rcx, %rax ++ adox %r13, %r13 ++ adcx %rbp, %rdx ++ adox %rbp, %rbx ++ adcx %r13, %rax ++ adcx %rdx, %rbx + +- .byte 0x66 +- add %rdx, %r14 +- +- movq %r13, 112(%rsp) +- movq %r14, 120(%rsp) + movq %xmm0, $out + movq %xmm1, %rbp + +@@ -741,6 +751,9 @@ $code.=<<___; + movq 48(%rsp), %r14 + movq 56(%rsp), %r15 + ++ movq %rax, 112(%rsp) ++ movq %rbx, 120(%rsp) ++ + call __rsaz_512_reducex + + addq 64(%rsp), %r8 +-- +2.17.1 + -- cgit v1.2.3