aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoralpine-mips-patches <info@mobile-stream.com>2018-12-13 05:51:24 +0000
committerLeonardo Arena <rnalrd@alpinelinux.org>2018-12-18 14:03:26 +0000
commit0141cee6972a961d297eec7429e56c9751f2e579 (patch)
treef9b1d6d467490fa94b06139a343394174a4cb334
parent52a93aff8bb1bef6f531b810050b6bb24e4a13c1 (diff)
downloadaports-0141cee6972a961d297eec7429e56c9751f2e579.tar.gz
aports-0141cee6972a961d297eec7429e56c9751f2e579.tar.bz2
aports-0141cee6972a961d297eec7429e56c9751f2e579.tar.xz
main/snappy: optimize on mips*, s390x
1) Enable generation of LWL/LWR/etc instructions on pre-R6 mips*. This gives 5x (yes, five times) compression speed-up on P5600 (and additional ~10% for decompression). 2) Recognize s390x as unaligned access capable architecture. See "Integral Boundaries" in SA22-7832-xx. 3) Force use of gcc/clang __builtin_bswapXX for big-endian code path in order to avoid suboptimal generic functions from <byteswap.h>. Test suite still passes on x86_64, mipseln8hf (hardware) and under qemu-s390x.
-rw-r--r--main/snappy/APKBUILD9
-rw-r--r--main/snappy/optimize-mips-s390x.patch80
2 files changed, 86 insertions, 3 deletions
diff --git a/main/snappy/APKBUILD b/main/snappy/APKBUILD
index 9b1333ae38..3386863626 100644
--- a/main/snappy/APKBUILD
+++ b/main/snappy/APKBUILD
@@ -2,14 +2,16 @@
# Maintainer: Natanael Copa <ncopa@alpinelinux.org>
pkgname=snappy
pkgver=1.1.7
-pkgrel=0
+pkgrel=1
pkgdesc="Fast compression and decompression library"
url="https://google.github.io/snappy/"
arch="all"
license="BSD-3-Clause"
makedepends="cmake"
subpackages="$pkgname-dbg $pkgname-dev $pkgname-doc"
-source="$pkgname-$pkgver.tar.gz::https://github.com/google/snappy/archive/$pkgver.tar.gz"
+source="$pkgname-$pkgver.tar.gz::https://github.com/google/snappy/archive/$pkgver.tar.gz
+ optimize-mips-s390x.patch
+ "
build() {
cd "$builddir"
@@ -36,4 +38,5 @@ package() {
done
}
-sha512sums="32046f532606ba545a4e4825c0c66a19be449f2ca2ff760a6fa170a3603731479a7deadb683546e5f8b5033414c50f4a9a29f6d23b7a41f047e566e69eca7caf snappy-1.1.7.tar.gz"
+sha512sums="32046f532606ba545a4e4825c0c66a19be449f2ca2ff760a6fa170a3603731479a7deadb683546e5f8b5033414c50f4a9a29f6d23b7a41f047e566e69eca7caf snappy-1.1.7.tar.gz
+a14b0159631beac628cf99cf9ad970631dfdbf607ca2c3911a64124d1133694689dc76a70f25d4f780ce7093584249905aec2926ef7a3d9350952f7648938392 optimize-mips-s390x.patch"
diff --git a/main/snappy/optimize-mips-s390x.patch b/main/snappy/optimize-mips-s390x.patch
new file mode 100644
index 0000000000..07c95096ae
--- /dev/null
+++ b/main/snappy/optimize-mips-s390x.patch
@@ -0,0 +1,80 @@
+diff --git a/snappy-stubs-internal.h b/snappy-stubs-internal.h
+index f834bdb..22407ef 100644
+--- a/snappy-stubs-internal.h
++++ b/snappy-stubs-internal.h
+@@ -123,7 +123,7 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
+ // x86, PowerPC, and ARM64 can simply do these loads and stores native.
+
+ #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \
+- defined(__aarch64__)
++ defined(__aarch64__) || defined(__s390x__)
+
+ #define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
+ #define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
+@@ -150,6 +150,8 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
+ // (it ignores __attribute__((packed)) on individual variables). However,
+ // we can tell it that a _struct_ is unaligned, which has the same effect,
+ // so we do that.
++//
++// On pre-R6 MIPS just let the compiler use LWL/LWR, SWL/SWR etc.
+
+ #elif defined(__arm__) && \
+ !defined(__ARM_ARCH_4__) && \
+@@ -163,7 +165,8 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
+ !defined(__ARM_ARCH_6K__) && \
+ !defined(__ARM_ARCH_6Z__) && \
+ !defined(__ARM_ARCH_6ZK__) && \
+- !defined(__ARM_ARCH_6T2__)
++ !defined(__ARM_ARCH_6T2__) || \
++ (defined(__mips__) && (!defined(__mips_isa_rev) || __mips_isa_rev < 6))
+
+ #if __GNUC__
+ #define ATTRIBUTE_PACKED __attribute__((__packed__))
+@@ -184,6 +187,11 @@ struct Unaligned32Struct {
+ uint8 dummy; // To make the size non-power-of-two.
+ } ATTRIBUTE_PACKED;
+
++struct Unaligned64Struct {
++ uint64 value;
++ uint8 dummy; // To make the size non-power-of-two.
++} ATTRIBUTE_PACKED;
++
+ } // namespace internal
+ } // namespace base
+
+@@ -203,6 +211,7 @@ struct Unaligned32Struct {
+ // See if that would be more efficient on platforms supporting it,
+ // at least for copies.
+
++#ifndef __mips__
+ inline uint64 UNALIGNED_LOAD64(const void *p) {
+ uint64 t;
+ memcpy(&t, p, sizeof t);
+@@ -212,6 +221,13 @@ inline uint64 UNALIGNED_LOAD64(const void *p) {
+ inline void UNALIGNED_STORE64(void *p, uint64 v) {
+ memcpy(p, &v, sizeof v);
+ }
++#else
++#define UNALIGNED_LOAD64(_p) \
++ ((reinterpret_cast<const ::snappy::base::internal::Unaligned64Struct *>(_p))->value)
++#define UNALIGNED_STORE64(_p, _val) \
++ ((reinterpret_cast< ::snappy::base::internal::Unaligned64Struct *>(_p))->value = \
++ (_val))
++#endif
+
+ #else
+
+@@ -274,6 +290,13 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) {
+ #define bswap_32(x) OSSwapInt32(x)
+ #define bswap_64(x) OSSwapInt64(x)
+
++#elif defined(__GNUC__)
++/* musl provides suboptimal (generic) bswap_xx implementations in <byteswap.h> */
++
++#define bswap_16 __builtin_bswap16
++#define bswap_32 __builtin_bswap32
++#define bswap_64 __builtin_bswap64
++
+ #elif defined(HAVE_BYTESWAP_H)
+ #include <byteswap.h>
+