MIPS: Implement __multi3 for GCC7 MIPS64r6 builds
diff mbox series

Message ID -
State Accepted
Delegated to: Ralf Baechle
Headers show
Series
  • MIPS: Implement __multi3 for GCC7 MIPS64r6 builds
Related show

Commit Message

James Hogan Dec. 7, 2017, 7:20 a.m. UTC
GCC7 is a bit too eager to generate suboptimal __multi3 calls (128bit
multiply with 128bit result) for MIPS64r6 builds, even in code which
doesn't explicitly use 128bit types, such as the following:

unsigned long func(unsigned long a, unsigned long b)
{
	return a > (~0UL) / b;
}

Which GCC rearanges to:

return (unsigned __int128)a * (unsigned __int128)b > 0xffffffff;

Therefore implement __multi3, but only for MIPS64r6 with GCC7 as under
normal circumstances we wouldn't expect any calls to __multi3 to be
generated from kernel code.

Reported-by: Thomas Petazzoni <>
Signed-off-by: James Hogan <>
Cc: Ralf Baechle <>
Cc: Maciej W. Rozycki <>
Cc: Matthew Fortune <>
Cc: Florian Fainelli <>
Cc: Waldemar Brodkorb <>
Cc: 
---
This should fix the issue Thomas. Thanks for reporting.
---
 arch/mips/lib/Makefile |  3 ++-
 arch/mips/lib/libgcc.h | 17 +++++++++++++++++
 arch/mips/lib/multi3.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 1 deletion(-)
 create mode 100644 arch/mips/lib/multi3.c

Comments

Maciej W. Rozycki Dec. 8, 2017, 11:52 p.m. UTC | #1
On Wed, 6 Dec 2017, James Hogan wrote:

> GCC7 is a bit too eager to generate suboptimal __multi3 calls (128bit
> multiply with 128bit result) for MIPS64r6 builds, even in code which
> doesn't explicitly use 128bit types, such as the following:
> 
> unsigned long func(unsigned long a, unsigned long b)
> {
> 	return a > (~0UL) / b;
> }
> 
> Which GCC rearanges to:
> 
> return (unsigned __int128)a * (unsigned __int128)b > 0xffffffff;

 You mean:

return (unsigned __int128)a * (unsigned __int128)b > 0xffffffffffffffff;

presumably, or is there another bug here?

> Therefore implement __multi3, but only for MIPS64r6 with GCC7 as under
> normal circumstances we wouldn't expect any calls to __multi3 to be
> generated from kernel code.

 That does look bad; I'd expect a `umulditi3' (widening 64-bit by 64-bit 
unsigned multiplication) kind of operation instead, which should expand 
internally.  And we only really need to execute DMUHU and then check the 
result for non-zero here, because the value of the low 64 bits of the 
product does not matter for the evaluation of the expression.

 I don't know offhand if such a transformation can be handled by GCC as it 
stands by tweaking the MIPS backend without a corresponding update to the 
middle end though.

  Maciej
James Hogan Dec. 9, 2017, 7:15 a.m. UTC | #2
On Fri, Dec 08, 2017 at 11:52:05PM +0000, Maciej W. Rozycki wrote:
> On Wed, 6 Dec 2017, James Hogan wrote:
> 
> > GCC7 is a bit too eager to generate suboptimal __multi3 calls (128bit
> > multiply with 128bit result) for MIPS64r6 builds, even in code which
> > doesn't explicitly use 128bit types, such as the following:
> > 
> > unsigned long func(unsigned long a, unsigned long b)
> > {
> > 	return a > (~0UL) / b;
> > }
> > 
> > Which GCC rearanges to:
> > 
> > return (unsigned __int128)a * (unsigned __int128)b > 0xffffffff;
> 
>  You mean:
> 
> return (unsigned __int128)a * (unsigned __int128)b > 0xffffffffffffffff;
> 
> presumably, or is there another bug here?

Yes, thats what was meant. It was copy + pasted from Ralf's analysis.

Thanks
James

> 
> > Therefore implement __multi3, but only for MIPS64r6 with GCC7 as under
> > normal circumstances we wouldn't expect any calls to __multi3 to be
> > generated from kernel code.
> 
>  That does look bad; I'd expect a `umulditi3' (widening 64-bit by 64-bit 
> unsigned multiplication) kind of operation instead, which should expand 
> internally.  And we only really need to execute DMUHU and then check the 
> result for non-zero here, because the value of the low 64 bits of the 
> product does not matter for the evaluation of the expression.
> 
>  I don't know offhand if such a transformation can be handled by GCC as it 
> stands by tweaking the MIPS backend without a corresponding update to the 
> middle end though.
> 
>   Maciej
>
Waldemar Brodkorb Dec. 27, 2017, 8:31 a.m. UTC | #3
Hi James,
James Hogan wrote,

> GCC7 is a bit too eager to generate suboptimal __multi3 calls (128bit
> multiply with 128bit result) for MIPS64r6 builds, even in code which
> doesn't explicitly use 128bit types, such as the following:
> 
> unsigned long func(unsigned long a, unsigned long b)
> {
> 	return a > (~0UL) / b;
> }
> 
> Which GCC rearanges to:
> 
> return (unsigned __int128)a * (unsigned __int128)b > 0xffffffff;
> 
> Therefore implement __multi3, but only for MIPS64r6 with GCC7 as under
> normal circumstances we wouldn't expect any calls to __multi3 to be
> generated from kernel code.

I tested the patch and it works fine for me. I can build a mips64r6
kernel and run the uClibc-ng testsuite inside qemu system emulation.

It works for me with 4.9.x and 4.14.x kernels.

Thanks
 Waldemar

Patch
diff mbox series

diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile
index 78c2affeabf8..e84e12655fa8 100644
--- a/arch/mips/lib/Makefile
+++ b/arch/mips/lib/Makefile
@@ -16,4 +16,5 @@  obj-$(CONFIG_CPU_R3000)		+= r3k_dump_tlb.o
 obj-$(CONFIG_CPU_TX39XX)	+= r3k_dump_tlb.o
 
 # libgcc-style stuff needed in the kernel
-obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o ucmpdi2.o
+obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o multi3.o \
+	 ucmpdi2.o
diff --git a/arch/mips/lib/libgcc.h b/arch/mips/lib/libgcc.h
index 28002ed90c2c..199a7f96282f 100644
--- a/arch/mips/lib/libgcc.h
+++ b/arch/mips/lib/libgcc.h
@@ -10,10 +10,18 @@  typedef int word_type __attribute__ ((mode (__word__)));
 struct DWstruct {
 	int high, low;
 };
+
+struct TWstruct {
+	long long high, low;
+};
 #elif defined(__LITTLE_ENDIAN)
 struct DWstruct {
 	int low, high;
 };
+
+struct TWstruct {
+	long long low, high;
+};
 #else
 #error I feel sick.
 #endif
@@ -23,4 +31,13 @@  typedef union {
 	long long ll;
 } DWunion;
 
+#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6)
+typedef int ti_type __attribute__((mode(TI)));
+
+typedef union {
+	struct TWstruct s;
+	ti_type ti;
+} TWunion;
+#endif
+
 #endif /* __ASM_LIBGCC_H */
diff --git a/arch/mips/lib/multi3.c b/arch/mips/lib/multi3.c
new file mode 100644
index ..fba123e366c8
--- /dev/null
+++ b/arch/mips/lib/multi3.c
@@ -0,0 +1,52 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/export.h>
+
+#include "libgcc.h"
+
+/*
+ * GCC 7 suboptimally generates __multi3 calls for mips64r6, so for that
+ * specific case only we'll implement it here.
+ *
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82981
+ */
+#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ == 7)
+
+/* multiply 64-bit values, low 64-bits returned */
+static inline long long notrace dmulu(long long a, long long b)
+{
+	long long res;
+	asm ("dmulu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b));
+	return res;
+}
+
+/* multiply 64-bit unsigned values, high 64-bits of 128-bit result returned */
+static inline long long notrace dmuhu(long long a, long long b)
+{
+	long long res;
+	asm ("dmuhu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b));
+	return res;
+}
+
+/* multiply 128-bit values, low 128-bits returned */
+ti_type notrace __multi3(ti_type a, ti_type b)
+{
+	TWunion res, aa, bb;
+
+	aa.ti = a;
+	bb.ti = b;
+
+	/*
+	 * a * b =           (a.lo * b.lo)
+	 *         + 2^64  * (a.hi * b.lo + a.lo * b.hi)
+	 *        [+ 2^128 * (a.hi * b.hi)]
+	 */
+	res.s.low = dmulu(aa.s.low, bb.s.low);
+	res.s.high = dmuhu(aa.s.low, bb.s.low);
+	res.s.high += dmulu(aa.s.high, bb.s.low);
+	res.s.high += dmulu(aa.s.low, bb.s.high);
+
+	return res.ti;
+}
+EXPORT_SYMBOL(__multi3);
+
+#endif /* 64BIT && CPU_MIPSR6 && GCC7 */