1 files changed, 217 insertions, 0 deletions
diff --git a/debian/patches/powerpcspe/D54584-powerpcspe-double-parameter.diff b/debian/patches/powerpcspe/D54584-powerpcspe-double-parameter.diff
new file mode 100644
index 0000000..d7d670a
--- /dev/null
+++ b/debian/patches/powerpcspe/D54584-powerpcspe-double-parameter.diff
@@ -0,0 +1,217 @@
+Description: PowerPC: Optimize SPE double parameter calling setup
+Author: Justin Hibbits <jrh29@alumni.cwru.edu>
+Origin: https://reviews.llvm.org/D54583
+Last-Update: 2018-12-04
+
+Index: llvm-toolchain-snapshot_8~svn350421/lib/Target/PowerPC/PPCISelLowering.cpp
+===================================================================
+--- llvm-toolchain-snapshot_8~svn350421.orig/lib/Target/PowerPC/PPCISelLowering.cpp
++++ llvm-toolchain-snapshot_8~svn350421/lib/Target/PowerPC/PPCISelLowering.cpp
+@@ -385,8 +385,16 @@ PPCTargetLowering::PPCTargetLowering(con
+   } else {
+     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+-    setOperationAction(ISD::BITCAST, MVT::i64, Expand);
+     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
++    if (Subtarget.hasSPE()) {
++      setOperationAction(ISD::BITCAST, MVT::i64, Custom);
++    } else {
++      setOperationAction(ISD::BITCAST, MVT::i64, Expand);
++    }
++  }
++
++  if (Subtarget.hasSPE()) {
++    setOperationAction(ISD::EXTRACT_ELEMENT, MVT::i64, Custom);
+   }
+ 
+   // We cannot sextinreg(i1).  Expand to shifts.
+@@ -1366,6 +1374,9 @@ const char *PPCTargetLowering::getTarget
+   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
+   case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
+   case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
++  case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
++  case PPCISD::EXTRACT_SPE_LO:  return "PPCISD::EXTRACT_SPE_LO";
++  case PPCISD::EXTRACT_SPE_HI:  return "PPCISD::EXTRACT_SPE_HI";
+   }
+   return nullptr;
+ }
+@@ -7886,6 +7897,15 @@ SDValue PPCTargetLowering::LowerBITCAST(
+   SDLoc dl(Op);
+   SDValue Op0 = Op->getOperand(0);
+ 
++  if (Subtarget.hasSPE()) {
++    if (Op.getValueType() == MVT::f64 &&
++        Op0.getOpcode() == ISD::BUILD_PAIR &&
++        (Op0.getOperand(1).getValueType() == MVT::i32) &&
++        (Op0.getOperand(0).getValueType() == MVT::i32))
++      return DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Op0.getOperand(0),
++          Op0.getOperand(1));
++  }
++
+   if (!EnableQuadPrecision ||
+       (Op.getValueType() != MVT::f128 ) ||
+       (Op0.getOpcode() != ISD::BUILD_PAIR) ||
+@@ -7897,6 +7917,26 @@ SDValue PPCTargetLowering::LowerBITCAST(
+                      Op0.getOperand(1));
+ }
+ 
++// Lower EXTRACT_ELEMENT (i64 BITCAST f64), 0/1 to evmerge*
++SDValue PPCTargetLowering::LowerEXTRACT_ELEMENT(SDValue Op, SelectionDAG &DAG) const {
++
++  SDLoc dl(Op);
++  SDValue Op0 = Op->getOperand(0);
++
++  if (!Subtarget.hasSPE())
++    return SDValue();
++
++  if (!(Op.getValueType() == MVT::i32 &&
++      Op0.getOpcode() == ISD::BITCAST))
++    return SDValue();
++
++  assert(Op0.getNumOperands() > 0 && "WTF?");
++  if (Op->getConstantOperandVal(1) == 0)
++    return DAG.getNode(PPCISD::EXTRACT_SPE_LO,  dl, MVT::i32, Op0.getOperand(0));
++
++  return DAG.getNode(PPCISD::EXTRACT_SPE_HI, dl, MVT::i32, Op0.getOperand(0));
++}
++
+ // If this is a case we can't handle, return null and let the default
+ // expansion code take care of it.  If we CAN select this case, and if it
+ // selects to a single instruction, return Op.  Otherwise, if we can codegen
+@@ -9680,6 +9720,8 @@ SDValue PPCTargetLowering::LowerOperatio
+     return LowerBSWAP(Op, DAG);
+   case ISD::ATOMIC_CMP_SWAP:
+     return LowerATOMIC_CMP_SWAP(Op, DAG);
++  case ISD::EXTRACT_ELEMENT:
++    return LowerEXTRACT_ELEMENT(Op, DAG);
+   }
+ }
+ 
+Index: llvm-toolchain-snapshot_8~svn350421/lib/Target/PowerPC/PPCISelLowering.h
+===================================================================
+--- llvm-toolchain-snapshot_8~svn350421.orig/lib/Target/PowerPC/PPCISelLowering.h
++++ llvm-toolchain-snapshot_8~svn350421/lib/Target/PowerPC/PPCISelLowering.h
+@@ -196,6 +196,15 @@ namespace llvm {
+       /// Direct move of 2 consective GPR to a VSX register.
+       BUILD_FP128,
+ 
++      /// Merge 2 GPRs to a single SPE register
++      BUILD_SPE64,
++
++      /// Extract high SPE register component
++      EXTRACT_SPE_HI,
++
++      /// Extract low SPE register component
++      EXTRACT_SPE_LO,
++
+       /// Extract a subvector from signed integer vector and convert to FP.
+       /// It is primarily used to convert a (widened) illegal integer vector
+       /// type to a legal floating point vector type.
+@@ -1110,6 +1119,7 @@ namespace llvm {
+     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+     SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
++    SDValue LowerEXTRACT_ELEMENT(SDValue Op, SelectionDAG &DAG) const;
+ 
+     SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
+     SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const;
+Index: llvm-toolchain-snapshot_8~svn350421/lib/Target/PowerPC/PPCInstrInfo.td
+===================================================================
+--- llvm-toolchain-snapshot_8~svn350421.orig/lib/Target/PowerPC/PPCInstrInfo.td
++++ llvm-toolchain-snapshot_8~svn350421/lib/Target/PowerPC/PPCInstrInfo.td
+@@ -231,6 +231,22 @@ def PPCbuild_fp128: SDNode<"PPCISD::BUIL
+                               SDTCisSameAs<1,2>]>,
+                            []>;
+ 
++def PPCbuild_spe64: SDNode<"PPCISD::BUILD_SPE64",
++                           SDTypeProfile<1, 2,
++                             [SDTCisFP<0>, SDTCisSameSizeAs<1,2>,
++                              SDTCisSameAs<1,2>]>,
++                           []>;
++
++def PPCextract_spe_hi : SDNode<"PPCISD::EXTRACT_SPE_HI",
++                               SDTypeProfile<1, 1,
++                                 [SDTCisInt<0>, SDTCisFP<1>]>,
++                                 []>;
++
++def PPCextract_spe_lo : SDNode<"PPCISD::EXTRACT_SPE_LO",
++                               SDTypeProfile<1, 1,
++                               [SDTCisInt<0>, SDTCisFP<1>]>,
++                               []>;
++
+ // These are target-independent nodes, but have target-specific formats.
+ def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
+                            [SDNPHasChain, SDNPOutGlue]>;
+Index: llvm-toolchain-snapshot_8~svn350421/lib/Target/PowerPC/PPCInstrSPE.td
+===================================================================
+--- llvm-toolchain-snapshot_8~svn350421.orig/lib/Target/PowerPC/PPCInstrSPE.td
++++ llvm-toolchain-snapshot_8~svn350421/lib/Target/PowerPC/PPCInstrSPE.td
+@@ -512,7 +512,7 @@ def EVLWWSPLATX    : EVXForm_1<792, (out
+ 
+ def EVMERGEHI      : EVXForm_1<556, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
+                                "evmergehi $RT, $RA, $RB", IIC_VecGeneral, []>;
+-def EVMERGELO      : EVXForm_1<557, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
++def EVMERGELO      : EVXForm_1<557, (outs sperc:$RT), (ins gprc:$RA, gprc:$RB),
+                                "evmergelo $RT, $RA, $RB", IIC_VecGeneral, []>;
+ def EVMERGEHILO    : EVXForm_1<558, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
+                                "evmergehilo $RT, $RA, $RB", IIC_VecGeneral, []>;
+@@ -887,4 +887,15 @@ def : Pat<(f64 (selectcc i1:$lhs, i1:$rh
+           (SELECT_SPE (CRANDC $lhs, $rhs), $tval, $fval)>;
+ def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
+           (SELECT_SPE (CRXOR $lhs, $rhs), $tval, $fval)>;
++
++
++def : Pat<(f64 (PPCbuild_spe64 i32:$rB, i32:$rA)),
++          (f64 (COPY_TO_REGCLASS (EVMERGELO $rA, $rB), SPERC))>;
++
++def : Pat<(i32 (PPCextract_spe_hi f64:$rA)),
++          (i32 (EXTRACT_SUBREG (EVMERGEHI $rA, $rA), sub_32))>;
++
++def : Pat<(i32 (PPCextract_spe_lo f64:$rA)),
++          (i32 (EXTRACT_SUBREG $rA, sub_32))>;
++
+ }
+Index: llvm-toolchain-snapshot_8~svn350421/test/CodeGen/PowerPC/spe.ll
+===================================================================
+--- llvm-toolchain-snapshot_8~svn350421.orig/test/CodeGen/PowerPC/spe.ll
++++ llvm-toolchain-snapshot_8~svn350421/test/CodeGen/PowerPC/spe.ll
+@@ -472,10 +472,8 @@ entry:
+ ; CHECK-LABEL: test_dselect
+ ; CHECK: andi.
+ ; CHECK: bc
+-; CHECK: evldd
+-; CHECK: b
+-; CHECK: evldd
+-; CHECK: evstdd
++; CHECK: evor
++; CHECK: evmergehi
+ ; CHECK: blr
+ }
+ 
+@@ -519,7 +517,7 @@ entry:
+   %1 = call i32 asm sideeffect "efdctsi $0, $1", "=d,d"(double %0)
+   ret i32 %1
+ ; CHECK-LABEL: test_dasmconst
+-; CHECK: evldd
++; CHECK: evmergelo
+ ; CHECK: #APP
+ ; CHECK: efdctsi
+ ; CHECK: #NO_APP
+@@ -541,7 +539,7 @@ entry:
+   %a4.addr = alloca i32*, align 4
+   %a5.addr = alloca i32*, align 4
+   %ptr = alloca i32*, align 4
+-  %v1 = alloca [8 x i32], align 4
++  %v1 = alloca [9 x i32], align 4
+   %v2 = alloca [7 x i32], align 4
+   %v3 = alloca [5 x i32], align 4
+   store i32 %a1, i32* %a1.addr, align 4
+@@ -554,7 +552,7 @@ entry:
+   call void asm sideeffect "","~{s0},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"() nounwind
+   %1 = fadd double %0, 3.14159
+   %2 = load i32*, i32** %ptr, align 4
+-  %3 = bitcast [8 x i32]* %v1 to i8*
++  %3 = bitcast [9 x i32]* %v1 to i8*
+   call void @llvm.memset.p0i8.i32(i8* align 4 %3, i8 0, i32 24, i1 true)
+   %4 = load i32*, i32** %a5.addr, align 4
+   store i32 0, i32* %4, align 4