diff options
Diffstat (limited to 'debian/patches/powerpcspe/D54584-powerpcspe-double-parameter.diff')
-rw-r--r-- | debian/patches/powerpcspe/D54584-powerpcspe-double-parameter.diff | 217 |
1 files changed, 217 insertions, 0 deletions
diff --git a/debian/patches/powerpcspe/D54584-powerpcspe-double-parameter.diff b/debian/patches/powerpcspe/D54584-powerpcspe-double-parameter.diff new file mode 100644 index 0000000..d7d670a --- /dev/null +++ b/debian/patches/powerpcspe/D54584-powerpcspe-double-parameter.diff @@ -0,0 +1,217 @@ +Description: PowerPC: Optimize SPE double parameter calling setup +Author: Justin Hibbits <jrh29@alumni.cwru.edu> +Origin: https://reviews.llvm.org/D54583 +Last-Update: 2018-12-04 + +Index: llvm-toolchain-snapshot_8~svn350421/lib/Target/PowerPC/PPCISelLowering.cpp +=================================================================== +--- llvm-toolchain-snapshot_8~svn350421.orig/lib/Target/PowerPC/PPCISelLowering.cpp ++++ llvm-toolchain-snapshot_8~svn350421/lib/Target/PowerPC/PPCISelLowering.cpp +@@ -385,8 +385,16 @@ PPCTargetLowering::PPCTargetLowering(con + } else { + setOperationAction(ISD::BITCAST, MVT::f32, Expand); + setOperationAction(ISD::BITCAST, MVT::i32, Expand); +- setOperationAction(ISD::BITCAST, MVT::i64, Expand); + setOperationAction(ISD::BITCAST, MVT::f64, Expand); ++ if (Subtarget.hasSPE()) { ++ setOperationAction(ISD::BITCAST, MVT::i64, Custom); ++ } else { ++ setOperationAction(ISD::BITCAST, MVT::i64, Expand); ++ } ++ } ++ ++ if (Subtarget.hasSPE()) { ++ setOperationAction(ISD::EXTRACT_ELEMENT, MVT::i64, Custom); + } + + // We cannot sextinreg(i1). Expand to shifts. +@@ -1366,6 +1374,9 @@ const char *PPCTargetLowering::getTarget + case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; + case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; + case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; ++ case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64"; ++ case PPCISD::EXTRACT_SPE_LO: return "PPCISD::EXTRACT_SPE_LO"; ++ case PPCISD::EXTRACT_SPE_HI: return "PPCISD::EXTRACT_SPE_HI"; + } + return nullptr; + } +@@ -7886,6 +7897,15 @@ SDValue PPCTargetLowering::LowerBITCAST( + SDLoc dl(Op); + SDValue Op0 = Op->getOperand(0); + ++ if (Subtarget.hasSPE()) { ++ if (Op.getValueType() == MVT::f64 && ++ Op0.getOpcode() == ISD::BUILD_PAIR && ++ (Op0.getOperand(1).getValueType() == MVT::i32) && ++ (Op0.getOperand(0).getValueType() == MVT::i32)) ++ return DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Op0.getOperand(0), ++ Op0.getOperand(1)); ++ } ++ + if (!EnableQuadPrecision || + (Op.getValueType() != MVT::f128 ) || + (Op0.getOpcode() != ISD::BUILD_PAIR) || +@@ -7897,6 +7917,26 @@ SDValue PPCTargetLowering::LowerBITCAST( + Op0.getOperand(1)); + } + ++// Lower EXTRACT_ELEMENT (i64 BITCAST f64), 0/1 to evmerge* ++SDValue PPCTargetLowering::LowerEXTRACT_ELEMENT(SDValue Op, SelectionDAG &DAG) const { ++ ++ SDLoc dl(Op); ++ SDValue Op0 = Op->getOperand(0); ++ ++ if (!Subtarget.hasSPE()) ++ return SDValue(); ++ ++ if (!(Op.getValueType() == MVT::i32 && ++ Op0.getOpcode() == ISD::BITCAST)) ++ return SDValue(); ++ ++ assert(Op0.getNumOperands() > 0 && "WTF?"); ++ if (Op->getConstantOperandVal(1) == 0) ++ return DAG.getNode(PPCISD::EXTRACT_SPE_LO, dl, MVT::i32, Op0.getOperand(0)); ++ ++ return DAG.getNode(PPCISD::EXTRACT_SPE_HI, dl, MVT::i32, Op0.getOperand(0)); ++} ++ + // If this is a case we can't handle, return null and let the default + // expansion code take care of it. If we CAN select this case, and if it + // selects to a single instruction, return Op. Otherwise, if we can codegen +@@ -9680,6 +9720,8 @@ SDValue PPCTargetLowering::LowerOperatio + return LowerBSWAP(Op, DAG); + case ISD::ATOMIC_CMP_SWAP: + return LowerATOMIC_CMP_SWAP(Op, DAG); ++ case ISD::EXTRACT_ELEMENT: ++ return LowerEXTRACT_ELEMENT(Op, DAG); + } + } + +Index: llvm-toolchain-snapshot_8~svn350421/lib/Target/PowerPC/PPCISelLowering.h +=================================================================== +--- llvm-toolchain-snapshot_8~svn350421.orig/lib/Target/PowerPC/PPCISelLowering.h ++++ llvm-toolchain-snapshot_8~svn350421/lib/Target/PowerPC/PPCISelLowering.h +@@ -196,6 +196,15 @@ namespace llvm { + /// Direct move of 2 consective GPR to a VSX register. + BUILD_FP128, + ++ /// Merge 2 GPRs to a single SPE register ++ BUILD_SPE64, ++ ++ /// Extract high SPE register component ++ EXTRACT_SPE_HI, ++ ++ /// Extract low SPE register component ++ EXTRACT_SPE_LO, ++ + /// Extract a subvector from signed integer vector and convert to FP. + /// It is primarily used to convert a (widened) illegal integer vector + /// type to a legal floating point vector type. +@@ -1110,6 +1119,7 @@ namespace llvm { + SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; ++ SDValue LowerEXTRACT_ELEMENT(SDValue Op, SelectionDAG &DAG) const; + + SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const; +Index: llvm-toolchain-snapshot_8~svn350421/lib/Target/PowerPC/PPCInstrInfo.td +=================================================================== +--- llvm-toolchain-snapshot_8~svn350421.orig/lib/Target/PowerPC/PPCInstrInfo.td ++++ llvm-toolchain-snapshot_8~svn350421/lib/Target/PowerPC/PPCInstrInfo.td +@@ -231,6 +231,22 @@ def PPCbuild_fp128: SDNode<"PPCISD::BUIL + SDTCisSameAs<1,2>]>, + []>; + ++def PPCbuild_spe64: SDNode<"PPCISD::BUILD_SPE64", ++ SDTypeProfile<1, 2, ++ [SDTCisFP<0>, SDTCisSameSizeAs<1,2>, ++ SDTCisSameAs<1,2>]>, ++ []>; ++ ++def PPCextract_spe_hi : SDNode<"PPCISD::EXTRACT_SPE_HI", ++ SDTypeProfile<1, 1, ++ [SDTCisInt<0>, SDTCisFP<1>]>, ++ []>; ++ ++def PPCextract_spe_lo : SDNode<"PPCISD::EXTRACT_SPE_LO", ++ SDTypeProfile<1, 1, ++ [SDTCisInt<0>, SDTCisFP<1>]>, ++ []>; ++ + // These are target-independent nodes, but have target-specific formats. + def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; +Index: llvm-toolchain-snapshot_8~svn350421/lib/Target/PowerPC/PPCInstrSPE.td +=================================================================== +--- llvm-toolchain-snapshot_8~svn350421.orig/lib/Target/PowerPC/PPCInstrSPE.td ++++ llvm-toolchain-snapshot_8~svn350421/lib/Target/PowerPC/PPCInstrSPE.td +@@ -512,7 +512,7 @@ def EVLWWSPLATX : EVXForm_1<792, (out + + def EVMERGEHI : EVXForm_1<556, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmergehi $RT, $RA, $RB", IIC_VecGeneral, []>; +-def EVMERGELO : EVXForm_1<557, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), ++def EVMERGELO : EVXForm_1<557, (outs sperc:$RT), (ins gprc:$RA, gprc:$RB), + "evmergelo $RT, $RA, $RB", IIC_VecGeneral, []>; + def EVMERGEHILO : EVXForm_1<558, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmergehilo $RT, $RA, $RB", IIC_VecGeneral, []>; +@@ -887,4 +887,15 @@ def : Pat<(f64 (selectcc i1:$lhs, i1:$rh + (SELECT_SPE (CRANDC $lhs, $rhs), $tval, $fval)>; + def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), + (SELECT_SPE (CRXOR $lhs, $rhs), $tval, $fval)>; ++ ++ ++def : Pat<(f64 (PPCbuild_spe64 i32:$rB, i32:$rA)), ++ (f64 (COPY_TO_REGCLASS (EVMERGELO $rA, $rB), SPERC))>; ++ ++def : Pat<(i32 (PPCextract_spe_hi f64:$rA)), ++ (i32 (EXTRACT_SUBREG (EVMERGEHI $rA, $rA), sub_32))>; ++ ++def : Pat<(i32 (PPCextract_spe_lo f64:$rA)), ++ (i32 (EXTRACT_SUBREG $rA, sub_32))>; ++ + } +Index: llvm-toolchain-snapshot_8~svn350421/test/CodeGen/PowerPC/spe.ll +=================================================================== +--- llvm-toolchain-snapshot_8~svn350421.orig/test/CodeGen/PowerPC/spe.ll ++++ llvm-toolchain-snapshot_8~svn350421/test/CodeGen/PowerPC/spe.ll +@@ -472,10 +472,8 @@ entry: + ; CHECK-LABEL: test_dselect + ; CHECK: andi. + ; CHECK: bc +-; CHECK: evldd +-; CHECK: b +-; CHECK: evldd +-; CHECK: evstdd ++; CHECK: evor ++; CHECK: evmergehi + ; CHECK: blr + } + +@@ -519,7 +517,7 @@ entry: + %1 = call i32 asm sideeffect "efdctsi $0, $1", "=d,d"(double %0) + ret i32 %1 + ; CHECK-LABEL: test_dasmconst +-; CHECK: evldd ++; CHECK: evmergelo + ; CHECK: #APP + ; CHECK: efdctsi + ; CHECK: #NO_APP +@@ -541,7 +539,7 @@ entry: + %a4.addr = alloca i32*, align 4 + %a5.addr = alloca i32*, align 4 + %ptr = alloca i32*, align 4 +- %v1 = alloca [8 x i32], align 4 ++ %v1 = alloca [9 x i32], align 4 + %v2 = alloca [7 x i32], align 4 + %v3 = alloca [5 x i32], align 4 + store i32 %a1, i32* %a1.addr, align 4 +@@ -554,7 +552,7 @@ entry: + call void asm sideeffect "","~{s0},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"() nounwind + %1 = fadd double %0, 3.14159 + %2 = load i32*, i32** %ptr, align 4 +- %3 = bitcast [8 x i32]* %v1 to i8* ++ %3 = bitcast [9 x i32]* %v1 to i8* + call void @llvm.memset.p0i8.i32(i8* align 4 %3, i8 0, i32 24, i1 true) + %4 = load i32*, i32** %a5.addr, align 4 + store i32 0, i32* %4, align 4 |