//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file defines the WebAssembly-specific TargetTransformInfo /// implementation. /// //===----------------------------------------------------------------------===// #include "WebAssemblyTargetTransformInfo.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" #include "llvm/CodeGen/CostTable.h" using namespace llvm; #define DEBUG_TYPE "wasmtti" TargetTransformInfo::PopcntSupportKind WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); return TargetTransformInfo::PSK_FastHardware; } unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const { unsigned Result = BaseT::getNumberOfRegisters(ClassID); // For SIMD, use at least 16 registers, as a rough guess. bool Vector = (ClassID == 1); if (Vector) Result = std::max(Result, 16u); return Result; } TypeSize WebAssemblyTTIImpl::getRegisterBitWidth( TargetTransformInfo::RegisterKind K) const { switch (K) { case TargetTransformInfo::RGK_Scalar: return TypeSize::getFixed(64); case TargetTransformInfo::RGK_FixedWidthVector: return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64); case TargetTransformInfo::RGK_ScalableVector: return TypeSize::getScalable(0); } llvm_unreachable("Unsupported register kind"); } InstructionCost WebAssemblyTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, ArrayRef Args, const Instruction *CxtI) const { if (ST->hasSIMD128()) { static const CostTblEntry ArithCostTbl[]{ // extmul + (maybe awkward) shuffle {ISD::MUL, MVT::v8i8, 4}, // 2x extmul + (okay) shuffle {ISD::MUL, MVT::v16i8, 4}, // extmul {ISD::MUL, MVT::v4i16, 1}, // extmul {ISD::MUL, MVT::v2i32, 1}, }; EVT DstVT = TLI->getValueType(DL, Ty); if (DstVT.isSimple()) { int ISD = TLI->InstructionOpcodeToISD(Opcode); if (const auto *Entry = CostTableLookup(ArithCostTbl, ISD, DstVT.getSimpleVT())) return Entry->Cost; } } InstructionCost Cost = BasicTTIImplBase::getArithmeticInstrCost( Opcode, Ty, CostKind, Op1Info, Op2Info); if (auto *VTy = dyn_cast(Ty)) { switch (Opcode) { case Instruction::LShr: case Instruction::AShr: case Instruction::Shl: // SIMD128's shifts currently only accept a scalar shift count. For each // element, we'll need to extract, op, insert. The following is a rough // approximation. if (!Op2Info.isUniform()) Cost = cast(VTy)->getNumElements() * (TargetTransformInfo::TCC_Basic + getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) + TargetTransformInfo::TCC_Basic); break; } } return Cost; } InstructionCost WebAssemblyTTIImpl::getCastInstrCost( unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); auto SrcTy = TLI->getValueType(DL, Src); auto DstTy = TLI->getValueType(DL, Dst); if (!SrcTy.isSimple() || !DstTy.isSimple()) { return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); } if (!ST->hasSIMD128()) { return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); } auto DstVT = DstTy.getSimpleVT(); auto SrcVT = SrcTy.getSimpleVT(); if (I && I->hasOneUser()) { auto *SingleUser = cast(*I->user_begin()); int UserISD = TLI->InstructionOpcodeToISD(SingleUser->getOpcode()); // extmul_low support if (UserISD == ISD::MUL && (ISD == ISD::ZERO_EXTEND || ISD == ISD::SIGN_EXTEND)) { // Free low extensions. if ((SrcVT == MVT::v8i8 && DstVT == MVT::v8i16) || (SrcVT == MVT::v4i16 && DstVT == MVT::v4i32) || (SrcVT == MVT::v2i32 && DstVT == MVT::v2i64)) { return 0; } // Will require an additional extlow operation for the intermediate // i16/i32 value. if ((SrcVT == MVT::v4i8 && DstVT == MVT::v4i32) || (SrcVT == MVT::v2i16 && DstVT == MVT::v2i64)) { return 1; } } } static constexpr TypeConversionCostTblEntry ConversionTbl[] = { // extend_low {ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1}, {ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1}, {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1}, {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1}, {ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1}, {ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1}, // 2 x extend_low {ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 2}, {ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2}, {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2}, {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2}, // extend_low, extend_high {ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2}, {ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2}, {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2}, {ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2}, {ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2}, {ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2}, // 2x extend_low, extend_high {ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 4}, {ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 4}, {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4}, {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4}, // 6x extend_low, extend_high {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6}, {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6}, // shuffle {ISD::TRUNCATE, MVT::v2i16, MVT::v2i32, 2}, {ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 4}, {ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 2}, {ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 4}, // narrow, and {ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2}, {ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2}, // narrow, 2x and {ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3}, // 3x narrow, 4x and {ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 7}, {ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7}, // 7x narrow, 8x and {ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 15}, // convert_i32x4 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1}, {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1}, {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1}, {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1}, // extend_low, convert {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2}, {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2}, {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2}, {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2}, // extend_low x 2, convert {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3}, {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3}, {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3}, {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3}, // several shuffles {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10}, {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10}, {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 10}, {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10}, /// trunc_sat, const, and, 3x narrow {ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 6}, {ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 6}, {ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 6}, {ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 6}, /// trunc_sat, const, and, narrow {ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 4}, {ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 4}, {ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4}, {ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4}, // 2x trunc_sat, const, 2x and, 3x narrow {ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 8}, {ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 8}, // 2x trunc_sat, const, 2x and, narrow {ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 6}, {ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 6}, }; if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD, DstVT, SrcVT)) { return Entry->Cost; } return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); } WebAssemblyTTIImpl::TTI::MemCmpExpansionOptions WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { TTI::MemCmpExpansionOptions Options; Options.AllowOverlappingLoads = true; if (ST->hasSIMD128()) Options.LoadSizes.push_back(16); Options.LoadSizes.append({8, 4, 2, 1}); Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); Options.NumLoadsPerBlock = Options.MaxNumLoads; return Options; } InstructionCost WebAssemblyTTIImpl::getMemoryOpCost( unsigned Opcode, Type *Ty, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo, const Instruction *I) const { if (!ST->hasSIMD128() || !isa(Ty)) { return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace, CostKind); } EVT VT = TLI->getValueType(DL, Ty, true); // Type legalization can't handle structs if (VT == MVT::Other) return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace, CostKind); auto LT = getTypeLegalizationCost(Ty); if (!LT.first.isValid()) return InstructionCost::getInvalid(); int ISD = TLI->InstructionOpcodeToISD(Opcode); unsigned width = VT.getSizeInBits(); if (ISD == ISD::LOAD) { // 128-bit loads are a single instruction. 32-bit and 64-bit vector loads // can be lowered to load32_zero and load64_zero respectively. Assume SIMD // loads are twice as expensive as scalar. switch (width) { default: break; case 32: case 64: case 128: return 2; } } else if (ISD == ISD::STORE) { // For stores, we can use store lane operations. switch (width) { default: break; case 8: case 16: case 32: case 64: case 128: return 2; } } return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace, CostKind); } InstructionCost WebAssemblyTTIImpl::getShuffleCost( TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef Args, const Instruction *CxtI) const { // Canonicalize the ShuffleKind in case optimizations didn't. // Otherwise, we might end up with the wrong ShuffleKind to match against. Kind = improveShuffleKindFromMask(Kind, Mask, SrcTy, Index, SubTp); // Wasm SIMD128 has native splat instructions for all lane types. if (ST->hasSIMD128() && Kind == TTI::SK_Broadcast && isa(SrcTy)) return 1; return BaseT::getShuffleCost(Kind, DstTy, SrcTy, Mask, CostKind, Index, SubTp, Args, CxtI); } InstructionCost WebAssemblyTTIImpl::getInterleavedMemoryOpCost( unsigned Opcode, Type *Ty, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const { assert(Factor >= 2 && "Invalid interleave factor"); auto *VecTy = cast(Ty); if (!ST->hasSIMD128() || !isa(VecTy)) { return InstructionCost::getInvalid(); } if (UseMaskForCond || UseMaskForGaps) return BaseT::getInterleavedMemoryOpCost(Opcode, Ty, Factor, Indices, Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); constexpr unsigned MaxInterleaveFactor = 4; if (Factor <= MaxInterleaveFactor) { unsigned MinElts = VecTy->getElementCount().getKnownMinValue(); // Ensure the number of vector elements is greater than 1. if (MinElts < 2 || MinElts % Factor != 0) return InstructionCost::getInvalid(); unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType()); // Ensure the element type is legal. if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64) return InstructionCost::getInvalid(); if (Factor != 2 && Factor != 4) return InstructionCost::getInvalid(); auto *SubVecTy = VectorType::get(VecTy->getElementType(), VecTy->getElementCount().divideCoefficientBy(Factor)); InstructionCost MemCost = getMemoryOpCost(Opcode, SubVecTy, Alignment, AddressSpace, CostKind); unsigned VecSize = DL.getTypeSizeInBits(SubVecTy); unsigned MaxVecSize = 128; unsigned NumAccesses = std::max(1, (MinElts * ElSize + MaxVecSize - 1) / VecSize); // A stride of two is commonly supported via dedicated instructions, so it // should be relatively cheap for all element sizes. A stride of four is // more expensive as it will likely require more shuffles. Using two // simd128 inputs is considered more expensive and we mainly account for // shuffling two inputs (32 bytes), but we do model 4 x v4i32 to enable // arithmetic kernels. static const CostTblEntry ShuffleCostTbl[] = { // One reg. {2, MVT::v2i8, 1}, // interleave 2 x 2i8 into 4i8 {2, MVT::v4i8, 1}, // interleave 2 x 4i8 into 8i8 {2, MVT::v8i8, 1}, // interleave 2 x 8i8 into 16i8 {2, MVT::v2i16, 1}, // interleave 2 x 2i16 into 4i16 {2, MVT::v4i16, 1}, // interleave 2 x 4i16 into 8i16 {2, MVT::v2i32, 1}, // interleave 2 x 2i32 into 4i32 // Two regs. {2, MVT::v16i8, 2}, // interleave 2 x 16i8 into 32i8 {2, MVT::v8i16, 2}, // interleave 2 x 8i16 into 16i16 {2, MVT::v4i32, 2}, // interleave 2 x 4i32 into 8i32 // One reg. {4, MVT::v2i8, 4}, // interleave 4 x 2i8 into 8i8 {4, MVT::v4i8, 4}, // interleave 4 x 4i8 into 16i8 {4, MVT::v2i16, 4}, // interleave 4 x 2i16 into 8i16 // Two regs. {4, MVT::v8i8, 16}, // interleave 4 x 8i8 into 32i8 {4, MVT::v4i16, 8}, // interleave 4 x 4i16 into 16i16 {4, MVT::v2i32, 4}, // interleave 4 x 2i32 into 8i32 // Four regs. {4, MVT::v4i32, 16}, // interleave 4 x 4i32 into 16i32 }; EVT ETy = TLI->getValueType(DL, SubVecTy); if (const auto *Entry = CostTableLookup(ShuffleCostTbl, Factor, ETy.getSimpleVT())) return Entry->Cost + (NumAccesses * MemCost); } return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); } InstructionCost WebAssemblyTTIImpl::getVectorInstrCost( unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC) const { InstructionCost Cost = BasicTTIImplBase::getVectorInstrCost( Opcode, Val, CostKind, Index, Op0, Op1, VIC); // SIMD128's insert/extract currently only take constant indices. if (Index == -1u) return Cost + 25 * TargetTransformInfo::TCC_Expensive; return Cost; } InstructionCost WebAssemblyTTIImpl::getPartialReductionCost( unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional BinOp, TTI::TargetCostKind CostKind, std::optional FMF) const { InstructionCost Invalid = InstructionCost::getInvalid(); if (!VF.isFixed() || !ST->hasSIMD128()) return Invalid; if (CostKind != TTI::TCK_RecipThroughput) return Invalid; if (Opcode != Instruction::Add) return Invalid; EVT AccumEVT = EVT::getEVT(AccumType); // TODO: Add i64 accumulator. if (AccumEVT != MVT::i32) return Invalid; // Possible options: // - i16x8.extadd_pairwise_i8x16_sx // - i32x4.extadd_pairwise_i16x8_sx // - i32x4.dot_i16x8_s // Only try to support dot, for now. EVT InputEVT = EVT::getEVT(InputTypeA); if (!((InputEVT == MVT::i16 && VF.getFixedValue() == 8) || (InputEVT == MVT::i8 && VF.getFixedValue() == 16))) { return Invalid; } if (OpAExtend == TTI::PR_None) return Invalid; InstructionCost Cost(TTI::TCC_Basic); if (!BinOp) return Cost; if (OpAExtend != OpBExtend) return Invalid; if (*BinOp != Instruction::Mul) return Invalid; if (InputTypeA != InputTypeB) return Invalid; // Signed inputs can lower to dot if (InputEVT == MVT::i16 && VF.getFixedValue() == 8) return OpAExtend == TTI::PR_SignExtend ? Cost : Cost * 2; // Double the size of the lowered sequence. if (InputEVT == MVT::i8 && VF.getFixedValue() == 16) return OpAExtend == TTI::PR_SignExtend ? Cost * 2 : Cost * 4; return Invalid; } TTI::ReductionShuffle WebAssemblyTTIImpl::getPreferredExpandedReductionShuffle( const IntrinsicInst *II) const { switch (II->getIntrinsicID()) { default: break; case Intrinsic::vector_reduce_fadd: return TTI::ReductionShuffle::Pairwise; } return TTI::ReductionShuffle::SplitHalf; } void WebAssemblyTTIImpl::getUnrollingPreferences( Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const { // Scan the loop: don't unroll loops with calls. This is a standard approach // for most (all?) targets. for (BasicBlock *BB : L->blocks()) for (Instruction &I : *BB) if (isa(I) || isa(I)) if (const Function *F = cast(I).getCalledFunction()) if (isLoweredToCall(F)) return; // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of // the various microarchitectures that use the BasicTTI implementation and // has been selected through heuristics across multiple cores and runtimes. UP.Partial = UP.Runtime = UP.UpperBound = true; UP.PartialThreshold = 30; // Avoid unrolling when optimizing for size. UP.OptSizeThreshold = 0; UP.PartialOptSizeThreshold = 0; // Set number of instructions optimized when "back edge" // becomes "fall through" to default value of 2. UP.BEInsns = 2; } bool WebAssemblyTTIImpl::supportsTailCalls() const { return getST()->hasTailCall(); } bool WebAssemblyTTIImpl::isProfitableToSinkOperands( Instruction *I, SmallVectorImpl &Ops) const { using namespace llvm::PatternMatch; if (!I->getType()->isVectorTy() || !I->isShift()) return false; Value *V = I->getOperand(1); // We dont need to sink constant splat. if (isa(V)) return false; if (match(V, m_Shuffle(m_InsertElt(m_Value(), m_Value(), m_ZeroInt()), m_Value(), m_ZeroMask()))) { // Sink insert Ops.push_back(&cast(V)->getOperandUse(0)); // Sink shuffle Ops.push_back(&I->getOperandUse(1)); return true; } return false; } /// Attempt to convert [relaxed_]swizzle to shufflevector if the mask is /// constant. static Value *simplifyWasmSwizzle(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsRelaxed) { auto *V = dyn_cast(II.getArgOperand(1)); if (!V) return nullptr; auto *VecTy = cast(II.getType()); unsigned NumElts = VecTy->getNumElements(); assert(NumElts == 16); // Construct a shuffle mask from constant integers or UNDEFs. int Indexes[16]; bool AnyOutOfBounds = false; for (unsigned I = 0; I < NumElts; ++I) { Constant *COp = V->getAggregateElement(I); if (!COp || (!isa(COp) && !isa(COp))) return nullptr; if (isa(COp)) { Indexes[I] = -1; continue; } if (IsRelaxed && cast(COp)->getSExtValue() >= NumElts) { // The relaxed_swizzle operation always returns 0 if the lane index is // less than 0 when interpreted as a signed value. For lane indices above // 15, however, it can choose between returning 0 or the lane at `Index % // 16`. However, the choice must be made consistently. As the WebAssembly // spec states: // // "The result of relaxed operators are implementation-dependent, because // the set of possible results may depend on properties of the host // environment, such as its hardware. Technically, their behaviour is // controlled by a set of global parameters to the semantics that an // implementation can instantiate in different ways. These choices are // fixed, that is, parameters are constant during the execution of any // given program." // // The WebAssembly runtime may choose differently from us, so we can't // optimize a relaxed swizzle with lane indices above 15. return nullptr; } uint64_t Index = cast(COp)->getZExtValue(); if (Index >= NumElts) { AnyOutOfBounds = true; // If there are out-of-bounds indices, the swizzle instruction returns // zeroes in those lanes. We'll provide an all-zeroes vector as the // second argument to shufflevector and read the first element from it. Indexes[I] = NumElts; continue; } Indexes[I] = Index; } auto *V1 = II.getArgOperand(0); auto *V2 = AnyOutOfBounds ? Constant::getNullValue(VecTy) : PoisonValue::get(VecTy); return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts)); } std::optional WebAssemblyTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { Intrinsic::ID IID = II.getIntrinsicID(); switch (IID) { case Intrinsic::wasm_swizzle: case Intrinsic::wasm_relaxed_swizzle: if (Value *V = simplifyWasmSwizzle( II, IC.Builder, IID == Intrinsic::wasm_relaxed_swizzle)) { return IC.replaceInstUsesWith(II, V); } break; } return std::nullopt; }