From e03948668cb2a4d8acd0746b2a596f1c85246553 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Thu, 7 Nov 2024 16:46:51 +0800 Subject: [PATCH] [CIR] [Lowering] [X86_64] Support VAArg for LongDouble --- clang/lib/CIR/Dialect/IR/CIRTypes.cpp | 3 +- .../Transforms/TargetLowering/ABIInfoImpl.cpp | 7 ++ .../Transforms/TargetLowering/ABIInfoImpl.h | 1 + .../Targets/LoweringPrepareX86CXXABI.cpp | 5 +- .../Transforms/TargetLowering/Targets/X86.cpp | 118 +++++++++++++++++- .../TargetLowering/Targets/X86_64ABIInfo.h | 8 ++ clang/test/CIR/Lowering/var-arg-x86_64.c | 52 ++++++++ 7 files changed, 186 insertions(+), 8 deletions(-) diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp index 3a600a0a0575..df89584fd3a9 100644 --- a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp @@ -745,7 +745,7 @@ const llvm::fltSemantics &FP80Type::getFloatSemantics() const { llvm::TypeSize FP80Type::getTypeSizeInBits(const mlir::DataLayout &dataLayout, mlir::DataLayoutEntryListRef params) const { - return llvm::TypeSize::getFixed(16); + return llvm::TypeSize::getFixed(128); } uint64_t FP80Type::getABIAlignment(const mlir::DataLayout &dataLayout, @@ -766,6 +766,7 @@ const llvm::fltSemantics &FP128Type::getFloatSemantics() const { llvm::TypeSize FP128Type::getTypeSizeInBits(const mlir::DataLayout &dataLayout, mlir::DataLayoutEntryListRef params) const { + // FIXME: We probably want it to return 128. But we're lacking a test now. return llvm::TypeSize::getFixed(16); } diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp index e07315d54a38..66e40e6ac5d0 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp @@ -65,4 +65,11 @@ CIRCXXABI::RecordArgABI getRecordArgABI(const StructType RT, return CXXABI.getRecordArgABI(RT); } +CIRCXXABI::RecordArgABI getRecordArgABI(mlir::Type ty, CIRCXXABI &CXXABI) { + auto st = mlir::dyn_cast(ty); + if (!st) + return CIRCXXABI::RAA_Default; + return getRecordArgABI(st, CXXABI); +} + } // namespace cir diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h index 8005b153a544..8088a333c4a5 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h @@ -33,6 +33,7 @@ mlir::Value emitRoundPointerUpToAlignment(cir::CIRBaseBuilderTy &builder, mlir::Type useFirstFieldIfTransparentUnion(mlir::Type Ty); CIRCXXABI::RecordArgABI getRecordArgABI(const StructType RT, CIRCXXABI &CXXABI); +CIRCXXABI::RecordArgABI getRecordArgABI(mlir::Type ty, CIRCXXABI &CXXABI); } // namespace cir diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp index ba376d26b0fc..d9b574fac4e7 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp @@ -46,7 +46,6 @@ std::unique_ptr getLowerModule(cir::VAArgOp op) { mlir::ModuleOp mo = op->getParentOfType(); if (!mo) return nullptr; - mlir::PatternRewriter rewriter(mo.getContext()); return cir::createLowerModule(mo, rewriter); } @@ -92,7 +91,7 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64( // Let's hope LLVM's va_arg instruction can take care of it. // Remove this when X86_64ABIInfo::classify can take care of every type. if (!mlir::isa(op.getType())) + StructType, LongDoubleType>(op.getType())) return nullptr; // Assume that va_list type is correct; should be pointer to LLVM type: @@ -107,7 +106,6 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64( std::unique_ptr lowerModule = getLowerModule(op); if (!lowerModule) return nullptr; - mlir::Type ty = op.getType(); // FIXME: How should we access the X86AVXABILevel? @@ -167,7 +165,6 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64( mlir::Block *contBlock = currentBlock->splitBlock(op); mlir::Block *inRegBlock = builder.createBlock(contBlock); mlir::Block *inMemBlock = builder.createBlock(contBlock); - builder.setInsertionPointToEnd(currentBlock); builder.create(loc, inRegs, inRegBlock, inMemBlock); diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp index 39bd1716aa3b..11c1ed459147 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp @@ -165,6 +165,21 @@ void X86_64ABIInfo::classify(mlir::Type Ty, uint64_t OffsetBase, Class &Lo, Current = Class::SSE; return; + } else if (mlir::isa(Ty)) { + const llvm::fltSemantics *LDF = + &getContext().getTargetInfo().getLongDoubleFormat(); + if (LDF == &llvm::APFloat::IEEEquad()) { + Lo = Class::SSE; + Hi = Class::SSEUp; + } else if (LDF == &llvm::APFloat::x87DoubleExtended()) { + Lo = Class::X87; + Hi = Class::X87Up; + } else if (LDF == &llvm::APFloat::IEEEdouble()) { + Current = Class::SSE; + } else { + llvm_unreachable("unexpected long double representation!"); + } + return; } else if (mlir::isa(Ty)) { Current = Class::Integer; } else if (const auto RT = mlir::dyn_cast(Ty)) { @@ -267,6 +282,65 @@ void X86_64ABIInfo::classify(mlir::Type Ty, uint64_t OffsetBase, Class &Lo, cir_cconv_unreachable("NYI"); } +ABIArgInfo X86_64ABIInfo::getIndirectResult(mlir::Type ty, + unsigned freeIntRegs) const { + // If this is a scalar LLVM value then assume LLVM will pass it in the right + // place naturally. + // + // This assumption is optimistic, as there could be free registers available + // when we need to pass this argument in memory, and LLVM could try to pass + // the argument in the free register. This does not seem to happen currently, + // but this code would be much safer if we could mark the argument with + // 'onstack'. See PR12193. + if (!isAggregateTypeForABI(ty) /* && IsIllegalVectorType(Ty) &&*/ + /*!Ty->isBitIntType()*/) { + // FIXME: Handling enum type? + + return (isPromotableIntegerTypeForABI(ty) ? ABIArgInfo::getExtend(ty) + : ABIArgInfo::getDirect()); + } + + if (CIRCXXABI::RecordArgABI RAA = getRecordArgABI(ty, getCXXABI())) + return getNaturalAlignIndirect(ty, RAA == CIRCXXABI::RAA_DirectInMemory); + + // Compute the byval alignment. We specify the alignment of the byval in all + // cases so that the mid-level optimizer knows the alignment of the byval. + unsigned align = std::max(getContext().getTypeAlign(ty) / 8, 8U); + + // Attempt to avoid passing indirect results using byval when possible. This + // is important for good codegen. + // + // We do this by coercing the value into a scalar type which the backend can + // handle naturally (i.e., without using byval). + // + // For simplicity, we currently only do this when we have exhausted all of the + // free integer registers. Doing this when there are free integer registers + // would require more care, as we would have to ensure that the coerced value + // did not claim the unused register. That would require either reording the + // arguments to the function (so that any subsequent inreg values came first), + // or only doing this optimization when there were no following arguments that + // might be inreg. + // + // We currently expect it to be rare (particularly in well written code) for + // arguments to be passed on the stack when there are still free integer + // registers available (this would typically imply large structs being passed + // by value), so this seems like a fair tradeoff for now. + // + // We can revisit this if the backend grows support for 'onstack' parameter + // attributes. See PR12193. + if (freeIntRegs == 0) { + uint64_t size = getContext().getTypeSize(ty); + + // If this type fits in an eightbyte, coerce it into the matching integral + // type, which will end up on the stack (with alignment 8). + if (align == 8 && size <= 64) + return ABIArgInfo::getDirect( + cir::IntType::get(LT.getMLIRContext(), size, false)); + } + + return ABIArgInfo::getIndirect(align); +} + /// Return a type that will be passed by the backend in the low 8 bytes of an /// XMM register, corresponding to the SSE class. mlir::Type X86_64ABIInfo::GetSSETypeAtOffset(mlir::Type IRType, @@ -278,7 +352,7 @@ mlir::Type X86_64ABIInfo::GetSSETypeAtOffset(mlir::Type IRType, (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset; mlir::Type T0 = getFPTypeAtOffset(IRType, IROffset, TD); if (!T0 || mlir::isa(T0)) - return T0; // NOTE(cir): Not sure if this is correct. + return cir::DoubleType::get(LT.getMLIRContext()); mlir::Type T1 = {}; unsigned T0Size = TD.getTypeAllocSize(T0); @@ -296,6 +370,8 @@ mlir::Type X86_64ABIInfo::GetSSETypeAtOffset(mlir::Type IRType, return T0; } + return cir::DoubleType::get(LT.getMLIRContext()); + cir_cconv_unreachable("NYI"); } @@ -539,6 +615,22 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType( ++neededSSE; break; } + // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument + // on the stack. + case Class::Memory: + + // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or + // COMPLEX_X87, it is passed in memory. + case Class::X87: + case Class::ComplexX87: + if (getRecordArgABI(Ty, getCXXABI()) == CIRCXXABI::RAA_Indirect) + ++neededInt; + return getIndirectResult(Ty, freeIntRegs); + + case Class::SSEUp: + case Class::X87Up: + llvm_unreachable("Invalid classification for lo word."); + default: cir_cconv_assert_or_abort(!cir::MissingFeatures::X86ArgTypeClassification(), "NYI"); @@ -546,6 +638,11 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType( mlir::Type HighPart = {}; switch (Hi) { + case Class::Memory: + case Class::X87: + case Class::ComplexX87: + llvm_unreachable("Invalid classification for hi word."); + case Class::NoClass: break; @@ -558,8 +655,23 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType( return ABIArgInfo::getDirect(HighPart, 8); break; - default: - cir_cconv_unreachable("NYI"); + // X87Up generally doesn't occur here (long double is passed in + // memory), except in situations involving unions. + case Class::X87Up: + case Class::SSE: + ++neededSSE; + HighPart = GetSSETypeAtOffset(Ty, 8, Ty, 8); + + if (Lo == Class::NoClass) // Pass HighPart at offset 8 in memory. + return ABIArgInfo::getDirect(HighPart, 8); + break; + + // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the + // eightbyte is passed in the upper half of the last used SSE + // register. This only happens when 128-bit vectors are passed. + case Class::SSEUp: + llvm_unreachable("NYI && We need to implement GetByteVectorType"); + break; } // If a high part was specified, merge it together with the low part. It is diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h index 201730519207..192ab750a548 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h @@ -69,6 +69,14 @@ class X86_64ABIInfo : public cir::ABIInfo { mlir::Type SourceTy, unsigned SourceOffset) const; + /// getIndirectResult - Give a source type \arg Ty, return a suitable result + /// such that the argument will be passed in memory. + /// + /// \param freeIntRegs - The number of free integer registers remaining + /// available. + ::cir::ABIArgInfo getIndirectResult(mlir::Type ty, + unsigned freeIntRegs) const; + /// The 0.98 ABI revision clarified a lot of ambiguities, /// unfortunately in ways that were not always consistent with /// certain previous compilers. In particular, platforms which diff --git a/clang/test/CIR/Lowering/var-arg-x86_64.c b/clang/test/CIR/Lowering/var-arg-x86_64.c index 992d5e82cd98..012e702d7f17 100644 --- a/clang/test/CIR/Lowering/var-arg-x86_64.c +++ b/clang/test/CIR/Lowering/var-arg-x86_64.c @@ -76,3 +76,55 @@ double f1(int n, ...) { // CIR: [[CASTED_ARG_P:%.+]] = cir.cast(bitcast, [[ARG]] // CIR: [[CASTED_ARG:%.+]] = cir.load align(16) [[CASTED_ARG_P]] // CIR: store [[CASTED_ARG]], [[RES]] +long double f2(int n, ...) { + va_list valist; + va_start(valist, n); + long double res = va_arg(valist, long double); + va_end(valist); + return res; +} + +// CHECK: define {{.*}}@f2 +// CHECK: [[RESULT:%.+]] = alloca x86_fp80 +// CHECK: [[VA_LIST_ALLOCA:%.+]] = alloca {{.*}}[[VA_LIST_TYPE]] +// CHECK: [[RES:%.+]] = alloca x86_fp80 +// CHECK: [[VA_LIST:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0 +// CHECK: call {{.*}}@llvm.va_start.p0(ptr [[VA_LIST]]) +// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0 +// CHECK: [[OVERFLOW_AREA_P:%.+]] = getelementptr {{.*}} [[VA_LIST2]], i32 0, i32 2 +// CHECK: [[OVERFLOW_AREA:%.+]] = load ptr, ptr [[OVERFLOW_AREA_P]] +// Ptr Mask Operations +// CHECK: [[OVERFLOW_AREA_OFFSET_ALIGNED:%.+]] = getelementptr i8, ptr [[OVERFLOW_AREA]], i64 15 +// CHECK: [[OVERFLOW_AREA_OFFSET_ALIGNED_P:%.+]] = ptrtoint ptr [[OVERFLOW_AREA_OFFSET_ALIGNED]] to i32 +// CHECK: [[MASKED:%.+]] = and i32 [[OVERFLOW_AREA_OFFSET_ALIGNED_P]], -16 +// CHECK: [[DIFF:%.+]] = sub i32 [[OVERFLOW_AREA_OFFSET_ALIGNED_P]], [[MASKED]] +// CHECK: [[PTR_MASKED:%.+]] = getelementptr i8, ptr [[OVERFLOW_AREA_OFFSET_ALIGNED]], i32 [[DIFF]] +// CHECK: [[OVERFLOW_AREA_NEXT:%.+]] = getelementptr i8, ptr [[PTR_MASKED]], i64 16 +// CHECK: store ptr [[OVERFLOW_AREA_NEXT]], ptr [[OVERFLOW_AREA_P]] +// CHECK: [[VALUE:%.+]] = load x86_fp80, ptr [[PTR_MASKED]] +// CHECK: store x86_fp80 [[VALUE]], ptr [[RES]] +// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0 +// CHECK: call {{.*}}@llvm.va_end.p0(ptr [[VA_LIST2]]) +// CHECK: [[VALUE2:%.+]] = load x86_fp80, ptr [[RES]] +// CHECK: store x86_fp80 [[VALUE2]], ptr [[RESULT]] +// CHECK: [[RETURN_VALUE:%.+]] = load x86_fp80, ptr [[RESULT]] +// CHECK: ret x86_fp80 [[RETURN_VALUE]] + +// CIR: cir.func @f2 +// CIR: [[VA_LIST_ALLOCA:%.+]] = cir.alloca !cir.array, !cir.ptr>, ["valist"] +// CIR: [[RES:%.+]] = cir.alloca !cir.long_double, !cir.ptr>, ["res" +// CIR: [[VASTED_VA_LIST:%.+]] = cir.cast(array_to_ptrdecay, [[VA_LIST_ALLOCA]] +// CIR: cir.va.start [[VASTED_VA_LIST]] +// CIR: [[VASTED_VA_LIST:%.+]] = cir.cast(array_to_ptrdecay, [[VA_LIST_ALLOCA]] +// CIR: [[OVERFLOW_AREA_P:%.+]] = cir.get_member [[VASTED_VA_LIST]][2] {name = "overflow_arg_area"} +// CIR-DAG: [[OVERFLOW_AREA:%.+]] = cir.load [[OVERFLOW_AREA_P]] +// CIR-DAG: [[CASTED:%.+]] = cir.cast(bitcast, [[OVERFLOW_AREA]] : !cir.ptr) +// CIR-DAG: [[CONSTANT:%.+]] = cir.const #cir.int<15> +// CIR-DAG: [[PTR_STRIDE:%.+]] = cir.ptr_stride([[CASTED]] {{.*}}[[CONSTANT]] +// CIR-DAG: [[MINUS_ALIGN:%.+]] = cir.const #cir.int<-16> +// CIR-DAG: [[ALIGNED:%.+]] = cir.ptr_mask([[PTR_STRIDE]], [[MINUS_ALIGN]] +// CIR: [[ALIGN:%.+]] = cir.const #cir.int<16> +// CIR: [[CAST_ALIGNED:%.+]] = cir.cast(bitcast, [[ALIGNED]] : !cir.ptr), !cir.ptr> +// CIR: [[CAST_ALIGNED_VALUE:%.+]] = cir.load [[CAST_ALIGNED]] +// CIR: cir.store [[CAST_ALIGNED_VALUE]], [[RES]] +// CIR. cir.via.end