Skip to content

Commit

Permalink
Fix indirect pointer detection for structs in VC
Browse files Browse the repository at this point in the history
If the pointer is the first member of the struct, the compiler would
incorrectly mark it as a byptr argument. The correct way is to mark it
as a byvalue argument with is_ptr set to true.
  • Loading branch information
vmustya authored and igcbot committed Nov 18, 2024
1 parent 0b4b682 commit e572b3b
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ SPDX-License-Identifier: MIT

#pragma once

const uint32_t INDIRECT_ACCESS_DETECTION_VERSION = 7;
const uint32_t INDIRECT_ACCESS_DETECTION_VERSION = 8;
15 changes: 8 additions & 7 deletions IGC/VectorCompiler/igcdeps/src/cmc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,7 @@ void CMKernel::createPointerGlobalAnnotation(const KernelArgInfo &ArgInfo,
const auto BTI = ArgInfo.getBTI();
const auto Size = ArgInfo.getSizeInBytes();
const auto SourceOffset = ArgInfo.getOffsetInArg();
const bool IsLinearization = SourceOffset != 0 || ArgInfo.getArgNo() != Index;

auto PtrAnnotation = std::make_unique<PointerArgumentAnnotation>();

Expand All @@ -398,24 +399,24 @@ void CMKernel::createPointerGlobalAnnotation(const KernelArgInfo &ArgInfo,

PreDefinedAttrGetter::ArgAddrMode ZeAddrMode;
if (AddrMode == ArgAddressMode::Bindless) {
IGC_ASSERT(SourceOffset == 0);
IGC_ASSERT(!IsLinearization);
ZeAddrMode = PreDefinedAttrGetter::ArgAddrMode::bindless;
} else if (AddrMode == ArgAddressMode::Stateful) {
IGC_ASSERT(SourceOffset == 0);
IGC_ASSERT(!IsLinearization);
ZeAddrMode = PreDefinedAttrGetter::ArgAddrMode::stateful;
} else {
IGC_ASSERT(AddrMode == ArgAddressMode::Stateless);
ZeAddrMode = PreDefinedAttrGetter::ArgAddrMode::stateless;
}

if (SourceOffset == 0) {
ZEInfoBuilder::addPayloadArgumentByPointer(
m_kernelInfo.m_zePayloadArgs, Offset, Size, Index, ZeAddrMode,
PreDefinedAttrGetter::ArgAddrSpace::global, getZEArgAccessType(Access));
} else { // Pass the argument as by_value with is_ptr = true
if (IsLinearization) { // Pass the argument as by_value with is_ptr = true
IGC_ASSERT(AddrMode == ArgAddressMode::Stateless);
ZEInfoBuilder::addPayloadArgumentByValue(
m_kernelInfo.m_zePayloadArgs, Offset, Size, Index, SourceOffset, true);
} else {
ZEInfoBuilder::addPayloadArgumentByPointer(
m_kernelInfo.m_zePayloadArgs, Offset, Size, Index, ZeAddrMode,
PreDefinedAttrGetter::ArgAddrSpace::global, getZEArgAccessType(Access));
}

if (AddrMode == ArgAddressMode::Stateful)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class GenXOCLRuntimeInfo : public ModulePass {
enum class AddressModeType { None, Stateful, Bindless, Stateless };

private:
unsigned ArgNo;
unsigned Index;
KindType Kind;
AccessKindType AccessKind;
Expand All @@ -82,6 +83,7 @@ class GenXOCLRuntimeInfo : public ModulePass {
KernelArgInfo() = default;

public:
unsigned getArgNo() const { return ArgNo; }
unsigned getIndex() const { return Index; }
KindType getKind() const { return Kind; }
AccessKindType getAccessKind() const { return AccessKind; }
Expand Down
6 changes: 3 additions & 3 deletions IGC/VectorCompiler/lib/GenXCodeGen/GenXDetectPointerArg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ bool GenXDetectPointerArg::handleKernel(Function &F) {
NewDescs.resize(F.arg_size(), "");

for (auto *Arg : PointerArgs) {
if (!Arg->getType()->isIntegerTy(64))
if (!Arg->getType()->isIntegerTy(64) && !Arg->getType()->isPointerTy())
continue;

auto ArgNo = Arg->getArgNo();
Expand Down Expand Up @@ -409,8 +409,8 @@ void GenXDetectPointerArg::analyzeValue(Value *V) {
WorkList.push(Inst->getOperand(1));
continue;
}
if (isa<GetElementPtrInst>(Inst)) {
WorkList.push(Inst->getOperand(0));
if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
WorkList.push(GEP->getPointerOperand());
continue;
}

Expand Down
1 change: 1 addition & 0 deletions IGC/VectorCompiler/lib/GenXCodeGen/GenXOCLRuntimeInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ KernelArgBuilder::translateArgument(const Argument &Arg) const {
Info.BTI = KM.getBTI(ArgNo);
// For implicit arguments that are byval argument linearization, index !=
// ArgNo in the IR function.
Info.ArgNo = ArgNo;
Info.Index = KM.getArgIndex(ArgNo);
// Linearization arguments have a non-zero offset in the original explicit
// byval arg.
Expand Down
69 changes: 69 additions & 0 deletions IGC/VectorCompiler/test/GenXDetectPointerArg/struct-ptr.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2024 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================

; RUN: %opt_typed_ptrs %use_old_pass_manager% -GenXDetectPointerArg -march=genx64 -mcpu=XeHPC -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-TYPED-PTRS
; RUN: %opt_opaque_ptrs %use_old_pass_manager% -GenXDetectPointerArg -march=genx64 -mcpu=XeHPC -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-OPAQUE-PTRS

target datalayout = "e-p:64:64-i64:64-n8:16:32:64"

%struct.state = type { i8, i8 addrspace(1) *, float }

@data = internal global <8 x i64> undef, align 64, !spirv.Decorations !0 #0

; Function Attrs: nounwind
declare void @llvm.genx.vstore.v8i64.p0v8i64(<8 x i64>, <8 x i64>*) #1

; Function Attrs: nounwind
define dllexport spir_kernel void @foo(%struct.state* byval(%struct.state) %_arg_, i32 addrspace(1)* %_arg_1, i64 %impl.arg.private.base, i8 %__arg_lin__arg_.0, i8 addrspace(1) *%__arg_lin__arg_.8, float %__arg_lin__arg_.16) #2 {
entry:
%0 = ptrtoint i8 addrspace(1)* %__arg_lin__arg_.8 to i64
%1 = tail call <8 x i64> @llvm.vc.internal.lsc.load.ugm.v8i64.i1.v2i8.i64(i1 true, i8 3, i8 4, i8 5, <2 x i8> zeroinitializer, i64 0, i64 %0, i16 1, i32 0, <8 x i64> undef)
tail call void @llvm.genx.vstore.v8i64.p0v8i64(<8 x i64> %1, <8 x i64>* nonnull @data)
ret void
}

declare <8 x i64> @llvm.vc.internal.lsc.load.ugm.v8i64.i1.v2i8.i64(i1, i8, i8, i8, <2 x i8>, i64, i64, i16, i32, <8 x i64>) #3

attributes #0 = { "VCByteOffset"="0" "VCGlobalVariable" "VCVolatile" "genx_byte_offset"="0" "genx_volatile" }
attributes #1 = { nounwind "target-cpu"="XeHPC" }
attributes #2 = { nounwind "CMGenxMain" "oclrt"="1" "target-cpu"="XeHPC" }
attributes #3 = { "target-cpu"="XeHPC" }

!spirv.Source = !{!0}
!opencl.spir.version = !{!1}
!opencl.ocl.version = !{!2}
!opencl.used.extensions = !{!3}
!opencl.used.optional.core.features = !{!3}
!spirv.Generator = !{!4}
!genx.kernels = !{!5}
!genx.kernel.internal = !{!10}

; CHECK: !genx.kernels = !{![[KERNEL:[0-9]+]]}
; CHECK-TYPED-PTRS: ![[KERNEL]] = !{void (%struct.state*, i32 addrspace(1)*, i64, i8, i8 addrspace(1)*, float)* @foo, !"foo", !{{[0-9]+}}, i32 0, !{{[0-9]+}}, !{{[0-9]+}}, ![[NODE:[0-9]+]], i32 0}
; CHECK-OPAQUE-PTRS: ![[KERNEL]] = !{ptr @foo, !"foo", !{{[0-9]+}}, i32 0, !{{[0-9]+}}, !{{[0-9]+}}, ![[NODE:[0-9]+]], i32 0}
; CHECK: ![[NODE]] = !{!"svmptr_t", !"svmptr_t", !"", !"", !"svmptr_t", !""}

!0 = !{i32 0, i32 100000}
!1 = !{i32 1, i32 2}
!2 = !{i32 1, i32 0}
!3 = !{}
!4 = !{i16 6, i16 14}
!5 = !{void (%struct.state*, i32 addrspace(1)*, i64, i8, i8 addrspace(1) *, float)* @foo, !"foo", !6, i32 0, !7, !8, !9, i32 0}
!6 = !{i32 112, i32 0, i32 96, i32 104, i32 104, i32 104}
!7 = !{i32 -1, i32 96, i32 64, i32 72, i32 80, i32 88}
!8 = !{i32 0, i32 0}
!9 = !{!"svmptr_t", !"svmptr_t"}
!10 = !{void (%struct.state*, i32 addrspace(1)*, i64, i8, i8 addrspace(1) *, float)* @foo, !11, !12, !13, null}
!11 = !{i32 0, i32 0, i32 0, i32 0, i32 8, i32 16}
!12 = !{i32 0, i32 1, i32 2, i32 0, i32 0, i32 0}
!13 = !{!14}
!14 = !{i32 0, !15}
!15 = !{!16, !17, !18}
!16 = !{i32 3, i32 0}
!17 = !{i32 4, i32 8}
!18 = !{i32 5, i32 16}
75 changes: 75 additions & 0 deletions IGC/ocloc_tests/VC/DetectPointerArg/struct-ptr.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2024 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================

; REQUIRES: regkeys, pvc-supported
; RUN: llvm-as %s -o %t.bc
; RUN: ocloc -device pvc -llvm_input -options "-vc-codegen -ze-collect-cost-info -igc_opts 'ShaderDumpEnable=1, DumpToCustomDir=%t'" -output_no_suffix -file %t.bc
; RUN: cat %t/*.zeinfo | FileCheck %s

target datalayout = "e-p:64:64-i64:64-n8:16:32:64"

%struct.state = type { i8, i8 addrspace(1) *, float }

@data = internal global <8 x i64> undef, align 64, !spirv.Decorations !0 #0

; Function Attrs: nounwind
declare void @llvm.genx.vstore.v8i64.p0v8i64(<8 x i64>, <8 x i64>*)

; CHECK: payload_arguments:
; CHECK: - arg_type: arg_bypointer
; CHECK-NEXT: offset: 40
; CHECK-NEXT: size: 8
; CHECK-NEXT: arg_index: 1
; CHECK-NEXT: addrmode: stateless
; CHECK-NEXT: addrspace: global
; CHECK-NEXT: access_type: readwrite
; CHECK: - arg_type: arg_byvalue
; CHECK-NEXT: offset: 16
; CHECK-NEXT: size: 1
; CHECK-NEXT: arg_index: 0
; CHECK-NEXT: source_offset: 0
; CHECK: - arg_type: arg_byvalue
; CHECK-NEXT: offset: 24
; CHECK-NEXT: size: 8
; CHECK-NEXT: arg_index: 0
; CHECK-NEXT: source_offset: 8
; CHECK-NEXT: is_ptr: true
; CHECK: - arg_type: arg_byvalue
; CHECK-NEXT: offset: 32
; CHECK-NEXT: size: 4
; CHECK-NEXT: arg_index: 0
; CHECK-NEXT: source_offset: 16

; Function Attrs: nounwind
define dllexport spir_kernel void @foo(%struct.state* byval(%struct.state) "VCArgumentIOKind"="0" %_arg_, i32 addrspace(1)* "VCArgumentIOKind"="0" %_arg_1) #1 {
entry:
%0 = getelementptr inbounds %struct.state, %struct.state* %_arg_, i64 0, i32 1
%1 = load i8 addrspace(1)*, i8 addrspace(1)** %0, align 8
%2 = ptrtoint i8 addrspace(1)* %1 to i64
%3 = tail call <8 x i64> @llvm.vc.internal.lsc.load.ugm.v8i64.i1.v2i8.i64(i1 true, i8 3, i8 4, i8 5, <2 x i8> zeroinitializer, i64 0, i64 %2, i16 1, i32 0, <8 x i64> undef)
tail call void @llvm.genx.vstore.v8i64.p0v8i64(<8 x i64> %3, <8 x i64>* nonnull @data)
ret void
}

declare <8 x i64> @llvm.vc.internal.lsc.load.ugm.v8i64.i1.v2i8.i64(i1, i8, i8, i8, <2 x i8>, i64, i64, i16, i32, <8 x i64>) #3

attributes #0 = { "VCByteOffset"="0" "VCGlobalVariable" "VCVolatile" "genx_byte_offset"="0" "genx_volatile" }
attributes #1 = { noinline nounwind "VCFunction" "VCNamedBarrierCount"="0" "VCSLMSize"="0" }

!spirv.Source = !{!0}
!opencl.spir.version = !{!1}
!opencl.ocl.version = !{!2}
!opencl.used.extensions = !{!3}
!opencl.used.optional.core.features = !{!3}
!spirv.Generator = !{!4}

!0 = !{i32 0, i32 100000}
!1 = !{i32 1, i32 2}
!2 = !{i32 1, i32 0}
!3 = !{}
!4 = !{i16 6, i16 14}

0 comments on commit e572b3b

Please sign in to comment.