; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s

; cbuffer CB {
;   float3 a1;     // offset    0, size 12 (+4)
;   double3 a2;    // offset   16, size 24
;   float16_t2 a3; // offset   40, size  4 (+4)
;   uint64_t3 a4;  // offset   48, size 24 (+8)
;   int4 a5;       // offset   80, size 16
;   uint16_t3 a6;  // offset   96, size  6 (+2)
;   uint2 a7;      // offset  104, size  8
; };
%__cblayout_CB = type <{ <3 x float>, target("dx.Padding", 4), <3 x double>, <2 x half>, target("dx.Padding", 4), <3 x i64>, target("dx.Padding", 8), <4 x i32>, <3 x i16>, target("dx.Padding", 2), <2 x i32> }>

@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison

; CHECK: define void @f
define void @f(ptr %dst) {
entry:
  %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
  store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4

  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
  %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 8

  ;; a1
  ;
  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 0)
  ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
  ; CHECK: [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 1
  ; CHECK: [[Z:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 2
  ; CHECK: [[VEC0:%.*]] = insertelement <3 x float> poison, float [[X]], i32 0
  ; CHECK: [[VEC1:%.*]] = insertelement <3 x float> [[VEC0]], float [[Y]], i32 1
  ; CHECK: [[VEC2:%.*]] = insertelement <3 x float> [[VEC1]], float [[Z]], i32 2
  ; CHECK: store <3 x float> [[VEC2]], ptr %dst
  %a1_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
  %a1 = load <3 x float>, ptr addrspace(2) %a1_gep, align 16
  store <3 x float> %a1, ptr %dst, align 4

  ;; a2
  ;
  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
  ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
  ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2)
  ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
  ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
  ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1
  ; CHECK: [[VEC2:%.*]] = insertelement <3 x double> [[VEC1]], double [[Z]], i32 2
  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16
  ; CHECK: store <3 x double> [[VEC2]], ptr [[PTR]]
  %a2_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 16)
  %a2 = load <3 x double>, ptr addrspace(2) %a2_gep, align 32
  %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 16
  store <3 x double> %a2, ptr %a2.i, align 8

  ;; a3
  ;
  ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2)
  ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 4
  ; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 5
  ; CHECK: [[VEC0:%.*]] = insertelement <2 x half> poison, half [[X]], i32 0
  ; CHECK: [[VEC1:%.*]] = insertelement <2 x half> [[VEC0]], half [[Y]], i32 1
  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40
  ; CHECK: store <2 x half> [[VEC1]], ptr [[PTR]]
  %a3_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 40)
  %a3 = load <2 x half>, ptr addrspace(2) %a3_gep, align 4
  %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 40
  store <2 x half> %a3, ptr %a3.i, align 2

  ;; a4
  ;
  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 3)
  ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
  ; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1
  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 4)
  ; CHECK: [[Z:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
  ; CHECK: [[VEC0:%.*]] = insertelement <3 x i64> poison, i64 [[X]], i32 0
  ; CHECK: [[VEC1:%.*]] = insertelement <3 x i64> [[VEC0]], i64 [[Y]], i32 1
  ; CHECK: [[VEC2:%.*]] = insertelement <3 x i64> [[VEC1]], i64 [[Z]], i32 2
  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 48
  ; CHECK: store <3 x i64> [[VEC2]], ptr [[PTR]]
  %a4_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48)
  %a4 = load <3 x i64>, ptr addrspace(2) %a4_gep, align 32
  %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 48
  store <3 x i64> %a4, ptr %a4.i, align 8

  ;; a5
  ;
  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 5)
  ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
  ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
  ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
  ; CHECK: [[A:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3
  ; CHECK: [[VEC0:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
  ; CHECK: [[VEC1:%.*]] = insertelement <4 x i32> [[VEC0]], i32 [[Y]], i32 1
  ; CHECK: [[VEC2:%.*]] = insertelement <4 x i32> [[VEC1]], i32 [[Z]], i32 2
  ; CHECK: [[VEC3:%.*]] = insertelement <4 x i32> [[VEC2]], i32 [[A]], i32 3
  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72
  ; CHECK: store <4 x i32> [[VEC3]], ptr [[PTR]]
  %a5_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 80)
  %a5 = load <4 x i32>, ptr addrspace(2) %a5_gep, align 16
  %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 72
  store <4 x i32> %a5, ptr %a5.i, align 4

  ;; a6
  ;
  ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 6)
  ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0
  ; CHECK: [[Y:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 1
  ; CHECK: [[Z:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 2
  ; CHECK: [[VEC0:%.*]] = insertelement <3 x i16> poison, i16 [[X]], i32 0
  ; CHECK: [[VEC1:%.*]] = insertelement <3 x i16> [[VEC0]], i16 [[Y]], i32 1
  ; CHECK: [[VEC2:%.*]] = insertelement <3 x i16> [[VEC1]], i16 [[Z]], i32 2
  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 88
  ; CHECK: store <3 x i16> [[VEC2]], ptr [[PTR]]
  %a6_gep = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 96)
  %a6 = load <3 x i16>, ptr addrspace(2) %a6_gep, align 8
  %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 88
  store <3 x i16> %a6, ptr %a6.i, align 2

  ;; a7 component-wise
  ; 
  ; CHECK: [[LOAD0:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 6)
  ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD0]], 2
  ; CHECK: [[PTR0:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 96
  ; CHECK: store i32 [[X]], ptr [[PTR0]]
  ; CHECK: [[LOAD1:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 6)
  ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD1]], 3
  ; CHECK: [[PTR1:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 100
  ; CHECK: store i32 [[Y]], ptr [[PTR1]]
  %a7_gep0 = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 104)
  %a7.x = load i32, ptr addrspace(2) %a7_gep0, align 4
  %a7.i0 = getelementptr inbounds nuw i8, ptr %dst, i32 96
  store i32 %a7.x, ptr %a7.i0, align 4
  %a7_gep1 = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 104)
  %a7.y_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a7_gep1, i32 4
  %a7.y = load i32, ptr addrspace(2) %a7.y_gep, align 4
  %a7.i1 = getelementptr inbounds nuw i8, ptr %dst, i32 100
  store i32 %a7.y, ptr %a7.i1, align 4

  ret void
}
