; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10PLUS,GFX10 %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10PLUS,GFX11 %s

define amdgpu_gfx i1 @return_i1() #0 {
; GFX9-LABEL: return_i1:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    v_mov_b32_e32 v0, 1
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: return_i1:
; GFX10PLUS:       ; %bb.0: ; %entry
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 1
; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
entry:
  ret i1 1
}

define amdgpu_gfx void @call_i1() #0 {
; GFX9-LABEL: call_i1:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_mov_b32 s36, s33
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v1, s30, 0
; GFX9-NEXT:    s_mov_b32 s35, return_i1@abs32@hi
; GFX9-NEXT:    s_mov_b32 s34, return_i1@abs32@lo
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    v_writelane_b32 v1, s31, 1
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    v_readlane_b32 s31, v1, 1
; GFX9-NEXT:    v_readlane_b32 s30, v1, 0
; GFX9-NEXT:    s_mov_b32 s32, s33
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_mov_b32 s33, s36
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: call_i1:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_mov_b32 s36, s33
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v1, s30, 0
; GFX10-NEXT:    s_mov_b32 s35, return_i1@abs32@hi
; GFX10-NEXT:    s_mov_b32 s34, return_i1@abs32@lo
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    v_writelane_b32 v1, s31, 1
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    v_readlane_b32 s31, v1, 1
; GFX10-NEXT:    v_readlane_b32 s30, v1, 0
; GFX10-NEXT:    s_mov_b32 s32, s33
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_mov_b32 s33, s36
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: call_i1:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s2, s33
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v1, s30, 0
; GFX11-NEXT:    s_mov_b32 s1, return_i1@abs32@hi
; GFX11-NEXT:    s_mov_b32 s0, return_i1@abs32@lo
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    v_writelane_b32 v1, s31, 1
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v1, 1
; GFX11-NEXT:    v_readlane_b32 s30, v1, 0
; GFX11-NEXT:    s_mov_b32 s32, s33
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v1, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_mov_b32 s33, s2
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  call amdgpu_gfx i1 @return_i1()
  ret void
}

define amdgpu_gfx i16 @return_i16() #0 {
; GFX9-LABEL: return_i16:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    v_mov_b32_e32 v0, 10
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: return_i16:
; GFX10PLUS:       ; %bb.0: ; %entry
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 10
; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
entry:
  ret i16 10
}

define amdgpu_gfx void @call_i16() #0 {
; GFX9-LABEL: call_i16:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_mov_b32 s36, s33
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v1, s30, 0
; GFX9-NEXT:    s_mov_b32 s35, return_i16@abs32@hi
; GFX9-NEXT:    s_mov_b32 s34, return_i16@abs32@lo
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    v_writelane_b32 v1, s31, 1
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    v_readlane_b32 s31, v1, 1
; GFX9-NEXT:    v_readlane_b32 s30, v1, 0
; GFX9-NEXT:    s_mov_b32 s32, s33
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_mov_b32 s33, s36
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: call_i16:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_mov_b32 s36, s33
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v1, s30, 0
; GFX10-NEXT:    s_mov_b32 s35, return_i16@abs32@hi
; GFX10-NEXT:    s_mov_b32 s34, return_i16@abs32@lo
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    v_writelane_b32 v1, s31, 1
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    v_readlane_b32 s31, v1, 1
; GFX10-NEXT:    v_readlane_b32 s30, v1, 0
; GFX10-NEXT:    s_mov_b32 s32, s33
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_mov_b32 s33, s36
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: call_i16:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s2, s33
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v1, s30, 0
; GFX11-NEXT:    s_mov_b32 s1, return_i16@abs32@hi
; GFX11-NEXT:    s_mov_b32 s0, return_i16@abs32@lo
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    v_writelane_b32 v1, s31, 1
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v1, 1
; GFX11-NEXT:    v_readlane_b32 s30, v1, 0
; GFX11-NEXT:    s_mov_b32 s32, s33
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v1, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_mov_b32 s33, s2
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  call amdgpu_gfx i16 @return_i16()
  ret void
}

define amdgpu_gfx <2 x i16> @return_2xi16() #0 {
; GFX9-LABEL: return_2xi16:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    v_mov_b32_e32 v0, 0x20001
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: return_2xi16:
; GFX10PLUS:       ; %bb.0: ; %entry
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 0x20001
; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
entry:
  ret <2 x i16> <i16 1, i16 2>
}

define amdgpu_gfx void @call_2xi16() #0 {
; GFX9-LABEL: call_2xi16:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_mov_b32 s36, s33
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v1, s30, 0
; GFX9-NEXT:    s_mov_b32 s35, return_2xi16@abs32@hi
; GFX9-NEXT:    s_mov_b32 s34, return_2xi16@abs32@lo
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    v_writelane_b32 v1, s31, 1
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    v_readlane_b32 s31, v1, 1
; GFX9-NEXT:    v_readlane_b32 s30, v1, 0
; GFX9-NEXT:    s_mov_b32 s32, s33
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_mov_b32 s33, s36
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: call_2xi16:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_mov_b32 s36, s33
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v1, s30, 0
; GFX10-NEXT:    s_mov_b32 s35, return_2xi16@abs32@hi
; GFX10-NEXT:    s_mov_b32 s34, return_2xi16@abs32@lo
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    v_writelane_b32 v1, s31, 1
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    v_readlane_b32 s31, v1, 1
; GFX10-NEXT:    v_readlane_b32 s30, v1, 0
; GFX10-NEXT:    s_mov_b32 s32, s33
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_mov_b32 s33, s36
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: call_2xi16:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s2, s33
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v1, s30, 0
; GFX11-NEXT:    s_mov_b32 s1, return_2xi16@abs32@hi
; GFX11-NEXT:    s_mov_b32 s0, return_2xi16@abs32@lo
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    v_writelane_b32 v1, s31, 1
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v1, 1
; GFX11-NEXT:    v_readlane_b32 s30, v1, 0
; GFX11-NEXT:    s_mov_b32 s32, s33
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v1, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_mov_b32 s33, s2
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  call amdgpu_gfx <2 x i16> @return_2xi16()
  ret void
}

define amdgpu_gfx <3 x i16> @return_3xi16() #0 {
; GFX9-LABEL: return_3xi16:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    v_mov_b32_e32 v0, 0x20001
; GFX9-NEXT:    v_mov_b32_e32 v1, 3
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: return_3xi16:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    v_mov_b32_e32 v0, 0x20001
; GFX10-NEXT:    v_mov_b32_e32 v1, 3
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: return_3xi16:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  ret <3 x i16> <i16 1, i16 2, i16 3>
}

define amdgpu_gfx void @call_3xi16() #0 {
; GFX9-LABEL: call_3xi16:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_mov_b32 s36, s33
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v2, s30, 0
; GFX9-NEXT:    s_mov_b32 s35, return_3xi16@abs32@hi
; GFX9-NEXT:    s_mov_b32 s34, return_3xi16@abs32@lo
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    v_writelane_b32 v2, s31, 1
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    v_readlane_b32 s31, v2, 1
; GFX9-NEXT:    v_readlane_b32 s30, v2, 0
; GFX9-NEXT:    s_mov_b32 s32, s33
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_mov_b32 s33, s36
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: call_3xi16:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_mov_b32 s36, s33
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v2, s30, 0
; GFX10-NEXT:    s_mov_b32 s35, return_3xi16@abs32@hi
; GFX10-NEXT:    s_mov_b32 s34, return_3xi16@abs32@lo
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    v_writelane_b32 v2, s31, 1
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    v_readlane_b32 s31, v2, 1
; GFX10-NEXT:    v_readlane_b32 s30, v2, 0
; GFX10-NEXT:    s_mov_b32 s32, s33
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_mov_b32 s33, s36
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: call_3xi16:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s2, s33
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v2, s33 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v2, s30, 0
; GFX11-NEXT:    s_mov_b32 s1, return_3xi16@abs32@hi
; GFX11-NEXT:    s_mov_b32 s0, return_3xi16@abs32@lo
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    v_writelane_b32 v2, s31, 1
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v2, 1
; GFX11-NEXT:    v_readlane_b32 s30, v2, 0
; GFX11-NEXT:    s_mov_b32 s32, s33
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v2, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_mov_b32 s33, s2
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  call amdgpu_gfx <3 x i16> @return_3xi16()
  ret void
}

; Check that return values that overlap CSRs are correctly handled

define amdgpu_gfx <100 x i32> @return_100xi32() #0 {
; GFX9-LABEL: return_100xi32:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    v_mov_b32_e32 v0, 0
; GFX9-NEXT:    v_mov_b32_e32 v1, 0
; GFX9-NEXT:    v_mov_b32_e32 v2, 0
; GFX9-NEXT:    v_mov_b32_e32 v3, 0
; GFX9-NEXT:    v_mov_b32_e32 v4, 0
; GFX9-NEXT:    v_mov_b32_e32 v5, 0
; GFX9-NEXT:    v_mov_b32_e32 v6, 0
; GFX9-NEXT:    v_mov_b32_e32 v7, 0
; GFX9-NEXT:    v_mov_b32_e32 v8, 0
; GFX9-NEXT:    v_mov_b32_e32 v9, 0
; GFX9-NEXT:    v_mov_b32_e32 v10, 0
; GFX9-NEXT:    v_mov_b32_e32 v11, 0
; GFX9-NEXT:    v_mov_b32_e32 v12, 0
; GFX9-NEXT:    v_mov_b32_e32 v13, 0
; GFX9-NEXT:    v_mov_b32_e32 v14, 0
; GFX9-NEXT:    v_mov_b32_e32 v15, 0
; GFX9-NEXT:    v_mov_b32_e32 v16, 0
; GFX9-NEXT:    v_mov_b32_e32 v17, 0
; GFX9-NEXT:    v_mov_b32_e32 v18, 0
; GFX9-NEXT:    v_mov_b32_e32 v19, 0
; GFX9-NEXT:    v_mov_b32_e32 v20, 0
; GFX9-NEXT:    v_mov_b32_e32 v21, 0
; GFX9-NEXT:    v_mov_b32_e32 v22, 0
; GFX9-NEXT:    v_mov_b32_e32 v23, 0
; GFX9-NEXT:    v_mov_b32_e32 v24, 0
; GFX9-NEXT:    v_mov_b32_e32 v25, 0
; GFX9-NEXT:    v_mov_b32_e32 v26, 0
; GFX9-NEXT:    v_mov_b32_e32 v27, 0
; GFX9-NEXT:    v_mov_b32_e32 v28, 0
; GFX9-NEXT:    v_mov_b32_e32 v29, 0
; GFX9-NEXT:    v_mov_b32_e32 v30, 0
; GFX9-NEXT:    v_mov_b32_e32 v31, 0
; GFX9-NEXT:    v_mov_b32_e32 v32, 0
; GFX9-NEXT:    v_mov_b32_e32 v33, 0
; GFX9-NEXT:    v_mov_b32_e32 v34, 0
; GFX9-NEXT:    v_mov_b32_e32 v35, 0
; GFX9-NEXT:    v_mov_b32_e32 v36, 0
; GFX9-NEXT:    v_mov_b32_e32 v37, 0
; GFX9-NEXT:    v_mov_b32_e32 v38, 0
; GFX9-NEXT:    v_mov_b32_e32 v39, 0
; GFX9-NEXT:    v_mov_b32_e32 v40, 0
; GFX9-NEXT:    v_mov_b32_e32 v41, 0
; GFX9-NEXT:    v_mov_b32_e32 v42, 0
; GFX9-NEXT:    v_mov_b32_e32 v43, 0
; GFX9-NEXT:    v_mov_b32_e32 v44, 0
; GFX9-NEXT:    v_mov_b32_e32 v45, 0
; GFX9-NEXT:    v_mov_b32_e32 v46, 0
; GFX9-NEXT:    v_mov_b32_e32 v47, 0
; GFX9-NEXT:    v_mov_b32_e32 v48, 0
; GFX9-NEXT:    v_mov_b32_e32 v49, 0
; GFX9-NEXT:    v_mov_b32_e32 v50, 0
; GFX9-NEXT:    v_mov_b32_e32 v51, 0
; GFX9-NEXT:    v_mov_b32_e32 v52, 0
; GFX9-NEXT:    v_mov_b32_e32 v53, 0
; GFX9-NEXT:    v_mov_b32_e32 v54, 0
; GFX9-NEXT:    v_mov_b32_e32 v55, 0
; GFX9-NEXT:    v_mov_b32_e32 v56, 0
; GFX9-NEXT:    v_mov_b32_e32 v57, 0
; GFX9-NEXT:    v_mov_b32_e32 v58, 0
; GFX9-NEXT:    v_mov_b32_e32 v59, 0
; GFX9-NEXT:    v_mov_b32_e32 v60, 0
; GFX9-NEXT:    v_mov_b32_e32 v61, 0
; GFX9-NEXT:    v_mov_b32_e32 v62, 0
; GFX9-NEXT:    v_mov_b32_e32 v63, 0
; GFX9-NEXT:    v_mov_b32_e32 v64, 0
; GFX9-NEXT:    v_mov_b32_e32 v65, 0
; GFX9-NEXT:    v_mov_b32_e32 v66, 0
; GFX9-NEXT:    v_mov_b32_e32 v67, 0
; GFX9-NEXT:    v_mov_b32_e32 v68, 0
; GFX9-NEXT:    v_mov_b32_e32 v69, 0
; GFX9-NEXT:    v_mov_b32_e32 v70, 0
; GFX9-NEXT:    v_mov_b32_e32 v71, 0
; GFX9-NEXT:    v_mov_b32_e32 v72, 0
; GFX9-NEXT:    v_mov_b32_e32 v73, 0
; GFX9-NEXT:    v_mov_b32_e32 v74, 0
; GFX9-NEXT:    v_mov_b32_e32 v75, 0
; GFX9-NEXT:    v_mov_b32_e32 v76, 0
; GFX9-NEXT:    v_mov_b32_e32 v77, 0
; GFX9-NEXT:    v_mov_b32_e32 v78, 0
; GFX9-NEXT:    v_mov_b32_e32 v79, 0
; GFX9-NEXT:    v_mov_b32_e32 v80, 0
; GFX9-NEXT:    v_mov_b32_e32 v81, 0
; GFX9-NEXT:    v_mov_b32_e32 v82, 0
; GFX9-NEXT:    v_mov_b32_e32 v83, 0
; GFX9-NEXT:    v_mov_b32_e32 v84, 0
; GFX9-NEXT:    v_mov_b32_e32 v85, 0
; GFX9-NEXT:    v_mov_b32_e32 v86, 0
; GFX9-NEXT:    v_mov_b32_e32 v87, 0
; GFX9-NEXT:    v_mov_b32_e32 v88, 0
; GFX9-NEXT:    v_mov_b32_e32 v89, 0
; GFX9-NEXT:    v_mov_b32_e32 v90, 0
; GFX9-NEXT:    v_mov_b32_e32 v91, 0
; GFX9-NEXT:    v_mov_b32_e32 v92, 0
; GFX9-NEXT:    v_mov_b32_e32 v93, 0
; GFX9-NEXT:    v_mov_b32_e32 v94, 0
; GFX9-NEXT:    v_mov_b32_e32 v95, 0
; GFX9-NEXT:    v_mov_b32_e32 v96, 0
; GFX9-NEXT:    v_mov_b32_e32 v97, 0
; GFX9-NEXT:    v_mov_b32_e32 v98, 0
; GFX9-NEXT:    v_mov_b32_e32 v99, 0
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: return_100xi32:
; GFX10:       ; %bb.0:
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    v_mov_b32_e32 v0, 0
; GFX10-NEXT:    v_mov_b32_e32 v1, 0
; GFX10-NEXT:    v_mov_b32_e32 v2, 0
; GFX10-NEXT:    v_mov_b32_e32 v3, 0
; GFX10-NEXT:    v_mov_b32_e32 v4, 0
; GFX10-NEXT:    v_mov_b32_e32 v5, 0
; GFX10-NEXT:    v_mov_b32_e32 v6, 0
; GFX10-NEXT:    v_mov_b32_e32 v7, 0
; GFX10-NEXT:    v_mov_b32_e32 v8, 0
; GFX10-NEXT:    v_mov_b32_e32 v9, 0
; GFX10-NEXT:    v_mov_b32_e32 v10, 0
; GFX10-NEXT:    v_mov_b32_e32 v11, 0
; GFX10-NEXT:    v_mov_b32_e32 v12, 0
; GFX10-NEXT:    v_mov_b32_e32 v13, 0
; GFX10-NEXT:    v_mov_b32_e32 v14, 0
; GFX10-NEXT:    v_mov_b32_e32 v15, 0
; GFX10-NEXT:    v_mov_b32_e32 v16, 0
; GFX10-NEXT:    v_mov_b32_e32 v17, 0
; GFX10-NEXT:    v_mov_b32_e32 v18, 0
; GFX10-NEXT:    v_mov_b32_e32 v19, 0
; GFX10-NEXT:    v_mov_b32_e32 v20, 0
; GFX10-NEXT:    v_mov_b32_e32 v21, 0
; GFX10-NEXT:    v_mov_b32_e32 v22, 0
; GFX10-NEXT:    v_mov_b32_e32 v23, 0
; GFX10-NEXT:    v_mov_b32_e32 v24, 0
; GFX10-NEXT:    v_mov_b32_e32 v25, 0
; GFX10-NEXT:    v_mov_b32_e32 v26, 0
; GFX10-NEXT:    v_mov_b32_e32 v27, 0
; GFX10-NEXT:    v_mov_b32_e32 v28, 0
; GFX10-NEXT:    v_mov_b32_e32 v29, 0
; GFX10-NEXT:    v_mov_b32_e32 v30, 0
; GFX10-NEXT:    v_mov_b32_e32 v31, 0
; GFX10-NEXT:    v_mov_b32_e32 v32, 0
; GFX10-NEXT:    v_mov_b32_e32 v33, 0
; GFX10-NEXT:    v_mov_b32_e32 v34, 0
; GFX10-NEXT:    v_mov_b32_e32 v35, 0
; GFX10-NEXT:    v_mov_b32_e32 v36, 0
; GFX10-NEXT:    v_mov_b32_e32 v37, 0
; GFX10-NEXT:    v_mov_b32_e32 v38, 0
; GFX10-NEXT:    v_mov_b32_e32 v39, 0
; GFX10-NEXT:    v_mov_b32_e32 v40, 0
; GFX10-NEXT:    v_mov_b32_e32 v41, 0
; GFX10-NEXT:    v_mov_b32_e32 v42, 0
; GFX10-NEXT:    v_mov_b32_e32 v43, 0
; GFX10-NEXT:    v_mov_b32_e32 v44, 0
; GFX10-NEXT:    v_mov_b32_e32 v45, 0
; GFX10-NEXT:    v_mov_b32_e32 v46, 0
; GFX10-NEXT:    v_mov_b32_e32 v47, 0
; GFX10-NEXT:    v_mov_b32_e32 v48, 0
; GFX10-NEXT:    v_mov_b32_e32 v49, 0
; GFX10-NEXT:    v_mov_b32_e32 v50, 0
; GFX10-NEXT:    v_mov_b32_e32 v51, 0
; GFX10-NEXT:    v_mov_b32_e32 v52, 0
; GFX10-NEXT:    v_mov_b32_e32 v53, 0
; GFX10-NEXT:    v_mov_b32_e32 v54, 0
; GFX10-NEXT:    v_mov_b32_e32 v55, 0
; GFX10-NEXT:    v_mov_b32_e32 v56, 0
; GFX10-NEXT:    v_mov_b32_e32 v57, 0
; GFX10-NEXT:    v_mov_b32_e32 v58, 0
; GFX10-NEXT:    v_mov_b32_e32 v59, 0
; GFX10-NEXT:    v_mov_b32_e32 v60, 0
; GFX10-NEXT:    v_mov_b32_e32 v61, 0
; GFX10-NEXT:    v_mov_b32_e32 v62, 0
; GFX10-NEXT:    v_mov_b32_e32 v63, 0
; GFX10-NEXT:    v_mov_b32_e32 v64, 0
; GFX10-NEXT:    v_mov_b32_e32 v65, 0
; GFX10-NEXT:    v_mov_b32_e32 v66, 0
; GFX10-NEXT:    v_mov_b32_e32 v67, 0
; GFX10-NEXT:    v_mov_b32_e32 v68, 0
; GFX10-NEXT:    v_mov_b32_e32 v69, 0
; GFX10-NEXT:    v_mov_b32_e32 v70, 0
; GFX10-NEXT:    v_mov_b32_e32 v71, 0
; GFX10-NEXT:    v_mov_b32_e32 v72, 0
; GFX10-NEXT:    v_mov_b32_e32 v73, 0
; GFX10-NEXT:    v_mov_b32_e32 v74, 0
; GFX10-NEXT:    v_mov_b32_e32 v75, 0
; GFX10-NEXT:    v_mov_b32_e32 v76, 0
; GFX10-NEXT:    v_mov_b32_e32 v77, 0
; GFX10-NEXT:    v_mov_b32_e32 v78, 0
; GFX10-NEXT:    v_mov_b32_e32 v79, 0
; GFX10-NEXT:    v_mov_b32_e32 v80, 0
; GFX10-NEXT:    v_mov_b32_e32 v81, 0
; GFX10-NEXT:    v_mov_b32_e32 v82, 0
; GFX10-NEXT:    v_mov_b32_e32 v83, 0
; GFX10-NEXT:    v_mov_b32_e32 v84, 0
; GFX10-NEXT:    v_mov_b32_e32 v85, 0
; GFX10-NEXT:    v_mov_b32_e32 v86, 0
; GFX10-NEXT:    v_mov_b32_e32 v87, 0
; GFX10-NEXT:    v_mov_b32_e32 v88, 0
; GFX10-NEXT:    v_mov_b32_e32 v89, 0
; GFX10-NEXT:    v_mov_b32_e32 v90, 0
; GFX10-NEXT:    v_mov_b32_e32 v91, 0
; GFX10-NEXT:    v_mov_b32_e32 v92, 0
; GFX10-NEXT:    v_mov_b32_e32 v93, 0
; GFX10-NEXT:    v_mov_b32_e32 v94, 0
; GFX10-NEXT:    v_mov_b32_e32 v95, 0
; GFX10-NEXT:    v_mov_b32_e32 v96, 0
; GFX10-NEXT:    v_mov_b32_e32 v97, 0
; GFX10-NEXT:    v_mov_b32_e32 v98, 0
; GFX10-NEXT:    v_mov_b32_e32 v99, 0
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: return_100xi32:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
; GFX11-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0
; GFX11-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0
; GFX11-NEXT:    v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v7, 0
; GFX11-NEXT:    v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v9, 0
; GFX11-NEXT:    v_dual_mov_b32 v10, 0 :: v_dual_mov_b32 v11, 0
; GFX11-NEXT:    v_dual_mov_b32 v12, 0 :: v_dual_mov_b32 v13, 0
; GFX11-NEXT:    v_dual_mov_b32 v14, 0 :: v_dual_mov_b32 v15, 0
; GFX11-NEXT:    v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v17, 0
; GFX11-NEXT:    v_dual_mov_b32 v18, 0 :: v_dual_mov_b32 v19, 0
; GFX11-NEXT:    v_dual_mov_b32 v20, 0 :: v_dual_mov_b32 v21, 0
; GFX11-NEXT:    v_dual_mov_b32 v22, 0 :: v_dual_mov_b32 v23, 0
; GFX11-NEXT:    v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v25, 0
; GFX11-NEXT:    v_dual_mov_b32 v26, 0 :: v_dual_mov_b32 v27, 0
; GFX11-NEXT:    v_dual_mov_b32 v28, 0 :: v_dual_mov_b32 v29, 0
; GFX11-NEXT:    v_dual_mov_b32 v30, 0 :: v_dual_mov_b32 v31, 0
; GFX11-NEXT:    v_dual_mov_b32 v32, 0 :: v_dual_mov_b32 v33, 0
; GFX11-NEXT:    v_dual_mov_b32 v34, 0 :: v_dual_mov_b32 v35, 0
; GFX11-NEXT:    v_dual_mov_b32 v36, 0 :: v_dual_mov_b32 v37, 0
; GFX11-NEXT:    v_dual_mov_b32 v38, 0 :: v_dual_mov_b32 v39, 0
; GFX11-NEXT:    v_dual_mov_b32 v40, 0 :: v_dual_mov_b32 v41, 0
; GFX11-NEXT:    v_dual_mov_b32 v42, 0 :: v_dual_mov_b32 v43, 0
; GFX11-NEXT:    v_dual_mov_b32 v44, 0 :: v_dual_mov_b32 v45, 0
; GFX11-NEXT:    v_dual_mov_b32 v46, 0 :: v_dual_mov_b32 v47, 0
; GFX11-NEXT:    v_dual_mov_b32 v48, 0 :: v_dual_mov_b32 v49, 0
; GFX11-NEXT:    v_dual_mov_b32 v50, 0 :: v_dual_mov_b32 v51, 0
; GFX11-NEXT:    v_dual_mov_b32 v52, 0 :: v_dual_mov_b32 v53, 0
; GFX11-NEXT:    v_dual_mov_b32 v54, 0 :: v_dual_mov_b32 v55, 0
; GFX11-NEXT:    v_dual_mov_b32 v56, 0 :: v_dual_mov_b32 v57, 0
; GFX11-NEXT:    v_dual_mov_b32 v58, 0 :: v_dual_mov_b32 v59, 0
; GFX11-NEXT:    v_dual_mov_b32 v60, 0 :: v_dual_mov_b32 v61, 0
; GFX11-NEXT:    v_dual_mov_b32 v62, 0 :: v_dual_mov_b32 v63, 0
; GFX11-NEXT:    v_dual_mov_b32 v64, 0 :: v_dual_mov_b32 v65, 0
; GFX11-NEXT:    v_dual_mov_b32 v66, 0 :: v_dual_mov_b32 v67, 0
; GFX11-NEXT:    v_dual_mov_b32 v68, 0 :: v_dual_mov_b32 v69, 0
; GFX11-NEXT:    v_dual_mov_b32 v70, 0 :: v_dual_mov_b32 v71, 0
; GFX11-NEXT:    v_dual_mov_b32 v72, 0 :: v_dual_mov_b32 v73, 0
; GFX11-NEXT:    v_dual_mov_b32 v74, 0 :: v_dual_mov_b32 v75, 0
; GFX11-NEXT:    v_dual_mov_b32 v76, 0 :: v_dual_mov_b32 v77, 0
; GFX11-NEXT:    v_dual_mov_b32 v78, 0 :: v_dual_mov_b32 v79, 0
; GFX11-NEXT:    v_dual_mov_b32 v80, 0 :: v_dual_mov_b32 v81, 0
; GFX11-NEXT:    v_dual_mov_b32 v82, 0 :: v_dual_mov_b32 v83, 0
; GFX11-NEXT:    v_dual_mov_b32 v84, 0 :: v_dual_mov_b32 v85, 0
; GFX11-NEXT:    v_dual_mov_b32 v86, 0 :: v_dual_mov_b32 v87, 0
; GFX11-NEXT:    v_dual_mov_b32 v88, 0 :: v_dual_mov_b32 v89, 0
; GFX11-NEXT:    v_dual_mov_b32 v90, 0 :: v_dual_mov_b32 v91, 0
; GFX11-NEXT:    v_dual_mov_b32 v92, 0 :: v_dual_mov_b32 v93, 0
; GFX11-NEXT:    v_dual_mov_b32 v94, 0 :: v_dual_mov_b32 v95, 0
; GFX11-NEXT:    v_dual_mov_b32 v96, 0 :: v_dual_mov_b32 v97, 0
; GFX11-NEXT:    v_dual_mov_b32 v98, 0 :: v_dual_mov_b32 v99, 0
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  ret <100 x i32> zeroinitializer
}

define amdgpu_gfx void @call_100xi32() #0 {
; GFX9-LABEL: call_100xi32:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_mov_b32 s36, s33
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v100, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v100, s30, 0
; GFX9-NEXT:    s_mov_b32 s35, return_100xi32@abs32@hi
; GFX9-NEXT:    s_mov_b32 s34, return_100xi32@abs32@lo
; GFX9-NEXT:    s_addk_i32 s32, 0x2400
; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v45, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v46, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v47, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v56, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v57, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v58, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v59, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v60, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v61, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v62, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v63, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v72, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v73, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v74, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v75, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v76, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v77, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v78, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v79, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v88, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v89, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v90, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v91, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v92, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v93, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v94, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v95, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT:    v_writelane_b32 v100, s31, 1
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    buffer_load_dword v95, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v94, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v93, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v92, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v91, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v90, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v89, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v88, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v79, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v78, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v77, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v76, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v75, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v74, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v73, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v72, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v63, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v62, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v61, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v60, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v59, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v58, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v57, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v56, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v47, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v46, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v45, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v44, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
; GFX9-NEXT:    v_readlane_b32 s31, v100, 1
; GFX9-NEXT:    v_readlane_b32 s30, v100, 0
; GFX9-NEXT:    s_mov_b32 s32, s33
; GFX9-NEXT:    s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v100, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_mov_b32 s33, s36
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: call_100xi32:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_mov_b32 s36, s33
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v100, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v100, s30, 0
; GFX10-NEXT:    s_mov_b32 s35, return_100xi32@abs32@hi
; GFX10-NEXT:    s_mov_b32 s34, return_100xi32@abs32@lo
; GFX10-NEXT:    s_addk_i32 s32, 0x1200
; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v45, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v46, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v47, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v56, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v57, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v58, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v59, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v60, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v61, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v62, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v63, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v72, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v73, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v74, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v75, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v76, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v77, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v78, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v79, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v88, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v89, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v90, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v91, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v92, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v93, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v94, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v95, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT:    v_writelane_b32 v100, s31, 1
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    s_clause 0x1f
; GFX10-NEXT:    buffer_load_dword v95, off, s[0:3], s33
; GFX10-NEXT:    buffer_load_dword v94, off, s[0:3], s33 offset:4
; GFX10-NEXT:    buffer_load_dword v93, off, s[0:3], s33 offset:8
; GFX10-NEXT:    buffer_load_dword v92, off, s[0:3], s33 offset:12
; GFX10-NEXT:    buffer_load_dword v91, off, s[0:3], s33 offset:16
; GFX10-NEXT:    buffer_load_dword v90, off, s[0:3], s33 offset:20
; GFX10-NEXT:    buffer_load_dword v89, off, s[0:3], s33 offset:24
; GFX10-NEXT:    buffer_load_dword v88, off, s[0:3], s33 offset:28
; GFX10-NEXT:    buffer_load_dword v79, off, s[0:3], s33 offset:32
; GFX10-NEXT:    buffer_load_dword v78, off, s[0:3], s33 offset:36
; GFX10-NEXT:    buffer_load_dword v77, off, s[0:3], s33 offset:40
; GFX10-NEXT:    buffer_load_dword v76, off, s[0:3], s33 offset:44
; GFX10-NEXT:    buffer_load_dword v75, off, s[0:3], s33 offset:48
; GFX10-NEXT:    buffer_load_dword v74, off, s[0:3], s33 offset:52
; GFX10-NEXT:    buffer_load_dword v73, off, s[0:3], s33 offset:56
; GFX10-NEXT:    buffer_load_dword v72, off, s[0:3], s33 offset:60
; GFX10-NEXT:    buffer_load_dword v63, off, s[0:3], s33 offset:64
; GFX10-NEXT:    buffer_load_dword v62, off, s[0:3], s33 offset:68
; GFX10-NEXT:    buffer_load_dword v61, off, s[0:3], s33 offset:72
; GFX10-NEXT:    buffer_load_dword v60, off, s[0:3], s33 offset:76
; GFX10-NEXT:    buffer_load_dword v59, off, s[0:3], s33 offset:80
; GFX10-NEXT:    buffer_load_dword v58, off, s[0:3], s33 offset:84
; GFX10-NEXT:    buffer_load_dword v57, off, s[0:3], s33 offset:88
; GFX10-NEXT:    buffer_load_dword v56, off, s[0:3], s33 offset:92
; GFX10-NEXT:    buffer_load_dword v47, off, s[0:3], s33 offset:96
; GFX10-NEXT:    buffer_load_dword v46, off, s[0:3], s33 offset:100
; GFX10-NEXT:    buffer_load_dword v45, off, s[0:3], s33 offset:104
; GFX10-NEXT:    buffer_load_dword v44, off, s[0:3], s33 offset:108
; GFX10-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:112
; GFX10-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:116
; GFX10-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:120
; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:124
; GFX10-NEXT:    v_readlane_b32 s31, v100, 1
; GFX10-NEXT:    v_readlane_b32 s30, v100, 0
; GFX10-NEXT:    s_mov_b32 s32, s33
; GFX10-NEXT:    s_xor_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v100, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_mov_b32 s33, s36
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: call_100xi32:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s2, s33
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v100, s33 offset:128 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v100, s30, 0
; GFX11-NEXT:    s_mov_b32 s1, return_100xi32@abs32@hi
; GFX11-NEXT:    s_mov_b32 s0, return_100xi32@abs32@lo
; GFX11-NEXT:    s_addk_i32 s32, 0x90
; GFX11-NEXT:    s_clause 0x1f
; GFX11-NEXT:    scratch_store_b32 off, v40, s33 offset:124
; GFX11-NEXT:    scratch_store_b32 off, v41, s33 offset:120
; GFX11-NEXT:    scratch_store_b32 off, v42, s33 offset:116
; GFX11-NEXT:    scratch_store_b32 off, v43, s33 offset:112
; GFX11-NEXT:    scratch_store_b32 off, v44, s33 offset:108
; GFX11-NEXT:    scratch_store_b32 off, v45, s33 offset:104
; GFX11-NEXT:    scratch_store_b32 off, v46, s33 offset:100
; GFX11-NEXT:    scratch_store_b32 off, v47, s33 offset:96
; GFX11-NEXT:    scratch_store_b32 off, v56, s33 offset:92
; GFX11-NEXT:    scratch_store_b32 off, v57, s33 offset:88
; GFX11-NEXT:    scratch_store_b32 off, v58, s33 offset:84
; GFX11-NEXT:    scratch_store_b32 off, v59, s33 offset:80
; GFX11-NEXT:    scratch_store_b32 off, v60, s33 offset:76
; GFX11-NEXT:    scratch_store_b32 off, v61, s33 offset:72
; GFX11-NEXT:    scratch_store_b32 off, v62, s33 offset:68
; GFX11-NEXT:    scratch_store_b32 off, v63, s33 offset:64
; GFX11-NEXT:    scratch_store_b32 off, v72, s33 offset:60
; GFX11-NEXT:    scratch_store_b32 off, v73, s33 offset:56
; GFX11-NEXT:    scratch_store_b32 off, v74, s33 offset:52
; GFX11-NEXT:    scratch_store_b32 off, v75, s33 offset:48
; GFX11-NEXT:    scratch_store_b32 off, v76, s33 offset:44
; GFX11-NEXT:    scratch_store_b32 off, v77, s33 offset:40
; GFX11-NEXT:    scratch_store_b32 off, v78, s33 offset:36
; GFX11-NEXT:    scratch_store_b32 off, v79, s33 offset:32
; GFX11-NEXT:    scratch_store_b32 off, v88, s33 offset:28
; GFX11-NEXT:    scratch_store_b32 off, v89, s33 offset:24
; GFX11-NEXT:    scratch_store_b32 off, v90, s33 offset:20
; GFX11-NEXT:    scratch_store_b32 off, v91, s33 offset:16
; GFX11-NEXT:    scratch_store_b32 off, v92, s33 offset:12
; GFX11-NEXT:    scratch_store_b32 off, v93, s33 offset:8
; GFX11-NEXT:    scratch_store_b32 off, v94, s33 offset:4
; GFX11-NEXT:    scratch_store_b32 off, v95, s33
; GFX11-NEXT:    v_writelane_b32 v100, s31, 1
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_clause 0x1f
; GFX11-NEXT:    scratch_load_b32 v95, off, s33
; GFX11-NEXT:    scratch_load_b32 v94, off, s33 offset:4
; GFX11-NEXT:    scratch_load_b32 v93, off, s33 offset:8
; GFX11-NEXT:    scratch_load_b32 v92, off, s33 offset:12
; GFX11-NEXT:    scratch_load_b32 v91, off, s33 offset:16
; GFX11-NEXT:    scratch_load_b32 v90, off, s33 offset:20
; GFX11-NEXT:    scratch_load_b32 v89, off, s33 offset:24
; GFX11-NEXT:    scratch_load_b32 v88, off, s33 offset:28
; GFX11-NEXT:    scratch_load_b32 v79, off, s33 offset:32
; GFX11-NEXT:    scratch_load_b32 v78, off, s33 offset:36
; GFX11-NEXT:    scratch_load_b32 v77, off, s33 offset:40
; GFX11-NEXT:    scratch_load_b32 v76, off, s33 offset:44
; GFX11-NEXT:    scratch_load_b32 v75, off, s33 offset:48
; GFX11-NEXT:    scratch_load_b32 v74, off, s33 offset:52
; GFX11-NEXT:    scratch_load_b32 v73, off, s33 offset:56
; GFX11-NEXT:    scratch_load_b32 v72, off, s33 offset:60
; GFX11-NEXT:    scratch_load_b32 v63, off, s33 offset:64
; GFX11-NEXT:    scratch_load_b32 v62, off, s33 offset:68
; GFX11-NEXT:    scratch_load_b32 v61, off, s33 offset:72
; GFX11-NEXT:    scratch_load_b32 v60, off, s33 offset:76
; GFX11-NEXT:    scratch_load_b32 v59, off, s33 offset:80
; GFX11-NEXT:    scratch_load_b32 v58, off, s33 offset:84
; GFX11-NEXT:    scratch_load_b32 v57, off, s33 offset:88
; GFX11-NEXT:    scratch_load_b32 v56, off, s33 offset:92
; GFX11-NEXT:    scratch_load_b32 v47, off, s33 offset:96
; GFX11-NEXT:    scratch_load_b32 v46, off, s33 offset:100
; GFX11-NEXT:    scratch_load_b32 v45, off, s33 offset:104
; GFX11-NEXT:    scratch_load_b32 v44, off, s33 offset:108
; GFX11-NEXT:    scratch_load_b32 v43, off, s33 offset:112
; GFX11-NEXT:    scratch_load_b32 v42, off, s33 offset:116
; GFX11-NEXT:    scratch_load_b32 v41, off, s33 offset:120
; GFX11-NEXT:    scratch_load_b32 v40, off, s33 offset:124
; GFX11-NEXT:    v_readlane_b32 s31, v100, 1
; GFX11-NEXT:    v_readlane_b32 s30, v100, 0
; GFX11-NEXT:    s_mov_b32 s32, s33
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v100, off, s33 offset:128 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_mov_b32 s33, s2
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  call amdgpu_gfx <100 x i32> @return_100xi32()
  ret void
}

; Check that return values that do not fit in registers do not crash

define amdgpu_gfx <512 x i32> @return_512xi32() #0 {
; GFX9-LABEL: return_512xi32:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    v_mov_b32_e32 v1, 0
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1020
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2044
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2040
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2036
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2032
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2028
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2024
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2020
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2016
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2012
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2008
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2004
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2000
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1996
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1992
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1988
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1984
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1980
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1976
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1972
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1968
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1964
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1960
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1956
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1952
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1948
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1944
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1940
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1936
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1932
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1928
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1924
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1920
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1916
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1912
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1908
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1904
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1900
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1896
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1892
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1888
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1884
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1880
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1876
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1872
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1868
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1864
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1860
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1856
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1852
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1848
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1844
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1840
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1836
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1832
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1828
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1824
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1820
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1816
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1812
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1808
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1804
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1800
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1796
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1792
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1788
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1784
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1780
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1776
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1772
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1768
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1764
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1760
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1756
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1752
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1748
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1744
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1740
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1736
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1732
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1728
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1724
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1720
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1716
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1712
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1708
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1704
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1700
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1696
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1692
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1688
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1684
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1680
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1676
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1672
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1668
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1664
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1660
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1656
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1652
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1648
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1644
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1640
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1636
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1632
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1628
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1624
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1620
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1616
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1612
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1608
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1604
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1600
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1596
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1592
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1588
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1584
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1580
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1576
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1572
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1568
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1564
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1560
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1556
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1552
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1548
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1544
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1540
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1536
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1532
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1528
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1524
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1520
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1516
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1512
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1508
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1504
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1500
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1496
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1492
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1488
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1484
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1480
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1476
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1472
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1468
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1464
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1460
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1456
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1452
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1448
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1444
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1440
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1436
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1432
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1428
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1424
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1420
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1416
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1412
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1408
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1404
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1400
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1396
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1392
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1388
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1384
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1380
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1376
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1372
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1368
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1364
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1360
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1356
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1352
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1348
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1344
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1340
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1336
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1332
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1328
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1324
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1320
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1316
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1312
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1308
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1304
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1300
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1296
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1292
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1288
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1284
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1280
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1276
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1272
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1268
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1264
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1260
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1256
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1252
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1248
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1244
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1240
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1236
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1232
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1228
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1224
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1220
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1216
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1212
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1208
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1204
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1200
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1196
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1192
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1188
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1184
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1180
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1176
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1172
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1168
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1164
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1160
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1156
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1152
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1148
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1144
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1140
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1136
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1132
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1128
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1124
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1120
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1116
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1112
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1108
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1104
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1100
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1096
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1092
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1088
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1084
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1080
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1076
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1072
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1068
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1064
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1060
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1056
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1052
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1048
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1044
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1040
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1036
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1032
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1028
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1024
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1016
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1012
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1008
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1004
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1000
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:996
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:992
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:988
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:984
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:980
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:976
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:972
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:968
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:964
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:960
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:956
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:952
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:948
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:944
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:940
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:936
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:932
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:928
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:924
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:920
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:916
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:912
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:908
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:904
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:900
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:896
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:892
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:888
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:884
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:880
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:876
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:872
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:868
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:864
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:860
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:856
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:852
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:848
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:844
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:840
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:836
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:832
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:828
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:824
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:820
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:816
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:812
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:808
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:804
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:800
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:796
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:792
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:788
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:784
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:780
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:776
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:772
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:768
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:764
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:760
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:756
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:752
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:748
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:744
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:740
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:736
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:732
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:728
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:724
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:720
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:716
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:712
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:708
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:704
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:700
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:696
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:692
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:688
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:684
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:680
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:676
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:672
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:668
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:664
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:660
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:656
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:652
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:648
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:644
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:640
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:636
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:632
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:628
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:624
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:620
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:616
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:612
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:608
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:604
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:600
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:596
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:592
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:588
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:584
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:580
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:576
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:572
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:568
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:564
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:560
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:556
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:552
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:548
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:544
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:540
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:536
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:532
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:528
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:524
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:520
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:516
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:512
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:508
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:504
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:500
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:496
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:492
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:488
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:484
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:480
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:476
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:472
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:468
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:464
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:460
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:456
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:452
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:448
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:444
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:440
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:436
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:432
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:428
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:424
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:420
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:416
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:412
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:408
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:404
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:400
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:396
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:392
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:388
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:384
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:380
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:376
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:372
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:368
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:364
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:360
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:356
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:352
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:348
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:344
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:340
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:336
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:332
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:328
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:324
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:320
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:316
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:312
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:308
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:304
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:300
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:296
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:292
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:288
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:284
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:280
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:276
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:272
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:268
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:264
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:260
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:256
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:252
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:248
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:244
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:240
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:236
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:232
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:228
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:224
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:220
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:216
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:212
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:208
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:204
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:200
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:196
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:192
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:188
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:184
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:180
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:176
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:172
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:168
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:164
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:160
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:156
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:152
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:148
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:144
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:140
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:136
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:132
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:128
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:124
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:120
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:116
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:112
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:108
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:104
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:100
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:96
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:92
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:88
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:84
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:80
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:76
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:72
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:68
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:64
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:60
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:56
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:52
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:48
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:44
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:40
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:36
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:32
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:28
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:24
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:20
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:8
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: return_512xi32:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    v_mov_b32_e32 v1, 0
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1020
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2044
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2040
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2036
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2032
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2028
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2024
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2020
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2016
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2012
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2008
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2004
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2000
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1996
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1992
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1988
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1984
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1980
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1976
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1972
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1968
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1964
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1960
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1956
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1952
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1948
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1944
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1940
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1936
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1932
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1928
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1924
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1920
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1916
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1912
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1908
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1904
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1900
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1896
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1892
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1888
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1884
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1880
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1876
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1872
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1868
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1864
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1860
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1856
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1852
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1848
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1844
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1840
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1836
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1832
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1828
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1824
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1820
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1816
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1812
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1808
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1804
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1800
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1796
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1792
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1788
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1784
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1780
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1776
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1772
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1768
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1764
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1760
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1756
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1752
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1748
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1744
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1740
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1736
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1732
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1728
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1724
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1720
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1716
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1712
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1708
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1704
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1700
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1696
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1692
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1688
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1684
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1680
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1676
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1672
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1668
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1664
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1660
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1656
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1652
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1648
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1644
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1640
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1636
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1632
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1628
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1624
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1620
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1616
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1612
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1608
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1604
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1600
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1596
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1592
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1588
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1584
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1580
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1576
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1572
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1568
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1564
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1560
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1556
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1552
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1548
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1544
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1540
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1536
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1532
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1528
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1524
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1520
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1516
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1512
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1508
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1504
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1500
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1496
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1492
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1488
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1484
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1480
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1476
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1472
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1468
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1464
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1460
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1456
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1452
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1448
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1444
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1440
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1436
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1432
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1428
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1424
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1420
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1416
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1412
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1408
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1404
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1400
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1396
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1392
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1388
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1384
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1380
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1376
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1372
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1368
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1364
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1360
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1356
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1352
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1348
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1344
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1340
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1336
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1332
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1328
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1324
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1320
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1316
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1312
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1308
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1304
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1300
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1296
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1292
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1288
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1284
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1280
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1276
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1272
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1268
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1264
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1260
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1256
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1252
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1248
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1244
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1240
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1236
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1232
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1228
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1224
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1220
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1216
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1212
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1208
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1204
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1200
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1196
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1192
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1188
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1184
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1180
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1176
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1172
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1168
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1164
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1160
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1156
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1152
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1148
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1144
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1140
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1136
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1132
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1128
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1124
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1120
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1116
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1112
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1108
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1104
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1100
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1096
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1092
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1088
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1084
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1080
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1076
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1072
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1068
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1064
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1060
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1056
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1052
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1048
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1044
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1040
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1036
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1032
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1028
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1024
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1016
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1012
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1008
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1004
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1000
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:996
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:992
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:988
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:984
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:980
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:976
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:972
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:968
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:964
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:960
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:956
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:952
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:948
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:944
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:940
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:936
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:932
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:928
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:924
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:920
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:916
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:912
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:908
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:904
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:900
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:896
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:892
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:888
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:884
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:880
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:876
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:872
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:868
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:864
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:860
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:856
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:852
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:848
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:844
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:840
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:836
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:832
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:828
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:824
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:820
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:816
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:812
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:808
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:804
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:800
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:796
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:792
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:788
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:784
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:780
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:776
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:772
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:768
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:764
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:760
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:756
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:752
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:748
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:744
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:740
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:736
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:732
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:728
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:724
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:720
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:716
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:712
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:708
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:704
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:700
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:696
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:692
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:688
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:684
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:680
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:676
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:672
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:668
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:664
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:660
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:656
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:652
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:648
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:644
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:640
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:636
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:632
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:628
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:624
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:620
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:616
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:612
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:608
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:604
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:600
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:596
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:592
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:588
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:584
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:580
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:576
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:572
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:568
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:564
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:560
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:556
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:552
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:548
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:544
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:540
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:536
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:532
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:528
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:524
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:520
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:516
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:512
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:508
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:504
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:500
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:496
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:492
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:488
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:484
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:480
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:476
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:472
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:468
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:464
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:460
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:456
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:452
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:448
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:444
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:440
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:436
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:432
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:428
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:424
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:420
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:416
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:412
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:408
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:404
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:400
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:396
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:392
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:388
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:384
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:380
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:376
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:372
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:368
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:364
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:360
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:356
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:352
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:348
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:344
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:340
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:336
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:332
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:328
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:324
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:320
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:316
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:312
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:308
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:304
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:300
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:296
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:292
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:288
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:284
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:280
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:276
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:272
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:268
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:264
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:260
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:256
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:252
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:248
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:244
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:240
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:236
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:232
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:228
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:224
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:220
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:216
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:212
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:208
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:204
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:200
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:196
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:192
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:188
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:184
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:180
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:176
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:172
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:168
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:164
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:160
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:156
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:152
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:148
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:144
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:140
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:136
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:132
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:128
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:124
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:120
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:116
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:112
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:108
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:104
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:100
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:96
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:92
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:88
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:84
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:80
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:76
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:72
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:68
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:64
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:60
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:56
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:52
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:48
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:44
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:40
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:36
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:32
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:28
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:24
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:20
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:8
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: return_512xi32:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s0, 0
; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX11-NEXT:    s_mov_b32 s3, s0
; GFX11-NEXT:    s_mov_b32 s1, s0
; GFX11-NEXT:    s_mov_b32 s2, s0
; GFX11-NEXT:    v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2
; GFX11-NEXT:    v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX11-NEXT:    s_clause 0x1f
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:2032
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:2016
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:2000
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1984
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1968
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1952
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1936
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1920
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1904
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1888
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1872
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1856
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1840
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1824
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1808
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1792
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1776
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1760
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1744
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1728
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1712
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1696
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1680
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1664
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1648
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1632
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1616
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1600
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1584
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1568
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1552
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1536
; GFX11-NEXT:    s_clause 0x1f
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1520
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1504
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1488
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1472
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1456
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1440
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1424
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1408
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1392
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1376
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1360
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1344
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1328
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1312
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1296
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1280
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1264
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1248
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1232
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1216
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1200
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1184
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1168
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1152
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1136
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1120
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1104
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1088
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1072
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1056
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1040
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1024
; GFX11-NEXT:    s_clause 0x1f
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1008
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:992
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:976
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:960
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:944
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:928
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:912
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:896
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:880
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:864
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:848
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:832
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:816
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:800
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:784
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:768
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:752
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:736
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:720
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:704
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:688
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:672
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:656
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:640
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:624
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:608
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:592
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:576
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:560
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:544
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:528
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:512
; GFX11-NEXT:    s_clause 0x1f
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:496
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:480
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:464
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:448
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:432
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:416
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:400
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:384
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:368
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:352
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:336
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:320
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:304
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:288
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:272
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:256
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:240
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:224
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:208
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:192
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:176
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:160
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:144
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:128
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:112
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:96
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:80
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:64
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:48
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:32
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:16
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  ret <512 x i32> zeroinitializer
}

define amdgpu_gfx void @call_512xi32() #0 {
; GFX9-LABEL: call_512xi32:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_mov_b32 s35, s33
; GFX9-NEXT:    s_add_i32 s33, s32, 0x1ffc0
; GFX9-NEXT:    s_and_b32 s33, s33, 0xfffe0000
; GFX9-NEXT:    s_xor_saveexec_b64 s[36:37], -1
; GFX9-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:2048 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
; GFX9-NEXT:    v_writelane_b32 v2, s30, 0
; GFX9-NEXT:    s_mov_b32 s37, return_512xi32@abs32@hi
; GFX9-NEXT:    s_mov_b32 s36, return_512xi32@abs32@lo
; GFX9-NEXT:    v_lshrrev_b32_e64 v0, 6, s33
; GFX9-NEXT:    s_mov_b32 s38, s34
; GFX9-NEXT:    s_mov_b32 s34, s32
; GFX9-NEXT:    s_add_i32 s32, s32, 0x60000
; GFX9-NEXT:    v_writelane_b32 v2, s31, 1
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[36:37]
; GFX9-NEXT:    v_readlane_b32 s31, v2, 1
; GFX9-NEXT:    v_readlane_b32 s30, v2, 0
; GFX9-NEXT:    s_mov_b32 s32, s34
; GFX9-NEXT:    s_mov_b32 s34, s38
; GFX9-NEXT:    s_xor_saveexec_b64 s[36:37], -1
; GFX9-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:2048 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
; GFX9-NEXT:    s_mov_b32 s33, s35
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: call_512xi32:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_mov_b32 s35, s33
; GFX10-NEXT:    s_add_i32 s33, s32, 0xffe0
; GFX10-NEXT:    s_and_b32 s33, s33, 0xffff0000
; GFX10-NEXT:    s_xor_saveexec_b32 s36, -1
; GFX10-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:2048 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s36
; GFX10-NEXT:    v_writelane_b32 v2, s30, 0
; GFX10-NEXT:    v_lshrrev_b32_e64 v0, 5, s33
; GFX10-NEXT:    s_mov_b32 s37, return_512xi32@abs32@hi
; GFX10-NEXT:    s_mov_b32 s36, return_512xi32@abs32@lo
; GFX10-NEXT:    s_mov_b32 s38, s34
; GFX10-NEXT:    s_mov_b32 s34, s32
; GFX10-NEXT:    s_add_i32 s32, s32, 0x30000
; GFX10-NEXT:    v_writelane_b32 v2, s31, 1
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[36:37]
; GFX10-NEXT:    v_readlane_b32 s31, v2, 1
; GFX10-NEXT:    v_readlane_b32 s30, v2, 0
; GFX10-NEXT:    s_mov_b32 s32, s34
; GFX10-NEXT:    s_mov_b32 s34, s38
; GFX10-NEXT:    s_xor_saveexec_b32 s36, -1
; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:2048 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s36
; GFX10-NEXT:    s_mov_b32 s33, s35
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: call_512xi32:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s35, s33
; GFX11-NEXT:    s_add_i32 s33, s32, 0x7ff
; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT:    s_and_b32 s33, s33, 0xfffff800
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v5, s33 offset:2048 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v5, s30, 0
; GFX11-NEXT:    v_mov_b32_e32 v0, s33
; GFX11-NEXT:    s_mov_b32 s1, return_512xi32@abs32@hi
; GFX11-NEXT:    s_mov_b32 s0, return_512xi32@abs32@lo
; GFX11-NEXT:    s_mov_b32 s36, s34
; GFX11-NEXT:    s_mov_b32 s34, s32
; GFX11-NEXT:    s_addk_i32 s32, 0x1800
; GFX11-NEXT:    v_writelane_b32 v5, s31, 1
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v5, 1
; GFX11-NEXT:    v_readlane_b32 s30, v5, 0
; GFX11-NEXT:    s_mov_b32 s32, s34
; GFX11-NEXT:    s_mov_b32 s34, s36
; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v5, off, s33 offset:2048 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_mov_b32 s33, s35
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  call amdgpu_gfx <512 x i32> @return_512xi32()
  ret void
}

; Check that return values larger than VGPR limit are handled correctly

define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 {
; GFX9-LABEL: return_72xi32:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:160
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:284
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:156
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:280
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:152
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:276
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:148
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:272
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:144
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:268
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:140
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:264
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:136
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:260
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:132
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:256
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:128
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:252
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:124
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:248
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:120
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:244
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:116
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:240
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:112
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:236
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:108
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:232
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:104
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:228
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:100
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:224
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:96
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:220
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:92
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:216
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:88
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:212
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:84
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:208
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:80
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:204
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:76
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:200
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:72
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:196
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:68
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:192
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:64
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:188
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:60
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:184
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:56
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:180
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:52
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:176
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:48
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:172
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:44
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:168
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:40
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:164
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:36
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:160
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:32
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:156
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:28
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:152
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:24
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:148
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:20
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:144
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:16
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:140
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:12
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:136
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:132
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:4
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:128
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:124
; GFX9-NEXT:    buffer_store_dword v31, v0, s[0:3], 0 offen offset:120
; GFX9-NEXT:    buffer_store_dword v30, v0, s[0:3], 0 offen offset:116
; GFX9-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen offset:112
; GFX9-NEXT:    buffer_store_dword v28, v0, s[0:3], 0 offen offset:108
; GFX9-NEXT:    buffer_store_dword v27, v0, s[0:3], 0 offen offset:104
; GFX9-NEXT:    buffer_store_dword v26, v0, s[0:3], 0 offen offset:100
; GFX9-NEXT:    buffer_store_dword v25, v0, s[0:3], 0 offen offset:96
; GFX9-NEXT:    buffer_store_dword v24, v0, s[0:3], 0 offen offset:92
; GFX9-NEXT:    buffer_store_dword v23, v0, s[0:3], 0 offen offset:88
; GFX9-NEXT:    buffer_store_dword v22, v0, s[0:3], 0 offen offset:84
; GFX9-NEXT:    buffer_store_dword v21, v0, s[0:3], 0 offen offset:80
; GFX9-NEXT:    buffer_store_dword v20, v0, s[0:3], 0 offen offset:76
; GFX9-NEXT:    buffer_store_dword v19, v0, s[0:3], 0 offen offset:72
; GFX9-NEXT:    buffer_store_dword v18, v0, s[0:3], 0 offen offset:68
; GFX9-NEXT:    buffer_store_dword v17, v0, s[0:3], 0 offen offset:64
; GFX9-NEXT:    buffer_store_dword v16, v0, s[0:3], 0 offen offset:60
; GFX9-NEXT:    buffer_store_dword v15, v0, s[0:3], 0 offen offset:56
; GFX9-NEXT:    buffer_store_dword v14, v0, s[0:3], 0 offen offset:52
; GFX9-NEXT:    buffer_store_dword v13, v0, s[0:3], 0 offen offset:48
; GFX9-NEXT:    buffer_store_dword v12, v0, s[0:3], 0 offen offset:44
; GFX9-NEXT:    buffer_store_dword v11, v0, s[0:3], 0 offen offset:40
; GFX9-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:36
; GFX9-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:32
; GFX9-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:28
; GFX9-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
; GFX9-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
; GFX9-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
; GFX9-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; GFX9-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; GFX9-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: return_72xi32:
; GFX10:       ; %bb.0:
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_clause 0x7
; GFX10-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:64
; GFX10-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:68
; GFX10-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:72
; GFX10-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:76
; GFX10-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:80
; GFX10-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:84
; GFX10-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:88
; GFX10-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:92
; GFX10-NEXT:    buffer_store_dword v31, v0, s[0:3], 0 offen offset:120
; GFX10-NEXT:    buffer_store_dword v30, v0, s[0:3], 0 offen offset:116
; GFX10-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen offset:112
; GFX10-NEXT:    buffer_store_dword v28, v0, s[0:3], 0 offen offset:108
; GFX10-NEXT:    s_clause 0x7
; GFX10-NEXT:    buffer_load_dword v28, off, s[0:3], s32 offset:128
; GFX10-NEXT:    buffer_load_dword v29, off, s[0:3], s32 offset:132
; GFX10-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:136
; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:140
; GFX10-NEXT:    buffer_load_dword v48, off, s[0:3], s32 offset:144
; GFX10-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:148
; GFX10-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:152
; GFX10-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:156
; GFX10-NEXT:    buffer_store_dword v27, v0, s[0:3], 0 offen offset:104
; GFX10-NEXT:    buffer_store_dword v26, v0, s[0:3], 0 offen offset:100
; GFX10-NEXT:    buffer_store_dword v25, v0, s[0:3], 0 offen offset:96
; GFX10-NEXT:    buffer_store_dword v24, v0, s[0:3], 0 offen offset:92
; GFX10-NEXT:    buffer_store_dword v23, v0, s[0:3], 0 offen offset:88
; GFX10-NEXT:    buffer_store_dword v22, v0, s[0:3], 0 offen offset:84
; GFX10-NEXT:    buffer_store_dword v21, v0, s[0:3], 0 offen offset:80
; GFX10-NEXT:    buffer_store_dword v20, v0, s[0:3], 0 offen offset:76
; GFX10-NEXT:    buffer_store_dword v19, v0, s[0:3], 0 offen offset:72
; GFX10-NEXT:    s_clause 0x7
; GFX10-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:96
; GFX10-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:100
; GFX10-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:104
; GFX10-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:108
; GFX10-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:112
; GFX10-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:116
; GFX10-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:120
; GFX10-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:124
; GFX10-NEXT:    buffer_store_dword v18, v0, s[0:3], 0 offen offset:68
; GFX10-NEXT:    buffer_store_dword v17, v0, s[0:3], 0 offen offset:64
; GFX10-NEXT:    buffer_store_dword v16, v0, s[0:3], 0 offen offset:60
; GFX10-NEXT:    buffer_store_dword v15, v0, s[0:3], 0 offen offset:56
; GFX10-NEXT:    buffer_store_dword v14, v0, s[0:3], 0 offen offset:52
; GFX10-NEXT:    buffer_store_dword v13, v0, s[0:3], 0 offen offset:48
; GFX10-NEXT:    buffer_store_dword v12, v0, s[0:3], 0 offen offset:44
; GFX10-NEXT:    buffer_store_dword v11, v0, s[0:3], 0 offen offset:40
; GFX10-NEXT:    s_clause 0x7
; GFX10-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:32
; GFX10-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:36
; GFX10-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:40
; GFX10-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:44
; GFX10-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:48
; GFX10-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:52
; GFX10-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:56
; GFX10-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:60
; GFX10-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:36
; GFX10-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:32
; GFX10-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:28
; GFX10-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
; GFX10-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
; GFX10-NEXT:    s_clause 0x3
; GFX10-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:12
; GFX10-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:16
; GFX10-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:20
; GFX10-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:24
; GFX10-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
; GFX10-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; GFX10-NEXT:    s_clause 0x3
; GFX10-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:4
; GFX10-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:8
; GFX10-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:160
; GFX10-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:28
; GFX10-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; GFX10-NEXT:    buffer_load_dword v3, off, s[0:3], s32
; GFX10-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; GFX10-NEXT:    s_waitcnt vmcnt(2)
; GFX10-NEXT:    buffer_store_dword v27, v0, s[0:3], 0 offen offset:284
; GFX10-NEXT:    buffer_store_dword v51, v0, s[0:3], 0 offen offset:280
; GFX10-NEXT:    buffer_store_dword v50, v0, s[0:3], 0 offen offset:276
; GFX10-NEXT:    buffer_store_dword v49, v0, s[0:3], 0 offen offset:272
; GFX10-NEXT:    buffer_store_dword v48, v0, s[0:3], 0 offen offset:268
; GFX10-NEXT:    buffer_store_dword v31, v0, s[0:3], 0 offen offset:264
; GFX10-NEXT:    buffer_store_dword v30, v0, s[0:3], 0 offen offset:260
; GFX10-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen offset:256
; GFX10-NEXT:    buffer_store_dword v28, v0, s[0:3], 0 offen offset:252
; GFX10-NEXT:    buffer_store_dword v26, v0, s[0:3], 0 offen offset:248
; GFX10-NEXT:    buffer_store_dword v25, v0, s[0:3], 0 offen offset:244
; GFX10-NEXT:    buffer_store_dword v24, v0, s[0:3], 0 offen offset:240
; GFX10-NEXT:    buffer_store_dword v23, v0, s[0:3], 0 offen offset:236
; GFX10-NEXT:    buffer_store_dword v22, v0, s[0:3], 0 offen offset:232
; GFX10-NEXT:    buffer_store_dword v21, v0, s[0:3], 0 offen offset:228
; GFX10-NEXT:    buffer_store_dword v20, v0, s[0:3], 0 offen offset:224
; GFX10-NEXT:    buffer_store_dword v19, v0, s[0:3], 0 offen offset:220
; GFX10-NEXT:    buffer_store_dword v39, v0, s[0:3], 0 offen offset:216
; GFX10-NEXT:    buffer_store_dword v38, v0, s[0:3], 0 offen offset:212
; GFX10-NEXT:    buffer_store_dword v37, v0, s[0:3], 0 offen offset:208
; GFX10-NEXT:    buffer_store_dword v36, v0, s[0:3], 0 offen offset:204
; GFX10-NEXT:    buffer_store_dword v35, v0, s[0:3], 0 offen offset:200
; GFX10-NEXT:    buffer_store_dword v34, v0, s[0:3], 0 offen offset:196
; GFX10-NEXT:    buffer_store_dword v33, v0, s[0:3], 0 offen offset:192
; GFX10-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:188
; GFX10-NEXT:    buffer_store_dword v18, v0, s[0:3], 0 offen offset:184
; GFX10-NEXT:    buffer_store_dword v17, v0, s[0:3], 0 offen offset:180
; GFX10-NEXT:    buffer_store_dword v16, v0, s[0:3], 0 offen offset:176
; GFX10-NEXT:    buffer_store_dword v15, v0, s[0:3], 0 offen offset:172
; GFX10-NEXT:    buffer_store_dword v14, v0, s[0:3], 0 offen offset:168
; GFX10-NEXT:    buffer_store_dword v13, v0, s[0:3], 0 offen offset:164
; GFX10-NEXT:    buffer_store_dword v12, v0, s[0:3], 0 offen offset:160
; GFX10-NEXT:    buffer_store_dword v11, v0, s[0:3], 0 offen offset:156
; GFX10-NEXT:    s_waitcnt vmcnt(1)
; GFX10-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:152
; GFX10-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:148
; GFX10-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:144
; GFX10-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:140
; GFX10-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:136
; GFX10-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:132
; GFX10-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:128
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:124
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: return_72xi32:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_clause 0xc
; GFX11-NEXT:    scratch_store_b32 off, v40, s32 offset:212
; GFX11-NEXT:    scratch_store_b32 off, v41, s32 offset:208
; GFX11-NEXT:    scratch_store_b32 off, v42, s32 offset:204
; GFX11-NEXT:    scratch_store_b32 off, v43, s32 offset:200
; GFX11-NEXT:    scratch_store_b32 off, v44, s32 offset:196
; GFX11-NEXT:    scratch_store_b32 off, v56, s32 offset:192
; GFX11-NEXT:    scratch_store_b32 off, v57, s32 offset:188
; GFX11-NEXT:    scratch_store_b32 off, v58, s32 offset:184
; GFX11-NEXT:    scratch_store_b32 off, v59, s32 offset:180
; GFX11-NEXT:    scratch_store_b32 off, v60, s32 offset:176
; GFX11-NEXT:    scratch_store_b32 off, v61, s32 offset:172
; GFX11-NEXT:    scratch_store_b32 off, v62, s32 offset:168
; GFX11-NEXT:    scratch_store_b32 off, v63, s32 offset:164
; GFX11-NEXT:    s_clause 0x11
; GFX11-NEXT:    scratch_load_b32 v36, off, s32 offset:16
; GFX11-NEXT:    scratch_load_b32 v35, off, s32 offset:12
; GFX11-NEXT:    scratch_load_b32 v34, off, s32 offset:8
; GFX11-NEXT:    scratch_load_b32 v51, off, s32 offset:32
; GFX11-NEXT:    scratch_load_b32 v50, off, s32 offset:28
; GFX11-NEXT:    scratch_load_b32 v49, off, s32 offset:24
; GFX11-NEXT:    scratch_load_b32 v55, off, s32 offset:48
; GFX11-NEXT:    scratch_load_b32 v54, off, s32 offset:44
; GFX11-NEXT:    scratch_load_b32 v53, off, s32 offset:40
; GFX11-NEXT:    scratch_load_b32 v40, off, s32 offset:64
; GFX11-NEXT:    scratch_load_b32 v39, off, s32 offset:60
; GFX11-NEXT:    scratch_load_b32 v38, off, s32 offset:56
; GFX11-NEXT:    scratch_load_b32 v44, off, s32 offset:80
; GFX11-NEXT:    scratch_load_b32 v43, off, s32 offset:76
; GFX11-NEXT:    scratch_load_b32 v42, off, s32 offset:72
; GFX11-NEXT:    scratch_load_b32 v59, off, s32 offset:96
; GFX11-NEXT:    scratch_load_b32 v58, off, s32 offset:92
; GFX11-NEXT:    scratch_load_b32 v57, off, s32 offset:88
; GFX11-NEXT:    scratch_store_b128 v0, v[21:24], off offset:80
; GFX11-NEXT:    s_clause 0x2
; GFX11-NEXT:    scratch_load_b32 v23, off, s32 offset:112
; GFX11-NEXT:    scratch_load_b32 v22, off, s32 offset:108
; GFX11-NEXT:    scratch_load_b32 v21, off, s32 offset:104
; GFX11-NEXT:    scratch_store_b128 v0, v[17:20], off offset:64
; GFX11-NEXT:    s_clause 0x2
; GFX11-NEXT:    scratch_load_b32 v19, off, s32 offset:128
; GFX11-NEXT:    scratch_load_b32 v18, off, s32 offset:124
; GFX11-NEXT:    scratch_load_b32 v17, off, s32 offset:120
; GFX11-NEXT:    scratch_store_b128 v0, v[13:16], off offset:48
; GFX11-NEXT:    s_clause 0x2
; GFX11-NEXT:    scratch_load_b32 v15, off, s32 offset:144
; GFX11-NEXT:    scratch_load_b32 v14, off, s32 offset:140
; GFX11-NEXT:    scratch_load_b32 v13, off, s32 offset:136
; GFX11-NEXT:    scratch_store_b128 v0, v[9:12], off offset:32
; GFX11-NEXT:    s_clause 0xd
; GFX11-NEXT:    scratch_load_b32 v63, off, s32 offset:160
; GFX11-NEXT:    scratch_load_b32 v62, off, s32 offset:156
; GFX11-NEXT:    scratch_load_b32 v61, off, s32 offset:152
; GFX11-NEXT:    scratch_load_b32 v60, off, s32 offset:148
; GFX11-NEXT:    scratch_load_b32 v12, off, s32 offset:132
; GFX11-NEXT:    scratch_load_b32 v16, off, s32 offset:116
; GFX11-NEXT:    scratch_load_b32 v20, off, s32 offset:100
; GFX11-NEXT:    scratch_load_b32 v56, off, s32 offset:84
; GFX11-NEXT:    scratch_load_b32 v41, off, s32 offset:68
; GFX11-NEXT:    scratch_load_b32 v37, off, s32 offset:52
; GFX11-NEXT:    scratch_load_b32 v52, off, s32 offset:36
; GFX11-NEXT:    scratch_load_b32 v48, off, s32 offset:20
; GFX11-NEXT:    scratch_load_b32 v33, off, s32 offset:4
; GFX11-NEXT:    scratch_load_b32 v32, off, s32
; GFX11-NEXT:    s_waitcnt vmcnt(10)
; GFX11-NEXT:    scratch_store_b128 v0, v[60:63], off offset:272
; GFX11-NEXT:    s_waitcnt vmcnt(9)
; GFX11-NEXT:    scratch_store_b128 v0, v[12:15], off offset:256
; GFX11-NEXT:    s_waitcnt vmcnt(8)
; GFX11-NEXT:    scratch_store_b128 v0, v[16:19], off offset:240
; GFX11-NEXT:    s_waitcnt vmcnt(7)
; GFX11-NEXT:    scratch_store_b128 v0, v[20:23], off offset:224
; GFX11-NEXT:    s_waitcnt vmcnt(6)
; GFX11-NEXT:    scratch_store_b128 v0, v[56:59], off offset:208
; GFX11-NEXT:    s_waitcnt vmcnt(5)
; GFX11-NEXT:    scratch_store_b128 v0, v[41:44], off offset:192
; GFX11-NEXT:    s_waitcnt vmcnt(4)
; GFX11-NEXT:    scratch_store_b128 v0, v[37:40], off offset:176
; GFX11-NEXT:    s_waitcnt vmcnt(3)
; GFX11-NEXT:    scratch_store_b128 v0, v[52:55], off offset:160
; GFX11-NEXT:    s_waitcnt vmcnt(2)
; GFX11-NEXT:    scratch_store_b128 v0, v[48:51], off offset:144
; GFX11-NEXT:    s_waitcnt vmcnt(1)
; GFX11-NEXT:    scratch_store_b128 v0, v[33:36], off offset:128
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_clause 0x3
; GFX11-NEXT:    scratch_store_b128 v0, v[29:32], off offset:112
; GFX11-NEXT:    scratch_store_b128 v0, v[25:28], off offset:96
; GFX11-NEXT:    scratch_store_b128 v0, v[5:8], off offset:16
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off
; GFX11-NEXT:    s_clause 0xc
; GFX11-NEXT:    scratch_load_b32 v63, off, s32 offset:164
; GFX11-NEXT:    scratch_load_b32 v62, off, s32 offset:168
; GFX11-NEXT:    scratch_load_b32 v61, off, s32 offset:172
; GFX11-NEXT:    scratch_load_b32 v60, off, s32 offset:176
; GFX11-NEXT:    scratch_load_b32 v59, off, s32 offset:180
; GFX11-NEXT:    scratch_load_b32 v58, off, s32 offset:184
; GFX11-NEXT:    scratch_load_b32 v57, off, s32 offset:188
; GFX11-NEXT:    scratch_load_b32 v56, off, s32 offset:192
; GFX11-NEXT:    scratch_load_b32 v44, off, s32 offset:196
; GFX11-NEXT:    scratch_load_b32 v43, off, s32 offset:200
; GFX11-NEXT:    scratch_load_b32 v42, off, s32 offset:204
; GFX11-NEXT:    scratch_load_b32 v41, off, s32 offset:208
; GFX11-NEXT:    scratch_load_b32 v40, off, s32 offset:212
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  ret <72 x i32> %val
}

define amdgpu_gfx void @call_72xi32() #1 {
; GFX9-LABEL: call_72xi32:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_mov_b32 s35, s33
; GFX9-NEXT:    s_add_i32 s33, s32, 0x7fc0
; GFX9-NEXT:    s_and_b32 s33, s33, 0xffff8000
; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT:    buffer_store_dword v63, off, s[0:3], s33 offset:1564 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
; GFX9-NEXT:    s_mov_b32 s38, s34
; GFX9-NEXT:    s_mov_b32 s34, s32
; GFX9-NEXT:    s_add_i32 s32, s32, 0x28000
; GFX9-NEXT:    v_mov_b32_e32 v0, 0
; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:20
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:28
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:32
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:36
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:40
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:44
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:48
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:52
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:56
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:60
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:64
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:68
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:72
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:76
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:80
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:84
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:88
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:92
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:96
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:100
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:104
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:108
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:112
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:116
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:120
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:124
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:128
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:132
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:136
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:140
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:144
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:148
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:152
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:156
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:160
; GFX9-NEXT:    v_lshrrev_b32_e64 v0, 6, s33
; GFX9-NEXT:    v_writelane_b32 v63, s30, 0
; GFX9-NEXT:    s_mov_b32 s37, return_72xi32@abs32@hi
; GFX9-NEXT:    s_mov_b32 s36, return_72xi32@abs32@lo
; GFX9-NEXT:    v_add_u32_e32 v0, 0x200, v0
; GFX9-NEXT:    v_mov_b32_e32 v1, 0
; GFX9-NEXT:    v_mov_b32_e32 v2, 0
; GFX9-NEXT:    v_mov_b32_e32 v3, 0
; GFX9-NEXT:    v_mov_b32_e32 v4, 0
; GFX9-NEXT:    v_mov_b32_e32 v5, 0
; GFX9-NEXT:    v_mov_b32_e32 v6, 0
; GFX9-NEXT:    v_mov_b32_e32 v7, 0
; GFX9-NEXT:    v_mov_b32_e32 v8, 0
; GFX9-NEXT:    v_mov_b32_e32 v9, 0
; GFX9-NEXT:    v_mov_b32_e32 v10, 0
; GFX9-NEXT:    v_mov_b32_e32 v11, 0
; GFX9-NEXT:    v_mov_b32_e32 v12, 0
; GFX9-NEXT:    v_mov_b32_e32 v13, 0
; GFX9-NEXT:    v_mov_b32_e32 v14, 0
; GFX9-NEXT:    v_mov_b32_e32 v15, 0
; GFX9-NEXT:    v_mov_b32_e32 v16, 0
; GFX9-NEXT:    v_mov_b32_e32 v17, 0
; GFX9-NEXT:    v_mov_b32_e32 v18, 0
; GFX9-NEXT:    v_mov_b32_e32 v19, 0
; GFX9-NEXT:    v_mov_b32_e32 v20, 0
; GFX9-NEXT:    v_mov_b32_e32 v21, 0
; GFX9-NEXT:    v_mov_b32_e32 v22, 0
; GFX9-NEXT:    v_mov_b32_e32 v23, 0
; GFX9-NEXT:    v_mov_b32_e32 v24, 0
; GFX9-NEXT:    v_mov_b32_e32 v25, 0
; GFX9-NEXT:    v_mov_b32_e32 v26, 0
; GFX9-NEXT:    v_mov_b32_e32 v27, 0
; GFX9-NEXT:    v_mov_b32_e32 v28, 0
; GFX9-NEXT:    v_mov_b32_e32 v29, 0
; GFX9-NEXT:    v_mov_b32_e32 v30, 0
; GFX9-NEXT:    v_mov_b32_e32 v31, 0
; GFX9-NEXT:    v_writelane_b32 v63, s31, 1
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[36:37]
; GFX9-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:636
; GFX9-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:640
; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s33 offset:644
; GFX9-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:648
; GFX9-NEXT:    buffer_load_dword v34, off, s[0:3], s33 offset:652
; GFX9-NEXT:    buffer_load_dword v35, off, s[0:3], s33 offset:656
; GFX9-NEXT:    buffer_load_dword v36, off, s[0:3], s33 offset:660
; GFX9-NEXT:    buffer_load_dword v37, off, s[0:3], s33 offset:664
; GFX9-NEXT:    buffer_load_dword v38, off, s[0:3], s33 offset:668
; GFX9-NEXT:    buffer_load_dword v39, off, s[0:3], s33 offset:672
; GFX9-NEXT:    buffer_load_dword v48, off, s[0:3], s33 offset:676
; GFX9-NEXT:    buffer_load_dword v49, off, s[0:3], s33 offset:680
; GFX9-NEXT:    buffer_load_dword v50, off, s[0:3], s33 offset:684
; GFX9-NEXT:    buffer_load_dword v51, off, s[0:3], s33 offset:688
; GFX9-NEXT:    buffer_load_dword v52, off, s[0:3], s33 offset:692
; GFX9-NEXT:    buffer_load_dword v53, off, s[0:3], s33 offset:696
; GFX9-NEXT:    buffer_load_dword v54, off, s[0:3], s33 offset:700
; GFX9-NEXT:    buffer_load_dword v55, off, s[0:3], s33 offset:704
; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:708
; GFX9-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:712
; GFX9-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:716
; GFX9-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:720
; GFX9-NEXT:    buffer_load_dword v44, off, s[0:3], s33 offset:724
; GFX9-NEXT:    buffer_load_dword v45, off, s[0:3], s33 offset:728
; GFX9-NEXT:    buffer_load_dword v46, off, s[0:3], s33 offset:732
; GFX9-NEXT:    buffer_load_dword v47, off, s[0:3], s33 offset:736
; GFX9-NEXT:    buffer_load_dword v56, off, s[0:3], s33 offset:740
; GFX9-NEXT:    buffer_load_dword v57, off, s[0:3], s33 offset:748
; GFX9-NEXT:    buffer_load_dword v58, off, s[0:3], s33 offset:752
; GFX9-NEXT:    buffer_load_dword v59, off, s[0:3], s33 offset:756
; GFX9-NEXT:    buffer_load_dword v60, off, s[0:3], s33 offset:760
; GFX9-NEXT:    buffer_load_dword v61, off, s[0:3], s33 offset:764
; GFX9-NEXT:    buffer_load_dword v62, off, s[0:3], s33 offset:768
; GFX9-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:772
; GFX9-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:776
; GFX9-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:780
; GFX9-NEXT:    buffer_load_dword v5, off, s[0:3], s33 offset:784
; GFX9-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:788
; GFX9-NEXT:    buffer_load_dword v7, off, s[0:3], s33 offset:792
; GFX9-NEXT:    buffer_load_dword v8, off, s[0:3], s33 offset:796
; GFX9-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:516
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1536 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:520
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1540 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:524
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1544 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:528
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1548 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:532
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1552 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:536
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1556 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:540
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:1560 ; 4-byte Folded Spill
; GFX9-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:544
; GFX9-NEXT:    s_nop 0
; GFX9-NEXT:    buffer_load_dword v10, off, s[0:3], s33 offset:548
; GFX9-NEXT:    buffer_load_dword v11, off, s[0:3], s33 offset:552
; GFX9-NEXT:    buffer_load_dword v12, off, s[0:3], s33 offset:556
; GFX9-NEXT:    buffer_load_dword v13, off, s[0:3], s33 offset:560
; GFX9-NEXT:    buffer_load_dword v14, off, s[0:3], s33 offset:564
; GFX9-NEXT:    buffer_load_dword v15, off, s[0:3], s33 offset:568
; GFX9-NEXT:    buffer_load_dword v16, off, s[0:3], s33 offset:572
; GFX9-NEXT:    buffer_load_dword v17, off, s[0:3], s33 offset:576
; GFX9-NEXT:    buffer_load_dword v18, off, s[0:3], s33 offset:580
; GFX9-NEXT:    buffer_load_dword v19, off, s[0:3], s33 offset:584
; GFX9-NEXT:    buffer_load_dword v20, off, s[0:3], s33 offset:588
; GFX9-NEXT:    buffer_load_dword v21, off, s[0:3], s33 offset:592
; GFX9-NEXT:    buffer_load_dword v22, off, s[0:3], s33 offset:596
; GFX9-NEXT:    buffer_load_dword v23, off, s[0:3], s33 offset:600
; GFX9-NEXT:    buffer_load_dword v24, off, s[0:3], s33 offset:604
; GFX9-NEXT:    buffer_load_dword v25, off, s[0:3], s33 offset:608
; GFX9-NEXT:    buffer_load_dword v26, off, s[0:3], s33 offset:612
; GFX9-NEXT:    buffer_load_dword v27, off, s[0:3], s33 offset:616
; GFX9-NEXT:    buffer_load_dword v28, off, s[0:3], s33 offset:620
; GFX9-NEXT:    buffer_load_dword v29, off, s[0:3], s33 offset:624
; GFX9-NEXT:    buffer_load_dword v30, off, s[0:3], s33 offset:628
; GFX9-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:632
; GFX9-NEXT:    s_nop 0
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32
; GFX9-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4
; GFX9-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:8
; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:12
; GFX9-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:16
; GFX9-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:20
; GFX9-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:24
; GFX9-NEXT:    buffer_store_dword v37, off, s[0:3], s32 offset:28
; GFX9-NEXT:    buffer_store_dword v38, off, s[0:3], s32 offset:32
; GFX9-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:36
; GFX9-NEXT:    buffer_store_dword v48, off, s[0:3], s32 offset:40
; GFX9-NEXT:    buffer_store_dword v49, off, s[0:3], s32 offset:44
; GFX9-NEXT:    buffer_store_dword v50, off, s[0:3], s32 offset:48
; GFX9-NEXT:    buffer_store_dword v51, off, s[0:3], s32 offset:52
; GFX9-NEXT:    buffer_store_dword v52, off, s[0:3], s32 offset:56
; GFX9-NEXT:    buffer_store_dword v53, off, s[0:3], s32 offset:60
; GFX9-NEXT:    buffer_store_dword v54, off, s[0:3], s32 offset:64
; GFX9-NEXT:    buffer_store_dword v55, off, s[0:3], s32 offset:68
; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:72
; GFX9-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:76
; GFX9-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:80
; GFX9-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:84
; GFX9-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:88
; GFX9-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:92
; GFX9-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:96
; GFX9-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:100
; GFX9-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:104
; GFX9-NEXT:    v_mov_b32_e32 v0, 24
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:108
; GFX9-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:112
; GFX9-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:116
; GFX9-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:120
; GFX9-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:124
; GFX9-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:128
; GFX9-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:132
; GFX9-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:136
; GFX9-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:140
; GFX9-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:144
; GFX9-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:148
; GFX9-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:152
; GFX9-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:156
; GFX9-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:160
; GFX9-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:1536 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:1540 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:1544 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v5, off, s[0:3], s33 offset:1548 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:1552 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v7, off, s[0:3], s33 offset:1556 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v8, off, s[0:3], s33 offset:1560 ; 4-byte Folded Reload
; GFX9-NEXT:    v_lshrrev_b32_e64 v0, 6, s33
; GFX9-NEXT:    v_add_u32_e32 v0, 0x400, v0
; GFX9-NEXT:    v_mov_b32_e32 v1, 42
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[36:37]
; GFX9-NEXT:    buffer_load_dword v62, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
; GFX9-NEXT:    v_readlane_b32 s31, v63, 1
; GFX9-NEXT:    v_readlane_b32 s30, v63, 0
; GFX9-NEXT:    s_mov_b32 s32, s34
; GFX9-NEXT:    s_mov_b32 s34, s38
; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT:    buffer_load_dword v63, off, s[0:3], s33 offset:1564 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
; GFX9-NEXT:    s_mov_b32 s33, s35
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: call_72xi32:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_mov_b32 s35, s33
; GFX10-NEXT:    s_add_i32 s33, s32, 0x3fe0
; GFX10-NEXT:    s_and_b32 s33, s33, 0xffffc000
; GFX10-NEXT:    s_or_saveexec_b32 s36, -1
; GFX10-NEXT:    buffer_store_dword v63, off, s[0:3], s33 offset:1568 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s36
; GFX10-NEXT:    v_mov_b32_e32 v0, 0
; GFX10-NEXT:    s_mov_b32 s38, s34
; GFX10-NEXT:    s_mov_b32 s34, s32
; GFX10-NEXT:    s_add_i32 s32, s32, 0x14000
; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:20
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:28
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:32
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:36
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:40
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:44
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:48
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:52
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:56
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:60
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:64
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:68
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:72
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:76
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:80
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:84
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:88
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:92
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:96
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:100
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:104
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:108
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:112
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:116
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:120
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:124
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:128
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:132
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:136
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:140
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:144
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:148
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:152
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:156
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:160
; GFX10-NEXT:    v_lshrrev_b32_e64 v0, 5, s33
; GFX10-NEXT:    v_writelane_b32 v63, s30, 0
; GFX10-NEXT:    v_mov_b32_e32 v1, 0
; GFX10-NEXT:    v_mov_b32_e32 v2, 0
; GFX10-NEXT:    v_mov_b32_e32 v3, 0
; GFX10-NEXT:    v_add_nc_u32_e32 v0, 0x200, v0
; GFX10-NEXT:    v_mov_b32_e32 v4, 0
; GFX10-NEXT:    v_mov_b32_e32 v5, 0
; GFX10-NEXT:    v_mov_b32_e32 v6, 0
; GFX10-NEXT:    v_mov_b32_e32 v7, 0
; GFX10-NEXT:    v_mov_b32_e32 v8, 0
; GFX10-NEXT:    v_mov_b32_e32 v9, 0
; GFX10-NEXT:    v_mov_b32_e32 v10, 0
; GFX10-NEXT:    v_mov_b32_e32 v11, 0
; GFX10-NEXT:    v_mov_b32_e32 v12, 0
; GFX10-NEXT:    v_mov_b32_e32 v13, 0
; GFX10-NEXT:    v_mov_b32_e32 v14, 0
; GFX10-NEXT:    v_mov_b32_e32 v15, 0
; GFX10-NEXT:    v_mov_b32_e32 v16, 0
; GFX10-NEXT:    v_mov_b32_e32 v17, 0
; GFX10-NEXT:    v_mov_b32_e32 v18, 0
; GFX10-NEXT:    v_mov_b32_e32 v19, 0
; GFX10-NEXT:    v_mov_b32_e32 v20, 0
; GFX10-NEXT:    v_mov_b32_e32 v21, 0
; GFX10-NEXT:    v_mov_b32_e32 v22, 0
; GFX10-NEXT:    v_mov_b32_e32 v23, 0
; GFX10-NEXT:    v_mov_b32_e32 v24, 0
; GFX10-NEXT:    v_mov_b32_e32 v25, 0
; GFX10-NEXT:    v_mov_b32_e32 v26, 0
; GFX10-NEXT:    v_mov_b32_e32 v27, 0
; GFX10-NEXT:    v_mov_b32_e32 v28, 0
; GFX10-NEXT:    v_mov_b32_e32 v29, 0
; GFX10-NEXT:    v_mov_b32_e32 v30, 0
; GFX10-NEXT:    v_mov_b32_e32 v31, 0
; GFX10-NEXT:    s_mov_b32 s37, return_72xi32@abs32@hi
; GFX10-NEXT:    s_mov_b32 s36, return_72xi32@abs32@lo
; GFX10-NEXT:    v_writelane_b32 v63, s31, 1
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[36:37]
; GFX10-NEXT:    s_clause 0x28
; GFX10-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:636
; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s33 offset:640
; GFX10-NEXT:    buffer_load_dword v32, off, s[0:3], s33 offset:644
; GFX10-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:648
; GFX10-NEXT:    buffer_load_dword v34, off, s[0:3], s33 offset:652
; GFX10-NEXT:    buffer_load_dword v35, off, s[0:3], s33 offset:656
; GFX10-NEXT:    buffer_load_dword v36, off, s[0:3], s33 offset:660
; GFX10-NEXT:    buffer_load_dword v37, off, s[0:3], s33 offset:664
; GFX10-NEXT:    buffer_load_dword v38, off, s[0:3], s33 offset:668
; GFX10-NEXT:    buffer_load_dword v39, off, s[0:3], s33 offset:672
; GFX10-NEXT:    buffer_load_dword v48, off, s[0:3], s33 offset:676
; GFX10-NEXT:    buffer_load_dword v49, off, s[0:3], s33 offset:680
; GFX10-NEXT:    buffer_load_dword v50, off, s[0:3], s33 offset:684
; GFX10-NEXT:    buffer_load_dword v51, off, s[0:3], s33 offset:688
; GFX10-NEXT:    buffer_load_dword v52, off, s[0:3], s33 offset:692
; GFX10-NEXT:    buffer_load_dword v53, off, s[0:3], s33 offset:696
; GFX10-NEXT:    buffer_load_dword v54, off, s[0:3], s33 offset:700
; GFX10-NEXT:    buffer_load_dword v55, off, s[0:3], s33 offset:704
; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:708
; GFX10-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:712
; GFX10-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:716
; GFX10-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:720
; GFX10-NEXT:    buffer_load_dword v44, off, s[0:3], s33 offset:724
; GFX10-NEXT:    buffer_load_dword v45, off, s[0:3], s33 offset:728
; GFX10-NEXT:    buffer_load_dword v46, off, s[0:3], s33 offset:732
; GFX10-NEXT:    buffer_load_dword v47, off, s[0:3], s33 offset:736
; GFX10-NEXT:    buffer_load_dword v56, off, s[0:3], s33 offset:740
; GFX10-NEXT:    buffer_load_dword v57, off, s[0:3], s33 offset:748
; GFX10-NEXT:    buffer_load_dword v58, off, s[0:3], s33 offset:752
; GFX10-NEXT:    buffer_load_dword v59, off, s[0:3], s33 offset:756
; GFX10-NEXT:    buffer_load_dword v60, off, s[0:3], s33 offset:760
; GFX10-NEXT:    buffer_load_dword v61, off, s[0:3], s33 offset:764
; GFX10-NEXT:    buffer_load_dword v62, off, s[0:3], s33 offset:768
; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:772
; GFX10-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:776
; GFX10-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:780
; GFX10-NEXT:    buffer_load_dword v5, off, s[0:3], s33 offset:784
; GFX10-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:788
; GFX10-NEXT:    buffer_load_dword v7, off, s[0:3], s33 offset:792
; GFX10-NEXT:    buffer_load_dword v8, off, s[0:3], s33 offset:796
; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:516
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1536 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:520
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1540 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:524
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1544 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:528
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1548 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:532
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1552 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:536
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1556 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:540
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1560 ; 4-byte Folded Spill
; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:544
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:1564 ; 4-byte Folded Spill
; GFX10-NEXT:    s_clause 0x15
; GFX10-NEXT:    buffer_load_dword v10, off, s[0:3], s33 offset:548
; GFX10-NEXT:    buffer_load_dword v11, off, s[0:3], s33 offset:552
; GFX10-NEXT:    buffer_load_dword v12, off, s[0:3], s33 offset:556
; GFX10-NEXT:    buffer_load_dword v13, off, s[0:3], s33 offset:560
; GFX10-NEXT:    buffer_load_dword v14, off, s[0:3], s33 offset:564
; GFX10-NEXT:    buffer_load_dword v15, off, s[0:3], s33 offset:568
; GFX10-NEXT:    buffer_load_dword v16, off, s[0:3], s33 offset:572
; GFX10-NEXT:    buffer_load_dword v17, off, s[0:3], s33 offset:576
; GFX10-NEXT:    buffer_load_dword v18, off, s[0:3], s33 offset:580
; GFX10-NEXT:    buffer_load_dword v19, off, s[0:3], s33 offset:584
; GFX10-NEXT:    buffer_load_dword v20, off, s[0:3], s33 offset:588
; GFX10-NEXT:    buffer_load_dword v21, off, s[0:3], s33 offset:592
; GFX10-NEXT:    buffer_load_dword v22, off, s[0:3], s33 offset:596
; GFX10-NEXT:    buffer_load_dword v23, off, s[0:3], s33 offset:600
; GFX10-NEXT:    buffer_load_dword v24, off, s[0:3], s33 offset:604
; GFX10-NEXT:    buffer_load_dword v25, off, s[0:3], s33 offset:608
; GFX10-NEXT:    buffer_load_dword v26, off, s[0:3], s33 offset:612
; GFX10-NEXT:    buffer_load_dword v27, off, s[0:3], s33 offset:616
; GFX10-NEXT:    buffer_load_dword v28, off, s[0:3], s33 offset:620
; GFX10-NEXT:    buffer_load_dword v29, off, s[0:3], s33 offset:624
; GFX10-NEXT:    buffer_load_dword v30, off, s[0:3], s33 offset:628
; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:632
; GFX10-NEXT:    v_mov_b32_e32 v0, 24
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:108
; GFX10-NEXT:    buffer_store_dword v9, off, s[0:3], s32
; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4
; GFX10-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:8
; GFX10-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:12
; GFX10-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:16
; GFX10-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:20
; GFX10-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:24
; GFX10-NEXT:    buffer_store_dword v37, off, s[0:3], s32 offset:28
; GFX10-NEXT:    buffer_store_dword v38, off, s[0:3], s32 offset:32
; GFX10-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:36
; GFX10-NEXT:    buffer_store_dword v48, off, s[0:3], s32 offset:40
; GFX10-NEXT:    buffer_store_dword v49, off, s[0:3], s32 offset:44
; GFX10-NEXT:    buffer_store_dword v50, off, s[0:3], s32 offset:48
; GFX10-NEXT:    buffer_store_dword v51, off, s[0:3], s32 offset:52
; GFX10-NEXT:    buffer_store_dword v52, off, s[0:3], s32 offset:56
; GFX10-NEXT:    buffer_store_dword v53, off, s[0:3], s32 offset:60
; GFX10-NEXT:    buffer_store_dword v54, off, s[0:3], s32 offset:64
; GFX10-NEXT:    buffer_store_dword v55, off, s[0:3], s32 offset:68
; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:72
; GFX10-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:76
; GFX10-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:80
; GFX10-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:84
; GFX10-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:88
; GFX10-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:92
; GFX10-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:96
; GFX10-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:100
; GFX10-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:104
; GFX10-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:112
; GFX10-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:116
; GFX10-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:120
; GFX10-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:124
; GFX10-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:128
; GFX10-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:132
; GFX10-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:136
; GFX10-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:140
; GFX10-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:144
; GFX10-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:148
; GFX10-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:152
; GFX10-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:156
; GFX10-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:160
; GFX10-NEXT:    s_clause 0x7
; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:1536
; GFX10-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:1540
; GFX10-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:1544
; GFX10-NEXT:    buffer_load_dword v5, off, s[0:3], s33 offset:1548
; GFX10-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:1552
; GFX10-NEXT:    buffer_load_dword v7, off, s[0:3], s33 offset:1556
; GFX10-NEXT:    buffer_load_dword v8, off, s[0:3], s33 offset:1560
; GFX10-NEXT:    buffer_load_dword v9, off, s[0:3], s33 offset:1564
; GFX10-NEXT:    v_lshrrev_b32_e64 v0, 5, s33
; GFX10-NEXT:    v_mov_b32_e32 v1, 42
; GFX10-NEXT:    v_add_nc_u32_e32 v0, 0x400, v0
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[36:37]
; GFX10-NEXT:    s_clause 0xe
; GFX10-NEXT:    buffer_load_dword v62, off, s[0:3], s33
; GFX10-NEXT:    buffer_load_dword v61, off, s[0:3], s33 offset:4
; GFX10-NEXT:    buffer_load_dword v60, off, s[0:3], s33 offset:8
; GFX10-NEXT:    buffer_load_dword v59, off, s[0:3], s33 offset:12
; GFX10-NEXT:    buffer_load_dword v58, off, s[0:3], s33 offset:16
; GFX10-NEXT:    buffer_load_dword v57, off, s[0:3], s33 offset:20
; GFX10-NEXT:    buffer_load_dword v56, off, s[0:3], s33 offset:24
; GFX10-NEXT:    buffer_load_dword v47, off, s[0:3], s33 offset:28
; GFX10-NEXT:    buffer_load_dword v46, off, s[0:3], s33 offset:32
; GFX10-NEXT:    buffer_load_dword v45, off, s[0:3], s33 offset:36
; GFX10-NEXT:    buffer_load_dword v44, off, s[0:3], s33 offset:40
; GFX10-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:44
; GFX10-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:48
; GFX10-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:52
; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:56
; GFX10-NEXT:    v_readlane_b32 s31, v63, 1
; GFX10-NEXT:    v_readlane_b32 s30, v63, 0
; GFX10-NEXT:    s_mov_b32 s32, s34
; GFX10-NEXT:    s_mov_b32 s34, s38
; GFX10-NEXT:    s_or_saveexec_b32 s36, -1
; GFX10-NEXT:    buffer_load_dword v63, off, s[0:3], s33 offset:1568 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s36
; GFX10-NEXT:    s_mov_b32 s33, s35
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: call_72xi32:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s35, s33
; GFX11-NEXT:    s_add_i32 s33, s32, 0x1ff
; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT:    s_and_b32 s33, s33, 0xfffffe00
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v60, s33 offset:1600 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_mov_b32 s0, 0
; GFX11-NEXT:    v_mov_b32_e32 v4, 0
; GFX11-NEXT:    s_mov_b32 s1, s0
; GFX11-NEXT:    s_mov_b32 s2, s0
; GFX11-NEXT:    s_mov_b32 s3, s0
; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT:    s_mov_b32 s36, s34
; GFX11-NEXT:    s_mov_b32 s34, s32
; GFX11-NEXT:    s_addk_i32 s32, 0xa00
; GFX11-NEXT:    s_clause 0xb
; GFX11-NEXT:    scratch_store_b32 off, v40, s33 offset:44
; GFX11-NEXT:    scratch_store_b32 off, v41, s33 offset:40
; GFX11-NEXT:    scratch_store_b32 off, v42, s33 offset:36
; GFX11-NEXT:    scratch_store_b32 off, v43, s33 offset:32
; GFX11-NEXT:    scratch_store_b32 off, v44, s33 offset:28
; GFX11-NEXT:    scratch_store_b32 off, v45, s33 offset:24
; GFX11-NEXT:    scratch_store_b32 off, v46, s33 offset:20
; GFX11-NEXT:    scratch_store_b32 off, v47, s33 offset:16
; GFX11-NEXT:    scratch_store_b32 off, v56, s33 offset:12
; GFX11-NEXT:    scratch_store_b32 off, v57, s33 offset:8
; GFX11-NEXT:    scratch_store_b32 off, v58, s33 offset:4
; GFX11-NEXT:    scratch_store_b32 off, v59, s33
; GFX11-NEXT:    s_add_i32 s0, s32, 0xa0
; GFX11-NEXT:    s_add_i32 s1, s32, 0x90
; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s32
; GFX11-NEXT:    scratch_store_b32 off, v4, s0
; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s1
; GFX11-NEXT:    s_add_i32 s0, s32, 0x80
; GFX11-NEXT:    s_add_i32 s1, s32, 0x70
; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s0
; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s1
; GFX11-NEXT:    s_add_i32 s0, s32, 0x60
; GFX11-NEXT:    s_add_i32 s1, s32, 0x50
; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s0
; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s1
; GFX11-NEXT:    s_add_i32 s0, s32, 64
; GFX11-NEXT:    s_add_i32 s1, s32, 48
; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s0
; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s1
; GFX11-NEXT:    s_add_i32 s0, s32, 32
; GFX11-NEXT:    s_add_i32 s1, s32, 16
; GFX11-NEXT:    s_add_i32 s2, s33, 0x200
; GFX11-NEXT:    v_writelane_b32 v60, s30, 0
; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s0
; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s1
; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0
; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
; GFX11-NEXT:    v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 0
; GFX11-NEXT:    v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v6, 0
; GFX11-NEXT:    v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v8, 0
; GFX11-NEXT:    v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v10, 0
; GFX11-NEXT:    v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v12, 0
; GFX11-NEXT:    v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v14, 0
; GFX11-NEXT:    v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v16, 0
; GFX11-NEXT:    v_dual_mov_b32 v19, 0 :: v_dual_mov_b32 v18, 0
; GFX11-NEXT:    v_dual_mov_b32 v21, 0 :: v_dual_mov_b32 v20, 0
; GFX11-NEXT:    v_dual_mov_b32 v23, 0 :: v_dual_mov_b32 v22, 0
; GFX11-NEXT:    v_dual_mov_b32 v25, 0 :: v_dual_mov_b32 v24, 0
; GFX11-NEXT:    v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v26, 0
; GFX11-NEXT:    v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v28, 0
; GFX11-NEXT:    v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0
; GFX11-NEXT:    s_mov_b32 s1, return_72xi32@abs32@hi
; GFX11-NEXT:    s_mov_b32 s0, return_72xi32@abs32@lo
; GFX11-NEXT:    v_writelane_b32 v60, s31, 1
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_clause 0x1
; GFX11-NEXT:    scratch_load_b128 v[45:48], off, s33 offset:624
; GFX11-NEXT:    scratch_load_b128 v[33:36], off, s33 offset:640
; GFX11-NEXT:    s_add_i32 s2, s32, 0xa0
; GFX11-NEXT:    s_waitcnt vmcnt(1)
; GFX11-NEXT:    v_mov_b32_e32 v32, v48
; GFX11-NEXT:    s_clause 0x9
; GFX11-NEXT:    scratch_load_b128 v[48:51], off, s33 offset:656
; GFX11-NEXT:    scratch_load_b128 v[52:55], off, s33 offset:672
; GFX11-NEXT:    scratch_load_b128 v[37:40], off, s33 offset:688
; GFX11-NEXT:    scratch_load_b128 v[41:44], off, s33 offset:704
; GFX11-NEXT:    scratch_load_b128 v[56:59], off, s33 offset:720
; GFX11-NEXT:    scratch_load_b128 v[12:15], off, s33 offset:736
; GFX11-NEXT:    scratch_load_b128 v[0:3], off, s33 offset:752
; GFX11-NEXT:    scratch_load_b128 v[4:7], off, s33 offset:768
; GFX11-NEXT:    scratch_load_b128 v[8:11], off, s33 offset:784
; GFX11-NEXT:    scratch_load_b128 v[16:19], off, s33 offset:512
; GFX11-NEXT:    s_waitcnt vmcnt(2)
; GFX11-NEXT:    v_dual_mov_b32 v14, v1 :: v_dual_mov_b32 v1, v4
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    scratch_store_b128 off, v[16:19], s33 offset:1584 ; 16-byte Folded Spill
; GFX11-NEXT:    s_clause 0x3
; GFX11-NEXT:    scratch_load_b128 v[16:19], off, s33 offset:528
; GFX11-NEXT:    scratch_load_b128 v[20:23], off, s33 offset:544
; GFX11-NEXT:    scratch_load_b128 v[24:27], off, s33 offset:560
; GFX11-NEXT:    scratch_load_b128 v[28:31], off, s33 offset:576
; GFX11-NEXT:    v_dual_mov_b32 v4, v7 :: v_dual_mov_b32 v7, v10
; GFX11-NEXT:    s_waitcnt vmcnt(2)
; GFX11-NEXT:    v_mov_b32_e32 v10, v21
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    scratch_store_b128 off, v[28:31], s33 offset:1568 ; 16-byte Folded Spill
; GFX11-NEXT:    scratch_load_b128 v[28:31], off, s33 offset:592
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    scratch_store_b128 off, v[28:31], s33 offset:1552 ; 16-byte Folded Spill
; GFX11-NEXT:    scratch_load_b128 v[28:31], off, s33 offset:608
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    scratch_store_b128 off, v[28:31], s33 offset:1536 ; 16-byte Folded Spill
; GFX11-NEXT:    scratch_store_b128 off, v[32:35], s32
; GFX11-NEXT:    v_dual_mov_b32 v31, v47 :: v_dual_mov_b32 v32, v36
; GFX11-NEXT:    v_dual_mov_b32 v33, v48 :: v_dual_mov_b32 v34, v49
; GFX11-NEXT:    v_dual_mov_b32 v35, v50 :: v_dual_mov_b32 v48, v51
; GFX11-NEXT:    v_dual_mov_b32 v49, v52 :: v_dual_mov_b32 v50, v53
; GFX11-NEXT:    v_dual_mov_b32 v51, v54 :: v_dual_mov_b32 v36, v55
; GFX11-NEXT:    v_dual_mov_b32 v53, v41 :: v_dual_mov_b32 v52, v40
; GFX11-NEXT:    v_dual_mov_b32 v54, v42 :: v_dual_mov_b32 v41, v56
; GFX11-NEXT:    v_dual_mov_b32 v55, v43 :: v_dual_mov_b32 v40, v44
; GFX11-NEXT:    v_dual_mov_b32 v42, v57 :: v_dual_mov_b32 v57, v12
; GFX11-NEXT:    v_dual_mov_b32 v43, v58 :: v_dual_mov_b32 v56, v59
; GFX11-NEXT:    v_mov_b32_e32 v58, v13
; GFX11-NEXT:    v_dual_mov_b32 v12, v15 :: v_dual_mov_b32 v13, v0
; GFX11-NEXT:    v_dual_mov_b32 v15, v2 :: v_dual_mov_b32 v0, v3
; GFX11-NEXT:    v_dual_mov_b32 v2, v5 :: v_dual_mov_b32 v3, v6
; GFX11-NEXT:    v_dual_mov_b32 v5, v8 :: v_dual_mov_b32 v6, v9
; GFX11-NEXT:    v_mov_b32_e32 v9, v20
; GFX11-NEXT:    scratch_store_b32 off, v11, s2
; GFX11-NEXT:    s_add_i32 s2, s32, 0x90
; GFX11-NEXT:    v_mov_b32_e32 v11, v22
; GFX11-NEXT:    scratch_store_b128 off, v[4:7], s2
; GFX11-NEXT:    s_add_i32 s2, s32, 0x80
; GFX11-NEXT:    v_mov_b32_e32 v5, v16
; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s2
; GFX11-NEXT:    v_mov_b32_e32 v0, 24
; GFX11-NEXT:    s_add_i32 s2, s32, 0x70
; GFX11-NEXT:    v_mov_b32_e32 v6, v17
; GFX11-NEXT:    scratch_store_b128 off, v[12:15], s2
; GFX11-NEXT:    v_mov_b32_e32 v13, v24
; GFX11-NEXT:    s_add_i32 s2, s32, 0x6c
; GFX11-NEXT:    v_mov_b32_e32 v7, v18
; GFX11-NEXT:    scratch_store_b32 off, v0, s2
; GFX11-NEXT:    s_add_i32 s2, s32, 0x60
; GFX11-NEXT:    v_dual_mov_b32 v8, v19 :: v_dual_mov_b32 v15, v26
; GFX11-NEXT:    scratch_store_b96 off, v[56:58], s2
; GFX11-NEXT:    s_add_i32 s2, s32, 0x50
; GFX11-NEXT:    v_dual_mov_b32 v12, v23 :: v_dual_mov_b32 v29, v45
; GFX11-NEXT:    scratch_store_b128 off, v[40:43], s2
; GFX11-NEXT:    s_add_i32 s2, s32, 64
; GFX11-NEXT:    v_mov_b32_e32 v14, v25
; GFX11-NEXT:    scratch_store_b128 off, v[52:55], s2
; GFX11-NEXT:    s_add_i32 s2, s32, 48
; GFX11-NEXT:    v_mov_b32_e32 v16, v27
; GFX11-NEXT:    scratch_store_b128 off, v[36:39], s2
; GFX11-NEXT:    s_add_i32 s2, s32, 32
; GFX11-NEXT:    v_mov_b32_e32 v30, v46
; GFX11-NEXT:    scratch_store_b128 off, v[48:51], s2
; GFX11-NEXT:    s_add_i32 s2, s32, 16
; GFX11-NEXT:    scratch_store_b128 off, v[32:35], s2
; GFX11-NEXT:    scratch_load_b128 v[1:4], off, s33 offset:1584 ; 16-byte Folded Reload
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    v_mov_b32_e32 v1, 42
; GFX11-NEXT:    s_clause 0x2
; GFX11-NEXT:    scratch_load_b128 v[17:20], off, s33 offset:1568
; GFX11-NEXT:    scratch_load_b128 v[21:24], off, s33 offset:1552
; GFX11-NEXT:    scratch_load_b128 v[25:28], off, s33 offset:1536
; GFX11-NEXT:    s_add_i32 s2, s33, 0x400
; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT:    v_mov_b32_e32 v0, s2
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_clause 0xb
; GFX11-NEXT:    scratch_load_b32 v59, off, s33
; GFX11-NEXT:    scratch_load_b32 v58, off, s33 offset:4
; GFX11-NEXT:    scratch_load_b32 v57, off, s33 offset:8
; GFX11-NEXT:    scratch_load_b32 v56, off, s33 offset:12
; GFX11-NEXT:    scratch_load_b32 v47, off, s33 offset:16
; GFX11-NEXT:    scratch_load_b32 v46, off, s33 offset:20
; GFX11-NEXT:    scratch_load_b32 v45, off, s33 offset:24
; GFX11-NEXT:    scratch_load_b32 v44, off, s33 offset:28
; GFX11-NEXT:    scratch_load_b32 v43, off, s33 offset:32
; GFX11-NEXT:    scratch_load_b32 v42, off, s33 offset:36
; GFX11-NEXT:    scratch_load_b32 v41, off, s33 offset:40
; GFX11-NEXT:    scratch_load_b32 v40, off, s33 offset:44
; GFX11-NEXT:    v_readlane_b32 s31, v60, 1
; GFX11-NEXT:    v_readlane_b32 s30, v60, 0
; GFX11-NEXT:    s_mov_b32 s32, s34
; GFX11-NEXT:    s_mov_b32 s34, s36
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v60, off, s33 offset:1600 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_mov_b32 s33, s35
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  %ret.0 = call amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> zeroinitializer)
  %val.0 = insertelement <72 x i32> %ret.0, i32 42, i32 0
  %val.1 = insertelement <72 x i32> %val.0, i32 24, i32 58
  %ret.1 = call amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val.1)
  ret void
}

; Ensure all VGPRs are available
attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1" }

; Limit to 64 VGPRs
attributes #1 = { nounwind "amdgpu-num-vgpr"="64" }
