A   O,             5rij5Aug 14 202405:08:24HOST64sm_86//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-31833905
// Cuda compilation tools, release 11.8, V11.8.89
// Based on NVVM 7.0.1
//

.version 7.8
.target sm_50
.address_size 64

	// .globl	ShaderKernel_ColorMatch
.const .align 4 .b8 kRGB32f_To_601YPbPr[36] = {135, 22, 153, 62, 162, 69, 22, 63, 213, 120, 233, 61, 33, 201, 44, 190, 111, 155, 169, 190, 0, 0, 0, 63, 0, 0, 0, 63, 70, 94, 214, 190, 232, 134, 166, 189};
.const .align 4 .b8 k601YPbPr_To_RGB32f[36] = {0, 0, 128, 63, 0, 0, 0, 0, 188, 116, 179, 63, 0, 0, 128, 63, 152, 50, 176, 190, 158, 209, 54, 191, 0, 0, 128, 63, 229, 208, 226, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_601YCbCr[36] = {70, 246, 130, 66, 145, 141, 0, 67, 94, 186, 199, 65, 33, 48, 23, 194, 240, 103, 148, 194, 0, 0, 224, 66, 0, 0, 224, 66, 111, 146, 187, 194, 70, 182, 145, 193};
.const .align 4 .b8 k601YCbCr_To_RGB32f[36] = {37, 160, 149, 59, 0, 0, 0, 0, 182, 23, 205, 59, 37, 160, 149, 59, 40, 15, 201, 186, 156, 239, 80, 187, 37, 160, 149, 59, 236, 155, 1, 60, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_601YCbCr[36] = {219, 121, 131, 62, 152, 14, 1, 63, 18, 131, 200, 61, 174, 199, 23, 190, 238, 252, 148, 190, 197, 224, 224, 62, 197, 224, 224, 62, 217, 78, 188, 190, 174, 71, 146, 189};
.const .align 4 .b8 k601YCbCr_To_RGB8u[36] = {127, 10, 149, 63, 0, 0, 0, 0, 160, 74, 204, 63, 127, 10, 149, 63, 254, 148, 200, 190, 184, 30, 80, 191, 127, 10, 149, 63, 78, 26, 1, 64, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_601YCbCrFullRange[36] = {135, 22, 153, 62, 162, 69, 22, 63, 213, 120, 233, 61, 166, 27, 44, 190, 39, 241, 168, 190, 250, 254, 254, 62, 250, 254, 254, 62, 43, 135, 213, 190, 59, 223, 165, 189};
.const .align 4 .b8 k601YCbCrFullRange_To_RGB8u[36] = {0, 0, 128, 63, 0, 0, 0, 0, 72, 193, 178, 63, 0, 0, 128, 63, 143, 130, 175, 190, 225, 26, 54, 191, 0, 0, 128, 63, 20, 238, 225, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_601YCbCrFullRange[36] = {113, 125, 152, 66, 92, 175, 21, 67, 92, 143, 232, 65, 158, 111, 43, 194, 49, 72, 168, 194, 0, 0, 254, 66, 0, 0, 254, 66, 170, 177, 212, 194, 88, 57, 165, 193};
.const .align 4 .b8 k601YCbCrFullRange_To_RGB32f[36] = {129, 128, 128, 59, 0, 0, 0, 0, 189, 116, 179, 59, 129, 128, 128, 59, 194, 50, 176, 186, 179, 209, 54, 187, 129, 128, 128, 59, 229, 208, 226, 59, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_709YPbPr[36] = {208, 179, 89, 62, 89, 23, 55, 63, 152, 221, 147, 61, 186, 164, 234, 189, 210, 86, 197, 190, 0, 0, 0, 63, 0, 0, 0, 63, 190, 134, 232, 190, 16, 202, 59, 189};
.const .align 4 .b8 k709YPbPr_To_RGB32f[36] = {0, 0, 128, 63, 0, 0, 0, 0, 12, 147, 201, 63, 0, 0, 128, 63, 221, 209, 63, 190, 243, 173, 239, 190, 0, 0, 128, 63, 77, 132, 237, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_709YCbCr[36] = {106, 60, 58, 66, 6, 161, 28, 67, 244, 253, 124, 65, 223, 79, 205, 193, 8, 172, 172, 194, 0, 0, 224, 66, 0, 0, 224, 66, 195, 117, 203, 194, 236, 81, 36, 193};
.const .align 4 .b8 k709YCbCr_To_RGB32f[36] = {37, 160, 149, 59, 0, 0, 0, 0, 239, 94, 230, 59, 37, 160, 149, 59, 33, 57, 91, 186, 178, 245, 8, 187, 37, 160, 149, 59, 82, 185, 7, 60, 0, 0, 0, 0};
.const .align 4 .b8 k709YCbCrFullRange_To_RGB32f[36] = {131, 128, 128, 59, 0, 0, 0, 0, 28, 147, 201, 59, 131, 128, 128, 59, 61, 210, 63, 186, 248, 173, 239, 186, 131, 128, 128, 59, 82, 132, 237, 59, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_709YCbCr[36] = {207, 247, 58, 62, 53, 62, 29, 63, 231, 251, 125, 61, 184, 30, 206, 189, 23, 89, 173, 190, 197, 224, 224, 62, 197, 224, 224, 62, 12, 66, 204, 190, 195, 245, 36, 189};
.const .align 4 .b8 k709YCbCr_To_RGB8u[36] = {127, 10, 149, 63, 0, 0, 0, 0, 147, 120, 229, 63, 127, 10, 149, 63, 53, 94, 90, 190, 205, 108, 8, 191, 127, 10, 149, 63, 154, 49, 7, 64, 0, 0, 0, 0};
.const .align 4 .b8 k709YCbCr_To_601YCbCr[36] = {0, 0, 128, 63, 23, 100, 203, 61, 1, 77, 68, 62, 0, 0, 0, 0, 18, 103, 125, 63, 10, 158, 226, 189, 0, 0, 0, 0, 61, 98, 148, 189, 249, 191, 123, 63};
.const .align 4 .b8 k601YCbCr_To_709YCbCr[36] = {0, 0, 128, 63, 122, 165, 236, 189, 179, 237, 84, 190, 0, 0, 0, 0, 204, 98, 130, 63, 216, 188, 234, 61, 0, 0, 0, 0, 74, 179, 153, 61, 234, 61, 131, 63};
.const .align 4 .b8 kZeroMatrix[36];
.const .align 4 .b8 kYCbCrOffset[12] = {0, 0, 128, 65, 0, 0, 0, 67, 0, 0, 0, 67};
.const .align 4 .b8 kYCbCrFullRangeOffset[12] = {0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 67};
.const .align 4 .f32 PQ_m1 = 0f3E232000;
.const .align 4 .f32 PQ_m1Inv = 0f40C8E06B;
.const .align 4 .f32 PQ_m2 = 0f429DB000;
.const .align 4 .f32 PQ_m2Inv = 0f3C4FCDAC;
.const .align 4 .f32 PQ_c1 = 0f3F560000;
.const .align 4 .f32 PQ_c2 = 0f4196D000;
.const .align 4 .f32 PQ_c3 = 0f41958000;
.const .align 4 .f32 Gamma1886 = 0f4019999A;
.const .align 4 .f32 PQ_Lpeak = 0f461C4000;
.const .align 4 .f32 PQ_a = 0f3F8CAC08;
.const .align 4 .f32 PQ_b = 0f426E1556;
.const .align 4 .f32 PQ_c = 0f39B033E5;
.const .align 4 .f32 PQ_s = 0f4385EB85;
.const .align 4 .f32 PQ_g = 0f3EE66666;
.const .align 4 .f32 scaleFD = 0f42C80000;
.const .align 4 .f32 HLG_a = 0f3E371FF0;
.const .align 4 .f32 HLG_b = 0f3E91C020;
.const .align 4 .f32 HLG_c = 0f3F0F564F;
.const .align 4 .f32 HLG_inva = 0f40B2F029;
.const .align 4 .f32 HLG_alpha = 0f41200000;
.const .align 4 .f32 HLG_invAlpha = 0f3DCCCCCD;
.const .align 4 .f32 HLG_gamma = 0f3F99999A;
.const .align 4 .f32 HLG_gammaM1 = 0f3E4CCCCD;
.const .align 4 .f32 HLG_gammaM1Dgamma = 0f3E2AAAAB;
.const .align 4 .f32 HLG_YR = 0f3E86809D;
.const .align 4 .f32 HLG_YG = 0f3F2D9168;
.const .align 4 .f32 HLG_YB = 0f3D72E48F;
.const .align 4 .f32 HLG_Lpeak = 0f447A0000;
.const .align 4 .f32 AppleLog_R0 = 0fBD670F18;
.const .align 4 .f32 AppleLog_Rt = 0f3C23D70A;
.const .align 4 .f32 AppleLog_c = 0f423D2601;
.const .align 4 .f32 AppleLog_beta = 0f3C1DF346;
.const .align 4 .f32 AppleLog_gamma = 0f3DAF1D23;
.const .align 4 .f32 AppleLog_delta = 0f3F3180A9;
.global .texref texture0_RECT;
// _ZZ32ShaderKernel_ColorMatch_DelegateP6float4PKS_Pvi17DevicePixelFormatii5uint2E7p_local has been demoted

.visible .entry ShaderKernel_ColorMatch(
	.param .u64 ShaderKernel_ColorMatch_param_0,
	.param .u64 ShaderKernel_ColorMatch_param_1,
	.param .u64 ShaderKernel_ColorMatch_param_2,
	.param .u32 ShaderKernel_ColorMatch_param_3,
	.param .u32 ShaderKernel_ColorMatch_param_4,
	.param .u32 ShaderKernel_ColorMatch_param_5,
	.param .u32 ShaderKernel_ColorMatch_param_6
)
{
	.reg .pred 	%p<15>;
	.reg .b16 	%rs<5>;
	.reg .f32 	%f<83>;
	.reg .b32 	%r<28>;
	.reg .b64 	%rd<13>;
	// demoted variable
	.shared .align 16 .b8 _ZZ32ShaderKernel_ColorMatch_DelegateP6float4PKS_Pvi17DevicePixelFormatii5uint2E7p_local[80];

	ld.param.u64 	%rd5, [ShaderKernel_ColorMatch_param_0];
	ld.param.u64 	%rd4, [ShaderKernel_ColorMatch_param_1];
	ld.param.u32 	%r7, [ShaderKernel_ColorMatch_param_3];
	ld.param.u32 	%r8, [ShaderKernel_ColorMatch_param_4];
	ld.param.u32 	%r9, [ShaderKernel_ColorMatch_param_5];
	ld.param.u32 	%r10, [ShaderKernel_ColorMatch_param_6];
	cvta.to.global.u64 	%rd1, %rd5;
	mov.u32 	%r1, %ntid.x;
	mov.u32 	%r11, %ctaid.x;
	mov.u32 	%r27, %tid.x;
	mad.lo.s32 	%r3, %r11, %r1, %r27;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r4, %r13, %r12, %r14;
	setp.ne.s32 	%p1, %r14, 0;
	setp.gt.u32 	%p2, %r27, 4;
	or.pred  	%p3, %p1, %p2;
	@%p3 bra 	$L__BB0_3;

	cvta.to.global.u64 	%rd2, %rd4;

$L__BB0_2:
	mul.wide.u32 	%rd6, %r27, 16;
	add.s64 	%rd7, %rd2, %rd6;
	shl.b32 	%r15, %r27, 4;
	mov.u32 	%r16, _ZZ32ShaderKernel_ColorMatch_DelegateP6float4PKS_Pvi17DevicePixelFormatii5uint2E7p_local;
	add.s32 	%r17, %r16, %r15;
	ld.global.v4.u32 	{%r18, %r19, %r20, %r21}, [%rd7];
	st.shared.v4.u32 	[%r17], {%r18, %r19, %r20, %r21};
	add.s32 	%r27, %r27, %r1;
	setp.lt.u32 	%p4, %r27, 5;
	@%p4 bra 	$L__BB0_2;

$L__BB0_3:
	bar.sync 	0;
	setp.ge.s32 	%p5, %r4, %r10;
	setp.ge.s32 	%p6, %r3, %r9;
	or.pred  	%p7, %p6, %p5;
	@%p7 bra 	$L__BB0_13;

	cvt.rn.f32.u32 	%f22, %r3;
	add.ftz.f32 	%f23, %f22, 0f3F000000;
	cvt.rn.f32.u32 	%f24, %r4;
	add.ftz.f32 	%f25, %f24, 0f3F000000;
	tex.2d.v4.f32.f32 	{%f1, %f2, %f3, %f4}, [texture0_RECT, {%f23, %f25}];
	ld.shared.v4.f32 	{%f26, %f27, %f28, %f29}, [_ZZ32ShaderKernel_ColorMatch_DelegateP6float4PKS_Pvi17DevicePixelFormatii5uint2E7p_local];
	sub.ftz.f32 	%f33, %f3, %f26;
	sub.ftz.f32 	%f34, %f2, %f27;
	sub.ftz.f32 	%f35, %f1, %f28;
	ld.shared.v4.f32 	{%f36, %f37, %f38, %f39}, [_ZZ32ShaderKernel_ColorMatch_DelegateP6float4PKS_Pvi17DevicePixelFormatii5uint2E7p_local+16];
	mul.ftz.f32 	%f5, %f33, %f36;
	mul.ftz.f32 	%f6, %f34, %f37;
	mul.ftz.f32 	%f7, %f35, %f38;
	abs.ftz.f32 	%f8, %f5;
	abs.ftz.f32 	%f9, %f6;
	abs.ftz.f32 	%f10, %f7;
	setp.le.ftz.f32 	%p8, %f8, 0f00000000;
	mov.f32 	%f81, 0f00000000;
	mov.f32 	%f80, %f81;
	@%p8 bra 	$L__BB0_6;

	ld.shared.f32 	%f43, [_ZZ32ShaderKernel_ColorMatch_DelegateP6float4PKS_Pvi17DevicePixelFormatii5uint2E7p_local+64];
	lg2.approx.ftz.f32 	%f44, %f8;
	mul.ftz.f32 	%f45, %f43, %f44;
	ex2.approx.ftz.f32 	%f80, %f45;

$L__BB0_6:
	setp.le.ftz.f32 	%p9, %f9, 0f00000000;
	@%p9 bra 	$L__BB0_8;

	ld.shared.f32 	%f47, [_ZZ32ShaderKernel_ColorMatch_DelegateP6float4PKS_Pvi17DevicePixelFormatii5uint2E7p_local+68];
	lg2.approx.ftz.f32 	%f48, %f9;
	mul.ftz.f32 	%f49, %f47, %f48;
	ex2.approx.ftz.f32 	%f81, %f49;

$L__BB0_8:
	setp.le.ftz.f32 	%p10, %f10, 0f00000000;
	mov.f32 	%f82, 0f00000000;
	@%p10 bra 	$L__BB0_10;

	ld.shared.f32 	%f51, [_ZZ32ShaderKernel_ColorMatch_DelegateP6float4PKS_Pvi17DevicePixelFormatii5uint2E7p_local+72];
	lg2.approx.ftz.f32 	%f52, %f10;
	mul.ftz.f32 	%f53, %f51, %f52;
	ex2.approx.ftz.f32 	%f82, %f53;

$L__BB0_10:
	ld.shared.v4.f32 	{%f54, %f55, %f56, %f57}, [_ZZ32ShaderKernel_ColorMatch_DelegateP6float4PKS_Pvi17DevicePixelFormatii5uint2E7p_local+48];
	ld.shared.v4.f32 	{%f62, %f63, %f64, %f65}, [_ZZ32ShaderKernel_ColorMatch_DelegateP6float4PKS_Pvi17DevicePixelFormatii5uint2E7p_local+32];
	setp.lt.ftz.f32 	%p11, %f5, 0f00000000;
	selp.f32 	%f70, 0fBF800000, 0f3F800000, %p11;
	mul.ftz.f32 	%f71, %f70, %f80;
	fma.rn.ftz.f32 	%f17, %f71, %f54, %f62;
	setp.lt.ftz.f32 	%p12, %f6, 0f00000000;
	selp.f32 	%f72, 0fBF800000, 0f3F800000, %p12;
	mul.ftz.f32 	%f73, %f72, %f81;
	fma.rn.ftz.f32 	%f18, %f73, %f55, %f63;
	setp.lt.ftz.f32 	%p13, %f7, 0f00000000;
	selp.f32 	%f74, 0fBF800000, 0f3F800000, %p13;
	mul.ftz.f32 	%f75, %f74, %f82;
	fma.rn.ftz.f32 	%f19, %f75, %f56, %f64;
	fma.rn.ftz.f32 	%f20, %f4, %f57, %f65;
	mad.lo.s32 	%r26, %r4, %r7, %r3;
	cvt.s64.s32 	%rd3, %r26;
	setp.eq.s32 	%p14, %r8, 0;
	@%p14 bra 	$L__BB0_12;

	shl.b64 	%rd9, %rd3, 4;
	add.s64 	%rd10, %rd1, %rd9;
	st.global.v4.f32 	[%rd10], {%f19, %f18, %f17, %f20};
	bra.uni 	$L__BB0_13;

$L__BB0_12:
	// begin inline asm
	{  cvt.rn.f16.f32 %rs1, %f19;}

	// end inline asm
	// begin inline asm
	{  cvt.rn.f16.f32 %rs2, %f18;}

	// end inline asm
	// begin inline asm
	{  cvt.rn.f16.f32 %rs3, %f17;}

	// end inline asm
	// begin inline asm
	{  cvt.rn.f16.f32 %rs4, %f20;}

	// end inline asm
	shl.b64 	%rd11, %rd3, 3;
	add.s64 	%rd12, %rd1, %rd11;
	st.global.u16 	[%rd12], %rs1;
	st.global.u16 	[%rd12+2], %rs2;
	st.global.u16 	[%rd12+4], %rs3;
	st.global.u16 	[%rd12+6], %rs4;

$L__BB0_13:
	ret;

}

  ELF3         ~           @            V2 @ 8  @    .shstrtab .strtab .symtab .symtab_shndx .nv.info .text.ShaderKernel_ColorMatch .nv.info.ShaderKernel_ColorMatch .nv.shared.ShaderKernel_ColorMatch .nv.constant3 .nv.constant0.ShaderKernel_ColorMatch .rel.nv.constant0.ShaderKernel_ColorMatch .debug_frame .rel.debug_frame .rela.debug_frame .nv.callgraph .nv.prototype .nv.rel.action  .shstrtab .strtab .symtab .symtab_shndx .nv.info .text.ShaderKernel_ColorMatch .nv.info.ShaderKernel_ColorMatch .nv.shared.ShaderKernel_ColorMatch .nv.constant3 kRGB32f_To_601YPbPr k601YPbPr_To_RGB32f kRGB32f_To_601YCbCr k601YCbCr_To_RGB32f kRGB8u_To_601YCbCr k601YCbCr_To_RGB8u kRGB8u_To_601YCbCrFullRange k601YCbCrFullRange_To_RGB8u kRGB32f_To_601YCbCrFullRange k601YCbCrFullRange_To_RGB32f kRGB32f_To_709YPbPr k709YPbPr_To_RGB32f kRGB32f_To_709YCbCr k709YCbCr_To_RGB32f k709YCbCrFullRange_To_RGB32f kRGB8u_To_709YCbCr k709YCbCr_To_RGB8u k709YCbCr_To_601YCbCr k601YCbCr_To_709YCbCr kZeroMatrix kYCbCrOffset kYCbCrFullRangeOffset PQ_m1 PQ_m1Inv PQ_m2 PQ_m2Inv PQ_c1 PQ_c2 PQ_c3 Gamma1886 PQ_Lpeak PQ_a PQ_b PQ_c PQ_s PQ_g scaleFD HLG_a HLG_b HLG_c HLG_inva HLG_alpha HLG_invAlpha HLG_gamma HLG_gammaM1 HLG_gammaM1Dgamma HLG_YR HLG_YG HLG_YB HLG_Lpeak AppleLog_R0 AppleLog_Rt AppleLog_c AppleLog_beta AppleLog_gamma AppleLog_delta .rel.nv.constant0.ShaderKernel_ColorMatch .nv.constant0.ShaderKernel_ColorMatch .debug_frame .rel.debug_frame .rela.debug_frame .nv.callgraph .nv.prototype .nv.rel.action ShaderKernel_ColorMatch texture0_RECT                               2                     q                                                       $            $       $            H       $            l       $                   $                  $                  $       4           $       P           $       m    D      $           h      $                 $                 $                 $                 $                 $       
    @      $           d      $       3          $       I          $       U                 b                 x                 ~                                                                                                                                                                                                                                                    $                 (                 ,                 0                 4                 8                 <                 @             $    D             6    H             =    L             D    P             K    T             U    X             a    \             m    `             x    d                 h                 l                                                     $                    @                    O                 g                     $        |( ((   4                                4   ( 8         / @       @        @       7 ~   5  
 <   `( (       $                                       !         !           !    D           p                               s          % 6         A   D          @   >E?x=!,o   ?   ?F^־膦  ?    t?  ?26  ??    FB C^A!0g  B  BoF%;    ;%;(ɺP%;<    y>?=>>NG
?    J?
?ȾP
?N@    >E?x=,'>>+վ;ߥ  ?    H?  ?6  ??    q}B\C\Ao+1H  B  BX9;    t;;26;;    гY>Y7?ݓ=Vž   ?   ?;  ?    ?  ??  ?M?    j<:BC|AO  B  BuQ$%;    ^;%;!9[%;R<    ;    ;;=?ﺃ;R;    :>5>?}=νY>>B̾$
?    x?
?5^Zl
?1@      ?d=MD>    g}?
    =b{?  ?z콳T    b?ؼ=    J==?                                      A   C   C       C   C  #>k@ BO<  V? ЖA A@ @F?VnB39Cff>  B7> >OV?)@   A=?L>*>>h-?r=  zDg
#<&=BF<#=1?                                                                                                                                                                                                                                                                                                                                                                                                                                                $v 
     y      !   " z  F   
    Ey         y      "   h y      &    y       %    x    p@  r    pVp  / $z    O $z       Ə z  a  pb  z   `  pf   G	  p       x         %v Z     y      s        A z      x    p`  G    Ay          {           M          Er          $t      Er         !t   ?     !t   ?     `{b 0 B y        " $z  ^     y       b !r      !r       !r        r     A  /  r	     A    r
     A   !r     r       !r     r       !r     r       y 0       y         	 @      "         & ) D      h  H       #        p                A   r         $r       "     A  / r         $t  ?           "       A  O r       x  ?    #         r       r       x  ?             z  _  pR   r     A   x  ?   / #r       x        r     A  O #r       #r   	      X      r     A  Ə   Y  $   #r   
          M          z  X   >r        >r        z  Y     x 2v        x2v       y      y      y     y     My          Gy    y            y            y            y            y            y            y            y            y            y                                                                                                  @       M                                                         u                                                         0         @                                       8      p                              )      p                      $                             P      p@                                                 "    p                                                   >    p                                                      	   @                                                   	   @                                                                           p                                   B               P                                  2                                    @                 q      C                     P                                   @                                                                                                                              P                    @                                           