UnityTips 之着色器编译器傻吗 2 多余的逻辑去哪里
简介: 今天突发奇想,要看看shader编译器对一些用不到的逻辑或者变量的处理,看看编译器到底能处理到多高效
unity版本:20222.1.7fc1
准备测试用的着色器
为了减少额外的干扰,先做一个最简单的,逻辑最少的shader,我们暂且将它叫做 CompileTester 吧
Shader "CompileTester"
{
Properties
{
_Color0("Color 0", Color) = (0,0,0,0)
}
SubShader
{
HLSLINCLUDE
#pragma target 3.0
#pragma prefer_hlslcc gles
ENDHLSL
Pass
{
Name "Forward"
Tags { "LightMode"="UniversalForward" }
HLSLPROGRAM
#pragma vertex vert
#pragma fragment frag
#include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Core.hlsl"
struct VertexInput
{
float4 vertex : POSITION;
};
struct VertexOutput
{
float4 clipPos : SV_POSITION;
};
float4 _Color0;
VertexOutput vert ( VertexInput v )
{
VertexOutput o = (VertexOutput)0;
o.clipPos= TransformWorldToHClip( TransformObjectToWorld( v.vertex.xyz ) );
return o;
}
half4 frag ( VertexOutput IN ) : SV_Target
{
return _Color0;
}
ENDHLSL
}
}
}
为了便于观看我们直接忽略掉不变的内容,我们主要处理的逻辑位于 frag 中,期间会添加 function,所以我们简称vert逻辑为 vert... ,简化后的代码如下
vert... //vertex 方法内容
half4 frag ( VertexOutput IN ) : SV_Target
{
return _Color0;
}
开始吧!先看计算逻辑的冗余
最简单的着色器编译结果
编译完大体上有 120+ 行 ,由于我们的逻辑很简单,也只有一个变体,所以编译结果也变得简洁了,由于目前我们vert逻辑没有变化,我们目前只需要关注 #ifdef FRAGMENT 以下的部分
Shader "CompileTester" {
Properties {
_Color0 ("Color 0", Color) = (0.000000,0.000000,0.000000,0.000000)
}
SubShader {
Pass {
Name "Forward"
Tags { "LIGHTMODE"="UniversalForward" }
//////////////////////////////////
// //
// Compiled programs //
// //
//////////////////////////////////
//////////////////////////////////////////////////////
Keywords: <none>
-- Hardware tier variant: Tier 1
-- Vertex shader for "gles3":
Constant Buffer "$Globals" (16 bytes) on slot 0 {
Vector4 _Color0 at 0
}
Constant Buffer "UnityPerDraw" (656 bytes) on slot 0 {
Matrix4x4 unity_ObjectToWorld at 0
}
Shader Disassembly:
#ifdef VERTEX
#version 300 es
#define HLSLCC_ENABLE_UNIFORM_BUFFERS 1
#if HLSLCC_ENABLE_UNIFORM_BUFFERS
#define UNITY_UNIFORM
#else
#define UNITY_UNIFORM uniform
#endif
#define UNITY_SUPPORTS_UNIFORM_LOCATION 1
#if UNITY_SUPPORTS_UNIFORM_LOCATION
#define UNITY_LOCATION(x) layout(location = x)
#define UNITY_BINDING(x) layout(binding = x, std140)
#else
#define UNITY_LOCATION(x)
#define UNITY_BINDING(x) layout(std140)
#endif
uniform vec4 hlslcc_mtx4x4unity_MatrixVP[4];
#if HLSLCC_ENABLE_UNIFORM_BUFFERS
UNITY_BINDING(0) uniform UnityPerDraw {
#endif
UNITY_UNIFORM vec4 hlslcc_mtx4x4unity_ObjectToWorld[4];
UNITY_UNIFORM vec4 Xhlslcc_UnusedXhlslcc_mtx4x4unity_WorldToObject[4];
UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_LODFade;
UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_WorldTransformParams;
UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_RenderingLayer;
UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_LightData;
UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_LightIndices[2];
UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_ProbesOcclusion;
UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_SpecCube0_HDR;
UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_SpecCube1_HDR;
UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_SpecCube0_BoxMax;
UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_SpecCube0_BoxMin;
UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_SpecCube0_ProbePosition;
UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_SpecCube1_BoxMax;
UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_SpecCube1_BoxMin;
UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_SpecCube1_ProbePosition;
UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_LightmapST;
UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_DynamicLightmapST;
UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_SHAr;
UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_SHAg;
UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_SHAb;
UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_SHBr;
UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_SHBg;
UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_SHBb;
UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_SHC;
UNITY_UNIFORM vec4 Xhlslcc_UnusedXhlslcc_mtx4x4unity_MatrixPreviousM[4];
UNITY_UNIFORM vec4 Xhlslcc_UnusedXhlslcc_mtx4x4unity_MatrixPreviousMI[4];
UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_MotionVectorsParams;
#if HLSLCC_ENABLE_UNIFORM_BUFFERS
};
#endif
in highp vec4 in_POSITION0;
vec4 u_xlat0;
vec4 u_xlat1;
void main()
{
u_xlat0.xyz = in_POSITION0.yyy * hlslcc_mtx4x4unity_ObjectToWorld[1].xyz;
u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[0].xyz * in_POSITION0.xxx + u_xlat0.xyz;
u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[2].xyz * in_POSITION0.zzz + u_xlat0.xyz;
u_xlat0.xyz = u_xlat0.xyz + hlslcc_mtx4x4unity_ObjectToWorld[3].xyz;
u_xlat1 = u_xlat0.yyyy * hlslcc_mtx4x4unity_MatrixVP[1];
u_xlat1 = hlslcc_mtx4x4unity_MatrixVP[0] * u_xlat0.xxxx + u_xlat1;
u_xlat0 = hlslcc_mtx4x4unity_MatrixVP[2] * u_xlat0.zzzz + u_xlat1;
gl_Position = u_xlat0 + hlslcc_mtx4x4unity_MatrixVP[3];
return;
}
#endif
#ifdef FRAGMENT
#version 300 es
precision highp float;
precision highp int;
#define HLSLCC_ENABLE_UNIFORM_BUFFERS 1
#if HLSLCC_ENABLE_UNIFORM_BUFFERS
#define UNITY_UNIFORM
#else
#define UNITY_UNIFORM uniform
#endif
#define UNITY_SUPPORTS_UNIFORM_LOCATION 1
#if UNITY_SUPPORTS_UNIFORM_LOCATION
#define UNITY_LOCATION(x) layout(location = x)
#define UNITY_BINDING(x) layout(binding = x, std140)
#else
#define UNITY_LOCATION(x)
#define UNITY_BINDING(x) layout(std140)
#endif
uniform vec4 _Color0;
layout(location = 0) out mediump vec4 SV_Target0;
void main()
{
SV_Target0 = _Color0;
return;
}
#endif
}
}
}
多步计算但不用
我们计算了很多属性和变量结果,但是有很多是没有用到的
half4 frag ( VertexOutput IN ) : SV_Target
{
half4 color2=_Color0+2;
half4 color3=_Color0+3;
return color3;
}
编译结果
uniform vec4 _Color0;
layout(location = 0) out mediump vec4 SV_Target0;
vec4 u_xlat0;
void main()
{
u_xlat0 = _Color0 + vec4(3.0, 3.0, 3.0, 3.0);
SV_Target0 = u_xlat0;
return;
}
color 的+2 计算并没有包含到编译后的逻辑了,优化的不错,点个赞
调用的方法内含冗余逻辑
例如我们调用一个方法,其除了一个返回值之外,还有out 几个值,但是最终只使用了其中的一个
half4 CalculateMultiParam(half4 color,half alpha,out half4 color2,out half4 color3)
{
color2=color*2;
color3=color*3;
return color+1;
}
half4 frag ( VertexOutput IN ) : SV_Target
{
half4 color2=0;
half4 color3=0;
half4 finalColor=CalculateMultiParam(_Color0,1,color2,color3);
return finalColor;
}
编译结果
uniform vec4 _Color0;
layout(location = 0) out mediump vec4 SV_Target0;
void main()
{
SV_Target0 = _Color0 + vec4(1.0, 1.0, 1.0, 1.0);
return;
}
很明显,方法体内的 color2 和 color3 计算虽然返回了,但是由于最终没有被用到,所以最终没有被编译,good!
多次赋值
我们多次给一个变量赋值,然后最后给一个常量
half4 frag ( VertexOutput IN ) : SV_Target
{
half4 color2=2;
half4 color3=3;
half4 finalColor=color3;
finalColor=CalculateMultiParam(_Color0,1,color2,color3);
finalColor=color2;
finalColor=1;
return finalColor;
}
编译结果
precision highp float;
precision highp int;
layout(location = 0) out mediump vec4 SV_Target0;
void main()
{
SV_Target0 = vec4(1.0, 1.0, 1.0, 1.0);
return;
}
我们发现连之前的变量声明都忽略掉了 uniform vec4 _Color0; good!
多步常量计算
我们进行多次的常量计算
half4 frag ( VertexOutput IN ) : SV_Target
{
half4 color2=2;
half4 color3=color2+3;
half4 finalColor=color3+5;
return finalColor;
}
编译结果
precision highp float;
precision highp int;
layout(location = 0) out mediump vec4 SV_Target0;
void main()
{
SV_Target0 = vec4(10.0, 10.0, 10.0, 10.0);
return;
}
直接出结果,勤俭持家好编译 good!
再看数据结构体
我们看一下最简单的两个结构体的编译结构
struct VertexInput
{
float4 vertex : POSITION;
};
struct VertexOutput
{
float4 clipPos : SV_POSITION;
};
VertexOutput vert ( VertexInput v )
{
VertexOutput o = (VertexOutput)0;
o.clipPos= TransformWorldToHClip( TransformObjectToWorld( v.vertex.xyz ) );
return o;
}
half4 frag ( VertexOutput IN ) : SV_Target
{
return _Color0;
}
编译结果 只看关键的 vert 和 frag部分
//Vert
in highp vec4 in_POSITION0;
vec4 u_xlat0;
vec4 u_xlat1;
void main()
{
u_xlat0.xyz = in_POSITION0.yyy * hlslcc_mtx4x4unity_ObjectToWorld[1].xyz;
u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[0].xyz * in_POSITION0.xxx + u_xlat0.xyz;
u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[2].xyz * in_POSITION0.zzz + u_xlat0.xyz;
u_xlat0.xyz = u_xlat0.xyz + hlslcc_mtx4x4unity_ObjectToWorld[3].xyz;
u_xlat1 = u_xlat0.yyyy * hlslcc_mtx4x4unity_MatrixVP[1];
u_xlat1 = hlslcc_mtx4x4unity_MatrixVP[0] * u_xlat0.xxxx + u_xlat1;
u_xlat0 = hlslcc_mtx4x4unity_MatrixVP[2] * u_xlat0.zzzz + u_xlat1;
gl_Position = u_xlat0 + hlslcc_mtx4x4unity_MatrixVP[3];
return;
}
//Frag
uniform vec4 _Color0;
layout(location = 0) out mediump vec4 SV_Target0;
void main()
{
SV_Target0 = _Color0;
return;
}
输入结果加入多余的字段
我们在顶点输入结构体中加入几个不同的数据字段,看一编译器如何处理
struct VertexInput
{
float4 vertex : POSITION;
float4 uv1:TEXCOORD0;
half4 uv2:TEXCOORD1;
half2 uv3:TEXCOORD2;
};
struct VertexOutput
{
float4 clipPos : SV_POSITION;
};
VertexOutput vert ( VertexInput v )
{
VertexOutput o = (VertexOutput)0;
o.clipPos= TransformWorldToHClip( TransformObjectToWorld( v.vertex.xyz ) );
return o;
}
编译结果
//Vert
in highp vec4 in_POSITION0;
vec4 u_xlat0;
vec4 u_xlat1;
void main()
{
u_xlat0.xyz = in_POSITION0.yyy * hlslcc_mtx4x4unity_ObjectToWorld[1].xyz;
u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[0].xyz * in_POSITION0.xxx + u_xlat0.xyz;
u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[2].xyz * in_POSITION0.zzz + u_xlat0.xyz;
u_xlat0.xyz = u_xlat0.xyz + hlslcc_mtx4x4unity_ObjectToWorld[3].xyz;
u_xlat1 = u_xlat0.yyyy * hlslcc_mtx4x4unity_MatrixVP[1];
u_xlat1 = hlslcc_mtx4x4unity_MatrixVP[0] * u_xlat0.xxxx + u_xlat1;
u_xlat0 = hlslcc_mtx4x4unity_MatrixVP[2] * u_xlat0.zzzz + u_xlat1;
gl_Position = u_xlat0 + hlslcc_mtx4x4unity_MatrixVP[3];
return;
}
//Frag
uniform vec4 _Color0;
layout(location = 0) out mediump vec4 SV_Target0;
void main()
{
SV_Target0 = _Color0;
return;
}
没有任何冗余信息加入,good!
输入输出都有用到的字段
我们在输出结构中也加入相同的数据字段
struct VertexInput
{
float4 vertex : POSITION;
float4 uv1:TEXCOORD0;
half4 uv2:TEXCOORD1;
half2 uv3:TEXCOORD2;
};
struct VertexOutput
{
float4 clipPos : SV_POSITION;
float4 uv1:TEXCOORD0;
half4 uv2:TEXCOORD1;
half2 uv3:TEXCOORD2;
};
VertexOutput vert ( VertexInput v )
{
VertexOutput o = (VertexOutput)0;
o.clipPos= TransformWorldToHClip( TransformObjectToWorld( v.vertex.xyz ) );
o.uv1=v.uv1;
o.uv2=v.uv2;
return o;
}
half4 frag ( VertexOutput IN ) : SV_Target
{
return _Color0;
}
编译结果
//Vert
in highp vec4 in_POSITION0;
in highp vec4 in_TEXCOORD0;
in mediump vec4 in_TEXCOORD1;
out highp vec4 vs_TEXCOORD0;
out mediump vec4 vs_TEXCOORD1;
out mediump vec2 vs_TEXCOORD2;
vec4 u_xlat0;
vec4 u_xlat1;
void main()
{
u_xlat0.xyz = in_POSITION0.yyy * hlslcc_mtx4x4unity_ObjectToWorld[1].xyz;
u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[0].xyz * in_POSITION0.xxx + u_xlat0.xyz;
u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[2].xyz * in_POSITION0.zzz + u_xlat0.xyz;
u_xlat0.xyz = u_xlat0.xyz + hlslcc_mtx4x4unity_ObjectToWorld[3].xyz;
u_xlat1 = u_xlat0.yyyy * hlslcc_mtx4x4unity_MatrixVP[1];
u_xlat1 = hlslcc_mtx4x4unity_MatrixVP[0] * u_xlat0.xxxx + u_xlat1;
u_xlat0 = hlslcc_mtx4x4unity_MatrixVP[2] * u_xlat0.zzzz + u_xlat1;
gl_Position = u_xlat0 + hlslcc_mtx4x4unity_MatrixVP[3];
vs_TEXCOORD0 = in_TEXCOORD0;
vs_TEXCOORD1 = in_TEXCOORD1;
vs_TEXCOORD2.xy = vec2(0.0, 0.0);
return;
}
//Frag
uniform vec4 _Color0;
layout(location = 0) out mediump vec4 SV_Target0;
void main()
{
SV_Target0 = _Color0;
return;
}
这里比较有意思了,我们在 Vert 方法内没有调用到 输入结构的 uv3:TEXCOORD2,所以这个字段没有被编译,但是输出结构体内的所有字段都被编译了,即使 Frag 没有调用,也全部被编译了,推测可能是被赋值了,我们下一步尝试把 Vert 内的赋值方法去掉。
不初始化输出结构体
去掉后会有警告,提示没有完全初始化应该是对部分渲染库不支持。
VertexOutput vert ( VertexInput v )
{
VertexOutput o ; //不对其进行初始化 其余逻辑不变
o.clipPos= TransformWorldToHClip( TransformObjectToWorld( v.vertex.xyz ) );
o.uv1=v.uv1;
o.uv2=v.uv2;
return o;
}
编译结果
//Vert
in highp vec4 in_POSITION0;
in highp vec4 in_TEXCOORD0;
in mediump vec4 in_TEXCOORD1;
out highp vec4 vs_TEXCOORD0;
out mediump vec4 vs_TEXCOORD1;
vec4 u_xlat0;
vec4 u_xlat1;
void main()
{
u_xlat0.xyz = in_POSITION0.yyy * hlslcc_mtx4x4unity_ObjectToWorld[1].xyz;
u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[0].xyz * in_POSITION0.xxx + u_xlat0.xyz;
u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[2].xyz * in_POSITION0.zzz + u_xlat0.xyz;
u_xlat0.xyz = u_xlat0.xyz + hlslcc_mtx4x4unity_ObjectToWorld[3].xyz;
u_xlat1 = u_xlat0.yyyy * hlslcc_mtx4x4unity_MatrixVP[1];
u_xlat1 = hlslcc_mtx4x4unity_MatrixVP[0] * u_xlat0.xxxx + u_xlat1;
u_xlat0 = hlslcc_mtx4x4unity_MatrixVP[2] * u_xlat0.zzzz + u_xlat1;
gl_Position = u_xlat0 + hlslcc_mtx4x4unity_MatrixVP[3];
vs_TEXCOORD0 = in_TEXCOORD0;
vs_TEXCOORD1 = in_TEXCOORD1;
return;
}
神奇的一幕发生了,由于没有初始化,这个多余没有用到的值被忽略掉了,没有进入编译结果~~~~~~~
输入中的字段参与运算但没有被用到输出结构
- 没有任何实际意义的空计算,即结构没有被赋值到过输出结构体上
struct VertexInput
{
float4 vertex : POSITION;
float4 uv1:TEXCOORD0;
half4 uv2:TEXCOORD1;
half4 uv3:TEXCOORD2;
};
struct VertexOutput
{
float4 clipPos : SV_POSITION;
float4 uv1:TEXCOORD0;
half4 uv2:TEXCOORD1;
half2 uv3:TEXCOORD2;
};
VertexOutput vert ( VertexInput v )
{
VertexOutput o ;
o.clipPos= TransformWorldToHClip( TransformObjectToWorld( v.vertex.xyz ) );
o.uv1=v.uv1;
half2 uv=v.uv2.xy;
o.uv3=v.uv3.xy;
return o;
}
编译结果
in highp vec4 in_POSITION0;
in highp vec4 in_TEXCOORD0;
in mediump vec4 in_TEXCOORD2;
out highp vec4 vs_TEXCOORD0;
out mediump vec2 vs_TEXCOORD2;
vec4 u_xlat0;
vec4 u_xlat1;
void main()
{
u_xlat0.xyz = in_POSITION0.yyy * hlslcc_mtx4x4unity_ObjectToWorld[1].xyz;
u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[0].xyz * in_POSITION0.xxx + u_xlat0.xyz;
u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[2].xyz * in_POSITION0.zzz + u_xlat0.xyz;
u_xlat0.xyz = u_xlat0.xyz + hlslcc_mtx4x4unity_ObjectToWorld[3].xyz;
u_xlat1 = u_xlat0.yyyy * hlslcc_mtx4x4unity_MatrixVP[1];
u_xlat1 = hlslcc_mtx4x4unity_MatrixVP[0] * u_xlat0.xxxx + u_xlat1;
u_xlat0 = hlslcc_mtx4x4unity_MatrixVP[2] * u_xlat0.zzzz + u_xlat1;
gl_Position = u_xlat0 + hlslcc_mtx4x4unity_MatrixVP[3];
vs_TEXCOORD0 = in_TEXCOORD0;
vs_TEXCOORD2.xy = in_TEXCOORD2.xy;
return;
}
可以看出,没有实际作用过的 input 内的声明被舍弃了 Good!
2.参与过赋值,但是被覆盖了 ,例如我们 input 的 uv2 赋值给 output 的 uv3,然后紧接着用 input 的 uv3 对 output 的 uv3 进行赋值
struct VertexInput
{
float4 vertex : POSITION;
float4 uv1:TEXCOORD0;
half4 uv2:TEXCOORD1;
half4 uv3:TEXCOORD2;
};
struct VertexOutput
{
float4 clipPos : SV_POSITION;
float4 uv1:TEXCOORD0;
half4 uv2:TEXCOORD1;
half2 uv3:TEXCOORD2;
};
VertexOutput vert ( VertexInput v )
{
VertexOutput o ;
o.clipPos= TransformWorldToHClip( TransformObjectToWorld( v.vertex.xyz ) );
o.uv1=v.uv1;
o.uv3=v.uv2.xy;
o.uv3=v.uv3.xy;
return o;
}
编译结果
//Vert
in highp vec4 in_POSITION0;
in highp vec4 in_TEXCOORD0;
in mediump vec4 in_TEXCOORD2;
out highp vec4 vs_TEXCOORD0;
out mediump vec2 vs_TEXCOORD2;
vec4 u_xlat0;
vec4 u_xlat1;
void main()
{
u_xlat0.xyz = in_POSITION0.yyy * hlslcc_mtx4x4unity_ObjectToWorld[1].xyz;
u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[0].xyz * in_POSITION0.xxx + u_xlat0.xyz;
u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[2].xyz * in_POSITION0.zzz + u_xlat0.xyz;
u_xlat0.xyz = u_xlat0.xyz + hlslcc_mtx4x4unity_ObjectToWorld[3].xyz;
u_xlat1 = u_xlat0.yyyy * hlslcc_mtx4x4unity_MatrixVP[1];
u_xlat1 = hlslcc_mtx4x4unity_MatrixVP[0] * u_xlat0.xxxx + u_xlat1;
u_xlat0 = hlslcc_mtx4x4unity_MatrixVP[2] * u_xlat0.zzzz + u_xlat1;
gl_Position = u_xlat0 + hlslcc_mtx4x4unity_MatrixVP[3];
vs_TEXCOORD0 = in_TEXCOORD0;
vs_TEXCOORD2.xy = in_TEXCOORD2.xy;
return;
}
可以看出,被覆盖了也就等于没有被用过,赋值逻辑也被剔除了,所以对应 input 内的声明也被舍弃了 Good!
多维声明不全用
我们在输入结构体中,声明的 float4 类型,然后在 Vert 阶段只使用其 xy 分量,我们将 input内 half4 uv3 的 xy 分量赋值给 output 内的 half2 uv3
struct VertexInput
{
float4 vertex : POSITION;
float4 uv1:TEXCOORD0;
half4 uv2:TEXCOORD1;
half4 uv3:TEXCOORD2;
};
struct VertexOutput
{
float4 clipPos : SV_POSITION;
float4 uv1:TEXCOORD0;
half4 uv2:TEXCOORD1;
half2 uv3:TEXCOORD2;
};
VertexOutput vert ( VertexInput v )
{
VertexOutput o ;
o.clipPos= TransformWorldToHClip( TransformObjectToWorld( v.vertex.xyz ) );
o.uv1=v.uv1;
o.uv3=v.uv2.xy;
return o;
}
编译结果
//Vert
in highp vec4 in_POSITION0;
in highp vec4 in_TEXCOORD0;
in mediump vec4 in_TEXCOORD2;
out highp vec4 vs_TEXCOORD0;
out mediump vec2 vs_TEXCOORD2;
vec4 u_xlat0;
vec4 u_xlat1;
void main()
{
u_xlat0.xyz = in_POSITION0.yyy * hlslcc_mtx4x4unity_ObjectToWorld[1].xyz;
u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[0].xyz * in_POSITION0.xxx + u_xlat0.xyz;
u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[2].xyz * in_POSITION0.zzz + u_xlat0.xyz;
u_xlat0.xyz = u_xlat0.xyz + hlslcc_mtx4x4unity_ObjectToWorld[3].xyz;
u_xlat1 = u_xlat0.yyyy * hlslcc_mtx4x4unity_MatrixVP[1];
u_xlat1 = hlslcc_mtx4x4unity_MatrixVP[0] * u_xlat0.xxxx + u_xlat1;
u_xlat0 = hlslcc_mtx4x4unity_MatrixVP[2] * u_xlat0.zzzz + u_xlat1;
gl_Position = u_xlat0 + hlslcc_mtx4x4unity_MatrixVP[3];
vs_TEXCOORD0 = in_TEXCOORD0;
vs_TEXCOORD2.xy = in_TEXCOORD2.xy;
return;
}
我们可以看到,input 内的 Texcoord1 没有在 Vert内被用到,所以被剔除了,但是,虽然我们只用到了 in_TEXCOORD2 的xy分量,但是其声明的 依然和我们声明的结构体一样的 half4
调研结论
- 着色器编译器对逻辑冗余的优化处理还是很棒的
- 输入结构体在Vert阶段没有被用到的声明,编译时会被舍弃,包括参与了运算但是最终没有赋值到 output 字段上的情况。但是只要用到了,哪怕是其中一个分量,整个字段还是会被全部声明
- 输出结构体在Vert阶段如果被初始化过,那么所有的声明都会被编译,如果没有被初始化,那只有被赋值的字段会被编译,不过不初始化貌似会有兼容问题
所以我们在写着色器的更多的应该关注 struct 结构体的声明维度和数据精度上。毕竟一个 float 占两个 half 的空间。至于方法的调用,逻辑的执行与注释,无用的空逻辑等这种,在实际编译过程中都会被优化掉,优化这部分代码的情况也就是为了书写工整,便于阅读与理解,当然也会有极少数的部分影响实际编译结果,但是我还没有遇到,然后就是,这玩意是但看的 gles3 ,所以也不能代表 dx 、vulkan 、metal 这些渲染库的编译。