UnityTips 之着色器编译器傻吗 2 多余的逻辑去哪里

2023-04-18  本文已影响0人  暴走TA

简介: 今天突发奇想,要看看shader编译器对一些用不到的逻辑或者变量的处理,看看编译器到底能处理到多高效
unity版本:20222.1.7fc1

准备测试用的着色器

为了减少额外的干扰,先做一个最简单的,逻辑最少的shader,我们暂且将它叫做 CompileTester 吧

Shader "CompileTester"
{
    Properties
    {
        _Color0("Color 0", Color) = (0,0,0,0)
    }

    SubShader
    {
        HLSLINCLUDE
        #pragma target 3.0
        #pragma prefer_hlslcc gles
        ENDHLSL
    
        Pass
        {   
            Name "Forward"
            Tags { "LightMode"="UniversalForward" }
        
            HLSLPROGRAM

            #pragma vertex vert
            #pragma fragment frag

            #include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Core.hlsl"

            struct VertexInput
            {
                float4 vertex : POSITION;
            };
            struct VertexOutput
            {
                float4 clipPos : SV_POSITION;
            };

            float4 _Color0;

            VertexOutput vert ( VertexInput v )
            {
                VertexOutput o = (VertexOutput)0;
                o.clipPos= TransformWorldToHClip( TransformObjectToWorld( v.vertex.xyz ) );
                return o;
            }

            half4 frag ( VertexOutput IN  ) : SV_Target
            {   
                return  _Color0;
            }
            ENDHLSL
        }
    }
}

为了便于观看我们直接忽略掉不变的内容,我们主要处理的逻辑位于 frag 中,期间会添加 function,所以我们简称vert逻辑为 vert... ,简化后的代码如下

    vert... //vertex 方法内容

    half4 frag ( VertexOutput IN  ) : SV_Target
            {   
                return  _Color0;
            }

开始吧!先看计算逻辑的冗余

最简单的着色器编译结果

编译完大体上有 120+ 行 ,由于我们的逻辑很简单,也只有一个变体,所以编译结果也变得简洁了,由于目前我们vert逻辑没有变化,我们目前只需要关注 #ifdef FRAGMENT 以下的部分

    Shader "CompileTester" {
    Properties {
     _Color0 ("Color 0", Color) = (0.000000,0.000000,0.000000,0.000000)
    }
    SubShader { 
     Pass {
      Name "Forward"
      Tags { "LIGHTMODE"="UniversalForward" }
      //////////////////////////////////
      //                              //
      //      Compiled programs       //
      //                              //
      //////////////////////////////////
    //////////////////////////////////////////////////////
    Keywords: <none>
    -- Hardware tier variant: Tier 1
    -- Vertex shader for "gles3":
    Constant Buffer "$Globals" (16 bytes) on slot 0 {
      Vector4 _Color0 at 0
    }
    Constant Buffer "UnityPerDraw" (656 bytes) on slot 0 {
      Matrix4x4 unity_ObjectToWorld at 0
    }
    Shader Disassembly:
    #ifdef VERTEX
    #version 300 es

    #define HLSLCC_ENABLE_UNIFORM_BUFFERS 1
    #if HLSLCC_ENABLE_UNIFORM_BUFFERS
    #define UNITY_UNIFORM
    #else
    #define UNITY_UNIFORM uniform
    #endif
    #define UNITY_SUPPORTS_UNIFORM_LOCATION 1
    #if UNITY_SUPPORTS_UNIFORM_LOCATION
    #define UNITY_LOCATION(x) layout(location = x)
    #define UNITY_BINDING(x) layout(binding = x, std140)
    #else
    #define UNITY_LOCATION(x)
    #define UNITY_BINDING(x) layout(std140)
    #endif
    uniform     vec4 hlslcc_mtx4x4unity_MatrixVP[4];
    #if HLSLCC_ENABLE_UNIFORM_BUFFERS
    UNITY_BINDING(0) uniform UnityPerDraw {
    #endif
        UNITY_UNIFORM vec4                hlslcc_mtx4x4unity_ObjectToWorld[4];
        UNITY_UNIFORM vec4 Xhlslcc_UnusedXhlslcc_mtx4x4unity_WorldToObject[4];
        UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_LODFade;
        UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_WorldTransformParams;
        UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_RenderingLayer;
        UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_LightData;
        UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_LightIndices[2];
        UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_ProbesOcclusion;
        UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_SpecCube0_HDR;
        UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_SpecCube1_HDR;
        UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_SpecCube0_BoxMax;
        UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_SpecCube0_BoxMin;
        UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_SpecCube0_ProbePosition;
        UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_SpecCube1_BoxMax;
        UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_SpecCube1_BoxMin;
        UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_SpecCube1_ProbePosition;
        UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_LightmapST;
        UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_DynamicLightmapST;
        UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_SHAr;
        UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_SHAg;
        UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_SHAb;
        UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_SHBr;
        UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_SHBg;
        UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_SHBb;
        UNITY_UNIFORM mediump vec4 Xhlslcc_UnusedXunity_SHC;
        UNITY_UNIFORM vec4 Xhlslcc_UnusedXhlslcc_mtx4x4unity_MatrixPreviousM[4];
        UNITY_UNIFORM vec4 Xhlslcc_UnusedXhlslcc_mtx4x4unity_MatrixPreviousMI[4];
        UNITY_UNIFORM vec4 Xhlslcc_UnusedXunity_MotionVectorsParams;
    #if HLSLCC_ENABLE_UNIFORM_BUFFERS
    };
    #endif
    in highp vec4 in_POSITION0;
    vec4 u_xlat0;
    vec4 u_xlat1;
    void main()
    {
        u_xlat0.xyz = in_POSITION0.yyy * hlslcc_mtx4x4unity_ObjectToWorld[1].xyz;
        u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[0].xyz * in_POSITION0.xxx + u_xlat0.xyz;
        u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[2].xyz * in_POSITION0.zzz + u_xlat0.xyz;
        u_xlat0.xyz = u_xlat0.xyz + hlslcc_mtx4x4unity_ObjectToWorld[3].xyz;
        u_xlat1 = u_xlat0.yyyy * hlslcc_mtx4x4unity_MatrixVP[1];
        u_xlat1 = hlslcc_mtx4x4unity_MatrixVP[0] * u_xlat0.xxxx + u_xlat1;
        u_xlat0 = hlslcc_mtx4x4unity_MatrixVP[2] * u_xlat0.zzzz + u_xlat1;
        gl_Position = u_xlat0 + hlslcc_mtx4x4unity_MatrixVP[3];
        return;
    }

    #endif
    #ifdef FRAGMENT
    #version 300 es
    precision highp float;
    precision highp int;
    #define HLSLCC_ENABLE_UNIFORM_BUFFERS 1
    #if HLSLCC_ENABLE_UNIFORM_BUFFERS
    #define UNITY_UNIFORM
    #else
    #define UNITY_UNIFORM uniform
    #endif
    #define UNITY_SUPPORTS_UNIFORM_LOCATION 1
    #if UNITY_SUPPORTS_UNIFORM_LOCATION
    #define UNITY_LOCATION(x) layout(location = x)
    #define UNITY_BINDING(x) layout(binding = x, std140)
    #else
    #define UNITY_LOCATION(x)
    #define UNITY_BINDING(x) layout(std140)
    #endif
    uniform     vec4 _Color0;
    layout(location = 0) out mediump vec4 SV_Target0;
    void main()
    {
        SV_Target0 = _Color0;
        return;
    }
    #endif
     }
    }
}

多步计算但不用

我们计算了很多属性和变量结果,但是有很多是没有用到的

    half4 frag ( VertexOutput IN  ) : SV_Target
    {   
        half4 color2=_Color0+2;
        half4 color3=_Color0+3;
        return color3;
    }

编译结果

    uniform     vec4 _Color0;
    layout(location = 0) out mediump vec4 SV_Target0;
    vec4 u_xlat0;
    void main()
    {
        u_xlat0 = _Color0 + vec4(3.0, 3.0, 3.0, 3.0);
        SV_Target0 = u_xlat0;
        return;
    }

color 的+2 计算并没有包含到编译后的逻辑了,优化的不错,点个赞

调用的方法内含冗余逻辑

例如我们调用一个方法,其除了一个返回值之外,还有out 几个值,但是最终只使用了其中的一个

    half4 CalculateMultiParam(half4 color,half alpha,out half4 color2,out half4 color3)
    {
        color2=color*2;
        color3=color*3;
        return color+1;
    }

    half4 frag ( VertexOutput IN  ) : SV_Target
    {   
        half4 color2=0;
        half4 color3=0;
        half4 finalColor=CalculateMultiParam(_Color0,1,color2,color3);
        return finalColor;
    }

编译结果

    uniform     vec4 _Color0;
    layout(location = 0) out mediump vec4 SV_Target0;
    void main()
    {
        SV_Target0 = _Color0 + vec4(1.0, 1.0, 1.0, 1.0);
        return;
    }

很明显,方法体内的 color2 和 color3 计算虽然返回了,但是由于最终没有被用到,所以最终没有被编译,good!

多次赋值

我们多次给一个变量赋值,然后最后给一个常量

    half4 frag ( VertexOutput IN  ) : SV_Target
    {   
        half4 color2=2;
        half4 color3=3;
        half4 finalColor=color3;
        finalColor=CalculateMultiParam(_Color0,1,color2,color3);
        finalColor=color2;
        finalColor=1;
        return finalColor;
    }

编译结果

    precision highp float;
    precision highp int;
    layout(location = 0) out mediump vec4 SV_Target0;
    void main()
    {
        SV_Target0 = vec4(1.0, 1.0, 1.0, 1.0);
        return;
    }

我们发现连之前的变量声明都忽略掉了 uniform vec4 _Color0; good!

多步常量计算

我们进行多次的常量计算

    half4 frag ( VertexOutput IN  ) : SV_Target
    {   
        half4 color2=2;
        half4 color3=color2+3;
        half4 finalColor=color3+5;
        return finalColor;
    }

编译结果

    precision highp float;
    precision highp int;
    layout(location = 0) out mediump vec4 SV_Target0;
    void main()
    {
        SV_Target0 = vec4(10.0, 10.0, 10.0, 10.0);
        return;
    }

直接出结果,勤俭持家好编译 good!

再看数据结构体

我们看一下最简单的两个结构体的编译结构

    struct VertexInput
    {
        float4 vertex : POSITION;
    };
    struct VertexOutput
    {
        float4 clipPos : SV_POSITION;
    };

    VertexOutput vert ( VertexInput v )
    {
        VertexOutput o = (VertexOutput)0;
        o.clipPos= TransformWorldToHClip( TransformObjectToWorld( v.vertex.xyz ) );
        return o;
    }

    half4 frag ( VertexOutput IN  ) : SV_Target
    {   
        return _Color0;
    }

编译结果 只看关键的 vert 和 frag部分

//Vert
    in highp vec4 in_POSITION0;
    vec4 u_xlat0;
    vec4 u_xlat1;
    void main()
    {
        u_xlat0.xyz = in_POSITION0.yyy * hlslcc_mtx4x4unity_ObjectToWorld[1].xyz;
        u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[0].xyz * in_POSITION0.xxx + u_xlat0.xyz;
        u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[2].xyz * in_POSITION0.zzz + u_xlat0.xyz;
        u_xlat0.xyz = u_xlat0.xyz + hlslcc_mtx4x4unity_ObjectToWorld[3].xyz;
        u_xlat1 = u_xlat0.yyyy * hlslcc_mtx4x4unity_MatrixVP[1];
        u_xlat1 = hlslcc_mtx4x4unity_MatrixVP[0] * u_xlat0.xxxx + u_xlat1;
        u_xlat0 = hlslcc_mtx4x4unity_MatrixVP[2] * u_xlat0.zzzz + u_xlat1;
        gl_Position = u_xlat0 + hlslcc_mtx4x4unity_MatrixVP[3];
        return;
    }
//Frag
    uniform     vec4 _Color0;
    layout(location = 0) out mediump vec4 SV_Target0;
    void main()
    {
        SV_Target0 = _Color0;
        return;
    }

输入结果加入多余的字段

我们在顶点输入结构体中加入几个不同的数据字段,看一编译器如何处理

    struct VertexInput
    {
        float4 vertex : POSITION;
        float4 uv1:TEXCOORD0;
        half4 uv2:TEXCOORD1;
        half2 uv3:TEXCOORD2;
    
    };
    struct VertexOutput
    {
        float4 clipPos : SV_POSITION;
    };

    VertexOutput vert ( VertexInput v )
    {
        VertexOutput o = (VertexOutput)0;
        o.clipPos= TransformWorldToHClip( TransformObjectToWorld( v.vertex.xyz ) );
        return o;
    }

编译结果

//Vert
    in highp vec4 in_POSITION0;
    vec4 u_xlat0;
    vec4 u_xlat1;
    void main()
    {
        u_xlat0.xyz = in_POSITION0.yyy * hlslcc_mtx4x4unity_ObjectToWorld[1].xyz;
        u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[0].xyz * in_POSITION0.xxx + u_xlat0.xyz;
        u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[2].xyz * in_POSITION0.zzz + u_xlat0.xyz;
        u_xlat0.xyz = u_xlat0.xyz + hlslcc_mtx4x4unity_ObjectToWorld[3].xyz;
        u_xlat1 = u_xlat0.yyyy * hlslcc_mtx4x4unity_MatrixVP[1];
        u_xlat1 = hlslcc_mtx4x4unity_MatrixVP[0] * u_xlat0.xxxx + u_xlat1;
        u_xlat0 = hlslcc_mtx4x4unity_MatrixVP[2] * u_xlat0.zzzz + u_xlat1;
        gl_Position = u_xlat0 + hlslcc_mtx4x4unity_MatrixVP[3];
        return;
    }
//Frag
    uniform     vec4 _Color0;
    layout(location = 0) out mediump vec4 SV_Target0;
    void main()
    {
        SV_Target0 = _Color0;
        return;
    }

没有任何冗余信息加入,good!

输入输出都有用到的字段

我们在输出结构中也加入相同的数据字段

    struct VertexInput
    {
        float4 vertex : POSITION;
        float4 uv1:TEXCOORD0;
        half4 uv2:TEXCOORD1;
        half2 uv3:TEXCOORD2;
    
    };
    struct VertexOutput
    {
        float4 clipPos : SV_POSITION;
        float4 uv1:TEXCOORD0;
        half4 uv2:TEXCOORD1;
        half2 uv3:TEXCOORD2;
    };

    VertexOutput vert ( VertexInput v )
    {
        VertexOutput o = (VertexOutput)0;
        o.clipPos= TransformWorldToHClip( TransformObjectToWorld( v.vertex.xyz ) );
        o.uv1=v.uv1;
        o.uv2=v.uv2;
        return o;
    }

    half4 frag ( VertexOutput IN  ) : SV_Target
    {   
        return _Color0;
    }

编译结果

//Vert
    in highp vec4 in_POSITION0;
    in highp vec4 in_TEXCOORD0;
    in mediump vec4 in_TEXCOORD1;
    out highp vec4 vs_TEXCOORD0;
    out mediump vec4 vs_TEXCOORD1;
    out mediump vec2 vs_TEXCOORD2;
    vec4 u_xlat0;
    vec4 u_xlat1;
    void main()
    {
        u_xlat0.xyz = in_POSITION0.yyy * hlslcc_mtx4x4unity_ObjectToWorld[1].xyz;
        u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[0].xyz * in_POSITION0.xxx + u_xlat0.xyz;
        u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[2].xyz * in_POSITION0.zzz + u_xlat0.xyz;
        u_xlat0.xyz = u_xlat0.xyz + hlslcc_mtx4x4unity_ObjectToWorld[3].xyz;
        u_xlat1 = u_xlat0.yyyy * hlslcc_mtx4x4unity_MatrixVP[1];
        u_xlat1 = hlslcc_mtx4x4unity_MatrixVP[0] * u_xlat0.xxxx + u_xlat1;
        u_xlat0 = hlslcc_mtx4x4unity_MatrixVP[2] * u_xlat0.zzzz + u_xlat1;
        gl_Position = u_xlat0 + hlslcc_mtx4x4unity_MatrixVP[3];
        vs_TEXCOORD0 = in_TEXCOORD0;
        vs_TEXCOORD1 = in_TEXCOORD1;
        vs_TEXCOORD2.xy = vec2(0.0, 0.0);
        return;
    }
//Frag
      
    uniform     vec4 _Color0;
    layout(location = 0) out mediump vec4 SV_Target0;
    void main()
    {
        SV_Target0 = _Color0;
        return;
    }

这里比较有意思了,我们在 Vert 方法内没有调用到 输入结构的 uv3:TEXCOORD2,所以这个字段没有被编译,但是输出结构体内的所有字段都被编译了,即使 Frag 没有调用,也全部被编译了,推测可能是被赋值了,我们下一步尝试把 Vert 内的赋值方法去掉。

不初始化输出结构体

去掉后会有警告,提示没有完全初始化应该是对部分渲染库不支持。

    VertexOutput vert ( VertexInput v )
    {
        VertexOutput o ; //不对其进行初始化 其余逻辑不变
        o.clipPos= TransformWorldToHClip( TransformObjectToWorld( v.vertex.xyz ) );
        o.uv1=v.uv1;
        o.uv2=v.uv2;
        return o;
    }

编译结果

    //Vert
    in highp vec4 in_POSITION0;
    in highp vec4 in_TEXCOORD0;
    in mediump vec4 in_TEXCOORD1;
    out highp vec4 vs_TEXCOORD0;
    out mediump vec4 vs_TEXCOORD1;
    vec4 u_xlat0;
    vec4 u_xlat1;
    void main()
    {
        u_xlat0.xyz = in_POSITION0.yyy * hlslcc_mtx4x4unity_ObjectToWorld[1].xyz;
        u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[0].xyz * in_POSITION0.xxx + u_xlat0.xyz;
        u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[2].xyz * in_POSITION0.zzz + u_xlat0.xyz;
        u_xlat0.xyz = u_xlat0.xyz + hlslcc_mtx4x4unity_ObjectToWorld[3].xyz;
        u_xlat1 = u_xlat0.yyyy * hlslcc_mtx4x4unity_MatrixVP[1];
        u_xlat1 = hlslcc_mtx4x4unity_MatrixVP[0] * u_xlat0.xxxx + u_xlat1;
        u_xlat0 = hlslcc_mtx4x4unity_MatrixVP[2] * u_xlat0.zzzz + u_xlat1;
        gl_Position = u_xlat0 + hlslcc_mtx4x4unity_MatrixVP[3];
        vs_TEXCOORD0 = in_TEXCOORD0;
        vs_TEXCOORD1 = in_TEXCOORD1;
        return;
    }

神奇的一幕发生了,由于没有初始化,这个多余没有用到的值被忽略掉了,没有进入编译结果~~~~~~~

输入中的字段参与运算但没有被用到输出结构

  1. 没有任何实际意义的空计算,即结构没有被赋值到过输出结构体上
    struct VertexInput
    {
        float4 vertex : POSITION;
        float4 uv1:TEXCOORD0;
        half4 uv2:TEXCOORD1;
        half4 uv3:TEXCOORD2;
    
    };
    struct VertexOutput
    {
        float4 clipPos : SV_POSITION;
        float4 uv1:TEXCOORD0;
        half4 uv2:TEXCOORD1;
        half2 uv3:TEXCOORD2;
    };

    VertexOutput vert ( VertexInput v )
    {
        VertexOutput o ;
        o.clipPos= TransformWorldToHClip( TransformObjectToWorld( v.vertex.xyz ) );
        o.uv1=v.uv1;
        half2 uv=v.uv2.xy;
        o.uv3=v.uv3.xy;
        return o;
    }

编译结果

    in highp vec4 in_POSITION0;
    in highp vec4 in_TEXCOORD0;
    in mediump vec4 in_TEXCOORD2;
    out highp vec4 vs_TEXCOORD0;
    out mediump vec2 vs_TEXCOORD2;
    vec4 u_xlat0;
    vec4 u_xlat1;
    void main()
    {
        u_xlat0.xyz = in_POSITION0.yyy * hlslcc_mtx4x4unity_ObjectToWorld[1].xyz;
        u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[0].xyz * in_POSITION0.xxx + u_xlat0.xyz;
        u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[2].xyz * in_POSITION0.zzz + u_xlat0.xyz;
        u_xlat0.xyz = u_xlat0.xyz + hlslcc_mtx4x4unity_ObjectToWorld[3].xyz;
        u_xlat1 = u_xlat0.yyyy * hlslcc_mtx4x4unity_MatrixVP[1];
        u_xlat1 = hlslcc_mtx4x4unity_MatrixVP[0] * u_xlat0.xxxx + u_xlat1;
        u_xlat0 = hlslcc_mtx4x4unity_MatrixVP[2] * u_xlat0.zzzz + u_xlat1;
        gl_Position = u_xlat0 + hlslcc_mtx4x4unity_MatrixVP[3];
        vs_TEXCOORD0 = in_TEXCOORD0;
        vs_TEXCOORD2.xy = in_TEXCOORD2.xy;
        return;
    }

可以看出,没有实际作用过的 input 内的声明被舍弃了 Good!

2.参与过赋值,但是被覆盖了 ,例如我们 input 的 uv2 赋值给 output 的 uv3,然后紧接着用 input 的 uv3 对 output 的 uv3 进行赋值

    struct VertexInput
    {
        float4 vertex : POSITION;
        float4 uv1:TEXCOORD0;
        half4 uv2:TEXCOORD1;
        half4 uv3:TEXCOORD2;
    
    };
    struct VertexOutput
    {
        float4 clipPos : SV_POSITION;
        float4 uv1:TEXCOORD0;
        half4 uv2:TEXCOORD1;
        half2 uv3:TEXCOORD2;
    };

    VertexOutput vert ( VertexInput v )
    {
        VertexOutput o ;
        o.clipPos= TransformWorldToHClip( TransformObjectToWorld( v.vertex.xyz ) );
        o.uv1=v.uv1;
        o.uv3=v.uv2.xy;
        o.uv3=v.uv3.xy;
        return o;
    }

编译结果

    //Vert
    in highp vec4 in_POSITION0;
    in highp vec4 in_TEXCOORD0;
    in mediump vec4 in_TEXCOORD2;
    out highp vec4 vs_TEXCOORD0;
    out mediump vec2 vs_TEXCOORD2;
    vec4 u_xlat0;
    vec4 u_xlat1;
    void main()
    {
        u_xlat0.xyz = in_POSITION0.yyy * hlslcc_mtx4x4unity_ObjectToWorld[1].xyz;
        u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[0].xyz * in_POSITION0.xxx + u_xlat0.xyz;
        u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[2].xyz * in_POSITION0.zzz + u_xlat0.xyz;
        u_xlat0.xyz = u_xlat0.xyz + hlslcc_mtx4x4unity_ObjectToWorld[3].xyz;
        u_xlat1 = u_xlat0.yyyy * hlslcc_mtx4x4unity_MatrixVP[1];
        u_xlat1 = hlslcc_mtx4x4unity_MatrixVP[0] * u_xlat0.xxxx + u_xlat1;
        u_xlat0 = hlslcc_mtx4x4unity_MatrixVP[2] * u_xlat0.zzzz + u_xlat1;
        gl_Position = u_xlat0 + hlslcc_mtx4x4unity_MatrixVP[3];
        vs_TEXCOORD0 = in_TEXCOORD0;
        vs_TEXCOORD2.xy = in_TEXCOORD2.xy;
        return;
    }

可以看出,被覆盖了也就等于没有被用过,赋值逻辑也被剔除了,所以对应 input 内的声明也被舍弃了 Good!

多维声明不全用

我们在输入结构体中,声明的 float4 类型,然后在 Vert 阶段只使用其 xy 分量,我们将 input内 half4 uv3 的 xy 分量赋值给 output 内的 half2 uv3

    struct VertexInput
    {
        float4 vertex : POSITION;
        float4 uv1:TEXCOORD0;
        half4 uv2:TEXCOORD1;
        half4 uv3:TEXCOORD2;
    
    };
    struct VertexOutput
    {
        float4 clipPos : SV_POSITION;
        float4 uv1:TEXCOORD0;
        half4 uv2:TEXCOORD1;
        half2 uv3:TEXCOORD2;
    };

    VertexOutput vert ( VertexInput v )
    {
        VertexOutput o ;
        o.clipPos= TransformWorldToHClip( TransformObjectToWorld( v.vertex.xyz ) );
        o.uv1=v.uv1;
        o.uv3=v.uv2.xy;
        return o;
    }

编译结果

    //Vert
    in highp vec4 in_POSITION0;
    in highp vec4 in_TEXCOORD0;
    in mediump vec4 in_TEXCOORD2;
    out highp vec4 vs_TEXCOORD0;
    out mediump vec2 vs_TEXCOORD2;
    vec4 u_xlat0;
    vec4 u_xlat1;
    void main()
    {
        u_xlat0.xyz = in_POSITION0.yyy * hlslcc_mtx4x4unity_ObjectToWorld[1].xyz;
        u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[0].xyz * in_POSITION0.xxx + u_xlat0.xyz;
        u_xlat0.xyz = hlslcc_mtx4x4unity_ObjectToWorld[2].xyz * in_POSITION0.zzz + u_xlat0.xyz;
        u_xlat0.xyz = u_xlat0.xyz + hlslcc_mtx4x4unity_ObjectToWorld[3].xyz;
        u_xlat1 = u_xlat0.yyyy * hlslcc_mtx4x4unity_MatrixVP[1];
        u_xlat1 = hlslcc_mtx4x4unity_MatrixVP[0] * u_xlat0.xxxx + u_xlat1;
        u_xlat0 = hlslcc_mtx4x4unity_MatrixVP[2] * u_xlat0.zzzz + u_xlat1;
        gl_Position = u_xlat0 + hlslcc_mtx4x4unity_MatrixVP[3];
        vs_TEXCOORD0 = in_TEXCOORD0;
        vs_TEXCOORD2.xy = in_TEXCOORD2.xy;
        return;
    }

我们可以看到,input 内的 Texcoord1 没有在 Vert内被用到,所以被剔除了,但是,虽然我们只用到了 in_TEXCOORD2 的xy分量,但是其声明的 依然和我们声明的结构体一样的 half4

调研结论

  1. 着色器编译器对逻辑冗余的优化处理还是很棒的
  2. 输入结构体在Vert阶段没有被用到的声明,编译时会被舍弃,包括参与了运算但是最终没有赋值到 output 字段上的情况。但是只要用到了,哪怕是其中一个分量,整个字段还是会被全部声明
  3. 输出结构体在Vert阶段如果被初始化过,那么所有的声明都会被编译,如果没有被初始化,那只有被赋值的字段会被编译,不过不初始化貌似会有兼容问题

所以我们在写着色器的更多的应该关注 struct 结构体的声明维度和数据精度上。毕竟一个 float 占两个 half 的空间。至于方法的调用,逻辑的执行与注释,无用的空逻辑等这种,在实际编译过程中都会被优化掉,优化这部分代码的情况也就是为了书写工整,便于阅读与理解,当然也会有极少数的部分影响实际编译结果,但是我还没有遇到,然后就是,这玩意是但看的 gles3 ,所以也不能代表 dx 、vulkan 、metal 这些渲染库的编译。

上一篇下一篇

猜你喜欢

热点阅读