Generics, be they variables, methods, or functions, are language features that trigger IL2CPP surprises. Can Burst do better? Let’s find out!

Update: A Russian translation of this article is available.

Getting

Let’s start out by making a struct type that has a lot of generics:

struct GenericStruct<T>
{
    public T Field;
 
    public GenericStruct(T value)
    {
        Field = value;
    }
 
    public T Get() { return Field; }
    public void Set(T value) { Field = value; }
 
    public T Property
    {
        get { return Field; }
        set { Field = value; }
    }
}

Now let’s make some jobs to get its Field value in a variety of ways:

[BurstCompile]
struct GetGenericFieldJob : IJob
{
    public GenericStruct<int> GenericStruct;
    public NativeArray<int> Out;
 
    public void Execute()
    {
        Out[0] = GenericStruct.Field;
    }
}
 
[BurstCompile]
struct GetGenericPropertyJob : IJob
{
    public GenericStruct<int> GenericStruct;
    public NativeArray<int> Out;
 
    public void Execute()
    {
        Out[0] = GenericStruct.Property;
    }
}
 
[BurstCompile]
struct GetGenericGetterJob : IJob
{
    public GenericStruct<int> GenericStruct;
    public NativeArray<int> Out;
 
    public void Execute()
    {
        Out[0] = GenericStruct.Get();
    }
}

Now let’s look at the Burst Inspector pane and see the disassembly for x86:

; GetGenericFieldJob
mov     rax, qword ptr [rdi + 8]
mov     ecx, dword ptr [rdi]
mov     dword ptr [rax], ecx
 
; GetGenericPropertyJob
mov     rax, qword ptr [rdi + 8]
mov     ecx, dword ptr [rdi]
mov     dword ptr [rax], ecx
 
; GetGenericGetterJob
mov     rax, qword ptr [rdi + 8]
mov     ecx, dword ptr [rdi]
mov     dword ptr [rax], ecx

We don’t need to be assembly experts to see that all three are the same. The property get and function calls have both been inlined by Burst. We’re left with the minimal work of getting the field and setting it to the first element of the NativeArray.

To compare with IL2CPP, let’s make a non-job class with similar functions:

static class TestClass
{
    public static void GetGenericFieldExecute(
        GenericStruct<int> GenericStruct,
        NativeArray<int> Out)
    {
        Out[0] = GenericStruct.Field;
    }
 
    public static void GetGenericPropertyExecute(
        GenericStruct<int> GenericStruct,
        NativeArray<int> Out)
    {
        Out[0] = GenericStruct.Property;
    }
 
    public static void GetGenericGetterExecute(
        GenericStruct<int> GenericStruct,
        NativeArray<int> Out)
    {
        Out[0] = GenericStruct.Get();
    }
}

Now we can do a macOS build and look in the /path/to/project/PROJECTNAME_macOS_BackUpThisFolder_ButDontShipItWithYourGame/il2cppOutput/Assembly-CSharp.cpp file to see the C++ it generated:

// System.Void TestClass::GetGenericFieldExecute(GenericStruct`1<System.Int32>,Unity.Collections.NativeArray`1<System.Int32>)
IL2CPP_EXTERN_C IL2CPP_METHOD_ATTR void TestClass_GetGenericFieldExecute_m6952D3E5CCB83892F969587C08AB72ACBADDFD6D (GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF  ___GenericStruct0, NativeArray_1_tC6374EC584BF0D6DD4AD6FA0FD00C2C82F82CCAF  ___Out1, const RuntimeMethod* method)
{
    {
        GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF  L_0 = ___GenericStruct0;
        int32_t L_1 = L_0.get_Field_0();
        IL2CPP_NATIVEARRAY_SET_ITEM(int32_t, ((NativeArray_1_tC6374EC584BF0D6DD4AD6FA0FD00C2C82F82CCAF *)(&___Out1))->___m_Buffer_0, 0, L_1);
        return;
    }
}
// System.Void TestClass::GetGenericPropertyExecute(GenericStruct`1<System.Int32>,Unity.Collections.NativeArray`1<System.Int32>)
IL2CPP_EXTERN_C IL2CPP_METHOD_ATTR void TestClass_GetGenericPropertyExecute_m2538CFF06C784D53DAEF39EE28131EB3D91FB332 (GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF  ___GenericStruct0, NativeArray_1_tC6374EC584BF0D6DD4AD6FA0FD00C2C82F82CCAF  ___Out1, const RuntimeMethod* method)
{
    static bool s_Il2CppMethodInitialized;
    if (!s_Il2CppMethodInitialized)
    {
        il2cpp_codegen_initialize_method (TestClass_GetGenericPropertyExecute_m2538CFF06C784D53DAEF39EE28131EB3D91FB332_MetadataUsageId);
        s_Il2CppMethodInitialized = true;
    }
    {
        int32_t L_0 = GenericStruct_1_get_Property_m025A61EA6663E602B36E1871CEE2E9D31B211363((GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF *)(&___GenericStruct0), /*hidden argument*/GenericStruct_1_get_Property_m025A61EA6663E602B36E1871CEE2E9D31B211363_RuntimeMethod_var);
        IL2CPP_NATIVEARRAY_SET_ITEM(int32_t, ((NativeArray_1_tC6374EC584BF0D6DD4AD6FA0FD00C2C82F82CCAF *)(&___Out1))->___m_Buffer_0, 0, L_0);
        return;
    }
}
// System.Void TestClass::GetGenericGetterExecute(GenericStruct`1<System.Int32>,Unity.Collections.NativeArray`1<System.Int32>)
IL2CPP_EXTERN_C IL2CPP_METHOD_ATTR void TestClass_GetGenericGetterExecute_m93709231DB117528EA77615832D05009A1DF482C (GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF  ___GenericStruct0, NativeArray_1_tC6374EC584BF0D6DD4AD6FA0FD00C2C82F82CCAF  ___Out1, const RuntimeMethod* method)
{
    static bool s_Il2CppMethodInitialized;
    if (!s_Il2CppMethodInitialized)
    {
        il2cpp_codegen_initialize_method (TestClass_GetGenericGetterExecute_m93709231DB117528EA77615832D05009A1DF482C_MetadataUsageId);
        s_Il2CppMethodInitialized = true;
    }
    {
        int32_t L_0 = GenericStruct_1_Get_mAD6B652D2C750B814ABCCDE24D2D58C3166153C8((GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF *)(&___GenericStruct0), /*hidden argument*/GenericStruct_1_Get_mAD6B652D2C750B814ABCCDE24D2D58C3166153C8_RuntimeMethod_var);
        IL2CPP_NATIVEARRAY_SET_ITEM(int32_t, ((NativeArray_1_tC6374EC584BF0D6DD4AD6FA0FD00C2C82F82CCAF *)(&___Out1))->___m_Buffer_0, 0, L_0);
        return;
    }
}

Directly getting the field is implemented well, but using the property or method to get the field results in overhead in each method call in the form of an if branch.

Setting

Now let’s try the opposite and set the field’s value. We can do this four ways instead of just three:

[BurstCompile]
struct SetGenericFieldJob : IJob
{
    public int Value;
    public GenericStruct<int> In;
    public NativeArray<GenericStruct<int>> Out;
 
    public void Execute()
    {
        In.Field = Value;
        Out[0] = In;
    }
}
 
[BurstCompile]
struct SetGenericPropertyJob : IJob
{
    public int Value;
    public GenericStruct<int> In;
    public NativeArray<GenericStruct<int>> Out;
 
    public void Execute()
    {
        In.Property = Value;
        Out[0] = In;
    }
}
 
[BurstCompile]
struct SetGenericSetterJob : IJob
{
    public int Value;
    public GenericStruct<int> In;
    public NativeArray<GenericStruct<int>> Out;
 
    public void Execute()
    {
        In.Set(Value);
        Out[0] = In;
    }
}
 
[BurstCompile]
struct SetGenericConstructorJob : IJob
{
    public int Value;
    public NativeArray<GenericStruct<int>> Out;
 
    public void Execute()
    {
        Out[0] = new GenericStruct<int>(Value);
    }
}

Now let’s look at the Burst output to see what it compiled these jobs to:

; SetGenericFieldJob
mov     rax, qword ptr [rdi + 8]
mov     ecx, dword ptr [rdi]
mov     dword ptr [rdi + 4], ecx
mov     dword ptr [rax], ecx
 
; SetGenericPropertyJob
mov     rax, qword ptr [rdi + 8]
mov     ecx, dword ptr [rdi]
mov     dword ptr [rdi + 4], ecx
mov     dword ptr [rax], ecx
 
; SetGenericSetterJob
mov     rax, qword ptr [rdi + 8]
mov     ecx, dword ptr [rdi]
mov     dword ptr [rdi + 4], ecx
mov     dword ptr [rax], ecx
 
; SetGenericConstructorJob
mov     rax, qword ptr [rdi + 8]
mov     ecx, dword ptr [rdi]
mov     dword ptr [rdi + 4], ecx
mov     dword ptr [rax], ecx

Again, all four outputs are the same. They’re all performing the minimal work, too. So let’s move on to compare against IL2CPP with some non-job functions:

static class TestClass
{
    public static void SetGenericFieldExecute(
        int Value,
        GenericStruct<int> In,
        NativeArray<GenericStruct<int>> Out)
    {
        In.Field = Value;
        Out[0] = In;
    }
 
    public static void SetGenericPropertyExecute(
        int Value,
        GenericStruct<int> In,
        NativeArray<GenericStruct<int>> Out)
    {
        In.Property = Value;
        Out[0] = In;
    }
 
    public static void SetGenericSetterExecute(
        int Value,
        GenericStruct<int> In,
        NativeArray<GenericStruct<int>> Out)
    {
        In.Set(Value);
        Out[0] = In;
    }
 
    public static void SetGenericConstructorExecute(
        int Value,
        NativeArray<GenericStruct<int>> Out)
    {
        Out[0] = new GenericStruct<int>(Value);
    }
}

Building the project, we see this IL2CPP output:

// System.Void TestClass::SetGenericFieldExecute(System.Int32,GenericStruct`1<System.Int32>,Unity.Collections.NativeArray`1<GenericStruct`1<System.Int32>>)
IL2CPP_EXTERN_C IL2CPP_METHOD_ATTR void TestClass_SetGenericFieldExecute_mD42100D588CE77AA17664B21AB781458327839A4 (int32_t ___Value0, GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF  ___In1, NativeArray_1_t179A5BEE7FE0D41A82BAC0386A76EBCEF8243C18  ___Out2, const RuntimeMethod* method)
{
    {
        int32_t L_0 = ___Value0;
        (&___In1)->set_Field_0(L_0);
        GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF  L_1 = ___In1;
        IL2CPP_NATIVEARRAY_SET_ITEM(GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF , ((NativeArray_1_t179A5BEE7FE0D41A82BAC0386A76EBCEF8243C18 *)(&___Out2))->___m_Buffer_0, 0, L_1);
        return;
    }
}
// System.Void TestClass::SetGenericPropertyExecute(System.Int32,GenericStruct`1<System.Int32>,Unity.Collections.NativeArray`1<GenericStruct`1<System.Int32>>)
IL2CPP_EXTERN_C IL2CPP_METHOD_ATTR void TestClass_SetGenericPropertyExecute_m5F84D9266EF36EF50950B19157342D07FE39434E (int32_t ___Value0, GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF  ___In1, NativeArray_1_t179A5BEE7FE0D41A82BAC0386A76EBCEF8243C18  ___Out2, const RuntimeMethod* method)
{
    static bool s_Il2CppMethodInitialized;
    if (!s_Il2CppMethodInitialized)
    {
        il2cpp_codegen_initialize_method (TestClass_SetGenericPropertyExecute_m5F84D9266EF36EF50950B19157342D07FE39434E_MetadataUsageId);
        s_Il2CppMethodInitialized = true;
    }
    {
        int32_t L_0 = ___Value0;
        GenericStruct_1_set_Property_m8FA3575D200FDB113066D2B4A437863B28B930BB((GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF *)(&___In1), L_0, /*hidden argument*/GenericStruct_1_set_Property_m8FA3575D200FDB113066D2B4A437863B28B930BB_RuntimeMethod_var);
        GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF  L_1 = ___In1;
        IL2CPP_NATIVEARRAY_SET_ITEM(GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF , ((NativeArray_1_t179A5BEE7FE0D41A82BAC0386A76EBCEF8243C18 *)(&___Out2))->___m_Buffer_0, 0, L_1);
        return;
    }
}
// System.Void TestClass::SetGenericSetterExecute(System.Int32,GenericStruct`1<System.Int32>,Unity.Collections.NativeArray`1<GenericStruct`1<System.Int32>>)
IL2CPP_EXTERN_C IL2CPP_METHOD_ATTR void TestClass_SetGenericSetterExecute_mBD904F1C296446416DFAB3C1429A1D590D9513BF (int32_t ___Value0, GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF  ___In1, NativeArray_1_t179A5BEE7FE0D41A82BAC0386A76EBCEF8243C18  ___Out2, const RuntimeMethod* method)
{
    static bool s_Il2CppMethodInitialized;
    if (!s_Il2CppMethodInitialized)
    {
        il2cpp_codegen_initialize_method (TestClass_SetGenericSetterExecute_mBD904F1C296446416DFAB3C1429A1D590D9513BF_MetadataUsageId);
        s_Il2CppMethodInitialized = true;
    }
    {
        int32_t L_0 = ___Value0;
        GenericStruct_1_Set_m46E487E73FE7B9851EEBF7F5E0747E863311C652((GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF *)(&___In1), L_0, /*hidden argument*/GenericStruct_1_Set_m46E487E73FE7B9851EEBF7F5E0747E863311C652_RuntimeMethod_var);
        GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF  L_1 = ___In1;
        IL2CPP_NATIVEARRAY_SET_ITEM(GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF , ((NativeArray_1_t179A5BEE7FE0D41A82BAC0386A76EBCEF8243C18 *)(&___Out2))->___m_Buffer_0, 0, L_1);
        return;
    }
}
// System.Void TestClass::SetGenericConstructorExecute(System.Int32,Unity.Collections.NativeArray`1<GenericStruct`1<System.Int32>>)
IL2CPP_EXTERN_C IL2CPP_METHOD_ATTR void TestClass_SetGenericConstructorExecute_mB24CC718B7EFD961F29D7482165F2EAB67FA5005 (int32_t ___Value0, NativeArray_1_t179A5BEE7FE0D41A82BAC0386A76EBCEF8243C18  ___Out1, const RuntimeMethod* method)
{
    static bool s_Il2CppMethodInitialized;
    if (!s_Il2CppMethodInitialized)
    {
        il2cpp_codegen_initialize_method (TestClass_SetGenericConstructorExecute_mB24CC718B7EFD961F29D7482165F2EAB67FA5005_MetadataUsageId);
        s_Il2CppMethodInitialized = true;
    }
    {
        int32_t L_0 = ___Value0;
        GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF  L_1;
        memset((&L_1), 0, sizeof(L_1));
        GenericStruct_1__ctor_mF683E5E102C875280E885DC8E8C2808D8969FF6C((&L_1), L_0, /*hidden argument*/GenericStruct_1__ctor_mF683E5E102C875280E885DC8E8C2808D8969FF6C_RuntimeMethod_var);
        IL2CPP_NATIVEARRAY_SET_ITEM(GenericStruct_1_t586960761D48CE541F98C0978DFF0AA576257CEF , ((NativeArray_1_t179A5BEE7FE0D41A82BAC0386A76EBCEF8243C18 *)(&___Out1))->___m_Buffer_0, 0, L_1);
        return;
    }
}

Again, we see good code generator for the direct field access but method overhead for all other functions. The constructor version includes an unnecessary memset call to clear the struct, but that’ll be removed by the C++ compiler so there’s no need to worry about it.

Calling

Finally, let’s try calling functions that are themselves generic. To do that cleanly, we’ll need a new struct:

struct StructWithGenericFunctions
{
    public T Choose<T>(bool chooseA, T a, T b)
    {
        return chooseA ? a : b;
    }
 
    public static T StaticChoose<T>(bool chooseA, T a, T b)
    {
        return chooseA ? a : b;
    }
}

Now let’s create some jobs to call these:

[BurstCompile]
struct CallGenericFunctionJob : IJob
{
    public bool ChooseA;
    public StructWithGenericFunctions In;
    public NativeArray<int> Out;
 
    public void Execute()
    {
        Out[0] = In.Choose(ChooseA, 123, 456);
    }
}
 
[BurstCompile]
struct CallGenericStaticFunctionJob : IJob
{
    public bool ChooseA;
    public NativeArray<int> Out;
 
    public void Execute()
    {
        Out[0] = StructWithGenericFunctions.StaticChoose(ChooseA, 123, 456);
    }
}

Here’s what we see in the Burst Inspector:

; CallGenericFunctionJob
mov     rax, qword ptr [rdi + 8]
cmp     byte ptr [rdi], 0
mov     ecx, 456
mov     edx, 123
cmove   edx, ecx
mov     dword ptr [rax], edx
 
; CallGenericStaticFunctionJob
mov     rax, qword ptr [rdi + 8]
cmp     byte ptr [rdi], 0
mov     ecx, 456
mov     edx, 123
cmove   edx, ecx
mov     dword ptr [rax], edx

Once more, we see that the result is the same. The assembly is straightforward and minimal logic that directly correlates to what we wrote in the generic functions and it’s been inlined into Execute.

Now let’s create non-job versions:

static class TestClass
{
    public static void CallGenericFunctionExecute(
        bool ChooseA,
        StructWithGenericFunctions In,
        NativeArray<int> Out)
    {
        Out[0] = In.Choose(ChooseA, 123, 456);
    }
 
    public static void CallGenericStaticFunctionExecute(
        bool ChooseA,
        NativeArray<int> Out)
    {
        Out[0] = StructWithGenericFunctions.StaticChoose(ChooseA, 123, 456);
    }
}

And here’s the output from IL2CPP:

// System.Void TestClass::CallGenericFunctionExecute(System.Boolean,StructWithGenericFunctions,Unity.Collections.NativeArray`1<System.Int32>)
IL2CPP_EXTERN_C IL2CPP_METHOD_ATTR void TestClass_CallGenericFunctionExecute_mE8ADF05FE4FF1D43420309F386E6B3730717DDB6 (bool ___ChooseA0, StructWithGenericFunctions_t2D57CAAA3EC03C9B25DCF8FF096FA46341B018C2  ___In1, NativeArray_1_tC6374EC584BF0D6DD4AD6FA0FD00C2C82F82CCAF  ___Out2, const RuntimeMethod* method)
{
    static bool s_Il2CppMethodInitialized;
    if (!s_Il2CppMethodInitialized)
    {
        il2cpp_codegen_initialize_method (TestClass_CallGenericFunctionExecute_mE8ADF05FE4FF1D43420309F386E6B3730717DDB6_MetadataUsageId);
        s_Il2CppMethodInitialized = true;
    }
    {
        bool L_0 = ___ChooseA0;
        int32_t L_1 = StructWithGenericFunctions_Choose_TisInt32_t585191389E07734F19F3156FF88FB3EF4800D102_mD3914329B4BEAEF619DD440AF05817106E0FB6AD((StructWithGenericFunctions_t2D57CAAA3EC03C9B25DCF8FF096FA46341B018C2 *)(&___In1), L_0, ((int32_t)123), ((int32_t)456), /*hidden argument*/StructWithGenericFunctions_Choose_TisInt32_t585191389E07734F19F3156FF88FB3EF4800D102_mD3914329B4BEAEF619DD440AF05817106E0FB6AD_RuntimeMethod_var);
        IL2CPP_NATIVEARRAY_SET_ITEM(int32_t, ((NativeArray_1_tC6374EC584BF0D6DD4AD6FA0FD00C2C82F82CCAF *)(&___Out2))->___m_Buffer_0, 0, L_1);
        return;
    }
}
// System.Void TestClass::CallGenericStaticFunctionExecute(System.Boolean,Unity.Collections.NativeArray`1<System.Int32>)
IL2CPP_EXTERN_C IL2CPP_METHOD_ATTR void TestClass_CallGenericStaticFunctionExecute_mBBB9E4295CDE01461BD2FA226375D71E573AC6AF (bool ___ChooseA0, NativeArray_1_tC6374EC584BF0D6DD4AD6FA0FD00C2C82F82CCAF  ___Out1, const RuntimeMethod* method)
{
    static bool s_Il2CppMethodInitialized;
    if (!s_Il2CppMethodInitialized)
    {
        il2cpp_codegen_initialize_method (TestClass_CallGenericStaticFunctionExecute_mBBB9E4295CDE01461BD2FA226375D71E573AC6AF_MetadataUsageId);
        s_Il2CppMethodInitialized = true;
    }
    {
        bool L_0 = ___ChooseA0;
        int32_t L_1 = StructWithGenericFunctions_StaticChoose_TisInt32_t585191389E07734F19F3156FF88FB3EF4800D102_m47B49B4671A00FF9AD6B95B4648FFC2B0CEAD665(L_0, ((int32_t)123), ((int32_t)456), /*hidden argument*/StructWithGenericFunctions_StaticChoose_TisInt32_t585191389E07734F19F3156FF88FB3EF4800D102_m47B49B4671A00FF9AD6B95B4648FFC2B0CEAD665_RuntimeMethod_var);
        IL2CPP_NATIVEARRAY_SET_ITEM(int32_t, ((NativeArray_1_tC6374EC584BF0D6DD4AD6FA0FD00C2C82F82CCAF *)(&___Out1))->___m_Buffer_0, 0, L_1);
        return;
    }
}

Both of these get method call overhead, but otherwise they’re very straightforward calls to the functions which will hopefully be inlined by the C++ compiler.

Conclusion

Today we’ve seen Burst handle a variety of C# generics equally as well or better than IL2CPP. In every single case, it generated the minimal assembly with no overhead. IL2CPP, on the other hand, frequently, but not always, inserts an additional if branch in functions that use generics. So while the two compilers sometimes result in the same assembly code being executed by the CPU, in the majority of cases Burst generates the smaller, faster code.