We’ve seen how NativeArray works, but what if we want more kinds of native collections? Unity 2018.1 only has that one, but you can make your own! Today’s article shows exactly how to do that.

Unity’s documentation provides an example of how to make a native container, but it is out of date and doesn’t compile. It’s still definitely worth looking at in order to understand a lot of the esoteric parts that are required, but it isn’t usable as-is. Here is a cleaned up version of it that does compile and can be used:

[NativeContainer]
[NativeContainerSupportsMinMaxWriteRestriction]
[DebuggerDisplay("Length = {" + nameof(Length) + "}")]
[DebuggerTypeProxy(typeof(NativeCustomArrayDebugView<>))]
public unsafe struct NativeCustomArray<T> : IDisposable
    where T : struct
{
    [NativeDisableUnsafePtrRestriction]
    private void* buffer;
    private int length;
    private Allocator allocator;
 
#if ENABLE_UNITY_COLLECTIONS_CHECKS
    internal int m_MinIndex;
    internal int m_MaxIndex;
    internal AtomicSafetyHandle m_Safety;
    [NativeSetClassTypeToNullOnSchedule] internal DisposeSentinel m_DisposeSentinel;
#endif
 
    public NativeCustomArray(int length, Allocator allocator)
    {
        long totalSize = UnsafeUtility.SizeOf<T>() * (long)length;
 
#if ENABLE_UNITY_COLLECTIONS_CHECKS
        if (allocator <= Allocator.None)
        {
            throw new ArgumentException(
                "Allocator must be Temp, TempJob or Persistent",
                nameof(allocator));
        }
 
        if (length < 0)
        {
            throw new ArgumentOutOfRangeException(
                nameof(length),
                "Length must be >= 0");
        }
 
        if (!UnsafeUtility.IsBlittable<T>())
        {
            throw new ArgumentException(
                string.Format(
                    "{0} used in NativeCustomArray<{0}> must be blittable",
                    typeof(T)));
        }
#endif
 
        buffer = UnsafeUtility.Malloc(
            totalSize,
            UnsafeUtility.AlignOf<T>(),
            allocator);
        UnsafeUtility.MemClear(buffer, totalSize);
 
        this.length = length;
        this.allocator = allocator;
 
#if ENABLE_UNITY_COLLECTIONS_CHECKS
        m_MinIndex = 0;
        m_MaxIndex = length - 1;
        DisposeSentinel.Create(out m_Safety, out m_DisposeSentinel, 0);
#endif
    }
 
    public int Length
    {
        get
        {
            return length;
        }
    }
 
    public T this[int index]
    {
        get
        {
#if ENABLE_UNITY_COLLECTIONS_CHECKS
            AtomicSafetyHandle.CheckReadAndThrow(m_Safety);
            if (index < m_MinIndex || index > m_MaxIndex)
            {
                FailOutOfRangeError(index);
            }
#endif
            return UnsafeUtility.ReadArrayElement<T>(buffer, index);
        }
 
        set
        {
#if ENABLE_UNITY_COLLECTIONS_CHECKS
            AtomicSafetyHandle.CheckWriteAndThrow(m_Safety);
            if (index < m_MinIndex || index > m_MaxIndex)
            {
                FailOutOfRangeError(index);
            }
#endif
            UnsafeUtility.WriteArrayElement(buffer, index, value);
        }
    }
 
    public T[] ToArray()
    {
#if ENABLE_UNITY_COLLECTIONS_CHECKS
        AtomicSafetyHandle.CheckReadAndThrow(m_Safety);
#endif
 
        T[] array = new T[Length];
        for (int i = 0; i < Length; i++)
        {
            array[i] = UnsafeUtility.ReadArrayElement<T>(buffer, i);
        }
        return array;
    }
 
    public bool IsCreated
    {
        get
        {
            return buffer != null;
        }
    }
 
    public void Dispose()
    {
#if ENABLE_UNITY_COLLECTIONS_CHECKS
        DisposeSentinel.Dispose(m_Safety, ref m_DisposeSentinel);
#endif
 
        UnsafeUtility.Free(buffer, allocator);
        buffer = null;
        length = 0;
    }
 
#if ENABLE_UNITY_COLLECTIONS_CHECKS
    private void FailOutOfRangeError(int index)
    {
        if (index < Length && (m_MinIndex != 0 || m_MaxIndex != Length - 1))
        {
            throw new IndexOutOfRangeException(
                $"Index {index} is out of restricted IJobParallelFor range " +
                $"[{m_MinIndex}...{m_MaxIndex}] in ReadWriteBuffer.\n" +
                "ReadWriteBuffers are restricted to only read & write the " +
                "element at the job index. You can use double buffering " +
                "strategies to avoid race conditions due to reading & " +
                "writing in parallel to the same elements from a job.");
        }
 
        throw new IndexOutOfRangeException(
            $"Index {index} is out of range of '{Length}' Length.");
    }
#endif
}
 
internal sealed class NativeCustomArrayDebugView<T>
    where T : struct
{
    private NativeCustomArray<T> m_Array;
 
    public NativeCustomArrayDebugView(NativeCustomArray<T> array)
    {
        m_Array = array;
    }
 
    public T[] Items
    {
        get
        {
            return m_Array.ToArray();
        }
    }
}

Here’s what’s changed from Unity’s documentation:

  • Used a void* for the buffer
  • Removed the first two parameters from the DisposeSentinel.Create call
  • Added [NativeSetClassTypeToNullOnSchedule] to m_DisposeSentinel
  • Renamed some fields to omit m_
  • Used C# 6 features: nameof and string interpolation (i.e. $"some {var} here")

Now that we have this, let's look at the IL2CPP output to see if it's inherited any of the special macros that make accessing a NativeArray so fast. To do so, we'll build for iOS with Unity 2018.1.0f2 and open it up in Xcode 9.3:

extern "C"  void NativeCustomArray_1_set_Item_m1259022219_gshared (NativeCustomArray_1_t1216658045 * __this, int32_t ___index0, int32_t ___value1, const RuntimeMethod* method)
{
    {
        void* L_0 = (void*)__this->get_m_Buffer_0();
        int32_t L_1 = ___index0;
        int32_t L_2 = ___value1;
        ((  void (*) (RuntimeObject * /* static, unused */, void*, int32_t, int32_t, const RuntimeMethod*))IL2CPP_RGCTX_METHOD_INFO(InitializedTypeInfo(method->klass)->rgctx_data, 3)->methodPointer)(NULL /*static, unused*/, (void*)(void*)L_0, (int32_t)L_1, (int32_t)L_2, /*hidden argument*/IL2CPP_RGCTX_METHOD_INFO(InitializedTypeInfo(method->klass)->rgctx_data, 3));
        return;
    }
}

This implementation of the set indexer is far from the special-case macros we saw with NativeArray. This super long line is looking up methods using InitializedTypeInfo, which looks like this:

inline RuntimeClass* InitializedTypeInfo(RuntimeClass* klass)
{
    il2cpp::vm::Class::Init(klass);
    return klass;
}

Because of this, the assembly for the set indexer is far from ideal:

    sub sp, sp, #80             ; =80
    stp x24, x23, [sp, #16]     ; 8-byte Folded Spill
    stp x22, x21, [sp, #32]     ; 8-byte Folded Spill
    stp x20, x19, [sp, #48]     ; 8-byte Folded Spill
    stp x29, x30, [sp, #64]     ; 8-byte Folded Spill
    add x29, sp, #64            ; =64
    mov  x19, x3
    mov  x20, x2
    mov  x21, x1
    ldr     x22, [x0]
    ldr x23, [x19, #24]
    mov  x0, x23
    bl  __ZN6il2cpp2vm5Class4InitEP11Il2CppClass
    ldr x8, [x23, #192]
    ldr x8, [x8, #24]
    ldr     x0, [x8]
    str x0, [sp, #8]            ; 8-byte Folded Spill
    ldr x19, [x19, #24]
    mov  x0, x19
    bl  __ZN6il2cpp2vm5Class4InitEP11Il2CppClass
    ldr x8, [x19, #192]
    ldr x4, [x8, #24]
    mov x0, #0
    mov  x1, x22
    mov  x2, x21
    mov  x3, x20
    ldr x5, [sp, #8]            ; 8-byte Folded Reload
    ldp x29, x30, [sp, #64]     ; 8-byte Folded Reload
    ldp x20, x19, [sp, #48]     ; 8-byte Folded Reload
    ldp x22, x21, [sp, #32]     ; 8-byte Folded Reload
    ldp x24, x23, [sp, #16]     ; 8-byte Folded Reload
    add sp, sp, #80             ; =80
    br  x5

That is a far cry from what we got with NativeArray:

    str w4, [x1, w3, sxtw #2]
    ret

This level of overhead is huge and jeopardizes the performance gains we're getting from using native containers in the first place. So let's try to work around this by having our custom native container type simply use NativeArray to allocate and access unmanaged memory rather than doing so ourselves:

[NativeContainer]
[NativeContainerSupportsMinMaxWriteRestriction]
[DebuggerDisplay("Length = {" + nameof(Length) + "}")]
[DebuggerTypeProxy(typeof(NativeNestedArrayDebugView<>))]
public struct NativeNestedArray<T> : IDisposable
    where T : struct
{
    private NativeArray<T> array;
 
#if ENABLE_UNITY_COLLECTIONS_CHECKS
    internal int m_Length;
    internal int m_MinIndex;
    internal int m_MaxIndex;
    internal AtomicSafetyHandle m_Safety;
    [NativeSetClassTypeToNullOnSchedule] internal DisposeSentinel m_DisposeSentinel;
#endif
 
    public NativeNestedArray(int length, Allocator allocator)
    {
        array = new NativeArray<T>(length, allocator);
        m_Length = length;
 
#if ENABLE_UNITY_COLLECTIONS_CHECKS
        m_MinIndex = 0;
        m_MaxIndex = length - 1;
        DisposeSentinel.Create(out m_Safety, out m_DisposeSentinel, 0);
#endif
    }
 
    public int Length
    {
        get
        {
            return m_Length;
        }
    }
 
    public T this[int index]
    {
        get
        {
            return array[index];
        }
        set
        {
            array[index] = value;
        }
    }
 
    public T[] ToArray()
    {
        return array.ToArray();
    }
 
    public bool IsCreated
    {
        get
        {
            return array.IsCreated;
        }
    }
 
    public void Dispose()
    {
#if ENABLE_UNITY_COLLECTIONS_CHECKS
        DisposeSentinel.Dispose(m_Safety, ref m_DisposeSentinel);
#endif
        array.Dispose();
    }
}
 
internal sealed class NativeNestedArrayDebugView<T>
    where T : struct
{
    private NativeNestedArray<T> array;
 
    public NativeNestedArrayDebugView(NativeNestedArray<T> array)
    {
        this.array = array;
    }
 
    public T[] Items
    {
        get
        {
            return array.ToArray();
        }
    }
}

This code is somewhat shorter and easier to read than the original version. It also doesn't require the unsafe keyword, so it's usable outside of "unsafe" contexts. Regardless, let's look at the IL2CPP output to see if we've regain access to the special-case NativeArray macros:

extern "C"  void NativeNestedArray_1_set_Item_m3157667876_gshared (NativeNestedArray_1_t2521080874 * __this, int32_t ___index0, int32_t ___value1, const RuntimeMethod* method)
{
    {
        NativeArray_1_t3237678471 * L_0 = (NativeArray_1_t3237678471 *)__this->get_address_of_m_Buffer_0();
        int32_t L_1 = ___index0;
        int32_t L_2 = ___value1;
        IL2CPP_NATIVEARRAY_SET_ITEM(int32_t, ((NativeArray_1_t3237678471 *)(NativeArray_1_t3237678471 *)L_0)->___m_Buffer_0, (int32_t)L_1, (int32_t)L_2);
        return;
    }
}

Now that all we're doing is using a NativeArray rather than calling UnsafeUtility.WriteArrayElement ourselves, IL2CPP outputs the special IL2CPP_NATIVEARRAY_SET_ITEM macro. Let's look at the assembly code to confirm that we're getting optimal code generation:

    ldr     x8, [x0]
    str w2, [x8, w1, sxtw #2]
    ret

There's one more instruction than before, but that's necessary in this code to get the m_Buffer field. So we've regained the optimal access to unmanaged memory at this point. Now that we have that access, we can use the general strategy of wrapping NativeArray in our own native container to create other types than just a simple array.

As a demonstration, let's create a NativeList<T> to mirror the List<T> type. This version has the basics (Add, Count, Capacity, RemoveAt) but certainly many more can be added. For now, let's take a look at NativeList<T>:

using System;
using System.Diagnostics;
using Unity.Collections;
using Unity.Collections.LowLevel.Unsafe;
 
/// <summary>
/// A version of <see cref="System.Collections.Generic.List{T}"/> that uses
/// unmanaged memory.
/// </summary>
/// 
/// <typeparam name="T">
/// Type of elements in the list. Must be blittable.
/// </typeparam>
///
/// <author>
/// Jackson Dunstan, http://JacksonDunstan.com/articles/4734
/// </author>
[NativeContainer]
[NativeContainerSupportsMinMaxWriteRestriction]
[DebuggerDisplay(
	"Count = {" + nameof(Count) + "}. Capacity = {" + nameof(Capacity) + "}")]
[DebuggerTypeProxy(typeof(NativeListDebugView<>))]
public unsafe struct NativeList<T> : IDisposable
	where T : struct
{
	// Backing array
	private NativeArray<T> array;
 
	// Number of added elements
	private int count;
 
	// Allocator used to create the backing array
	private Allocator allocator;
 
	// These are all required when checks are enabled
	// They must have these exact types, names, and attributes
#if ENABLE_UNITY_COLLECTIONS_CHECKS
	internal int m_Length;
	internal int m_MinIndex;
	internal int m_MaxIndex;
	internal AtomicSafetyHandle m_Safety;
	[NativeSetClassTypeToNullOnSchedule] internal DisposeSentinel m_DisposeSentinel;
#endif
 
	/// <summary>
	/// Create the list with an initial capacity. It initially has no elements.
	/// </summary>
	/// 
	/// <param name="capacity">
	/// Initial capacity. This will be doubled if too many elements are added.
	/// </param>
	/// 
	/// <param name="allocator">
	/// Allocator to allocate unmanaged memory with
	/// </param>
	public NativeList(int capacity, Allocator allocator)
	{
		// Create the backing array
		array = new NativeArray<T>(capacity, allocator);
		count = 0;
		this.allocator = allocator;
		m_Length = capacity;
 
		// Initialize fields for safety checks
#if ENABLE_UNITY_COLLECTIONS_CHECKS
		m_MinIndex = 0;
		m_MaxIndex = -1;
		DisposeSentinel.Create(out m_Safety, out m_DisposeSentinel, 0);
#endif
	}
 
	/// <summary>
	/// Get the capacity of the list. This is always greater than or equal to
	/// its <see cref="Count"/>.
	/// </summary>
	public int Capacity
	{
		get
		{
			return m_Length;
		}
	}
 
	/// <summary>
	/// Get the number of elements currently in the list. This is always less
	/// than or equal to the <see cref="Capacity"/>.
	/// </summary>
	public int Count
	{
		get
		{
			return count;
		}
	}
 
	/// <summary>
	/// Index into the list's elements
	/// </summary>
	/// 
	/// <param name="index">
	/// Index of the element to get or set. Must be greater than or equal to
	/// zero and less than <see cref="Count"/>.
	/// </param>
	public T this[int index]
	{
		get
		{
#if ENABLE_UNITY_COLLECTIONS_CHECKS
			AtomicSafetyHandle.CheckReadAndThrow(m_Safety);
			if (index < m_MinIndex || index > m_MaxIndex)
			{
				FailOutOfRangeError(index);
			}
#endif
			return array[index];
		}
		set
		{
#if ENABLE_UNITY_COLLECTIONS_CHECKS
			AtomicSafetyHandle.CheckWriteAndThrow(m_Safety);
			if (index < m_MinIndex || index > m_MaxIndex)
			{
				FailOutOfRangeError(index);
			}
#endif
			array[index] = value;
		}
	}
 
	/// <summary>
	/// Add an element to the end of the list. If the list is full, it will be
	/// automatically resized by allocating new unmanaged memory with double
	/// the <see cref="Capacity"/> and copying over all existing elements.
	/// </summary>
	/// 
	/// <param name="value">
	/// Element to add
	/// </param>
	public void Add(T value)
	{
#if ENABLE_UNITY_COLLECTIONS_CHECKS
		AtomicSafetyHandle.CheckWriteAndThrow(m_Safety);
		if (m_MinIndex != 0 || m_MaxIndex != Count - 1)
		{
			throw new IndexOutOfRangeException(
				"Can't call add in an IJobParallelFor job.");
		}
#endif
		// The list is full. Resize.
		int insertIndex = count;
		if (insertIndex == m_Length)
		{
			int newLength = insertIndex * 2;
			NativeArray<T> newArray = new NativeArray<T>(
				newLength,
				allocator);
			UnsafeUtility.MemCpy(
				newArray.GetUnsafePtr(),
				array.GetUnsafePtr(),
				m_Length * (long)UnsafeUtility.SizeOf<T>());
			array.Dispose();
			array = newArray;
			m_Length = newLength;
		}
 
		// Insert at the end
		array[insertIndex] = value;
 
		// Mark the new maximum index that can be read
#if ENABLE_UNITY_COLLECTIONS_CHECKS
		m_MaxIndex = count;
#endif
 
		// Count the newly-added element
		count++;
	}
 
	/// <summary>
	/// Remove an element at a given index. Elements after it will be shifted
	/// toward the front of the list.
	/// </summary>
	/// 
	/// <param name="index">
	/// Index of the element to remove. Must be greater than or equal to zero
	/// and less than or equal to <see cref="Count"/>.
	/// </param>
	public void RemoveAt(int index)
	{
		int numElementsToShift = count - index - 1;
#if ENABLE_UNITY_COLLECTIONS_CHECKS
		AtomicSafetyHandle.CheckWriteAndThrow(m_Safety);
		if (index < m_MinIndex || index + numElementsToShift > m_MaxIndex)
		{
			FailOutOfRangeError(index);
		}
#endif
		if (numElementsToShift > 0)
		{
			int elementSize = UnsafeUtility.SizeOf<T>();
			void* buffer = array.GetUnsafePtr();
			byte* source = (byte*)buffer + elementSize * (index + 1);
			long shiftSize = numElementsToShift * (long)elementSize;
			UnsafeUtility.MemMove(source - elementSize, source, shiftSize);
		}
 
		count--;
#if ENABLE_UNITY_COLLECTIONS_CHECKS
		m_MaxIndex = count - 1;
#endif
	}
 
	/// <summary>
	/// Allocate a managed array and copy all elements to it
	/// </summary>
	/// 
	/// <returns>
	/// A managed array with all of the list's elements
	/// </returns>
	public T[] ToArray()
	{
		T[] array = new T[count];
		for (int i = 0; i < count; ++i)
		{
			array[i] = this.array[i];
		}
 
		return array;
	}
 
	/// <summary>
	/// Check if the underlying unmanaged memory has been created. This is
	/// initially true then false after <see cref="Dispose"/> is called.
	/// </summary>
	public bool IsCreated
	{
		get
		{
			return array.IsCreated;
		}
	}
 
	/// <summary>
	/// Release the list's unmanaged memory. Do not use it after this.
	/// </summary>
	public void Dispose()
	{
#if ENABLE_UNITY_COLLECTIONS_CHECKS
		DisposeSentinel.Dispose(m_Safety, ref m_DisposeSentinel);
#endif
		array.Dispose();
	}
 
	// Throw an appropriate exception when safety checks are enabled
#if ENABLE_UNITY_COLLECTIONS_CHECKS
	private void FailOutOfRangeError(int index)
	{
		if (index < m_Length && (m_MinIndex != 0 || m_MaxIndex != m_Length - 1))
		{
			throw new IndexOutOfRangeException(
				$"Index {index} is out of restricted IJobParallelFor range " +
				$"[{m_MinIndex}...{m_MaxIndex}] in ReadWriteBuffer.\n" +
				"ReadWriteBuffers are restricted to only read & write the " +
				"element at the job index. You can use double buffering " +
				"strategies to avoid race conditions due to reading & " +
				"writing in parallel to the same elements from a job.");
		}
 
		throw new IndexOutOfRangeException(
			$"Index {index} is out of range of '{m_Length}' Length.");
	}
#endif
}
 
/// <summary>
/// Provides a debugger view of <see cref="NativeList{T}"/>.
/// </summary>
/// 
/// <typeparam name="T">
/// Type of elements in the list
/// </typeparam>
internal sealed class NativeListDebugView<T>
	where T : struct
{
	// List to view
	private NativeList<T> list;
 
	/// <summary>
	/// Create the view for a given list
	/// </summary>
	/// 
	/// <param name="list">
	/// List to view
	/// </param>
	public NativeListDebugView(NativeList<T> list)
	{
		this.list = list;
	}
 
	/// <summary>
	/// Get a managed array version of the list's elements to be viewed in the
	/// debugger.
	/// </summary>
	public T[] Items
	{
		get
		{
			return list.ToArray();
		}
	}
}

Note that it does require an "unsafe" context in order to call UnsafeUtility.MemCpy and UnsafeUtility.MemMove, so make sure to check Allow 'unsafe' Code in Project Settings > Player. Also, Add shouldn't be called from a job's Execute function. There is code to detect calls from an IJobParallelFor, but there's no good way to detect this from a regular IJob so some discipline is required here.

Now that we have a NativeList<T>, let's try to use it in the new Job System. The following job just sums the values in two NativeList and stores the result in a NativeArray:

public struct NativeListJob : IJobParallelFor
{
    public NativeList<int> ListA;
    public NativeList<int> ListB;
    public NativeArray<int> Sum;
 
    public void Execute(int index)
    {
        Sum[index] = ListA[index] + ListB[index];
    }
}

And here's a simple function to run the job from a MonoBehaviour:

public class TestScript : MonoBehaviour
{
    void Start()
    {
        // Allocate the lists and add to them
        const int capacity = 10;
        NativeList<int> a = new NativeList<int>(capacity, Allocator.Temp);
        NativeList<int> b = new NativeList<int>(capacity, Allocator.Temp);
        for (int i = 0; i < capacity; ++i)
        {
            a.Add(i);
            b.Add(i * 10);
        }
 
        // Allocate the results array
        NativeArray<int> sum = new NativeArray<int>(capacity, Allocator.Temp);
 
        // Create and run the job synchronously
        NativeListJob job = new NativeListJob
        {
            ListA = a,
            ListB = b,
            Sum = sum
        };
        job.Run(capacity);
 
        // Print the results
        for (int i = 0; i < capacity; ++i)
        {
            print(sum[i]);
        }
 
        // Dispose the lists and array
        a.Dispose();
        b.Dispose();
        sum.Dispose();
    }
}

As expected, this prints the following:

11
22
33
44
55
66
77
88
99

This NativeList<T> is incomplete, but feel free to add onto it. This also forms a pattern that can be used to create any sort of container using unmanaged memory. The resulting code from IL2CPP and the C++ compiler will be completely optimal, so there's plenty of reason to take the time to do this for performance-critical areas of the game or for any code using the Job System.