// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

.syntax unified
.thumb

#include <AsmOffsets.inc>         // generated by the build from AsmOffsets.cpp
#include <unixasmmacros.inc>

// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's
// allocation context then automatically fallback to the slow allocation path.
//  r0 == EEType
LEAF_ENTRY RhpNewFast, _TEXT
        PROLOG_PUSH "{r4,lr}"
        mov	        r4, r0 // save EEType

        // r0 = GetThread()
        INLINE_GETTHREAD

        // r4 contains EEType pointer
        ldr         r2, [r4, #OFFSETOF__EEType__m_uBaseSize]

        // r0: Thread pointer
        // r4: EEType pointer
        // r2: base size

        ldr         r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
        add         r2, r3
        ldr         r1, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
        cmp         r2, r1
        bhi         LOCAL_LABEL(RhpNewFast_RarePath)

        // set the new alloc pointer
        str         r2, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]

        // Set the new object's EEType pointer
        str         r4, [r3, #OFFSETOF__Object__m_pEEType]

        mov         r0, r3

        EPILOG_POP  "{r4,pc}"

LOCAL_LABEL(RhpNewFast_RarePath):
        mov         r0, r4            // restore EEType
        mov         r1, #0
        EPILOG_POP  "{r4,lr}"
        b           C_FUNC(RhpNewObject)

LEAF_END RhpNewFast, _TEXT

// Allocate non-array object with finalizer.
//  r0 == EEType
//
LEAF_ENTRY RhpNewFinalizable, _TEXT
        mov         r1, #GC_ALLOC_FINALIZE
        b           C_FUNC(RhpNewObject)
LEAF_END RhpNewFinalizable, _TEXT


// Allocate non-array object.
//  r0 == EEType
//  r1 == alloc flags
NESTED_ENTRY RhpNewObject, _TEXT, NoHandler

        PUSH_COOP_PINVOKE_FRAME r3

        // r0: EEType
        // r1: alloc flags
        // r3: transition frame

        // Preserve the EEType in r5.
        mov         r5, r0

        ldr         r2, [r0, #OFFSETOF__EEType__m_uBaseSize]    // cbSize

        // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
        blx         C_FUNC(RhpGcAlloc)

        // Set the new object's EEType pointer on success.
        cbz         r0, LOCAL_LABEL(NewOutOfMemory)
        str         r5, [r0, #OFFSETOF__Object__m_pEEType]

        // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
        ldr         r1, [r5, #OFFSETOF__EEType__m_uBaseSize]
        movw        r2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF)
        movt        r2, #(RH_LARGE_OBJECT_SIZE >> 16)
        cmp         r1, r2
        blo         LOCAL_LABEL(New_SkipPublish)

        // r0: already contains object
        // r1: already contains object size

        bl          C_FUNC(RhpPublishObject)
        // r0: function returned the passed-in object

LOCAL_LABEL(New_SkipPublish):

        POP_COOP_PINVOKE_FRAME
        bx          lr

LOCAL_LABEL(NewOutOfMemory):
        // This is the OOM failure path. We're going to tail-call to a managed helper that will throw
        // an out of memory exception that the caller of this allocator understands.

        mov         r0, r5            // EEType pointer
        mov         r1, #0            // Indicate that we should throw OOM.

        POP_COOP_PINVOKE_FRAME

        b           C_FUNC(RhExceptionHandling_FailedAllocation)

NESTED_END RhpNewObject, _TEXT


// Allocate a string.
//  r0 == EEType
//  r1 == element/character count
LEAF_ENTRY RhNewString, _TEXT
        PROLOG_PUSH "{r4-r6,lr}"
        // Make sure computing the overall allocation size won't overflow
        MOV32       r12, ((0xFFFFFFFF - STRING_BASE_SIZE - 3) / STRING_COMPONENT_SIZE)
        cmp         r1, r12
        bhi         LOCAL_LABEL(StringSizeOverflow)

        // Compute overall allocation size (align(base size + (element size * elements), 4)).
        mov         r2, #(STRING_BASE_SIZE + 3)
#if STRING_COMPONENT_SIZE == 2
        add         r2, r2, r1, lsl #1                  // r2 += characters * 2
#else
        NotImplementedComponentSize
#endif
        bic         r2, r2, #3

        mov         r4, r0 // Save EEType
        mov         r5, r1 // Save element count
        mov         r6, r2 // Save string size
        // r0 = GetThread()
        INLINE_GETTHREAD
        // r4 == EEType
        // r5 == element count
        // r6 == string size
        // r0 == Thread*

        // Load potential new object address into r12.
        ldr         r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]

        // Determine whether the end of the object would lie outside of the current allocation context. If so,
        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
        adds        r6, r12
        bcs         LOCAL_LABEL(RhNewString_RarePath) // if we get a carry here, the string is too large to fit below 4 GB

        ldr         r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
        cmp         r6, r12
        bhi         LOCAL_LABEL(RhNewString_RarePath)

        // Reload new object address into r12.
        ldr         r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]

        // Update the alloc pointer to account for the allocation.
        str         r6, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]

        // Set the new object's EEType pointer and element count.
        str         r4, [r12, #OFFSETOF__Object__m_pEEType]
        str         r5, [r12, #OFFSETOF__String__m_Length]

        // Return the object allocated in r0.
        mov         r0, r12
        EPILOG_POP  "{r4-r6,pc}"

LOCAL_LABEL(StringSizeOverflow):
        // We get here if the size of the final string object can't be represented as an unsigned
        // 32-bit value. We're going to tail-call to a managed helper that will throw
        // an OOM exception that the caller of this allocator understands.

        // EEType is in r0 already
        mov         r1, 0                  // Indicate that we should throw OOM
        EPILOG_POP  "{r4-r6,lr}"
        b           C_FUNC(RhExceptionHandling_FailedAllocation)

LOCAL_LABEL(RhNewString_RarePath):
        mov         r3, r0
        mov         r0, r4
        mov         r1, r5
        mov         r2, r6
        // r0 == EEType
        // r1 == element count
        // r2 == string size + Thread::m_alloc_context::alloc_ptr
        // r3 == Thread
        EPILOG_POP  "{r4-r6,lr}"
        b           C_FUNC(RhpNewArrayRare)

LEAF_END RhNewString, _TEXT


// Allocate one dimensional, zero based array (SZARRAY).
//  r0 == EEType
//  r1 == element count
LEAF_ENTRY RhpNewArray, _TEXT
        PROLOG_PUSH "{r4-r6,lr}"

        // Compute overall allocation size (align(base size + (element size * elements), 4)).
        // if the element count is <= 0x10000, no overflow is possible because the component
        // size is <= 0xffff (it's an unsigned 16-bit value) and thus the product is <= 0xffff0000
        // and the base size for the worst case (32 dimensional MdArray) is less than 0xffff.
        ldrh        r2, [r0, #OFFSETOF__EEType__m_usComponentSize]
        cmp         r1, #0x10000
        bhi         LOCAL_LABEL(ArraySizeBig)
        umull       r2, r3, r2, r1
        ldr         r3, [r0, #OFFSETOF__EEType__m_uBaseSize]
        adds        r2, r3
        adds        r2, #3
LOCAL_LABEL(ArrayAlignSize):
        bic         r2, r2, #3

        mov         r4, r0 // Save EEType
        mov         r5, r1 // Save element count
        mov         r6, r2 // Save array size
        // r0 = GetThread()
        INLINE_GETTHREAD
        // r4 == EEType
        // r5 == element count
        // r6 == array size
        // r0 == Thread*

        // Load potential new object address into r12.
        ldr         r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]

        // Determine whether the end of the object would lie outside of the current allocation context. If so,
        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
        adds        r6, r12
        bcs         LOCAL_LABEL(RhpNewArray_RarePath) // if we get a carry here, the array is too large to fit below 4 GB

        ldr         r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
        cmp         r6, r12
        bhi         LOCAL_LABEL(RhpNewArray_RarePath)

        // Reload new object address into r12.
        ldr         r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]

        // Update the alloc pointer to account for the allocation.
        str         r6, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]

        // Set the new object's EEType pointer and element count.
        str         r4, [r12, #OFFSETOF__Object__m_pEEType]
        str         r5, [r12, #OFFSETOF__Array__m_Length]

        // Return the object allocated in r0.
        mov         r0, r12
        EPILOG_POP  "{r4-r6,pc}"

LOCAL_LABEL(ArraySizeBig):
        // if the element count is negative, it's an overflow error
        cmp         r1, #0
        blt         LOCAL_LABEL(ArraySizeOverflow)

        // now we know the element count is in the signed int range [0..0x7fffffff]
        // overflow in computing the total size of the array size gives an out of memory exception,
        // NOT an overflow exception
        // we already have the component size in r2
        umull       r2, r3, r2, r1
        cbnz        r3, LOCAL_LABEL(ArrayOutOfMemoryFinal)
        ldr         r3, [r0, #OFFSETOF__EEType__m_uBaseSize]
        adds        r2, r3
        bcs         LOCAL_LABEL(ArrayOutOfMemoryFinal)
        adds        r2, #3
        bcs         LOCAL_LABEL(ArrayOutOfMemoryFinal)
        b           LOCAL_LABEL(ArrayAlignSize)

LOCAL_LABEL(ArrayOutOfMemoryFinal):

        // EEType is in r0 already
        mov         r1, #0                  // Indicate that we should throw OOM.
        EPILOG_POP  "{r4-r6,lr}"
        b           C_FUNC(RhExceptionHandling_FailedAllocation)

LOCAL_LABEL(ArraySizeOverflow):
        // We get here if the size of the final array object can't be represented as an unsigned
        // 32-bit value. We're going to tail-call to a managed helper that will throw
        // an overflow exception that the caller of this allocator understands.

        // EEType is in r0 already
        mov         r1, #1                  // Indicate that we should throw OverflowException
        EPILOG_POP  "{r4-r6,lr}"
        b           C_FUNC(RhExceptionHandling_FailedAllocation)

LOCAL_LABEL(RhpNewArray_RarePath):
        mov         r3, r0
        mov         r0, r4
        mov         r1, r5
        mov         r2, r6
        // r0 == EEType
        // r1 == element count
        // r2 == array size + Thread::m_alloc_context::alloc_ptr
        // r3 == Thread
        EPILOG_POP  "{r4-r6,lr}"
        b           C_FUNC(RhpNewArrayRare)

LEAF_END RhpNewArray, _TEXT


// Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper.
//  r0 == EEType
//  r1 == element count
//  r2 == array size + Thread::m_alloc_context::alloc_ptr
//  r3 == Thread
NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler

        // Recover array size by subtracting the alloc_ptr from r2.
        ldr         r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
        sub         r2, r12

        PUSH_COOP_PINVOKE_FRAME r3

        // Preserve the EEType in r5 and element count in r6.
        mov         r5, r0
        mov         r6, r1

        mov         r7, r2          // Save array size in r7

        mov         r1, #0          // uFlags

        // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
        blx         C_FUNC(RhpGcAlloc)

        // Test for failure (NULL return).
        cbz         r0, LOCAL_LABEL(ArrayOutOfMemory)

        // Success, set the array's type and element count in the new object.
        str         r5, [r0, #OFFSETOF__Object__m_pEEType]
        str         r6, [r0, #OFFSETOF__Array__m_Length]

        // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
        movw        r2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF)
        movt        r2, #(RH_LARGE_OBJECT_SIZE >> 16)
        cmp         r7, r2
        blo         LOCAL_LABEL(NewArray_SkipPublish)

                                        // r0: already contains object
        mov         r1, r7              // r1: object size
        bl          C_FUNC(RhpPublishObject)
                                        // r0: function returned the passed-in object

LOCAL_LABEL(NewArray_SkipPublish):

        POP_COOP_PINVOKE_FRAME
        bx          lr

LOCAL_LABEL(ArrayOutOfMemory):

        mov         r0, r5       // EEType
        mov         r1, #0       // Indicate that we should throw OOM.

        POP_COOP_PINVOKE_FRAME

        b           C_FUNC(RhExceptionHandling_FailedAllocation)

NESTED_END RhpNewArrayRare, _TEXT

// Allocate simple object (not finalizable, array or value type) on an 8 byte boundary.
//  r0 == EEType
LEAF_ENTRY RhpNewFastAlign8, _TEXT
        PROLOG_PUSH "{r4,lr}"

        mov         r4, r0 // save EEType

        // r0 = GetThread()
        INLINE_GETTHREAD

        // Fetch object size into r2.
        ldr         r2, [r4, #OFFSETOF__EEType__m_uBaseSize]

        // r4: EEType pointer
        // r0: Thread pointer
        // r2: base size

        // Load potential new object address into r3. Cache this result in r12 as well for the common case
        // where the allocation succeeds (r3 will be overwritten in the following bounds check).
        ldr         r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
        mov         r12, r3

        // Check whether the current allocation context is already aligned for us.
        tst         r3, #0x7
        bne         LOCAL_LABEL(Alloc8Failed)

        // Determine whether the end of the object would lie outside of the current allocation context. If so,
        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
        add         r2, r3
        ldr         r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
        cmp         r2, r3
        bhi         LOCAL_LABEL(Alloc8Failed)

        // Update the alloc pointer to account for the allocation.
        str         r2, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]

        // Set the new object's EEType pointer.
        str         r4, [r12, #OFFSETOF__Object__m_pEEType]

        // Return the object allocated in r0.
        mov         r0, r12

        EPILOG_POP  "{r4,pc}"

LOCAL_LABEL(Alloc8Failed):
        // Fast allocation failed. Call slow helper with flags set to indicate an 8-byte alignment and no
        // finalization.
        mov         r0, r4 // restore EEType
        mov         r1, #GC_ALLOC_ALIGN8
        EPILOG_POP  "{r4,lr}"
        b           C_FUNC(RhpNewObject)

LEAF_END RhpNewFastAlign8, _TEXT

// Allocate a finalizable object (by definition not an array or value type) on an 8 byte boundary.
//  r0 == EEType
LEAF_ENTRY RhpNewFinalizableAlign8, _TEXT
        mov         r1, #(GC_ALLOC_FINALIZE | GC_ALLOC_ALIGN8)
        b           C_FUNC(RhpNewObject)
LEAF_END RhpNewFinalizableAlign8, _TEXT

// Allocate a value type object (i.e. box it) on an 8 byte boundary + 4 (so that the value type payload
// itself is 8 byte aligned).
//  r0 == EEType
LEAF_ENTRY RhpNewFastMisalign, _TEXT
        PROLOG_PUSH "{r4,lr}"

        mov         r4, r0 // save EEType

        // r0 = GetThread()
        INLINE_GETTHREAD

        // Fetch object size into r2.
        ldr         r2, [r4, #OFFSETOF__EEType__m_uBaseSize]

        // r4: EEType pointer
        // r0: Thread pointer
        // r2: base size

        // Load potential new object address into r3. Cache this result in r12 as well for the common case
        // where the allocation succeeds (r3 will be overwritten in the following bounds check).
        ldr         r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
        mov         r12, r3

        // Check whether the current allocation context is already aligned for us (for boxing that means the
        // address % 8 == 4, so the value type payload following the EEType* is actually 8-byte aligned).
        tst         r3, #0x7
        beq         LOCAL_LABEL(BoxAlloc8Failed)

        // Determine whether the end of the object would lie outside of the current allocation context. If so,
        // we abandon the attempt to allocate the object directly and fall back to the slow helper.
        add         r2, r3
        ldr         r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit]
        cmp         r2, r3
        bhi         LOCAL_LABEL(BoxAlloc8Failed)

        // Update the alloc pointer to account for the allocation.
        str         r2, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]

        // Set the new object's EEType pointer.
        str         r4, [r12, #OFFSETOF__Object__m_pEEType]

        // Return the object allocated in r0.
        mov         r0, r12

        EPILOG_POP  "{r4,pc}"

LOCAL_LABEL(BoxAlloc8Failed):
        // Fast allocation failed. Call slow helper with flags set to indicate an 8+4 byte alignment and no
        // finalization.
        mov         r0, r4 // restore EEType
        mov         r1, #(GC_ALLOC_ALIGN8 | GC_ALLOC_ALIGN8_BIAS)
        EPILOG_POP  "{r4,lr}"
        b           C_FUNC(RhpNewObject)

LEAF_END RhpNewFastMisalign, _TEXT

// Allocate an array on an 8 byte boundary.
//  r0 == EEType
//  r1 == element count
NESTED_ENTRY RhpNewArrayAlign8, _TEXT, NoHandler

        PUSH_COOP_PINVOKE_FRAME r3

        // Compute overall allocation size (base size + align((element size * elements), 4)).
        ldrh        r2, [r0, #OFFSETOF__EEType__m_usComponentSize]
        umull       r2, r4, r2, r1
        cbnz        r4, LOCAL_LABEL(Array8SizeOverflow)
        adds        r2, #3
        bcs         LOCAL_LABEL(Array8SizeOverflow)
        bic         r2, r2, #3
        ldr         r4, [r0, #OFFSETOF__EEType__m_uBaseSize]
        adds        r2, r4
        bcs         LOCAL_LABEL(Array8SizeOverflow)

        // Preserve the EEType in r5 and element count in r6.
        mov         r5, r0
        mov         r6, r1
        mov         r7, r2                  // Save array size in r7

        mov         r1, #GC_ALLOC_ALIGN8    // uFlags

        // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame)
        blx         C_FUNC(RhpGcAlloc)

        // Test for failure (NULL return).
        cbz         r0, LOCAL_LABEL(Array8OutOfMemory)

        // Success, set the array's type and element count in the new object.
        str         r5, [r0, #OFFSETOF__Object__m_pEEType]
        str         r6, [r0, #OFFSETOF__Array__m_Length]

        // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC
        movw        r2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF)
        movt        r2, #(RH_LARGE_OBJECT_SIZE >> 16)
        cmp         r7, r2
        blo         LOCAL_LABEL(NewArray8_SkipPublish)

                                              // r0: already contains object
        mov         r1, r7                    // r1: object size
        bl          C_FUNC(RhpPublishObject)
                                              // r0: function returned the passed-in object
LOCAL_LABEL(NewArray8_SkipPublish):

        POP_COOP_PINVOKE_FRAME

        bx          lr

LOCAL_LABEL(Array8SizeOverflow):
        // We get here if the size of the final array object can't be represented as an unsigned
        // 32-bit value. We're going to tail-call to a managed helper that will throw
        // an OOM or overflow exception that the caller of this allocator understands.

        // if the element count is non-negative, it's an OOM error
        cmp         r1, #0
        bge         LOCAL_LABEL(Array8OutOfMemory1)

        // r0 holds EEType pointer already
        mov         r1, #1              // Indicate that we should throw OverflowException

        POP_COOP_PINVOKE_FRAME
        b           C_FUNC(RhExceptionHandling_FailedAllocation)

LOCAL_LABEL(Array8OutOfMemory):
        // This is the OOM failure path. We're going to tail-call to a managed helper that will throw
        // an out of memory exception that the caller of this allocator understands.

        mov         r0, r5              // EEType pointer

LOCAL_LABEL(Array8OutOfMemory1):

        mov         r1, #0              // Indicate that we should throw OOM.

        POP_COOP_PINVOKE_FRAME
        b           C_FUNC(RhExceptionHandling_FailedAllocation)

NESTED_END RhpNewArrayAlign8, _TEXT
