# # These macros show integer register usage # #define N %eax #define X %edx #define Y %ecx # #double ATL_UDOT(const int N, const double *X, const int incX, # const double *Y, const int incY) .global ATL_UDOT .type ATL_UDOT,@function ATL_UDOT: # # Load parameters # movl 4(%esp), N movl 8(%esp), X movl 16(%esp), Y # # Dot product starts at 0 # fldz LOOP: fldl (X) fldl (Y) fmulp %st, %st(1) addl $8, X addl $8, Y faddp %st, %st(1) dec N jnz LOOP ret
Notice that because we are able to confine ourselves to the three scratch registers, we have an empty function prologue and epilogue (we do not save any registers or move the stack pointer).