www.main.lv
Don't think just code it

2011-02-25 Linux Assembler SSE add

SSe programming is whery interesting fromthat point that there are parallely 4 numbers that are porcessed.SSE has registers of size 128 bits. They can handle 4 floats.GCC C there is no default type for 128 bits and we define our ownstructure for that.
typedef struct xmm
{
    float a;
    float b;
    float c;
    float d;
} xmm __attribute__ ((aligned (16)));
structure is aligned for perfomance.to make 4byted value + 4byte valuewe need to load values:
movaps xmm0, [eax]
movaps xmm1, [ebx]
and add them
addps xmm0,xmm1
after that store somewhere
movaps [eax], xmm0
Final test program in C looks like:
typedef struct xmm
{
    float a;
    float b;
    float c;
    float d;
} xmm __attribute__ ((aligned (16)));

extern void sse_add( xmm *, xmm * );

int main( int argc, char **argv)
{
    xmm x0,x1;
    x0.a = 1.0;
    x0.b = 2.0;
    x0.c = 3.0;
    x0.d = 4.0;
    x1.a = x1.b = x1.c = x1.d = 5.0;
    
    printf("%10f %10f %10f %10f\n",x0.a,x0.b,x0.c,x0.d);
    printf("%10f %10f %10f %10f\n",x1.a,x1.b,x1.c,x1.d);
    
    sse_add( &x0 , &x1 );
    
    printf("%10f %10f %10f %10f\n",x0.a,x0.b,x0.c,x0.d);
    printf("%10f %10f %10f %10f\n",x1.a,x1.b,x1.c,x1.d);
    
    return 0;
}
gcc main.c add.o -o main And asm example
format ELF

section '.text'

public sse_add

align 4
sse_add:
    ;arguments that are pointers for 2 xmm data blocks
    x0 equ [ebp+8]
    x1 equ [ebp+12]
    
    push ebp
    mov ebp, esp
    
    mov eax, x0
    mov ebx, x1
    
    ;load in xmm0 and xmm1 values
    ;if values where not aligned than we would used other instruction
    movaps xmm0, [eax]
    movaps xmm1, [ebx]
    
    ;sum up and save inside xmm0
    addps xmm0,xmm1
    
    ;save value in first argument
    movaps [eax], xmm0
    
    pop ebp
    ret
fasm add.asm add.o