The Beautiful Future

sse add mul 본문

스킬

sse add mul

Small Octopus 2016. 9. 4. 22:21

#include <string.h>

#include <xmmintrin.h>

#include <mmintrin.h>

#include <malloc.h>



int _tmain(int argc, _TCHAR* argv[])

{

const int nData = 10;

const int nDataSize = nData*sizeof(float);

const int nDataSizeAlign = ((nDataSize+15)/16)*16;


float* a = (float*)_aligned_malloc(nDataSizeAlign, 16);

float* b = (float*)_aligned_malloc(nDataSizeAlign, 16);

float* result = (float*)_aligned_malloc(nDataSizeAlign, 16);


memset(a, 0, nDataSizeAlign);

memset(b, 0, nDataSizeAlign);

memset(result, 0, nDataSizeAlign);



for (int i = 0; i < nData; ++i)

{

a[i] = i;

b[i] = i + 1;

result[i] = 1 + i;

}


__m128* sse_a = (__m128*)a;

__m128* sse_b = (__m128*)b;

__m128* sse_c = (__m128*)result;


const int nSize4 = ((nData+3)/4);

// mul

for (int i = 0; i < nSize4; ++i)

sse_c[i]  = _mm_mul_ps(sse_a[i], sse_b[i]);

// add

for (int i = 0; i < nSize4-1; ++i)

sse_c[0] = _mm_add_ps(sse_c[0], sse_c[i+1]);

result[0] = result[0] + result[1] + result[2] + result[3];

for (int i = 0; i < nData; ++i)

printf("%f \n", result[i]);



_aligned_free(a);

_aligned_free(b);

_aligned_free(result);




getchar();

return 0;

}


'스킬' 카테고리의 다른 글

우분투 명령어  (0) 2017.08.09
caffe locally connected layer  (0) 2016.09.17
matlab 그래프 포인트 움직이기  (0) 2016.09.01
vtk 설치 win8.1  (0) 2016.08.06
install python on windows  (0) 2016.07.26
Comments