The Beautiful Future
sse add mul 본문
#include <string.h>
#include <xmmintrin.h>
#include <mmintrin.h>
#include <malloc.h>
int _tmain(int argc, _TCHAR* argv[])
{
const int nData = 10;
const int nDataSize = nData*sizeof(float);
const int nDataSizeAlign = ((nDataSize+15)/16)*16;
float* a = (float*)_aligned_malloc(nDataSizeAlign, 16);
float* b = (float*)_aligned_malloc(nDataSizeAlign, 16);
float* result = (float*)_aligned_malloc(nDataSizeAlign, 16);
memset(a, 0, nDataSizeAlign);
memset(b, 0, nDataSizeAlign);
memset(result, 0, nDataSizeAlign);
for (int i = 0; i < nData; ++i)
{
a[i] = i;
b[i] = i + 1;
result[i] = 1 + i;
}
__m128* sse_a = (__m128*)a;
__m128* sse_b = (__m128*)b;
__m128* sse_c = (__m128*)result;
const int nSize4 = ((nData+3)/4);
// mul
for (int i = 0; i < nSize4; ++i)
sse_c[i] = _mm_mul_ps(sse_a[i], sse_b[i]);
// add
for (int i = 0; i < nSize4-1; ++i)
sse_c[0] = _mm_add_ps(sse_c[0], sse_c[i+1]);
result[0] = result[0] + result[1] + result[2] + result[3];
for (int i = 0; i < nData; ++i)
printf("%f \n", result[i]);
_aligned_free(a);
_aligned_free(b);
_aligned_free(result);
getchar();
return 0;
}
'스킬' 카테고리의 다른 글
우분투 명령어 (0) | 2017.08.09 |
---|---|
caffe locally connected layer (0) | 2016.09.17 |
matlab 그래프 포인트 움직이기 (0) | 2016.09.01 |
vtk 설치 win8.1 (0) | 2016.08.06 |
install python on windows (0) | 2016.07.26 |