-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathVirtMatrixThreads.cpp
111 lines (102 loc) · 3.3 KB
/
VirtMatrixThreads.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#include "defines.h"
// class header
#include "VirtMatrixThreads.h"
// intrinsics
#include <xmmintrin.h>
#include <emmintrin.h>
#include <assert.h>
using namespace std;
// thread function
void ThreadProc4(ThreadData *data)
{
float *upperrow = (float *) data->upperrow;
assert(upperrow != nullptr);
// this element should be 1.0
assert(std::abs(*upperrow - 1.0) < 0.000001);
// nothing to do
if (data->numstepsdown == 0)
return;
float *b = (float *) data->b;
// go down below lead row excluding one unknown
float *row = upperrow + data->shiftdown * data->rowoffset;
float *b1 = b + data->rowoffset;
for (size_t i = 0; i < data->numstepsdown; i++)
{
float *x = upperrow;
float *r = row;
// RHS
*b1 -= (*b) * (*row);
// first row element
register __m128 xfirst = _mm_set1_ps(*row);
for (size_t j = 0; j < data->stepsright; j++)
{
_mm_storeu_ps(r,_mm_sub_ps(_mm_loadu_ps(r),_mm_mul_ps(_mm_loadu_ps(x),xfirst)));
x += 4;
r += 4;
}
// this element should be zero
// assert(std::abs(*row) < 0.000001);
// to the start of next row
row += data->shiftdown;
// ..and next RHS
b1++;
}
return;
}
// thread function
void ThreadProc8(ThreadData *data)
{
double *upperrow = (double *) data->upperrow;
assert(upperrow != nullptr);
// this element should be 1.0
assert(std::abs(*upperrow - 1.0) < 0.000001);
// nothing to do
if (data->numstepsdown == 0)
return;
double *b = (double *) data->b;
// go down below lead row excluding one unknown
double *row = upperrow + data->shiftdown * data->rowoffset; //!!! rowoffset should be in doubles
double *b1 = b + data->rowoffset;
for (size_t i = 0; i < data->numstepsdown; i++)
{
double *x = upperrow;
double *r = row;
// RHS
*b1 -= (*b) * (*row);
// first row element
register __m128d xfirst = _mm_set1_pd(*row);
for (size_t j = 0; j < data->stepsright; j++)
{
_mm_storeu_pd(r,_mm_sub_pd(_mm_loadu_pd(r),_mm_mul_pd(_mm_loadu_pd(x),xfirst)));
x += 2;
r += 2;
}
// this element should be zero
// assert(std::abs(*row) < 0.000001);
// to the start of next row
row += data->shiftdown; //!!! shiftdown should be in doubles
// ..and next RHS
b1++;
}
return;
}
// thread function
void ThreadProcFloat(ThreadData *data)
{
if (data->upperrow != nullptr)
{
// do the job
ThreadProc4(data);
}
return;
}
// thread function
void ThreadProcDouble(ThreadData *data)
{
if (data->upperrow != nullptr)
{
// do the job
ThreadProc8(data);
}
return;
}