-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.cpp
More file actions
152 lines (130 loc) · 5.3 KB
/
main.cpp
File metadata and controls
152 lines (130 loc) · 5.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#define __MAIN_CPP
#ifdef __MAIN_CPP
#define NDEBUG
#include <assert.h>
#include <sstream>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <chrono>
#include "Matx.h"
#include "OpenCLQueue.h"
#include "OpenCLTask.h"
int StrToInt(std::string str) {
int ret_val;
std::istringstream(str) >> ret_val;
return ret_val;
}
/*
* product of two matrices with sizes 2NxN and Nx3N
* cmd params:
* param 1 - platform index
* param 2 - type of device (gpu, cpu of all)
* param 3 - device index
* param 4 - matrices size
*/
int main(int argc, char* argv[]) {
// data
const size_t common_size = argc > 4 ? StrToInt(argv[4]) : 16 * (128 * 3 / 2); // matrices size
const size_t r1_size = common_size * 2; // first matrix rows count
const size_t c1_r2_size = common_size; // first matrix columns count and second matrix rows count
const size_t c2_size = common_size * 3; // second matrix columns count
std::cout << "data init. in1: " << r1_size << " x " << c1_r2_size <<
", in2: " << c1_r2_size << " x " << c2_size << std::endl;
CMatx<false> in1_matx(r1_size, c1_r2_size);
CMatx<false> in2_matx(in1_matx.Cols(), c2_size);
CMatx<false> out_matx(in1_matx.Rows(), in2_matx.Cols());
auto InputMatx = [](auto& matx) { // some init of matx 1
for(size_t row = 0; row < matx.Rows(); row++)
for(size_t col = 0; col < matx.Cols(); col++)
matx.At(row, col) = abs(row - col) % 7 - 3;
};
auto InputMatx2 = [](auto& matx) { // some init of matx 2
for(size_t row = 0; row < matx.Rows(); row++)
for(size_t col = 0; col < matx.Cols(); col++)
matx.At(row, col) = abs(row - col) % 3;
};
InputMatx(in1_matx);
InputMatx2(in2_matx);
try {
using OpenCL::COpenCLQueue;
using OpenCL::COpenCLTask;
using OpenCL::Utility::CLLoadSource;
// choice of device
const size_t platform_ind = argc > 1 ? StrToInt(argv[1]) : 0;
std::string device_type_str = argc > 2 ? argv[2] : "gpu";
int device_type = -1;
if(device_type_str == "gpu")
device_type = CL_DEVICE_TYPE_GPU;
else if(device_type_str == "cpu")
device_type = CL_DEVICE_TYPE_CPU;
else if(device_type_str == "all")
device_type = CL_DEVICE_TYPE_ALL;
const size_t device_ind = argc > 3 ? StrToInt(argv[3]) : 0;
std::cout << "opencl init" << std::endl;
COpenCLQueue queue(platform_ind, device_type, device_ind);
std::cout << "build kernel" << std::endl;
const char* kernal_filename = "mat_mul_kernel.cl";
COpenCLTask task;
// calculation of optimal tile size for task
const size_t tile_size = COpenCLTask::CalcTileSize(queue, out_matx.Rows(), out_matx.Cols());
task.Build(queue, CLLoadSource(kernal_filename), "main", tile_size); // building task
std::cout << "calculating, local group size: " << tile_size << " x " << tile_size << std::endl;
auto start_time = std::chrono::high_resolution_clock::now();
// calculation of matrices product
task.MatMul(queue,
&in1_matx.At(0, 0), &in2_matx.At(0, 0), &out_matx.At(0, 0),
in1_matx.Rows(), in1_matx.Cols(), in2_matx.Cols());
std::chrono::duration<double> seconds(std::chrono::high_resolution_clock::now() - start_time);
std::cout << "time: " << seconds.count() << " sec" << std::endl << std::endl;
} catch(cl::Error error) {
using OpenCL::Utility::CLErrName;
std::cout << error.what() << " : " << CLErrName(error.err()) << std::endl << std::endl;
return error.err();
}
auto OutputMatx = [](auto& matx) {
std::ostream& out = std::cout;
const int weight = 3;
size_t rows = matx.Rows();
const int max_count = 10;
if(rows > max_count) rows = max_count;
size_t cols = matx.Cols();
if(cols > max_count) cols = max_count;
for(size_t row = 0; row < rows; row++) {
for(size_t col = 0; col < cols; col++)
out << std::setw(weight) << matx.At(row, col) << ' ';
out << std::endl;
}
out << std::endl;
};
auto OutputOf2Matx = [common_size](auto& matx1, auto& matx2) {
std::ostream& out = std::cout;
const size_t max_size = 10;
size_t size = common_size;
if(size > max_size) size = max_size;
const int weight = 2;
for(size_t row = 0; row < size; row++) {
for(size_t col = 0; col < size; col++)
out << std::setw(weight) << matx1.At(row, col) << ' ';
out << '\t';
for(size_t col = 0; col < size; col++)
out << std::setw(weight) << matx2.At(row, col) << ' ';
out << std::endl;
}
out << std::endl;
};
if(in1_matx.Rows() > 10 || in1_matx.Cols() > 10 ||
in2_matx.Rows() > 10 || in2_matx.Cols() > 10)
{
std::cout << "in1, in2:" << std::endl;
OutputOf2Matx(in1_matx, in2_matx);
} else {
std::cout << "in1:" << std::endl;
OutputMatx(in1_matx);
std::cout << "in2:" << std::endl;
OutputMatx(in2_matx);
}
std::cout << "out:" << std::endl;
OutputMatx(out_matx);
}
#endif // __MAIN_CPP