<T>LAPACK 0.1.1
C++ Template Linear Algebra PACKage
Loading...
Searching...
No Matches
MatrixEntry.hpp
Go to the documentation of this file.
1
3//
4// Copyright (c) 2025, University of Colorado Denver. All rights reserved.
5//
6// This file is part of <T>LAPACK.
7// <T>LAPACK is free software: you can redistribute it and/or modify it under
8// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
9
10#ifndef TLAPACK_STARPU_MATRIXENTRY_HH
11#define TLAPACK_STARPU_MATRIXENTRY_HH
12
13#include <starpu.h>
14
15#include <tuple>
16
19
20namespace tlapack {
21namespace starpu {
22
23 namespace internal {
24
25 enum class Operation : int {
26 Assign = 0,
27 Add = 1,
28 Subtract = 2,
29 Multiply = 3,
30 Divide = 4
31 };
32
35 {
38 return {};
39 }
40
48 template <class T, class U, internal::Operation op>
49 constexpr void data_op_data(void** buffers, void* args) noexcept
50 {
51 T& x = *((T*)STARPU_VARIABLE_GET_PTR(buffers[0]));
52 const U& y = *((U*)STARPU_VARIABLE_GET_PTR(buffers[1]));
53
54 if constexpr (op == internal::Operation::Assign)
55 x = y;
56 else if constexpr (op == internal::Operation::Add)
57 x += y;
58 else if constexpr (op == internal::Operation::Subtract)
59 x -= y;
60 else if constexpr (op == internal::Operation::Multiply)
61 x *= y;
62 else if constexpr (op == internal::Operation::Divide)
63 x /= y;
64 }
65
73 template <class T, class U, internal::Operation op>
74 constexpr void data_op_value(void** buffers, void* args) noexcept
75 {
76 T& x = *((T*)STARPU_VARIABLE_GET_PTR(buffers[0]));
77 const U& y = *((U*)args);
78
79 if constexpr (op == internal::Operation::Assign)
80 x = y;
81 else if constexpr (op == internal::Operation::Add)
82 x += y;
83 else if constexpr (op == internal::Operation::Subtract)
84 x -= y;
85 else if constexpr (op == internal::Operation::Multiply)
86 x *= y;
87 else if constexpr (op == internal::Operation::Divide)
88 x /= y;
89 }
90
98 template <class T, internal::Operation op>
99 constexpr void matrixentry_op_matrixentry(void** buffers,
100 void* args) noexcept
101 {
102 using args_t = std::tuple<idx_t, idx_t, idx_t, idx_t>;
103
104 // get dimensions
105 const idx_t& ld = STARPU_MATRIX_GET_LD(buffers[0]);
106
107 // get arguments
108 const args_t& cl_args = *(args_t*)args;
109 const idx_t& i = std::get<0>(cl_args);
110 const idx_t& j = std::get<1>(cl_args);
111 const idx_t& k = std::get<2>(cl_args);
112 const idx_t& l = std::get<3>(cl_args);
113
114 // get entries
116 T& x = ((T*)A)[i + ld * j];
117 const T& y = ((T*)A)[k + ld * l];
118
119 // perform operation
120 if constexpr (op == internal::Operation::Assign)
121 x = y;
122 else if constexpr (op == internal::Operation::Add)
123 x += y;
124 else if constexpr (op == internal::Operation::Subtract)
125 x -= y;
126 else if constexpr (op == internal::Operation::Multiply)
127 x *= y;
128 else if constexpr (op == internal::Operation::Divide)
129 x /= y;
130 }
131
132 } // namespace internal
133
144 template <typename T>
145 struct MatrixEntry {
148 const idx_t pos[2];
149
152 const idx_t pos[2]) noexcept
153 : root_handle(root_handle), pos{pos[0], pos[1]}
154 {
155 struct starpu_data_filter f_var = {
157 .nchildren = 1,
159 .filter_arg_ptr = (void*)pos};
161 }
162
163 // Disable copy and move constructors, and copy assignment
164 MatrixEntry(MatrixEntry&&) = delete;
165 MatrixEntry(const MatrixEntry&) = delete;
166 MatrixEntry& operator=(const MatrixEntry&) = delete;
167
173
175 constexpr operator T() const noexcept
176 {
178 const T x = *((T*)starpu_variable_get_local_ptr(handle));
180
181 return x;
182 }
183
185 template <class U,
186 std::enable_if_t<std::is_same_v<real_type<U>, T>, int> = 0>
187 constexpr operator MatrixEntry<U>() const noexcept
188 {
190 const T x = *((T*)starpu_variable_get_local_ptr(handle));
192
193 return x;
194 }
195
196 // Arithmetic operators with assignment
197
198 template <class U>
199 constexpr MatrixEntry& operator=(MatrixEntry<U>&& x) noexcept
200 {
202 }
203 constexpr MatrixEntry& operator=(const T& x) noexcept
204 {
206 }
207
208 template <class U>
209 constexpr MatrixEntry& operator+=(const MatrixEntry<U>& x) noexcept
210 {
211 return operate_and_assign<internal::Operation::Add, U>(x);
212 }
213 constexpr MatrixEntry& operator+=(const T& x) noexcept
214 {
215 return operate_and_assign<internal::Operation::Add, T>(x);
216 }
217
218 template <class U>
219 constexpr MatrixEntry& operator-=(const MatrixEntry<U>& x) noexcept
220 {
221 return operate_and_assign<internal::Operation::Subtract, U>(x);
222 }
223 constexpr MatrixEntry& operator-=(const T& x) noexcept
224 {
225 return operate_and_assign<internal::Operation::Subtract, T>(x);
226 }
227
228 template <class U>
229 constexpr MatrixEntry& operator*=(const MatrixEntry<U>& x) noexcept
230 {
231 return operate_and_assign<internal::Operation::Multiply, U>(x);
232 }
233 constexpr MatrixEntry& operator*=(const T& x) noexcept
234 {
235 return operate_and_assign<internal::Operation::Multiply, T>(x);
236 }
237
238 template <class U>
239 constexpr MatrixEntry& operator/=(const MatrixEntry<U>& x) noexcept
240 {
241 return operate_and_assign<internal::Operation::Divide, U>(x);
242 }
243 constexpr MatrixEntry& operator/=(const T& x) noexcept
244 {
245 return operate_and_assign<internal::Operation::Divide, T>(x);
246 }
247
248 // Math functions to satisfy the concept Scalar
249
250 constexpr friend real_type<T> abs(const MatrixEntry& x) noexcept
251 {
252 return abs(T(x));
253 }
254
255 // Display value in ostream
256
257 constexpr friend std::ostream& operator<<(std::ostream& out,
258 const MatrixEntry& x)
259 {
260 return out << T(x);
261 }
262
263 private:
267 template <internal::Operation op, class U>
268 static constexpr struct starpu_codelet gen_cl_op_value() noexcept
269 {
270 using internal::Operation;
271 struct starpu_codelet cl = internal::codelet_init();
272
273 cl.cpu_funcs[0] = internal::data_op_value<T, U, op>;
274 cl.nbuffers = 1;
275 cl.modes[0] = (op == Operation::Assign) ? STARPU_W : STARPU_RW;
276 cl.name = (op == Operation::Assign) ? "assign_value"
277 : (op == Operation::Add) ? "add_value"
278 : (op == Operation::Subtract) ? "subtract_value"
279 : (op == Operation::Multiply) ? "multiply_value"
280 : (op == Operation::Divide) ? "divide_value"
281 : "unknown";
282
283 // The following lines are needed to make the codelet const
284 // See _starpu_codelet_check_deprecated_fields() in StarPU:
285 cl.where |= STARPU_CPU;
286 cl.checked = 1;
287
288 return cl;
289 }
290
291 static constexpr const struct starpu_codelet cl_op_value[] = {
292 gen_cl_op_value<internal::Operation::Assign, T>(),
293 gen_cl_op_value<internal::Operation::Add, T>(),
294 gen_cl_op_value<internal::Operation::Subtract, T>(),
295 gen_cl_op_value<internal::Operation::Multiply, T>(),
296 gen_cl_op_value<internal::Operation::Divide, T>()};
297 static constexpr const struct starpu_codelet cl_op_rvalue[] = {
298 gen_cl_op_value<internal::Operation::Assign, real_type<T>>(),
299 gen_cl_op_value<internal::Operation::Add, real_type<T>>(),
300 gen_cl_op_value<internal::Operation::Subtract, real_type<T>>(),
301 gen_cl_op_value<internal::Operation::Multiply, real_type<T>>(),
302 gen_cl_op_value<internal::Operation::Divide, real_type<T>>()};
303
307 template <internal::Operation op, class U>
308 static constexpr struct starpu_codelet gen_cl_op_data() noexcept
309 {
310 using internal::Operation;
311 struct starpu_codelet cl = internal::codelet_init();
312
313 cl.cpu_funcs[0] = internal::data_op_data<T, U, op>;
314 cl.nbuffers = 2;
315 cl.modes[0] = (op == Operation::Assign) ? STARPU_W : STARPU_RW;
316 cl.modes[1] = STARPU_R;
317 cl.name = (op == Operation::Assign) ? "assign_data"
318 : (op == Operation::Add) ? "add_data"
319 : (op == Operation::Subtract) ? "subtract_data"
320 : (op == Operation::Multiply) ? "multiply_data"
321 : (op == Operation::Divide) ? "divide_data"
322 : "unknown";
323
324 // The following lines are needed to make the codelet const
325 // See _starpu_codelet_check_deprecated_fields() in StarPU:
326 cl.where |= STARPU_CPU;
327 cl.checked = 1;
328
329 return cl;
330 }
331
332 static constexpr const struct starpu_codelet cl_op_data[] = {
333 gen_cl_op_data<internal::Operation::Assign, T>(),
334 gen_cl_op_data<internal::Operation::Add, T>(),
335 gen_cl_op_data<internal::Operation::Subtract, T>(),
336 gen_cl_op_data<internal::Operation::Multiply, T>(),
337 gen_cl_op_data<internal::Operation::Divide, T>()};
338 static constexpr const struct starpu_codelet cl_op_rdata[] = {
339 gen_cl_op_data<internal::Operation::Assign, real_type<T>>(),
340 gen_cl_op_data<internal::Operation::Add, real_type<T>>(),
341 gen_cl_op_data<internal::Operation::Subtract, real_type<T>>(),
342 gen_cl_op_data<internal::Operation::Multiply, real_type<T>>(),
343 gen_cl_op_data<internal::Operation::Divide, real_type<T>>()};
344
348 template <internal::Operation op>
349 static constexpr struct starpu_codelet gen_cl_op_entries() noexcept
350 {
351 using internal::Operation;
352 struct starpu_codelet cl = internal::codelet_init();
353
354 cl.cpu_funcs[0] = internal::matrixentry_op_matrixentry<T, op>;
355 cl.nbuffers = 1;
356 cl.modes[0] = STARPU_RW;
357 cl.name = (op == Operation::Assign) ? "copy_entry"
358 : (op == Operation::Add) ? "add_entry"
359 : (op == Operation::Subtract) ? "subtract_entry"
360 : (op == Operation::Multiply) ? "multiply_entry"
361 : (op == Operation::Divide) ? "divide_entry"
362 : "unknown";
363
364 // The following lines are needed to make the codelet const
365 // See _starpu_codelet_check_deprecated_fields() in StarPU:
366 cl.where |= STARPU_CPU;
367 cl.checked = 1;
368
369 return cl;
370 }
371
372 static constexpr const struct starpu_codelet cl_op_entries[] = {
373 gen_cl_op_entries<internal::Operation::Assign>(),
374 gen_cl_op_entries<internal::Operation::Add>(),
375 gen_cl_op_entries<internal::Operation::Subtract>(),
376 gen_cl_op_entries<internal::Operation::Multiply>(),
377 gen_cl_op_entries<internal::Operation::Divide>()};
378
388 template <internal::Operation op,
389 class U,
390 std::enable_if_t<(std::is_same_v<U, T> ||
391 std::is_same_v<U, real_type<T>>),
392 int> = 0>
393 MatrixEntry& operate_and_assign(const MatrixEntry<U>& x)
394 {
395 // Allocate space for the task
396 struct starpu_task* task = starpu_task_create();
397
398 if (this->root_handle == x.root_handle) {
399 // If both variables are in the same matrix handle, we must
400 // directly perform the operation on the entries of the
401 // handle
402
403 using args_t = std::tuple<idx_t, idx_t, idx_t, idx_t>;
404
405 // Allocate space for the arguments
406 args_t* args_ptr = new args_t;
407
408 // Initialize arguments
409 std::get<0>(*args_ptr) = pos[0];
410 std::get<1>(*args_ptr) = pos[1];
411 std::get<2>(*args_ptr) = x.pos[0];
412 std::get<3>(*args_ptr) = x.pos[1];
413
414 // Initialize task
415 task->cl = (struct starpu_codelet*)&(cl_op_entries[int(op)]);
416 task->handles[0] = this->root_handle;
417 task->cl_arg = (void*)args_ptr;
418 task->cl_arg_size = sizeof(args_t);
419 task->callback_func = [](void* args) noexcept {
420 delete (args_t*)args;
421 };
422 task->callback_arg = (void*)args_ptr;
423 }
424 else {
425 // Initialize task
426 task->cl = (struct starpu_codelet*)&(
427 std::is_same_v<U, T> ? cl_op_data[int(op)]
428 : cl_op_rdata[int(op)]);
429 task->handles[0] = this->handle;
430 task->handles[1] = x.handle;
431 }
432
433 // Submit task and check for errors
434 const int ret = starpu_task_submit(task);
435 STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
436
437 return *this;
438 }
439
449 template <internal::Operation op,
450 class U,
451 std::enable_if_t<(std::is_same_v<U, T> ||
452 std::is_same_v<U, real_type<T>>),
453 int> = 0>
454 MatrixEntry& operate_and_assign(const T& x)
455 {
456 // Allocate space for the value and initialize it
457 T* x_ptr = new T(x);
458
459 // Create and initialize task
460 struct starpu_task* task = starpu_task_create();
461 task->cl = (struct starpu_codelet*)&(std::is_same_v<U, T>
462 ? cl_op_value[int(op)]
463 : cl_op_rvalue[int(op)]);
464 task->handles[0] = this->handle;
465 task->cl_arg = (void*)x_ptr;
466 task->cl_arg_size = sizeof(T);
467 task->callback_func = [](void* arg) noexcept { delete (T*)arg; };
468 task->callback_arg = (void*)x_ptr;
469
470 // Submit task and check for errors
471 const int ret = starpu_task_submit(task);
472 STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
473
474 return *this;
475 }
476 };
477
478 // Arithmetic operators with MatrixEntry
479
480 template <class lhs_t,
481 class rhs_t,
482 std::enable_if_t<
483 (std::is_same_v<lhs_t, rhs_t>) ||
484 (std::is_same_v<real_type<lhs_t>, real_type<rhs_t>>),
485 int> = 0>
486 constexpr auto operator+(const MatrixEntry<lhs_t>& x,
487 const rhs_t& y) noexcept
488 {
489 using T = scalar_type<lhs_t, rhs_t>;
490 return T(x) + T(y);
491 }
492
493 template <class lhs_t,
494 class rhs_t,
495 std::enable_if_t<
496 (std::is_same_v<lhs_t, rhs_t>) ||
497 (std::is_same_v<real_type<lhs_t>, real_type<rhs_t>>),
498 int> = 0>
499 constexpr auto operator+(const lhs_t& x,
500 const MatrixEntry<rhs_t>& y) noexcept
501 {
502 using T = scalar_type<lhs_t, rhs_t>;
503 return T(x) + T(y);
504 }
505
506 template <class lhs_t,
507 class rhs_t,
508 std::enable_if_t<
509 (std::is_same_v<lhs_t, rhs_t>) ||
510 (std::is_same_v<real_type<lhs_t>, real_type<rhs_t>>),
511 int> = 0>
512 constexpr auto operator+(const MatrixEntry<lhs_t>& x,
513 const MatrixEntry<rhs_t>& y) noexcept
514 {
515 using T = scalar_type<lhs_t, rhs_t>;
516 return T(x) + T(y);
517 }
518
519 template <class lhs_t,
520 class rhs_t,
521 std::enable_if_t<
522 (std::is_same_v<lhs_t, rhs_t>) ||
523 (std::is_same_v<real_type<lhs_t>, real_type<rhs_t>>),
524 int> = 0>
525 constexpr auto operator-(const MatrixEntry<lhs_t>& x,
526 const rhs_t& y) noexcept
527 {
528 using T = scalar_type<lhs_t, rhs_t>;
529 return T(x) - T(y);
530 }
531
532 template <class lhs_t,
533 class rhs_t,
534 std::enable_if_t<
535 (std::is_same_v<lhs_t, rhs_t>) ||
536 (std::is_same_v<real_type<lhs_t>, real_type<rhs_t>>),
537 int> = 0>
538 constexpr auto operator-(const lhs_t& x,
539 const MatrixEntry<rhs_t>& y) noexcept
540 {
541 using T = scalar_type<lhs_t, rhs_t>;
542 return T(x) - T(y);
543 }
544
545 template <class lhs_t,
546 class rhs_t,
547 std::enable_if_t<
548 (std::is_same_v<lhs_t, rhs_t>) ||
549 (std::is_same_v<real_type<lhs_t>, real_type<rhs_t>>),
550 int> = 0>
551 constexpr auto operator-(const MatrixEntry<lhs_t>& x,
552 const MatrixEntry<rhs_t>& y) noexcept
553 {
554 using T = scalar_type<lhs_t, rhs_t>;
555 return T(x) - T(y);
556 }
557
558 template <class lhs_t,
559 class rhs_t,
560 std::enable_if_t<
561 (std::is_same_v<lhs_t, rhs_t>) ||
562 (std::is_same_v<real_type<lhs_t>, real_type<rhs_t>>),
563 int> = 0>
564 constexpr auto operator*(const MatrixEntry<lhs_t>& x,
565 const rhs_t& y) noexcept
566 {
567 using T = scalar_type<lhs_t, rhs_t>;
568 return T(x) * T(y);
569 }
570
571 template <class lhs_t,
572 class rhs_t,
573 std::enable_if_t<
574 (std::is_same_v<lhs_t, rhs_t>) ||
575 (std::is_same_v<real_type<lhs_t>, real_type<rhs_t>>),
576 int> = 0>
577 constexpr auto operator*(const lhs_t& x,
578 const MatrixEntry<rhs_t>& y) noexcept
579 {
580 using T = scalar_type<lhs_t, rhs_t>;
581 return T(x) * T(y);
582 }
583
584 template <class lhs_t,
585 class rhs_t,
586 std::enable_if_t<
587 (std::is_same_v<lhs_t, rhs_t>) ||
588 (std::is_same_v<real_type<lhs_t>, real_type<rhs_t>>),
589 int> = 0>
590 constexpr auto operator*(const MatrixEntry<lhs_t>& x,
591 const MatrixEntry<rhs_t>& y) noexcept
592 {
593 using T = scalar_type<lhs_t, rhs_t>;
594 return T(x) * T(y);
595 }
596
597 template <class lhs_t,
598 class rhs_t,
599 std::enable_if_t<
600 (std::is_same_v<lhs_t, rhs_t>) ||
601 (std::is_same_v<real_type<lhs_t>, real_type<rhs_t>>),
602 int> = 0>
603 constexpr auto operator/(const MatrixEntry<lhs_t>& x,
604 const rhs_t& y) noexcept
605 {
606 using T = scalar_type<lhs_t, rhs_t>;
607 return T(x) / T(y);
608 }
609
610 template <class lhs_t,
611 class rhs_t,
612 std::enable_if_t<
613 (std::is_same_v<lhs_t, rhs_t>) ||
614 (std::is_same_v<real_type<lhs_t>, real_type<rhs_t>>),
615 int> = 0>
616 constexpr auto operator/(const lhs_t& x,
617 const MatrixEntry<rhs_t>& y) noexcept
618 {
619 using T = scalar_type<lhs_t, rhs_t>;
620 return T(x) / T(y);
621 }
622
623 template <class lhs_t,
624 class rhs_t,
625 std::enable_if_t<
626 (std::is_same_v<lhs_t, rhs_t>) ||
627 (std::is_same_v<real_type<lhs_t>, real_type<rhs_t>>),
628 int> = 0>
629
630 constexpr auto operator/(const MatrixEntry<lhs_t>& x,
631 const MatrixEntry<rhs_t>& y) noexcept
632 {
633 using T = scalar_type<lhs_t, rhs_t>;
634 return T(x) / T(y);
635 }
636
637} // namespace starpu
638} // namespace tlapack
639
640#endif // TLAPACK_STARPU_MATRIXENTRY_HH
constexpr void data_op_data(void **buffers, void *args) noexcept
MatrixEntry operation with assignment using two StarPU variable buffers.
Definition MatrixEntry.hpp:49
constexpr struct starpu_codelet codelet_init() noexcept
Return an empty starpu_codelet struct.
Definition MatrixEntry.hpp:34
constexpr void matrixentry_op_matrixentry(void **buffers, void *args) noexcept
MatrixEntry operation with assignment using a two matrix entries.
Definition MatrixEntry.hpp:99
constexpr void data_op_value(void **buffers, void *args) noexcept
MatrixEntry operation with assignment using a StarPU variable buffer and a value.
Definition MatrixEntry.hpp:74
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113
Types for StarPU.
Arithmetic data type used by Matrix.
Definition MatrixEntry.hpp:145
const idx_t pos[2]
Position of the entry in the matrix.
Definition MatrixEntry.hpp:148
const starpu_data_handle_t root_handle
Matrix handle.
Definition MatrixEntry.hpp:146
~MatrixEntry() noexcept
Destructor cleans StarPU partition plan.
Definition MatrixEntry.hpp:169
starpu_data_handle_t handle
Entry handle.
Definition MatrixEntry.hpp:147
constexpr MatrixEntry(starpu_data_handle_t root_handle, const idx_t pos[2]) noexcept
MatrixEntry constructor from a variable handle.
Definition MatrixEntry.hpp:151