34 const idx_t m =
B.nrows();
35 const idx_t n =
B.ncols();
36 const idx_t nx =
B.get_nx();
37 const idx_t ny =
B.get_ny();
40 if (m == 0 || n == 0)
return;
46 trans != Op::ConjTrans);
56 if (
side == Side::Left) {
57 if (
trans == Op::NoTrans) {
58 if (
uplo == Uplo::Upper) {
59 for (idx_t ix = 0; ix < nx; ++ix) {
60 for (idx_t iy = 0; iy < ny; ++iy) {
61 starpu::insert_task_trsm<TA, TB>(
63 A_.tile(nx - ix - 1, nx - ix - 1),
64 B.tile(nx - ix - 1, iy));
68 A.get_const_tiles(0, nx - ix - 1, nx - ix - 1, 1),
69 B.get_const_tiles(nx - ix - 1, 0, 1, ny),
74 for (idx_t ix = 0; ix < nx; ++ix) {
75 for (idx_t iy = 0; iy < ny; ++iy) {
76 starpu::insert_task_trsm<TA, TB>(
78 A_.tile(ix, ix),
B.tile(ix, iy));
80 auto C =
B.get_tiles(ix + 1, 0, nx - ix - 1, ny);
82 A.get_const_tiles(ix + 1, ix, nx - ix - 1, 1),
83 B.get_const_tiles(ix, 0, 1, ny),
89 if (
uplo == Uplo::Upper) {
90 for (idx_t ix = 0; ix < nx; ++ix) {
92 for (idx_t iy = 0; iy < ny; ++iy) {
93 starpu::insert_task_trsm<TA, TB>(
95 A_.tile(ix, ix),
B.tile(ix, iy));
101 auto C =
B.get_tiles(ix + 1, 0, nx - ix - 1, ny);
103 A.get_const_tiles(ix, ix + 1, 1, nx - ix - 1),
104 B.get_const_tiles(ix, 0, 1, ny),
109 for (idx_t ix = 0; ix < nx; ++ix) {
110 for (idx_t iy = 0; iy < ny; ++iy) {
111 starpu::insert_task_trsm<TA, TB>(
113 A_.tile(nx - ix - 1, nx - ix - 1),
114 B.tile(nx - ix - 1, iy));
116 auto C =
B.get_tiles(0, 0, nx - ix - 1, ny);
118 A.get_const_tiles(nx - ix - 1, 0, 1, nx - ix - 1),
119 B.get_const_tiles(nx - ix - 1, 0, 1, ny),
126 if (
trans == Op::NoTrans) {
127 if (
uplo == Uplo::Upper) {
128 for (idx_t iy = 0; iy < ny; ++iy) {
129 for (idx_t ix = 0; ix < nx; ++ix) {
130 starpu::insert_task_trsm<TA, TB>(
132 A_.tile(iy, iy),
B.tile(ix, iy));
134 auto C =
B.get_tiles(0, iy + 1, nx, ny - iy - 1);
136 B.get_const_tiles(0, iy, nx, 1),
137 A.get_const_tiles(iy, iy + 1, 1, ny - iy - 1),
142 for (idx_t iy = 0; iy < ny; ++iy) {
143 for (idx_t ix = 0; ix < nx; ++ix) {
144 starpu::insert_task_trsm<TA, TB>(
146 A_.tile(ny - iy - 1, ny - iy - 1),
147 B.tile(ix, ny - iy - 1));
149 auto C =
B.get_tiles(0, 0, nx, ny - iy - 1);
151 B.get_const_tiles(0, ny - iy - 1, nx, 1),
152 A.get_const_tiles(ny - iy - 1, 0, 1, ny - iy - 1),
158 if (
uplo == Uplo::Upper) {
159 for (idx_t iy = 0; iy < ny; ++iy) {
160 for (idx_t ix = 0; ix < nx; ++ix) {
161 starpu::insert_task_trsm<TA, TB>(
163 A_.tile(ny - iy - 1, ny - iy - 1),
164 B.tile(ix, ny - iy - 1));
166 auto C =
B.get_tiles(0, 0, nx, ny - iy - 1);
168 B.get_const_tiles(0, ny - iy - 1, nx, 1),
169 A.get_const_tiles(0, ny - iy - 1, ny - iy - 1, 1),
174 for (idx_t iy = 0; iy < ny; ++iy) {
175 for (idx_t ix = 0; ix < nx; ++ix) {
176 starpu::insert_task_trsm<TA, TB>(
178 A_.tile(iy, iy),
B.tile(ix, iy));
180 auto C =
B.get_tiles(0, iy + 1, nx, ny - iy - 1);
182 B.get_const_tiles(0, iy, nx, 1),
183 A.get_const_tiles(iy + 1, iy, ny - iy - 1, 1),