13 #ifndef STXXL_CONTAINERS_MATRIX_LOW_LEVEL_HEADER
14 #define STXXL_CONTAINERS_MATRIX_LOW_LEVEL_HEADER
30 namespace matrix_local {
33 template <
typename ValueType,
unsigned BlockS
ideLength>
34 struct matrix_operations;
37 template <
unsigned BlockS
ideLength,
bool transposed>
41 template <
unsigned BlockS
ideLength>
52 template <
unsigned BlockS
ideLength>
63 template <
typename ValueType,
unsigned BlockS
ideLength,
bool a_transposed,
bool b_transposed,
class Op>
71 #pragma omp parallel for
74 for (int_type col = 0; col <
int_type(BlockSideLength); ++col)
80 #pragma omp parallel for
82 for (int_type row = 0; row <
int_type(BlockSideLength); ++row)
83 for (int_type col = 0; col <
int_type(BlockSideLength); ++col)
90 #pragma omp parallel for
92 for (int_type row = 0; row <
int_type(BlockSideLength); ++row)
93 for (int_type col = 0; col <
int_type(BlockSideLength); ++col)
101 template <
typename ValueType,
unsigned BlockS
ideLength,
bool a_transposed,
class Op>
108 #pragma omp parallel for
111 for (int_type col = 0; col <
int_type(BlockSideLength); ++col)
118 template <
typename ValueType,
unsigned BlockS
ideLength,
bool a_transposed,
class Op>
125 #pragma omp parallel for
128 for (int_type col = 0; col <
int_type(BlockSideLength); ++col)
141 template <
typename ValueType,
unsigned BlockS
ideLength>
145 const ValueType* b,
bool b_in_col_major,
146 ValueType* c,
const bool c_in_col_major)
151 bool a_cm = ! b_in_col_major;
152 b_in_col_major = ! a_in_col_major;
153 a_in_col_major = a_cm;
155 if (! a_in_col_major)
157 if (! b_in_col_major)
160 #pragma omp parallel for
162 for (int_type i = 0; i <
int_type(BlockSideLength); ++i)
165 c[i * BlockSideLength + j] += a[i * BlockSideLength + k] * b[k * BlockSideLength + j];
170 #pragma omp parallel for
172 for (int_type i = 0; i <
int_type(BlockSideLength); ++i)
175 c[i * BlockSideLength + j] += a[i * BlockSideLength + k] * b[k + j * BlockSideLength];
180 if (! b_in_col_major)
183 #pragma omp parallel for
185 for (int_type i = 0; i <
int_type(BlockSideLength); ++i)
188 c[i * BlockSideLength + j] += a[i + k * BlockSideLength] * b[k * BlockSideLength + j];
193 #pragma omp parallel for
195 for (int_type i = 0; i <
int_type(BlockSideLength); ++i)
198 c[i * BlockSideLength + j] += a[i + k * BlockSideLength] * b[k + j * BlockSideLength];
205 typedef int_type blas_int;
206 typedef std::complex<double> blas_double_complex;
207 typedef std::complex<float> blas_single_complex;
211 extern "C" void daxpy_(
const blas_int* n,
const double* alpha,
const double* x,
const blas_int* incx,
double* y,
const blas_int* incy);
212 extern "C" void saxpy_(
const blas_int* n,
const float* alpha,
const float* x,
const blas_int* incx,
float* y,
const blas_int* incy);
213 extern "C" void zaxpy_(
const blas_int* n,
const blas_double_complex* alpha,
const blas_double_complex* x,
const blas_int* incx, blas_double_complex* y,
const blas_int* incy);
214 extern "C" void caxpy_(
const blas_int* n,
const blas_single_complex* alpha,
const blas_single_complex* x,
const blas_int* incx, blas_single_complex* y,
const blas_int* incy);
215 extern "C" void dcopy_(
const blas_int* n,
const double* x,
const blas_int* incx,
double* y,
const blas_int* incy);
216 extern "C" void scopy_(
const blas_int* n,
const float* x,
const blas_int* incx,
float* y,
const blas_int* incy);
217 extern "C" void zcopy_(
const blas_int* n,
const blas_double_complex* x,
const blas_int* incx, blas_double_complex* y,
const blas_int* incy);
218 extern "C" void ccopy_(
const blas_int* n,
const blas_single_complex* x,
const blas_int* incx, blas_single_complex* y,
const blas_int* incy);
221 template <
unsigned BlockS
ideLength>
222 struct low_level_matrix_binary_ass_op<double, BlockSideLength, false, false, typename matrix_operations<double, BlockSideLength>::addition>
224 low_level_matrix_binary_ass_op(
double* c,
const double* a,
const double* b,
typename matrix_operations<double, BlockSideLength>::addition =
typename matrix_operations<double, BlockSideLength>::addition())
229 low_level_matrix_unary_op<double, BlockSideLength, false, typename matrix_operations<double, BlockSideLength>::addition>
231 low_level_matrix_unary_ass_op<double, BlockSideLength, false, typename matrix_operations<double, BlockSideLength>::addition>
235 low_level_matrix_unary_op<double, BlockSideLength, false, typename matrix_operations<double, BlockSideLength>::addition>
240 low_level_matrix_unary_op<double, BlockSideLength, false, typename matrix_operations<double, BlockSideLength>::addition>
246 template <
unsigned BlockS
ideLength>
247 struct low_level_matrix_binary_ass_op<double, BlockSideLength, false, false, typename matrix_operations<double, BlockSideLength>::subtraction>
249 low_level_matrix_binary_ass_op(
double* c,
const double* a,
const double* b,
250 typename matrix_operations<double, BlockSideLength>::subtraction =
typename matrix_operations<double, BlockSideLength>::subtraction())
255 low_level_matrix_unary_op<double, BlockSideLength, false, typename matrix_operations<double, BlockSideLength>::addition>
257 low_level_matrix_unary_ass_op<double, BlockSideLength, false, typename matrix_operations<double, BlockSideLength>::subtraction>
261 low_level_matrix_unary_op<double, BlockSideLength, false, typename matrix_operations<double, BlockSideLength>::addition>
266 low_level_matrix_unary_op<double, BlockSideLength, false, typename matrix_operations<double, BlockSideLength>::subtraction>
272 template <
unsigned BlockS
ideLength>
273 struct low_level_matrix_unary_ass_op<double, BlockSideLength, false, typename matrix_operations<double, BlockSideLength>::addition>
275 low_level_matrix_unary_ass_op(
double* c,
const double* a,
276 typename matrix_operations<double, BlockSideLength>::addition =
typename matrix_operations<double, BlockSideLength>::addition())
278 const blas_int size = BlockSideLength * BlockSideLength;
279 const blas_int int_one = 1;
280 const double one = 1.0;
282 daxpy_(&size, &one, a, &int_one, c, &int_one);
286 template <
unsigned BlockS
ideLength>
287 struct low_level_matrix_unary_ass_op<double, BlockSideLength, false, typename matrix_operations<double, BlockSideLength>::subtraction>
289 low_level_matrix_unary_ass_op(
double* c,
const double* a,
290 typename matrix_operations<double, BlockSideLength>::subtraction =
typename matrix_operations<double, BlockSideLength>::subtraction())
292 const blas_int size = BlockSideLength * BlockSideLength;
293 const blas_int int_one = 1;
294 const double minusone = -1.0;
296 daxpy_(&size, &minusone, a, &int_one, c, &int_one);
300 template <
unsigned BlockS
ideLength>
301 struct low_level_matrix_unary_op<double, BlockSideLength, false, typename matrix_operations<double, BlockSideLength>::addition>
303 low_level_matrix_unary_op(
double* c,
const double* a,
304 typename matrix_operations<double, BlockSideLength>::addition =
typename matrix_operations<double, BlockSideLength>::addition())
306 const blas_int size = BlockSideLength * BlockSideLength;
307 const blas_int int_one = 1;
308 dcopy_(&size, a, &int_one, c, &int_one);
313 template <
unsigned BlockS
ideLength>
314 struct low_level_matrix_binary_ass_op<float, BlockSideLength, false, false, typename matrix_operations<float, BlockSideLength>::addition>
316 low_level_matrix_binary_ass_op(
float* c,
const float* a,
const float* b,
typename matrix_operations<float, BlockSideLength>::addition =
typename matrix_operations<float, BlockSideLength>::addition())
321 low_level_matrix_unary_op<float, BlockSideLength, false, typename matrix_operations<float, BlockSideLength>::addition>
323 low_level_matrix_unary_ass_op<float, BlockSideLength, false, typename matrix_operations<float, BlockSideLength>::addition>
327 low_level_matrix_unary_op<float, BlockSideLength, false, typename matrix_operations<float, BlockSideLength>::addition>
332 low_level_matrix_unary_op<float, BlockSideLength, false, typename matrix_operations<float, BlockSideLength>::addition>
338 template <
unsigned BlockS
ideLength>
339 struct low_level_matrix_binary_ass_op<float, BlockSideLength, false, false, typename matrix_operations<float, BlockSideLength>::subtraction>
341 low_level_matrix_binary_ass_op(
float* c,
const float* a,
const float* b,
342 typename matrix_operations<float, BlockSideLength>::subtraction =
typename matrix_operations<float, BlockSideLength>::subtraction())
347 low_level_matrix_unary_op<float, BlockSideLength, false, typename matrix_operations<float, BlockSideLength>::addition>
349 low_level_matrix_unary_ass_op<float, BlockSideLength, false, typename matrix_operations<float, BlockSideLength>::subtraction>
353 low_level_matrix_unary_op<float, BlockSideLength, false, typename matrix_operations<float, BlockSideLength>::addition>
358 low_level_matrix_unary_op<float, BlockSideLength, false, typename matrix_operations<float, BlockSideLength>::subtraction>
364 template <
unsigned BlockS
ideLength>
365 struct low_level_matrix_unary_ass_op<float, BlockSideLength, false, typename matrix_operations<float, BlockSideLength>::addition>
367 low_level_matrix_unary_ass_op(
float* c,
const float* a,
368 typename matrix_operations<float, BlockSideLength>::addition =
typename matrix_operations<float, BlockSideLength>::addition())
370 const blas_int size = BlockSideLength * BlockSideLength;
371 const blas_int int_one = 1;
372 const float one = 1.0;
374 saxpy_(&size, &one, a, &int_one, c, &int_one);
378 template <
unsigned BlockS
ideLength>
379 struct low_level_matrix_unary_ass_op<float, BlockSideLength, false, typename matrix_operations<float, BlockSideLength>::subtraction>
381 low_level_matrix_unary_ass_op(
float* c,
const float* a,
382 typename matrix_operations<float, BlockSideLength>::subtraction =
typename matrix_operations<float, BlockSideLength>::subtraction())
384 const blas_int size = BlockSideLength * BlockSideLength;
385 const blas_int int_one = 1;
386 const float minusone = -1.0;
388 saxpy_(&size, &minusone, a, &int_one, c, &int_one);
392 template <
unsigned BlockS
ideLength>
393 struct low_level_matrix_unary_op<float, BlockSideLength, false, typename matrix_operations<float, BlockSideLength>::addition>
395 low_level_matrix_unary_op(
float* c,
const float* a,
396 typename matrix_operations<float, BlockSideLength>::addition =
typename matrix_operations<float, BlockSideLength>::addition())
398 const blas_int size = BlockSideLength * BlockSideLength;
399 const blas_int int_one = 1;
400 scopy_(&size, a, &int_one, c, &int_one);
405 template <
unsigned BlockS
ideLength>
406 struct low_level_matrix_binary_ass_op<blas_double_complex, BlockSideLength, false, false, typename matrix_operations<blas_double_complex, BlockSideLength>::addition>
408 low_level_matrix_binary_ass_op(blas_double_complex* c,
const blas_double_complex* a,
const blas_double_complex* b,
typename matrix_operations<blas_double_complex, BlockSideLength>::addition =
typename matrix_operations<blas_double_complex, BlockSideLength>::addition())
413 low_level_matrix_unary_op<blas_double_complex, BlockSideLength, false, typename matrix_operations<blas_double_complex, BlockSideLength>::addition>
415 low_level_matrix_unary_ass_op<blas_double_complex, BlockSideLength, false, typename matrix_operations<blas_double_complex, BlockSideLength>::addition>
419 low_level_matrix_unary_op<blas_double_complex, BlockSideLength, false, typename matrix_operations<blas_double_complex, BlockSideLength>::addition>
424 low_level_matrix_unary_op<blas_double_complex, BlockSideLength, false, typename matrix_operations<blas_double_complex, BlockSideLength>::addition>
430 template <
unsigned BlockS
ideLength>
431 struct low_level_matrix_binary_ass_op<blas_double_complex, BlockSideLength, false, false, typename matrix_operations<blas_double_complex, BlockSideLength>::subtraction>
433 low_level_matrix_binary_ass_op(blas_double_complex* c,
const blas_double_complex* a,
const blas_double_complex* b,
434 typename matrix_operations<blas_double_complex, BlockSideLength>::subtraction =
typename matrix_operations<blas_double_complex, BlockSideLength>::subtraction())
439 low_level_matrix_unary_op<blas_double_complex, BlockSideLength, false, typename matrix_operations<blas_double_complex, BlockSideLength>::addition>
441 low_level_matrix_unary_ass_op<blas_double_complex, BlockSideLength, false, typename matrix_operations<blas_double_complex, BlockSideLength>::subtraction>
445 low_level_matrix_unary_op<blas_double_complex, BlockSideLength, false, typename matrix_operations<blas_double_complex, BlockSideLength>::addition>
450 low_level_matrix_unary_op<blas_double_complex, BlockSideLength, false, typename matrix_operations<blas_double_complex, BlockSideLength>::subtraction>
456 template <
unsigned BlockS
ideLength>
457 struct low_level_matrix_unary_ass_op<blas_double_complex, BlockSideLength, false, typename matrix_operations<blas_double_complex, BlockSideLength>::addition>
459 low_level_matrix_unary_ass_op(blas_double_complex* c,
const blas_double_complex* a,
460 typename matrix_operations<blas_double_complex, BlockSideLength>::addition =
typename matrix_operations<blas_double_complex, BlockSideLength>::addition())
462 const blas_int size = BlockSideLength * BlockSideLength;
463 const blas_int int_one = 1;
464 const blas_double_complex one = 1.0;
466 zaxpy_(&size, &one, a, &int_one, c, &int_one);
470 template <
unsigned BlockS
ideLength>
471 struct low_level_matrix_unary_ass_op<blas_double_complex, BlockSideLength, false, typename matrix_operations<blas_double_complex, BlockSideLength>::subtraction>
473 low_level_matrix_unary_ass_op(blas_double_complex* c,
const blas_double_complex* a,
474 typename matrix_operations<blas_double_complex, BlockSideLength>::subtraction =
typename matrix_operations<blas_double_complex, BlockSideLength>::subtraction())
476 const blas_int size = BlockSideLength * BlockSideLength;
477 const blas_int int_one = 1;
478 const blas_double_complex minusone = -1.0;
480 zaxpy_(&size, &minusone, a, &int_one, c, &int_one);
484 template <
unsigned BlockS
ideLength>
485 struct low_level_matrix_unary_op<blas_double_complex, BlockSideLength, false, typename matrix_operations<blas_double_complex, BlockSideLength>::addition>
487 low_level_matrix_unary_op(blas_double_complex* c,
const blas_double_complex* a,
488 typename matrix_operations<blas_double_complex, BlockSideLength>::addition =
typename matrix_operations<blas_double_complex, BlockSideLength>::addition())
490 const blas_int size = BlockSideLength * BlockSideLength;
491 const blas_int int_one = 1;
492 zcopy_(&size, a, &int_one, c, &int_one);
497 template <
unsigned BlockS
ideLength>
498 struct low_level_matrix_binary_ass_op<blas_single_complex, BlockSideLength, false, false, typename matrix_operations<blas_single_complex, BlockSideLength>::addition>
500 low_level_matrix_binary_ass_op(blas_single_complex* c,
const blas_single_complex* a,
const blas_single_complex* b,
typename matrix_operations<blas_single_complex, BlockSideLength>::addition =
typename matrix_operations<blas_single_complex, BlockSideLength>::addition())
505 low_level_matrix_unary_op<blas_single_complex, BlockSideLength, false, typename matrix_operations<blas_single_complex, BlockSideLength>::addition>
507 low_level_matrix_unary_ass_op<blas_single_complex, BlockSideLength, false, typename matrix_operations<blas_single_complex, BlockSideLength>::addition>
511 low_level_matrix_unary_op<blas_single_complex, BlockSideLength, false, typename matrix_operations<blas_single_complex, BlockSideLength>::addition>
516 low_level_matrix_unary_op<blas_single_complex, BlockSideLength, false, typename matrix_operations<blas_single_complex, BlockSideLength>::addition>
522 template <
unsigned BlockS
ideLength>
523 struct low_level_matrix_binary_ass_op<blas_single_complex, BlockSideLength, false, false, typename matrix_operations<blas_single_complex, BlockSideLength>::subtraction>
525 low_level_matrix_binary_ass_op(blas_single_complex* c,
const blas_single_complex* a,
const blas_single_complex* b,
526 typename matrix_operations<blas_single_complex, BlockSideLength>::subtraction =
typename matrix_operations<blas_single_complex, BlockSideLength>::subtraction())
531 low_level_matrix_unary_op<blas_single_complex, BlockSideLength, false, typename matrix_operations<blas_single_complex, BlockSideLength>::addition>
533 low_level_matrix_unary_ass_op<blas_single_complex, BlockSideLength, false, typename matrix_operations<blas_single_complex, BlockSideLength>::subtraction>
537 low_level_matrix_unary_op<blas_single_complex, BlockSideLength, false, typename matrix_operations<blas_single_complex, BlockSideLength>::addition>
542 low_level_matrix_unary_op<blas_single_complex, BlockSideLength, false, typename matrix_operations<blas_single_complex, BlockSideLength>::subtraction>
548 template <
unsigned BlockS
ideLength>
549 struct low_level_matrix_unary_ass_op<blas_single_complex, BlockSideLength, false, typename matrix_operations<blas_single_complex, BlockSideLength>::addition>
551 low_level_matrix_unary_ass_op(blas_single_complex* c,
const blas_single_complex* a,
552 typename matrix_operations<blas_single_complex, BlockSideLength>::addition =
typename matrix_operations<blas_single_complex, BlockSideLength>::addition())
554 const blas_int size = BlockSideLength * BlockSideLength;
555 const blas_int int_one = 1;
556 const blas_single_complex one = 1.0;
558 caxpy_(&size, &one, a, &int_one, c, &int_one);
562 template <
unsigned BlockS
ideLength>
563 struct low_level_matrix_unary_ass_op<blas_single_complex, BlockSideLength, false, typename matrix_operations<blas_single_complex, BlockSideLength>::subtraction>
565 low_level_matrix_unary_ass_op(blas_single_complex* c,
const blas_single_complex* a,
566 typename matrix_operations<blas_single_complex, BlockSideLength>::subtraction =
typename matrix_operations<blas_single_complex, BlockSideLength>::subtraction())
568 const blas_int size = BlockSideLength * BlockSideLength;
569 const blas_int int_one = 1;
570 const blas_single_complex minusone = -1.0;
572 caxpy_(&size, &minusone, a, &int_one, c, &int_one);
576 template <
unsigned BlockS
ideLength>
577 struct low_level_matrix_unary_op<blas_single_complex, BlockSideLength, false, typename matrix_operations<blas_single_complex, BlockSideLength>::addition>
579 low_level_matrix_unary_op(blas_single_complex* c,
const blas_single_complex* a,
580 typename matrix_operations<blas_single_complex, BlockSideLength>::addition =
typename matrix_operations<blas_single_complex, BlockSideLength>::addition())
582 const blas_int size = BlockSideLength * BlockSideLength;
583 const blas_int int_one = 1;
584 ccopy_(&size, a, &int_one, c, &int_one);
590 extern "C" void dgemm_(
const char* transa,
const char* transb,
591 const blas_int* m,
const blas_int* n,
const blas_int* k,
592 const double* alpha,
const double* a,
const blas_int* lda,
593 const double* b,
const blas_int* ldb,
594 const double* beta,
double* c,
const blas_int* ldc);
596 extern "C" void sgemm_(
const char* transa,
const char* transb,
597 const blas_int* m,
const blas_int* n,
const blas_int* k,
598 const float* alpha,
const float* a,
const blas_int* lda,
599 const float* b,
const blas_int* ldb,
600 const float* beta,
float* c,
const blas_int* ldc);
602 extern "C" void zgemm_(
const char* transa,
const char* transb,
603 const blas_int* m,
const blas_int* n,
const blas_int* k,
604 const blas_double_complex* alpha,
const blas_double_complex* a,
const blas_int* lda,
605 const blas_double_complex* b,
const blas_int* ldb,
606 const blas_double_complex* beta, blas_double_complex* c,
const blas_int* ldc);
608 extern "C" void cgemm_(
const char* transa,
const char* transb,
609 const blas_int* m,
const blas_int* n,
const blas_int* k,
610 const blas_single_complex* alpha,
const blas_single_complex* a,
const blas_int* lda,
611 const blas_single_complex* b,
const blas_int* ldb,
612 const blas_single_complex* beta, blas_single_complex* c,
const blas_int* ldc);
614 template <
typename ValueType>
615 void gemm_(
const char* transa,
const char* transb,
616 const blas_int* m,
const blas_int* n,
const blas_int* k,
617 const ValueType* alpha,
const ValueType* a,
const blas_int* lda,
618 const ValueType* b,
const blas_int* ldb,
619 const ValueType* beta, ValueType* c,
const blas_int* ldc);
629 template <
typename ValueType>
630 void gemm_wrapper(
const blas_int n,
const blas_int l,
const blas_int m,
631 const ValueType alpha,
const bool a_in_col_major,
const ValueType* a,
632 const bool b_in_col_major,
const ValueType* b,
633 const ValueType beta,
const bool c_in_col_major, ValueType* c)
635 const blas_int& stride_in_a = a_in_col_major ? n : l;
636 const blas_int& stride_in_b = b_in_col_major ? l : m;
637 const blas_int& stride_in_c = c_in_col_major ? n : m;
638 const char transa = a_in_col_major xor c_in_col_major ?
'T' :
'N';
639 const char transb = b_in_col_major xor c_in_col_major ?
'T' :
'N';
642 gemm_(&transa, &transb, &n, &m, &l, &alpha, a, &stride_in_a, b, &stride_in_b, &beta, c, &stride_in_c);
645 gemm_(&transb, &transa, &m, &n, &l, &alpha, b, &stride_in_b, a, &stride_in_a, &beta, c, &stride_in_c);
649 void gemm_(
const char* transa,
const char* transb,
650 const blas_int* m,
const blas_int* n,
const blas_int* k,
651 const double* alpha,
const double* a,
const blas_int* lda,
652 const double* b,
const blas_int* ldb,
653 const double* beta,
double* c,
const blas_int* ldc)
655 dgemm_(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
659 void gemm_(
const char* transa,
const char* transb,
660 const blas_int* m,
const blas_int* n,
const blas_int* k,
661 const float* alpha,
const float* a,
const blas_int* lda,
662 const float* b,
const blas_int* ldb,
663 const float* beta,
float* c,
const blas_int* ldc)
665 sgemm_(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
669 void gemm_(
const char* transa,
const char* transb,
670 const blas_int* m,
const blas_int* n,
const blas_int* k,
671 const blas_double_complex* alpha,
const blas_double_complex* a,
const blas_int* lda,
672 const blas_double_complex* b,
const blas_int* ldb,
673 const blas_double_complex* beta, blas_double_complex* c,
const blas_int* ldc)
675 zgemm_(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
679 void gemm_(
const char* transa,
const char* transb,
680 const blas_int* m,
const blas_int* n,
const blas_int* k,
681 const blas_single_complex* alpha,
const blas_single_complex* a,
const blas_int* lda,
682 const blas_single_complex* b,
const blas_int* ldb,
683 const blas_single_complex* beta, blas_single_complex* c,
const blas_int* ldc)
685 cgemm_(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
689 template <
unsigned BlockS
ideLength>
690 struct low_level_matrix_multiply_and_add<double, BlockSideLength>
692 low_level_matrix_multiply_and_add(
const double* a,
bool a_in_col_major,
693 const double* b,
bool b_in_col_major,
694 double* c,
const bool c_in_col_major)
696 gemm_wrapper<double>(BlockSideLength, BlockSideLength, BlockSideLength,
697 1.0, a_in_col_major, a,
699 1.0, c_in_col_major, c);
704 template <
unsigned BlockS
ideLength>
705 struct low_level_matrix_multiply_and_add<float, BlockSideLength>
707 low_level_matrix_multiply_and_add(
const float* a,
bool a_in_col_major,
708 const float* b,
bool b_in_col_major,
709 float* c,
const bool c_in_col_major)
711 gemm_wrapper<float>(BlockSideLength, BlockSideLength, BlockSideLength,
712 1.0, a_in_col_major, a,
714 1.0, c_in_col_major, c);
719 template <
unsigned BlockS
ideLength>
720 struct low_level_matrix_multiply_and_add<blas_single_complex, BlockSideLength>
722 low_level_matrix_multiply_and_add(
const blas_single_complex* a,
bool a_in_col_major,
723 const blas_single_complex* b,
bool b_in_col_major,
724 blas_single_complex* c,
const bool c_in_col_major)
726 gemm_wrapper<blas_single_complex>(BlockSideLength, BlockSideLength, BlockSideLength,
727 1.0, a_in_col_major, a,
729 1.0, c_in_col_major, c);
734 template <
unsigned BlockS
ideLength>
735 struct low_level_matrix_multiply_and_add<blas_double_complex, BlockSideLength>
737 low_level_matrix_multiply_and_add(
const blas_double_complex* a,
bool a_in_col_major,
738 const blas_double_complex* b,
bool b_in_col_major,
739 blas_double_complex* c,
const bool c_in_col_major)
741 gemm_wrapper<blas_double_complex>(BlockSideLength, BlockSideLength, BlockSideLength,
742 1.0, a_in_col_major, a,
744 1.0, c_in_col_major, c);
755 #endif // !STXXL_CONTAINERS_MATRIX_LOW_LEVEL_HEADER
multiplies matrices A and B, adds result to C, for arbitrary entries param pointer to blocks of A...
c = a [op] b; for arbitrary entries
low_level_matrix_multiply_and_add(const ValueType *a, bool a_in_col_major, const ValueType *b, bool b_in_col_major, ValueType *c, const bool c_in_col_major)
c [op]= a; for arbitrary entries
choose_int_types< my_pointer_size >::int_type int_type
c =[op] a; for arbitrary entries
#define STXXL_BEGIN_NAMESPACE
choose_int_types< my_pointer_size >::unsigned_type unsigned_type
switch_major_index(const int_type row, const int_type col)
#define STXXL_END_NAMESPACE
switch_major_index(const int_type row, const int_type col)