Ginkgo Generated from develop branch based on develop. Ginkgo version 1.10.0
A numerical linear algebra library targeting many-core architectures
 
Loading...
Searching...
No Matches
csr.hpp
1// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#ifndef GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
6#define GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
7
8
9#include <ginkgo/core/base/array.hpp>
10#include <ginkgo/core/base/index_set.hpp>
11#include <ginkgo/core/base/lin_op.hpp>
12#include <ginkgo/core/base/math.hpp>
13#include <ginkgo/core/matrix/permutation.hpp>
14#include <ginkgo/core/matrix/scaled_permutation.hpp>
15
16
17namespace gko {
18namespace matrix {
19
20
21template <typename ValueType>
22class Dense;
23
24template <typename ValueType>
25class Diagonal;
26
27template <typename ValueType, typename IndexType>
28class Coo;
29
30template <typename ValueType, typename IndexType>
31class Ell;
32
33template <typename ValueType, typename IndexType>
34class Hybrid;
35
36template <typename ValueType, typename IndexType>
37class Sellp;
38
39template <typename ValueType, typename IndexType>
40class SparsityCsr;
41
42template <typename ValueType, typename IndexType>
43class Csr;
44
45template <typename ValueType, typename IndexType>
46class Fbcsr;
47
48template <typename ValueType, typename IndexType>
50
51template <typename IndexType>
52class Permutation;
53
54
55namespace detail {
56
57
58template <typename ValueType = default_precision, typename IndexType = int32>
59void strategy_rebuild_helper(Csr<ValueType, IndexType>* result);
60
61
62} // namespace detail
63
64
103template <typename ValueType = default_precision, typename IndexType = int32>
104class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
105 public ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>,
106#if GINKGO_ENABLE_HALF
107 public ConvertibleTo<
108 Csr<next_precision<next_precision<ValueType>>, IndexType>>,
109#endif
110 public ConvertibleTo<Dense<ValueType>>,
111 public ConvertibleTo<Coo<ValueType, IndexType>>,
112 public ConvertibleTo<Ell<ValueType, IndexType>>,
113 public ConvertibleTo<Fbcsr<ValueType, IndexType>>,
114 public ConvertibleTo<Hybrid<ValueType, IndexType>>,
115 public ConvertibleTo<Sellp<ValueType, IndexType>>,
116 public ConvertibleTo<SparsityCsr<ValueType, IndexType>>,
117 public DiagonalExtractable<ValueType>,
118 public ReadableFromMatrixData<ValueType, IndexType>,
119 public WritableToMatrixData<ValueType, IndexType>,
120 public Transposable,
121 public Permutable<IndexType>,
123 remove_complex<Csr<ValueType, IndexType>>>,
124 public ScaledIdentityAddable {
125 friend class EnablePolymorphicObject<Csr, LinOp>;
126 friend class Coo<ValueType, IndexType>;
127 friend class Dense<ValueType>;
128 friend class Diagonal<ValueType>;
129 friend class Ell<ValueType, IndexType>;
130 friend class Hybrid<ValueType, IndexType>;
131 friend class Sellp<ValueType, IndexType>;
132 friend class SparsityCsr<ValueType, IndexType>;
133 friend class Fbcsr<ValueType, IndexType>;
134 friend class CsrBuilder<ValueType, IndexType>;
135 friend class Csr<to_complex<ValueType>, IndexType>;
136
137public:
138 using EnableLinOp<Csr>::convert_to;
139 using EnableLinOp<Csr>::move_to;
140 using ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>::convert_to;
141 using ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>::move_to;
142 using ConvertibleTo<Dense<ValueType>>::convert_to;
143 using ConvertibleTo<Dense<ValueType>>::move_to;
144 using ConvertibleTo<Coo<ValueType, IndexType>>::convert_to;
146 using ConvertibleTo<Ell<ValueType, IndexType>>::convert_to;
156 using ReadableFromMatrixData<ValueType, IndexType>::read;
157
158 using value_type = ValueType;
159 using index_type = IndexType;
160 using transposed_type = Csr<ValueType, IndexType>;
161 using mat_data = matrix_data<ValueType, IndexType>;
162 using device_mat_data = device_matrix_data<ValueType, IndexType>;
163 using absolute_type = remove_complex<Csr>;
164
165 class automatical;
166
174 friend class automatical;
175
176 public:
182 strategy_type(std::string name) : name_(name) {}
183
184 virtual ~strategy_type() = default;
185
191 std::string get_name() { return name_; }
192
199 virtual void process(const array<index_type>& mtx_row_ptrs,
200 array<index_type>* mtx_srow) = 0;
201
209 virtual int64_t clac_size(const int64_t nnz) = 0;
210
215 virtual std::shared_ptr<strategy_type> copy() = 0;
216
217 protected:
218 void set_name(std::string name) { name_ = name; }
219
220 private:
221 std::string name_;
222 };
223
230 class classical : public strategy_type {
231 public:
235 classical() : strategy_type("classical"), max_length_per_row_(0) {}
236
237 void process(const array<index_type>& mtx_row_ptrs,
238 array<index_type>* mtx_srow) override
239 {
240 auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
241 array<index_type> row_ptrs_host(host_mtx_exec);
242 const bool is_mtx_on_host{host_mtx_exec ==
243 mtx_row_ptrs.get_executor()};
244 const index_type* row_ptrs{};
245 if (is_mtx_on_host) {
246 row_ptrs = mtx_row_ptrs.get_const_data();
247 } else {
248 row_ptrs_host = mtx_row_ptrs;
249 row_ptrs = row_ptrs_host.get_const_data();
250 }
251 auto num_rows = mtx_row_ptrs.get_size() - 1;
252 max_length_per_row_ = 0;
253 for (size_type i = 0; i < num_rows; i++) {
254 max_length_per_row_ = std::max(max_length_per_row_,
255 row_ptrs[i + 1] - row_ptrs[i]);
256 }
257 }
258
259 int64_t clac_size(const int64_t nnz) override { return 0; }
260
261 index_type get_max_length_per_row() const noexcept
262 {
263 return max_length_per_row_;
264 }
265
266 std::shared_ptr<strategy_type> copy() override
267 {
268 return std::make_shared<classical>();
269 }
270
271 private:
272 index_type max_length_per_row_;
273 };
274
280 class merge_path : public strategy_type {
281 public:
285 merge_path() : strategy_type("merge_path") {}
286
287 void process(const array<index_type>& mtx_row_ptrs,
288 array<index_type>* mtx_srow) override
289 {}
290
291 int64_t clac_size(const int64_t nnz) override { return 0; }
292
293 std::shared_ptr<strategy_type> copy() override
294 {
295 return std::make_shared<merge_path>();
296 }
297 };
298
305 class cusparse : public strategy_type {
306 public:
310 cusparse() : strategy_type("cusparse") {}
311
312 void process(const array<index_type>& mtx_row_ptrs,
313 array<index_type>* mtx_srow) override
314 {}
315
316 int64_t clac_size(const int64_t nnz) override { return 0; }
317
318 std::shared_ptr<strategy_type> copy() override
319 {
320 return std::make_shared<cusparse>();
321 }
322 };
323
329 class sparselib : public strategy_type {
330 public:
334 sparselib() : strategy_type("sparselib") {}
335
336 void process(const array<index_type>& mtx_row_ptrs,
337 array<index_type>* mtx_srow) override
338 {}
339
340 int64_t clac_size(const int64_t nnz) override { return 0; }
341
342 std::shared_ptr<strategy_type> copy() override
343 {
344 return std::make_shared<sparselib>();
345 }
346 };
347
352 public:
359 [[deprecated]] load_balance()
360 : load_balance(std::move(
362 {}
363
369 load_balance(std::shared_ptr<const CudaExecutor> exec)
370 : load_balance(exec->get_num_warps(), exec->get_warp_size())
371 {}
372
378 load_balance(std::shared_ptr<const HipExecutor> exec)
379 : load_balance(exec->get_num_warps(), exec->get_warp_size(), false)
380 {}
381
389 load_balance(std::shared_ptr<const DpcppExecutor> exec)
390 : load_balance(exec->get_num_subgroups(), 32, false, "intel")
391 {}
392
404 load_balance(int64_t nwarps, int warp_size = 32,
405 bool cuda_strategy = true,
406 std::string strategy_name = "none")
407 : strategy_type("load_balance"),
408 nwarps_(nwarps),
409 warp_size_(warp_size),
410 cuda_strategy_(cuda_strategy),
411 strategy_name_(strategy_name)
412 {}
413
414 void process(const array<index_type>& mtx_row_ptrs,
415 array<index_type>* mtx_srow) override
416 {
417 auto nwarps = mtx_srow->get_size();
418
419 if (nwarps > 0) {
420 auto host_srow_exec = mtx_srow->get_executor()->get_master();
421 auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
422 const bool is_srow_on_host{host_srow_exec ==
423 mtx_srow->get_executor()};
424 const bool is_mtx_on_host{host_mtx_exec ==
425 mtx_row_ptrs.get_executor()};
426 array<index_type> row_ptrs_host(host_mtx_exec);
427 array<index_type> srow_host(host_srow_exec);
428 const index_type* row_ptrs{};
429 index_type* srow{};
430 if (is_srow_on_host) {
431 srow = mtx_srow->get_data();
432 } else {
433 srow_host = *mtx_srow;
434 srow = srow_host.get_data();
435 }
436 if (is_mtx_on_host) {
437 row_ptrs = mtx_row_ptrs.get_const_data();
438 } else {
439 row_ptrs_host = mtx_row_ptrs;
440 row_ptrs = row_ptrs_host.get_const_data();
441 }
442 for (size_type i = 0; i < nwarps; i++) {
443 srow[i] = 0;
444 }
445 const auto num_rows = mtx_row_ptrs.get_size() - 1;
446 const auto num_elems = row_ptrs[num_rows];
447 const auto bucket_divider =
448 num_elems > 0 ? ceildiv(num_elems, warp_size_) : 1;
449 for (size_type i = 0; i < num_rows; i++) {
450 auto bucket =
451 ceildiv((ceildiv(row_ptrs[i + 1], warp_size_) * nwarps),
452 bucket_divider);
453 if (bucket < nwarps) {
454 srow[bucket]++;
455 }
456 }
457 // find starting row for thread i
458 for (size_type i = 1; i < nwarps; i++) {
459 srow[i] += srow[i - 1];
460 }
461 if (!is_srow_on_host) {
462 *mtx_srow = srow_host;
463 }
464 }
465 }
466
467 int64_t clac_size(const int64_t nnz) override
468 {
469 if (warp_size_ > 0) {
470 int multiple = 8;
471 if (nnz >= static_cast<int64_t>(2e8)) {
472 multiple = 2048;
473 } else if (nnz >= static_cast<int64_t>(2e7)) {
474 multiple = 512;
475 } else if (nnz >= static_cast<int64_t>(2e6)) {
476 multiple = 128;
477 } else if (nnz >= static_cast<int64_t>(2e5)) {
478 multiple = 32;
479 }
480 if (strategy_name_ == "intel") {
481 multiple = 8;
482 if (nnz >= static_cast<int64_t>(2e8)) {
483 multiple = 256;
484 } else if (nnz >= static_cast<int64_t>(2e7)) {
485 multiple = 32;
486 }
487 }
488#if GINKGO_HIP_PLATFORM_HCC
489 if (!cuda_strategy_) {
490 multiple = 8;
491 if (nnz >= static_cast<int64_t>(1e7)) {
492 multiple = 64;
493 } else if (nnz >= static_cast<int64_t>(1e6)) {
494 multiple = 16;
495 }
496 }
497#endif // GINKGO_HIP_PLATFORM_HCC
498
499 auto nwarps = nwarps_ * multiple;
500 return min(ceildiv(nnz, warp_size_), nwarps);
501 } else {
502 return 0;
503 }
504 }
505
506 std::shared_ptr<strategy_type> copy() override
507 {
508 return std::make_shared<load_balance>(
509 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
510 }
511
512 private:
513 int64_t nwarps_;
514 int warp_size_;
515 bool cuda_strategy_;
516 std::string strategy_name_;
517 };
518
519 class automatical : public strategy_type {
520 public:
521 /* Use imbalance strategy when the maximum number of nonzero per row is
522 * more than 1024 on NVIDIA hardware */
523 const index_type nvidia_row_len_limit = 1024;
524 /* Use imbalance strategy when the matrix has more more than 1e6 on
525 * NVIDIA hardware */
526 const index_type nvidia_nnz_limit{static_cast<index_type>(1e6)};
527 /* Use imbalance strategy when the maximum number of nonzero per row is
528 * more than 768 on AMD hardware */
529 const index_type amd_row_len_limit = 768;
530 /* Use imbalance strategy when the matrix has more more than 1e8 on AMD
531 * hardware */
532 const index_type amd_nnz_limit{static_cast<index_type>(1e8)};
533 /* Use imbalance strategy when the maximum number of nonzero per row is
534 * more than 25600 on Intel hardware */
535 const index_type intel_row_len_limit = 25600;
536 /* Use imbalance strategy when the matrix has more more than 3e8 on
537 * Intel hardware */
538 const index_type intel_nnz_limit{static_cast<index_type>(3e8)};
539
540 public:
547 [[deprecated]] automatical()
548 : automatical(std::move(
550 {}
551
557 automatical(std::shared_ptr<const CudaExecutor> exec)
558 : automatical(exec->get_num_warps(), exec->get_warp_size())
559 {}
560
566 automatical(std::shared_ptr<const HipExecutor> exec)
567 : automatical(exec->get_num_warps(), exec->get_warp_size(), false)
568 {}
569
577 automatical(std::shared_ptr<const DpcppExecutor> exec)
578 : automatical(exec->get_num_subgroups(), 32, false, "intel")
579 {}
580
592 automatical(int64_t nwarps, int warp_size = 32,
593 bool cuda_strategy = true,
594 std::string strategy_name = "none")
595 : strategy_type("automatical"),
596 nwarps_(nwarps),
597 warp_size_(warp_size),
598 cuda_strategy_(cuda_strategy),
599 strategy_name_(strategy_name),
600 max_length_per_row_(0)
601 {}
602
603 void process(const array<index_type>& mtx_row_ptrs,
604 array<index_type>* mtx_srow) override
605 {
606 // if the number of stored elements is larger than <nnz_limit> or
607 // the maximum number of stored elements per row is larger than
608 // <row_len_limit>, use load_balance otherwise use classical
609 index_type nnz_limit = nvidia_nnz_limit;
610 index_type row_len_limit = nvidia_row_len_limit;
611 if (strategy_name_ == "intel") {
612 nnz_limit = intel_nnz_limit;
613 row_len_limit = intel_row_len_limit;
614 }
615#if GINKGO_HIP_PLATFORM_HCC
616 if (!cuda_strategy_) {
617 nnz_limit = amd_nnz_limit;
618 row_len_limit = amd_row_len_limit;
619 }
620#endif // GINKGO_HIP_PLATFORM_HCC
621 auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
622 const bool is_mtx_on_host{host_mtx_exec ==
623 mtx_row_ptrs.get_executor()};
624 array<index_type> row_ptrs_host(host_mtx_exec);
625 const index_type* row_ptrs{};
626 if (is_mtx_on_host) {
627 row_ptrs = mtx_row_ptrs.get_const_data();
628 } else {
629 row_ptrs_host = mtx_row_ptrs;
630 row_ptrs = row_ptrs_host.get_const_data();
631 }
632 const auto num_rows = mtx_row_ptrs.get_size() - 1;
633 if (row_ptrs[num_rows] > nnz_limit) {
634 load_balance actual_strategy(nwarps_, warp_size_,
635 cuda_strategy_, strategy_name_);
636 if (is_mtx_on_host) {
637 actual_strategy.process(mtx_row_ptrs, mtx_srow);
638 } else {
639 actual_strategy.process(row_ptrs_host, mtx_srow);
640 }
641 this->set_name(actual_strategy.get_name());
642 } else {
643 index_type maxnum = 0;
644 for (size_type i = 0; i < num_rows; i++) {
645 maxnum = std::max(maxnum, row_ptrs[i + 1] - row_ptrs[i]);
646 }
647 if (maxnum > row_len_limit) {
648 load_balance actual_strategy(
649 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
650 if (is_mtx_on_host) {
651 actual_strategy.process(mtx_row_ptrs, mtx_srow);
652 } else {
653 actual_strategy.process(row_ptrs_host, mtx_srow);
654 }
655 this->set_name(actual_strategy.get_name());
656 } else {
657 classical actual_strategy;
658 if (is_mtx_on_host) {
659 actual_strategy.process(mtx_row_ptrs, mtx_srow);
660 max_length_per_row_ =
661 actual_strategy.get_max_length_per_row();
662 } else {
663 actual_strategy.process(row_ptrs_host, mtx_srow);
664 max_length_per_row_ =
665 actual_strategy.get_max_length_per_row();
666 }
667 this->set_name(actual_strategy.get_name());
668 }
669 }
670 }
671
672 int64_t clac_size(const int64_t nnz) override
673 {
674 return std::make_shared<load_balance>(
675 nwarps_, warp_size_, cuda_strategy_, strategy_name_)
676 ->clac_size(nnz);
677 }
678
679 index_type get_max_length_per_row() const noexcept
680 {
681 return max_length_per_row_;
682 }
683
684 std::shared_ptr<strategy_type> copy() override
685 {
686 return std::make_shared<automatical>(
687 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
688 }
689
690 private:
691 int64_t nwarps_;
692 int warp_size_;
693 bool cuda_strategy_;
694 std::string strategy_name_;
695 index_type max_length_per_row_;
696 };
697
698 friend class Csr<previous_precision<ValueType>, IndexType>;
699
700 void convert_to(
701 Csr<next_precision<ValueType>, IndexType>* result) const override;
702
703 void move_to(Csr<next_precision<ValueType>, IndexType>* result) override;
704
705#if GINKGO_ENABLE_HALF
706 friend class Csr<previous_precision<previous_precision<ValueType>>,
707 IndexType>;
708 using ConvertibleTo<
709 Csr<next_precision<next_precision<ValueType>>, IndexType>>::convert_to;
710 using ConvertibleTo<
712
713 void convert_to(Csr<next_precision<next_precision<ValueType>>, IndexType>*
714 result) const override;
715
716 void move_to(Csr<next_precision<next_precision<ValueType>>, IndexType>*
717 result) override;
718#endif
719
720 void convert_to(Dense<ValueType>* other) const override;
721
722 void move_to(Dense<ValueType>* other) override;
723
724 void convert_to(Coo<ValueType, IndexType>* result) const override;
725
726 void move_to(Coo<ValueType, IndexType>* result) override;
727
728 void convert_to(Ell<ValueType, IndexType>* result) const override;
729
730 void move_to(Ell<ValueType, IndexType>* result) override;
731
732 void convert_to(Fbcsr<ValueType, IndexType>* result) const override;
733
734 void move_to(Fbcsr<ValueType, IndexType>* result) override;
735
736 void convert_to(Hybrid<ValueType, IndexType>* result) const override;
737
738 void move_to(Hybrid<ValueType, IndexType>* result) override;
739
740 void convert_to(Sellp<ValueType, IndexType>* result) const override;
741
742 void move_to(Sellp<ValueType, IndexType>* result) override;
743
744 void convert_to(SparsityCsr<ValueType, IndexType>* result) const override;
745
746 void move_to(SparsityCsr<ValueType, IndexType>* result) override;
747
748 void read(const mat_data& data) override;
749
750 void read(const device_mat_data& data) override;
751
752 void read(device_mat_data&& data) override;
753
754 void write(mat_data& data) const override;
755
756 std::unique_ptr<LinOp> transpose() const override;
757
758 std::unique_ptr<LinOp> conj_transpose() const override;
759
767
770 std::unique_ptr<Permutation<index_type>> value_permutation);
771
780 ptr_param<Csr> output) const;
781
782 std::unique_ptr<Permutation<IndexType>> value_permutation;
783 };
784
796 std::pair<std::unique_ptr<Csr>, permuting_reuse_info> transpose_reuse()
797 const;
798
813 std::unique_ptr<Csr> permute(
814 ptr_param<const Permutation<index_type>> permutation,
816
830 std::unique_ptr<Csr> permute(
831 ptr_param<const Permutation<index_type>> row_permutation,
832 ptr_param<const Permutation<index_type>> column_permutation,
833 bool invert = false) const;
834
855 std::pair<std::unique_ptr<Csr>, permuting_reuse_info> permute_reuse(
856 ptr_param<const Permutation<index_type>> permutation,
858
877 std::pair<std::unique_ptr<Csr>, permuting_reuse_info> permute_reuse(
878 ptr_param<const Permutation<index_type>> row_permutation,
879 ptr_param<const Permutation<index_type>> column_permutation,
880 bool invert = false) const;
881
891 std::unique_ptr<Csr> scale_permute(
894
907 std::unique_ptr<Csr> scale_permute(
909 row_permutation,
911 column_permutation,
912 bool invert = false) const;
913
914 std::unique_ptr<LinOp> permute(
915 const array<IndexType>* permutation_indices) const override;
916
917 std::unique_ptr<LinOp> inverse_permute(
918 const array<IndexType>* inverse_permutation_indices) const override;
919
920 std::unique_ptr<LinOp> row_permute(
921 const array<IndexType>* permutation_indices) const override;
922
923 std::unique_ptr<LinOp> column_permute(
924 const array<IndexType>* permutation_indices) const override;
925
926 std::unique_ptr<LinOp> inverse_row_permute(
927 const array<IndexType>* inverse_permutation_indices) const override;
928
929 std::unique_ptr<LinOp> inverse_column_permute(
930 const array<IndexType>* inverse_permutation_indices) const override;
931
932 std::unique_ptr<Diagonal<ValueType>> extract_diagonal() const override;
933
934 std::unique_ptr<absolute_type> compute_absolute() const override;
935
937
942
943 /*
944 * Tests if all row entry pairs (value, col_idx) are sorted by column index
945 *
946 * @returns True if all row entry pairs (value, col_idx) are sorted by
947 * column index
948 */
949 bool is_sorted_by_column_index() const;
950
956 value_type* get_values() noexcept { return values_.get_data(); }
957
965 const value_type* get_const_values() const noexcept
966 {
967 return values_.get_const_data();
968 }
969
974 std::unique_ptr<Dense<ValueType>> create_value_view();
975
980 std::unique_ptr<const Dense<ValueType>> create_const_value_view() const;
981
987 index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); }
988
996 const index_type* get_const_col_idxs() const noexcept
997 {
998 return col_idxs_.get_const_data();
999 }
1000
1006 index_type* get_row_ptrs() noexcept { return row_ptrs_.get_data(); }
1007
1015 const index_type* get_const_row_ptrs() const noexcept
1016 {
1017 return row_ptrs_.get_const_data();
1018 }
1019
1025 index_type* get_srow() noexcept { return srow_.get_data(); }
1026
1034 const index_type* get_const_srow() const noexcept
1035 {
1036 return srow_.get_const_data();
1037 }
1038
1045 {
1046 return srow_.get_size();
1047 }
1048
1055 {
1056 return values_.get_size();
1057 }
1058
1063 std::shared_ptr<strategy_type> get_strategy() const noexcept
1064 {
1065 return strategy_;
1066 }
1067
1073 void set_strategy(std::shared_ptr<strategy_type> strategy)
1074 {
1075 strategy_ = std::move(strategy->copy());
1076 this->make_srow();
1077 }
1078
1086 {
1087 auto exec = this->get_executor();
1088 GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1));
1089 this->scale_impl(make_temporary_clone(exec, alpha).get());
1090 }
1091
1099 {
1100 auto exec = this->get_executor();
1101 GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1));
1102 this->inv_scale_impl(make_temporary_clone(exec, alpha).get());
1103 }
1104
1113 static std::unique_ptr<Csr> create(std::shared_ptr<const Executor> exec,
1114 std::shared_ptr<strategy_type> strategy);
1115
1127 static std::unique_ptr<Csr> create(
1128 std::shared_ptr<const Executor> exec, const dim<2>& size = {},
1129 size_type num_nonzeros = {},
1130 std::shared_ptr<strategy_type> strategy = nullptr);
1131
1151 static std::unique_ptr<Csr> create(
1152 std::shared_ptr<const Executor> exec, const dim<2>& size,
1153 array<value_type> values, array<index_type> col_idxs,
1154 array<index_type> row_ptrs,
1155 std::shared_ptr<strategy_type> strategy = nullptr);
1156
1161 template <typename InputValueType, typename InputColumnIndexType,
1162 typename InputRowPtrType>
1163 GKO_DEPRECATED(
1164 "explicitly construct the gko::array argument instead of passing "
1165 "initializer lists")
1166 static std::unique_ptr<Csr> create(
1167 std::shared_ptr<const Executor> exec, const dim<2>& size,
1168 std::initializer_list<InputValueType> values,
1169 std::initializer_list<InputColumnIndexType> col_idxs,
1170 std::initializer_list<InputRowPtrType> row_ptrs)
1171 {
1172 return create(exec, size, array<value_type>{exec, std::move(values)},
1173 array<index_type>{exec, std::move(col_idxs)},
1174 array<index_type>{exec, std::move(row_ptrs)});
1175 }
1176
1192 static std::unique_ptr<const Csr> create_const(
1193 std::shared_ptr<const Executor> exec, const dim<2>& size,
1194 gko::detail::const_array_view<ValueType>&& values,
1195 gko::detail::const_array_view<IndexType>&& col_idxs,
1196 gko::detail::const_array_view<IndexType>&& row_ptrs,
1197 std::shared_ptr<strategy_type> strategy = nullptr);
1198
1211 std::unique_ptr<Csr<ValueType, IndexType>> create_submatrix(
1212 const index_set<IndexType>& row_index_set,
1213 const index_set<IndexType>& column_index_set) const;
1214
1226 std::unique_ptr<Csr<ValueType, IndexType>> create_submatrix(
1227 const span& row_span, const span& column_span) const;
1228
1233
1240
1244 Csr(const Csr&);
1245
1252
1253protected:
1254 Csr(std::shared_ptr<const Executor> exec, const dim<2>& size = {},
1255 size_type num_nonzeros = {},
1256 std::shared_ptr<strategy_type> strategy = nullptr);
1257
1258 Csr(std::shared_ptr<const Executor> exec, const dim<2>& size,
1259 array<value_type> values, array<index_type> col_idxs,
1260 array<index_type> row_ptrs,
1261 std::shared_ptr<strategy_type> strategy = nullptr);
1262
1263 void apply_impl(const LinOp* b, LinOp* x) const override;
1264
1265 void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta,
1266 LinOp* x) const override;
1267
1268 // TODO: This provides some more sane settings. Please fix this!
1269 static std::shared_ptr<strategy_type> make_default_strategy(
1270 std::shared_ptr<const Executor> exec)
1271 {
1272 auto cuda_exec = std::dynamic_pointer_cast<const CudaExecutor>(exec);
1273 auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(exec);
1274 auto dpcpp_exec = std::dynamic_pointer_cast<const DpcppExecutor>(exec);
1275 std::shared_ptr<strategy_type> new_strategy;
1276 if (cuda_exec) {
1277 new_strategy = std::make_shared<automatical>(cuda_exec);
1278 } else if (hip_exec) {
1279 new_strategy = std::make_shared<automatical>(hip_exec);
1280 } else if (dpcpp_exec) {
1281 new_strategy = std::make_shared<automatical>(dpcpp_exec);
1282 } else {
1283 new_strategy = std::make_shared<classical>();
1284 }
1285 return new_strategy;
1286 }
1287
1288 // TODO clean this up as soon as we improve strategy_type
1289 template <typename CsrType>
1290 void convert_strategy_helper(CsrType* result) const
1291 {
1292 auto strat = this->get_strategy().get();
1293 std::shared_ptr<typename CsrType::strategy_type> new_strat;
1294 if (dynamic_cast<classical*>(strat)) {
1295 new_strat = std::make_shared<typename CsrType::classical>();
1296 } else if (dynamic_cast<merge_path*>(strat)) {
1297 new_strat = std::make_shared<typename CsrType::merge_path>();
1298 } else if (dynamic_cast<cusparse*>(strat)) {
1299 new_strat = std::make_shared<typename CsrType::cusparse>();
1300 } else if (dynamic_cast<sparselib*>(strat)) {
1301 new_strat = std::make_shared<typename CsrType::sparselib>();
1302 } else {
1303 auto rexec = result->get_executor();
1304 auto cuda_exec =
1305 std::dynamic_pointer_cast<const CudaExecutor>(rexec);
1306 auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(rexec);
1307 auto dpcpp_exec =
1308 std::dynamic_pointer_cast<const DpcppExecutor>(rexec);
1309 auto lb = dynamic_cast<load_balance*>(strat);
1310 if (cuda_exec) {
1311 if (lb) {
1312 new_strat =
1313 std::make_shared<typename CsrType::load_balance>(
1314 cuda_exec);
1315 } else {
1316 new_strat = std::make_shared<typename CsrType::automatical>(
1317 cuda_exec);
1318 }
1319 } else if (hip_exec) {
1320 if (lb) {
1321 new_strat =
1322 std::make_shared<typename CsrType::load_balance>(
1323 hip_exec);
1324 } else {
1325 new_strat = std::make_shared<typename CsrType::automatical>(
1326 hip_exec);
1327 }
1328 } else if (dpcpp_exec) {
1329 if (lb) {
1330 new_strat =
1331 std::make_shared<typename CsrType::load_balance>(
1332 dpcpp_exec);
1333 } else {
1334 new_strat = std::make_shared<typename CsrType::automatical>(
1335 dpcpp_exec);
1336 }
1337 } else {
1338 // Try to preserve this executor's configuration
1339 auto this_cuda_exec =
1340 std::dynamic_pointer_cast<const CudaExecutor>(
1341 this->get_executor());
1342 auto this_hip_exec =
1343 std::dynamic_pointer_cast<const HipExecutor>(
1344 this->get_executor());
1345 auto this_dpcpp_exec =
1346 std::dynamic_pointer_cast<const DpcppExecutor>(
1347 this->get_executor());
1348 if (this_cuda_exec) {
1349 if (lb) {
1350 new_strat =
1351 std::make_shared<typename CsrType::load_balance>(
1352 this_cuda_exec);
1353 } else {
1354 new_strat =
1355 std::make_shared<typename CsrType::automatical>(
1356 this_cuda_exec);
1357 }
1358 } else if (this_hip_exec) {
1359 if (lb) {
1360 new_strat =
1361 std::make_shared<typename CsrType::load_balance>(
1362 this_hip_exec);
1363 } else {
1364 new_strat =
1365 std::make_shared<typename CsrType::automatical>(
1366 this_hip_exec);
1367 }
1368 } else if (this_dpcpp_exec) {
1369 if (lb) {
1370 new_strat =
1371 std::make_shared<typename CsrType::load_balance>(
1372 this_dpcpp_exec);
1373 } else {
1374 new_strat =
1375 std::make_shared<typename CsrType::automatical>(
1376 this_dpcpp_exec);
1377 }
1378 } else {
1379 // FIXME: this changes strategies.
1380 // We had a load balance or automatical strategy from a non
1381 // HIP or Cuda executor and are moving to a non HIP or Cuda
1382 // executor.
1383 new_strat = std::make_shared<typename CsrType::classical>();
1384 }
1385 }
1386 }
1387 result->set_strategy(new_strat);
1388 }
1389
1393 void make_srow()
1394 {
1395 srow_.resize_and_reset(strategy_->clac_size(values_.get_size()));
1396 strategy_->process(row_ptrs_, &srow_);
1397 }
1398
1405 virtual void scale_impl(const LinOp* alpha);
1406
1413 virtual void inv_scale_impl(const LinOp* alpha);
1414
1415private:
1416 std::shared_ptr<strategy_type> strategy_;
1417 array<value_type> values_;
1418 array<index_type> col_idxs_;
1419 array<index_type> row_ptrs_;
1420 array<index_type> srow_;
1421
1422 void add_scaled_identity_impl(const LinOp* a, const LinOp* b) override;
1423};
1424
1425
1426namespace detail {
1427
1428
1435template <typename ValueType, typename IndexType>
1436void strategy_rebuild_helper(Csr<ValueType, IndexType>* result)
1437{
1438 using load_balance = typename Csr<ValueType, IndexType>::load_balance;
1439 using automatical = typename Csr<ValueType, IndexType>::automatical;
1440 auto strategy = result->get_strategy();
1441 auto executor = result->get_executor();
1442 if (std::dynamic_pointer_cast<load_balance>(strategy)) {
1443 if (auto exec =
1444 std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1445 result->set_strategy(std::make_shared<load_balance>(exec));
1446 } else if (auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1447 executor)) {
1448 result->set_strategy(std::make_shared<load_balance>(exec));
1449 }
1450 } else if (std::dynamic_pointer_cast<automatical>(strategy)) {
1451 if (auto exec =
1452 std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1453 result->set_strategy(std::make_shared<automatical>(exec));
1454 } else if (auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1455 executor)) {
1456 result->set_strategy(std::make_shared<automatical>(exec));
1457 }
1458 }
1459}
1460
1461
1462} // namespace detail
1463} // namespace matrix
1464} // namespace gko
1465
1466
1467#endif // GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
ConvertibleTo interface is used to mark that the implementer can be converted to the object of Result...
Definition polymorphic_object.hpp:479
This is the Executor subclass which represents the CUDA device.
Definition executor.hpp:1542
The diagonal of a LinOp implementing this interface can be extracted.
Definition lin_op.hpp:743
The EnableAbsoluteComputation mixin provides the default implementations of compute_absolute_linop an...
Definition lin_op.hpp:794
The EnableLinOp mixin can be used to provide sensible default implementations of the majority of the ...
Definition lin_op.hpp:879
This mixin inherits from (a subclass of) PolymorphicObject and provides a base implementation of a ne...
Definition polymorphic_object.hpp:668
The first step in using the Ginkgo library consists of creating an executor.
Definition executor.hpp:615
Definition lin_op.hpp:117
LinOp(const LinOp &)=default
Copy-constructs a LinOp.
This is the Executor subclass which represents the OpenMP device (typically CPU).
Definition executor.hpp:1387
Linear operators which support permutation should implement the Permutable interface.
Definition lin_op.hpp:484
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor of the object.
Definition polymorphic_object.hpp:243
A LinOp implementing this interface can read its data from a matrix_data structure.
Definition lin_op.hpp:605
Adds the operation M <- a I + b M for matrix M, identity operator I and scalars a and b,...
Definition lin_op.hpp:818
Linear operators which support transposition should implement the Transposable interface.
Definition lin_op.hpp:433
A LinOp implementing this interface can write its data to a matrix_data structure.
Definition lin_op.hpp:660
An array is a container which encapsulates fixed-sized arrays, stored on the Executor tied to the arr...
Definition array.hpp:166
value_type * get_data() noexcept
Returns a pointer to the block of memory used to store the elements of the array.
Definition array.hpp:673
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor associated with the array.
Definition array.hpp:689
const value_type * get_const_data() const noexcept
Returns a constant pointer to the block of memory used to store the elements of the array.
Definition array.hpp:682
size_type get_size() const noexcept
Returns the number of elements in the array.
Definition array.hpp:656
This type is a device-side equivalent to matrix_data.
Definition device_matrix_data.hpp:36
An index set class represents an ordered set of intervals.
Definition index_set.hpp:56
COO stores a matrix in the coordinate matrix format.
Definition coo.hpp:63
Definition csr.hpp:49
Definition csr.hpp:519
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:684
automatical(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates an automatical strategy with specified parameters.
Definition csr.hpp:592
automatical()
Creates an automatical strategy.
Definition csr.hpp:547
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:672
automatical(std::shared_ptr< const CudaExecutor > exec)
Creates an automatical strategy with CUDA executor.
Definition csr.hpp:557
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:603
automatical(std::shared_ptr< const DpcppExecutor > exec)
Creates an automatical strategy with Dpcpp executor.
Definition csr.hpp:577
automatical(std::shared_ptr< const HipExecutor > exec)
Creates an automatical strategy with HIP executor.
Definition csr.hpp:566
classical is a strategy_type which uses the same number of threads on each row.
Definition csr.hpp:230
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:237
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:266
classical()
Creates a classical strategy.
Definition csr.hpp:235
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:259
cusparse is a strategy_type which uses the sparselib csr.
Definition csr.hpp:305
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:316
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:318
cusparse()
Creates a cusparse strategy.
Definition csr.hpp:310
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:312
load_balance is a strategy_type which uses the load balance algorithm.
Definition csr.hpp:351
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:414
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:506
load_balance(std::shared_ptr< const HipExecutor > exec)
Creates a load_balance strategy with HIP executor.
Definition csr.hpp:378
load_balance()
Creates a load_balance strategy.
Definition csr.hpp:359
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:467
load_balance(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates a load_balance strategy with specified parameters.
Definition csr.hpp:404
load_balance(std::shared_ptr< const CudaExecutor > exec)
Creates a load_balance strategy with CUDA executor.
Definition csr.hpp:369
load_balance(std::shared_ptr< const DpcppExecutor > exec)
Creates a load_balance strategy with DPCPP executor.
Definition csr.hpp:389
merge_path is a strategy_type which uses the merge_path algorithm.
Definition csr.hpp:280
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:291
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:293
merge_path()
Creates a merge_path strategy.
Definition csr.hpp:285
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:287
sparselib is a strategy_type which uses the sparselib csr.
Definition csr.hpp:329
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:340
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:336
sparselib()
Creates a sparselib strategy.
Definition csr.hpp:334
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:342
strategy_type is to decide how to set the csr algorithm.
Definition csr.hpp:173
virtual int64_t clac_size(const int64_t nnz)=0
Computes the srow size according to the number of nonzeros.
std::string get_name()
Returns the name of strategy.
Definition csr.hpp:191
virtual std::shared_ptr< strategy_type > copy()=0
Copy a strategy.
virtual void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow)=0
Computes srow according to row pointers.
strategy_type(std::string name)
Creates a strategy_type.
Definition csr.hpp:182
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition csr.hpp:124
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > permute_reuse(ptr_param< const Permutation< index_type > > permutation, permute_mode mode=permute_mode::symmetric) const
Computes the operations necessary to propagate changed values from a matrix A to a permuted matrix.
Csr & operator=(const Csr &)
Copy-assigns a Csr matrix.
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type > > permutation, permute_mode=permute_mode::symmetric) const
Creates a scaled and permuted copy of this matrix.
std::unique_ptr< absolute_type > compute_absolute() const override
Gets the AbsoluteLinOp.
const index_type * get_const_row_ptrs() const noexcept
Returns the row pointers of the matrix.
Definition csr.hpp:1015
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const span &row_span, const span &column_span) const
Creates a submatrix from this Csr matrix given row and column spans.
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size={}, size_type num_nonzeros={}, std::shared_ptr< strategy_type > strategy=nullptr)
Creates an uninitialized CSR matrix of the specified size.
const index_type * get_const_srow() const noexcept
Returns the starting rows.
Definition csr.hpp:1034
void set_strategy(std::shared_ptr< strategy_type > strategy)
Set the strategy.
Definition csr.hpp:1073
void inv_scale(ptr_param< const LinOp > alpha)
Scales the matrix with the inverse of a scalar.
Definition csr.hpp:1098
index_type * get_srow() noexcept
Returns the starting rows.
Definition csr.hpp:1025
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, std::shared_ptr< strategy_type > strategy)
Creates an uninitialized CSR matrix of the specified size.
size_type get_num_srow_elements() const noexcept
Returns the number of the srow stored elements (involved warps)
Definition csr.hpp:1044
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > permute_reuse(ptr_param< const Permutation< index_type > > row_permutation, ptr_param< const Permutation< index_type > > column_permutation, bool invert=false) const
Computes the operations necessary to propagate changed values from a matrix A to a permuted matrix.
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const index_set< IndexType > &row_index_set, const index_set< IndexType > &column_index_set) const
Creates a submatrix from this Csr matrix given row and column index_set objects.
std::unique_ptr< Diagonal< ValueType > > extract_diagonal() const override
Extracts the diagonal entries of the matrix into a vector.
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size, array< value_type > values, array< index_type > col_idxs, array< index_type > row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Creates a CSR matrix from already allocated (and initialized) row pointer, column index and value arr...
index_type * get_row_ptrs() noexcept
Returns the row pointers of the matrix.
Definition csr.hpp:1006
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type > > permutation, permute_mode mode=permute_mode::symmetric) const
Creates a permuted copy of this matrix with the given permutation .
std::unique_ptr< const Dense< ValueType > > create_const_value_view() const
Creates a const Dense view of the value array of this matrix as a column vector of dimensions nnz x 1...
static std::unique_ptr< const Csr > create_const(std::shared_ptr< const Executor > exec, const dim< 2 > &size, gko::detail::const_array_view< ValueType > &&values, gko::detail::const_array_view< IndexType > &&col_idxs, gko::detail::const_array_view< IndexType > &&row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Creates a constant (immutable) Csr matrix from a set of constant arrays.
Csr(const Csr &)
Copy-constructs a Csr matrix.
Csr & operator=(Csr &&)
Move-assigns a Csr matrix.
std::unique_ptr< LinOp > transpose() const override
Returns a LinOp representing the transpose of the Transposable object.
const value_type * get_const_values() const noexcept
Returns the values of the matrix.
Definition csr.hpp:965
void compute_absolute_inplace() override
Compute absolute inplace on each element.
size_type get_num_stored_elements() const noexcept
Returns the number of elements explicitly stored in the matrix.
Definition csr.hpp:1054
std::shared_ptr< strategy_type > get_strategy() const noexcept
Returns the strategy.
Definition csr.hpp:1063
const index_type * get_const_col_idxs() const noexcept
Returns the column indexes of the matrix.
Definition csr.hpp:996
void sort_by_column_index()
Sorts all (value, col_idx) pairs in each row by column index.
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > transpose_reuse() const
Computes the necessary data to update a transposed matrix from its original matrix.
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type > > row_permutation, ptr_param< const ScaledPermutation< value_type, index_type > > column_permutation, bool invert=false) const
Creates a scaled and permuted copy of this matrix.
std::unique_ptr< Dense< ValueType > > create_value_view()
Creates a Dense view of the value array of this matrix as a column vector of dimensions nnz x 1.
void scale(ptr_param< const LinOp > alpha)
Scales the matrix with a scalar.
Definition csr.hpp:1085
value_type * get_values() noexcept
Returns the values of the matrix.
Definition csr.hpp:956
index_type * get_col_idxs() noexcept
Returns the column indexes of the matrix.
Definition csr.hpp:987
Csr(Csr &&)
Move-constructs a Csr matrix.
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type > > row_permutation, ptr_param< const Permutation< index_type > > column_permutation, bool invert=false) const
Creates a non-symmetrically permuted copy of this matrix with the given row and column permutations...
std::unique_ptr< LinOp > conj_transpose() const override
Returns a LinOp representing the conjugate transpose of the Transposable object.
Dense is a matrix format which explicitly stores all values of the matrix.
Definition dense.hpp:117
This class is a utility which efficiently implements the diagonal matrix (a linear operator which sca...
Definition diagonal.hpp:53
ELL is a matrix format where stride with explicit zeros is used such that all rows have the same numb...
Definition ell.hpp:64
Fixed-block compressed sparse row storage matrix format.
Definition fbcsr.hpp:113
HYBRID is a matrix format which splits the matrix into ELLPACK and COO format.
Definition hybrid.hpp:55
Permutation is a matrix format that represents a permutation matrix, i.e.
Definition permutation.hpp:112
ScaledPermutation is a matrix combining a permutation with scaling factors.
Definition scaled_permutation.hpp:38
SELL-P is a matrix format similar to ELL format.
Definition sellp.hpp:55
SparsityCsr is a matrix format which stores only the sparsity pattern of a sparse matrix by compressi...
Definition sparsity_csr.hpp:56
This class is used for function parameters in the place of raw pointers.
Definition utils_helper.hpp:41
The matrix namespace.
Definition dense_cache.hpp:15
permute_mode
Specifies how a permutation will be applied to a matrix.
Definition permutation.hpp:42
@ symmetric
The rows and columns will be permuted.
Definition permutation.hpp:53
The Ginkgo namespace.
Definition abstract_factory.hpp:20
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition math.hpp:260
typename detail::to_complex_s< T >::type to_complex
Obtain the type which adds the complex of complex/scalar type or the template parameter of class by a...
Definition math.hpp:279
void write(StreamType &&os, MatrixPtrType &&matrix, layout_type layout=detail::mtx_io_traits< std::remove_cv_t< detail::pointee< MatrixPtrType > > >::default_layout)
Writes a matrix into an output stream in matrix market format.
Definition mtx_io.hpp:295
constexpr int64 ceildiv(int64 num, int64 den)
Performs integer division with rounding up.
Definition math.hpp:590
std::size_t size_type
Integral type used for allocation quantities.
Definition types.hpp:89
constexpr T min(const T &x, const T &y)
Returns the smaller of the arguments.
Definition math.hpp:719
std::unique_ptr< MatrixType > read(StreamType &&is, MatrixArgs &&... args)
Reads a matrix stored in matrix market format from an input stream.
Definition mtx_io.hpp:159
next_precision_base< T > next_precision
Obtains the next type in the singly-linked precision list with half.
Definition math.hpp:445
detail::temporary_clone< detail::pointee< Ptr > > make_temporary_clone(std::shared_ptr< const Executor > exec, Ptr &&ptr)
Creates a temporary_clone.
Definition temporary_clone.hpp:208
STL namespace.
A type representing the dimensions of a multidimensional object.
Definition dim.hpp:26
permuting_reuse_info()
Creates an empty reuse info.
void update_values(ptr_param< const Csr > input, ptr_param< Csr > output) const
Propagates the values from an input matrix to the transformed matrix.
permuting_reuse_info(std::unique_ptr< Permutation< index_type > > value_permutation)
Creates a reuse info structure from its value permutation.
This structure is used as an intermediate data type to store a sparse matrix.
Definition matrix_data.hpp:126
A span is a lightweight structure used to create sub-ranges from other ranges.
Definition range.hpp:46