MOOCHO (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
AbstractLinAlgPack_COOMatrixTmplOpDef.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Moocho: Multi-functional Object-Oriented arCHitecture for Optimization
5 // Copyright (2003) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov)
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef COO_MATRIX_TMPL_OP_DEF_H
43 #define COO_MATRIX_TMPL_OP_DEF_H
44 
46 
50 
51 namespace AbstractLinAlgPack {
52 
54 
58 
59 // gms_lhs += alpha * coom_rhs (time = O(coom_rhs.nz()), space = O(1))
60 template<class T_COOM>
61 void Mp_StCOOM(DMatrixSlice* gms_lhs, value_type alpha, const T_COOM& coom_rhs
62  , BLAS_Cpp::Transp trans_rhs)
63 {
64  Mp_M_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans
65  , coom_rhs.rows(), coom_rhs.cols(), trans_rhs );
66  typename T_COOM::difference_type
67  i_o = coom_rhs.row_offset(),
68  j_o = coom_rhs.col_offset();
69  if(trans_rhs == BLAS_Cpp::no_trans)
70  for(typename T_COOM::const_iterator itr = coom_rhs.begin(); itr != coom_rhs.end(); ++itr)
71  (*gms_lhs)(itr->row_i()+i_o,itr->col_j()+j_o) += alpha * itr->value();
72  else
73  for(typename T_COOM::const_iterator itr = coom_rhs.begin(); itr != coom_rhs.end(); ++itr)
74  (*gms_lhs)(itr->col_j()+j_o,itr->row_i()+i_o) += alpha * itr->value();
75 }
76 
77 // vs_lhs += alpha * coom_rhs1 * vs_rhs2 (BLAS xGEMV) (time = O(coom_rhs.nz()), space = O(1))
78 template<class T_COOM>
79 void Vp_StCOOMtV(DVectorSlice* vs_lhs, value_type alpha, const T_COOM& coom_rhs1
80  , BLAS_Cpp::Transp trans_rhs1, const DVectorSlice& vs_rhs2)
81 {
82  Vp_MtV_assert_sizes( vs_lhs->dim(), coom_rhs1.rows(), coom_rhs1.cols(), trans_rhs1, vs_rhs2.dim() );
83  typename T_COOM::difference_type
84  i_o = coom_rhs1.row_offset(),
85  j_o = coom_rhs1.col_offset();
86  if(trans_rhs1 == BLAS_Cpp::no_trans)
87  for(typename T_COOM::const_iterator itr = coom_rhs1.begin(); itr != coom_rhs1.end(); ++itr)
88  (*vs_lhs)(itr->row_i()+i_o) += alpha * itr->value() * vs_rhs2(itr->col_j()+j_o);
89  else
90  for(typename T_COOM::const_iterator itr = coom_rhs1.begin(); itr != coom_rhs1.end(); ++itr)
91  (*vs_lhs)(itr->col_j()+j_o) += alpha * itr->value() * vs_rhs2(itr->row_i()+i_o);
92 }
93 
94 namespace UtilityPack {
95 
96 // op(gms_lhs) += alpha * op(gms_rhs1) * op(coom_rhs2) (BLAS xGEMM)
97 template<class T_COOM>
98 void imp_Mp_StMtCOOM(DMatrixSlice& gms_lhs, BLAS_Cpp::Transp trans_lhs, value_type alpha
99  , const DMatrixSlice& gms_rhs1, BLAS_Cpp::Transp trans_rhs1
100  , const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2 );
101 
102 } // end namespace UtilityPack
103 
104 
105 // gms_lhs += alpha * op(coom_rhs1) * op(gms_rhs2) (right) (BLAS xGEMM)
106 template<class T_COOM>
107 void Mp_StCOOMtM(DMatrixSlice* gms_lhs, value_type alpha, const T_COOM& coom_rhs1
108  , BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice& gms_rhs2, BLAS_Cpp::Transp trans_rhs2)
109 {
110  Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans
111  , coom_rhs1.rows(), coom_rhs1.cols(), trans_rhs1
112  , gms_rhs2.rows() ,gms_rhs2.cols(), trans_rhs2 );
113  UtilityPack::imp_Mp_StMtCOOM( gms_lhs, BLAS_Cpp::trans, alpha, gms_rhs2
114  , trans_not(trans_rhs2), coom_rhs1, trans_not(trans_rhs1) );
115 }
116 
117 // gms_lhs += alpha * op(gms_rhs1) * op(coom_rhs2) (left) (BLAS xGEMM)
118 template<class T_COOM>
119 void Mp_StMtCOOM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSlice& gms_rhs1
120  , BLAS_Cpp::Transp trans_rhs1, const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2)
121 {
122  Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans
123  , gms_rhs1.rows() ,gms_rhs1.cols(), trans_rhs1
124  , coom_rhs2.rows(), coom_rhs2.cols(), trans_rhs2 );
125  UtilityPack::imp_Mp_StMtCOOM( gms_lhs, BLAS_Cpp::no_trans, alpha, gms_rhs1
126  , trans_rhs1, coom_rhs2, trans_rhs2 );
127 }
128 
129 // ToDo: implement the level-3 BLAS operations below
130 
131 // gms_lhs = alpha * op(coom_rhs1) * op(sym_rhs2) (right) (BLAS xSYMM)
132 //template<class T_COOM>
133 //void Mp_StCOOMtSM(DMatrixSlice* gms_lhs, value_type alpha, const T_COOM& coom_rhs1
134 // , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceSym& sym_rhs2, BLAS_Cpp::Transp trans_rhs2);
135 
136 // gms_lhs = alpha * op(sym_rhs1) * op(coom_rhs2) (left) (BLAS xSYMM)
137 //template<class T_COOM>
138 //void Mp_StSMtCOOM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceSym& sym_rhs1
139 // , BLAS_Cpp::Transp trans_rhs1, const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2);
140 
141 // gms_lhs = alpha * op(coom_rhs1) * op(tri_rhs2) (right) (BLAS xTRMM)
142 //template<class T_COOM>
143 //void Mp_StCOOMtSM(DMatrixSlice* gms_lhs, value_type alpha, const T_COOM& coom_rhs1
144 // , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceTri& tri_rhs2, BLAS_Cpp::Transp trans_rhs2);
145 
146 // gms_lhs = alpha * op(tri_rhs1) * op(coom_rhs2) (left) (BLAS xTRMM)
147 //template<class T_COOM>
148 //void Mp_StSMtCOOM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceTri& tri_rhs1
149 // , BLAS_Cpp::Transp trans_rhs1, const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2);
150 
151 namespace UtilityPack {
152 
153 // op(gms_lhs) += alpha * op(gms_rhs1) * op(coom_rhs2) (BLAS xGEMM)
154 //
155 // This function perform this operation by looping through the nonzero elements of
156 // coom_rhs2 and for each nonzero element (val,i,j) it performs the following operation.
157 //
158 // op(gms_lhs).col(j) += (alpha * val) * op(gms_rhs1).col(i)
159 //
160 //
161 // jth ith jth
162 // [ # ] [ # ] [ ]
163 // [ # ] [ # ] [ ]
164 // m [ # ] += [ # ] * [ ] n
165 // [ # ] [ # ] ith [ val ]
166 // n p [ ]
167 // p
168 // op(gms_lhs) op(gms_rhs1) op(coom_rhs2)
169 //
170 // The number of arithmetic operations performed are:
171 // floats = (2*m + 1) * nz
172 //
173 // Strictly speeking the number of memory references is:
174 // mem_refs = (2*m + 1) * nz
175 // but this does not take into account that elements are accessed by columns
176 // and this has some ramifications on cache effects and paging. If op(gms_lhs)
177 // == gms_lhs' or op(gms_rhs1) == gms_rhs1' then elements in a column are
178 // not adjacent to each other and if m is large enough each element may
179 // even reside on a seperate page of memory. On Win32 0x86 systems a page is
180 // 4 K so 4,000 (bytes/page) / 8 (bytes/double) = 500 doubles / page. If
181 // the working set of pages is small this could cause some serious page thrashing
182 // for large m.
183 //
184 // Another concideration is to sorting order of the elements in the COO matrix.
185 // If op(coom_rhs2) is sorted by row then columns of op(gms_lhs) will be accessed
186 // consecutivly and will result in better performance. The same goes for op(gms_rhs1)
187 // if op(coom_rhs2) is sorted by column.
188 //
189 // There is opertunity for some vectorization and it is handled by calling
190 // DenseLinAlgPack::Vp_StV(...).
191 //
192 template<class T_COOM>
194  , const DMatrixSlice& gms_rhs1, BLAS_Cpp::Transp trans_rhs1
195  , const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2)
196 {
197  using BLAS_Cpp::rows;
198  using BLAS_Cpp::cols;
199  using DenseLinAlgPack::col;
200 
201  typename T_COOM::difference_type
202  i_o = coom_rhs2.row_offset(),
203  j_o = coom_rhs2.col_offset();
204  for(typename T_COOM::const_iterator itr = coom_rhs2.begin(); itr != coom_rhs2.end(); ++itr) {
205  size_type i = rows( itr->row_i() + i_o , itr->col_j() + j_o , trans_rhs2 ),
206  j = cols( itr->row_i() + i_o , itr->col_j() + j_o , trans_rhs2 );
207  // op(gms_lhs).col(j) += (alpha * val) * op(gms_rhs1).col(i)
208  DenseLinAlgPack::Vp_StV( &col(*gms_lhs,trans_lhs,j), alpha * itr->value()
209  , col(gms_rhs1,trans_rhs1,i) );
210  }
211 }
212 
213 } // end namespace UtilityPack
214 
215 } // end namespace AbstractLinAlgPack
216 
217 #endif // COO_MATRIX_TMPL_OP_DEF_H
void Vp_StV(DVectorSlice *vs_lhs, value_type alpha, const DVectorSlice &vs_rhs)
vs_lhs += alpha * vs_rhs (BLAS xAXPY)
size_type dim() const
Returns the number of elements of the VectorSliceTmpl.
void imp_Mp_StMtCOOM(DMatrixSlice &gms_lhs, BLAS_Cpp::Transp trans_lhs, value_type alpha, const DMatrixSlice &gms_rhs1, BLAS_Cpp::Transp trans_rhs1, const T_COOM &coom_rhs2, BLAS_Cpp::Transp trans_rhs2)
size_type cols() const
Return the number of columns.
size_type rows(size_type rows, size_type cols, BLAS_Cpp::Transp _trans)
Return rows of a possible transposed matrix.
void Mp_StMtCOOM(DMatrixSlice *gms_lhs, value_type alpha, const DMatrixSlice &gms_rhs1, BLAS_Cpp::Transp trans_rhs1, const T_COOM &coom_rhs2, BLAS_Cpp::Transp trans_rhs2)
gms_lhs += alpha * op(gms_rhs1) * op(coom_rhs2) (left) (BLAS xGEMM)
void Mp_MtM_assert_sizes(size_type m_lhs_rows, size_type m_lhs_cols, BLAS_Cpp::Transp trans_lhs, size_type m_rhs1_rows, size_type m_rhs1_cols, BLAS_Cpp::Transp trans_rhs1, size_type m_rhs2_rows, size_type m_rhs2_cols, BLAS_Cpp::Transp trans_rhs2)
op(m_lhs) += op(m_rhs1) * op(m_rhs2)
void Mp_M_assert_sizes(size_type m_lhs_rows, size_type m_lhs_cols, BLAS_Cpp::Transp trans_lhs, size_type m_rhs_rows, size_type m_rhs_cols, BLAS_Cpp::Transp trans_rhs)
op(m_lhs) += op op(m_rhs)
Transposed.
Not transposed.
void Mp_StCOOMtM(DMatrixSlice *gms_lhs, value_type alpha, const T_COOM &coom_rhs1, BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice &gms_rhs2, BLAS_Cpp::Transp trans_rhs2)
gms_lhs += alpha * op(coom_rhs1) * op(gms_rhs2) (right) (BLAS xGEMM)
DVectorSlice col(DMatrixSlice &gms, BLAS_Cpp::Transp trans, size_type j)
void Mp_StCOOM(DMatrixSlice *gms_lhs, value_type alpha, const T_COOM &coom_rhs, BLAS_Cpp::Transp trans_rhs)
gms_lhs += alpha * op(coom_rhs) (time = O(coom_rhs.nz(), space = O(1))
void Vp_StCOOMtV(DVectorSlice *vs_lhs, value_type alpha, const T_COOM &coom_rhs1, BLAS_Cpp::Transp trans_rhs1, const DVectorSlice &vs_rhs2)
vs_lhs += alpha * op(coom_rhs1) * vs_rhs2 (BLAS xGEMV) (time = O(coom_rhs.nz(), space = O(1)) ...
Transp trans_not(Transp _trans)
Return the opposite of the transpose argument.
size_type rows() const
Return the number of rows.
void Vp_MtV_assert_sizes(size_type v_lhs_size, size_type m_rhs1_rows, size_type m_rhs1_cols, BLAS_Cpp::Transp trans_rhs1, size_type v_rhs2_size)
v_lhs += op(m_rhs1) * v_rhs2
Transp
TRANS.
size_type cols(size_type rows, size_type cols, BLAS_Cpp::Transp _trans)
Return columns of a possible transposed matrix.