DenseLinAlgPack: Concreate C++ Classes for Dense Blas-Compatible Linear Algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
DenseLinAlgPack_DMatrixOpBLAS.cpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Moocho: Multi-functional Object-Oriented arCHitecture for Optimization
5 // Copyright (2003) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov)
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #include "DenseLinAlgPack_DMatrixOp.hpp"
43 #include "DenseLinAlgPack_BLAS_Cpp.hpp"
44 
45 namespace {
46 
51 using DenseLinAlgPack::col;
52 using DenseLinAlgPack::value_type;
53 using DenseLinAlgPack::assert_gms_sizes;
57 using DenseLinAlgPack::assign;
58 using BLAS_Cpp::Transp;
59 using BLAS_Cpp::no_trans;
60 using BLAS_Cpp::trans;
61 using BLAS_Cpp::Uplo;
62 using BLAS_Cpp::upper;
63 using BLAS_Cpp::lower;
64 
65 } // end namespace
66 
67 // ///////////////////////////////////////////////////////////////////////////////////
68 // Assignment Fucntions
69 
70 namespace {
71 
72 // implementation: gms_lhs = alpha (elementwise)
73 inline void i_assign(DMatrixSlice* gms_lhs, value_type alpha) {
74  for(DMatrixSlice::size_type i = 1; i <= gms_lhs->cols(); ++i)
75  gms_lhs->col(i) = alpha;
76 }
77 
78 // implementaion: gms_lhs = gms_rhs. Most basic copy function for rectangular matrices.
79 inline void i_assign_basic(DMatrixSlice* gms_lhs, const DMatrixSlice& gms_rhs
80  , BLAS_Cpp::Transp trans_rhs)
81 {
82  for(DMatrixSlice::size_type i = 1; i <= gms_lhs->cols(); ++i)
83  gms_lhs->col(i) = col(gms_rhs,trans_rhs,i);
84 }
85 
86 // implementaion: gms_lhs = op(gms_rhs). Checks for overlap and creates temporaries accordingly.
87 inline void i_assign(DMatrixSlice* gms_lhs, const DMatrixSlice& gms_rhs, BLAS_Cpp::Transp trans_rhs)
88 {
89  switch(gms_lhs->overlap(gms_rhs)) {
90  case DenseLinAlgPack::NO_OVERLAP: // no overlap so just perform the copy
91  i_assign_basic(gms_lhs,gms_rhs,trans_rhs);
92  return;
93  case DenseLinAlgPack::SAME_MEM:
94  if(trans_rhs == BLAS_Cpp::no_trans) return; // assignment to self, nothing to do.
95  default: // either same memory that needs to be transposed or some overlap so just generate temp.
96  DMatrix temp = gms_rhs;
97  i_assign_basic(gms_lhs,temp(),trans_rhs);
98  return;
99  }
100 }
101 
102 // Copy one triangular region into another. Does not check sizes or aliasing of argument matrices.
103 // A row of a upper triangular region corresponds to a col of a BLAS_Cpp::lower triangular region.
104 inline void i_assign_basic(DMatrixSliceTriEle* tri_lhs, const DMatrixSliceTriEle& tri_rhs)
105 {
106  DMatrixSlice::size_type n = tri_lhs->gms().cols();
107 
108  // Access BLAS_Cpp::lower tri by col and upper tri by row
110  trans_lhs = (tri_lhs->uplo() == BLAS_Cpp::lower) ? BLAS_Cpp::no_trans : BLAS_Cpp::trans,
111  trans_rhs = (tri_rhs.uplo() == BLAS_Cpp::lower) ? BLAS_Cpp::no_trans : BLAS_Cpp::trans;
112 
113  for(int i = 1; i <= n; ++i) { // Only copy the part of the vec in tri region.
114  col(tri_lhs->gms(),trans_lhs,i)(i,n) = col(tri_rhs.gms(),trans_rhs,i)(i,n);
115  }
116 }
117 
118 } // end namespace
119 
120 // gm_lhs = alpha (elementwise)
121 void DenseLinAlgPack::assign(DMatrix* gm_lhs, value_type alpha)
122 {
123  if(!gm_lhs->rows()) gm_lhs->resize(1,1,alpha);
124  i_assign(&(*gm_lhs)(),alpha);
125 }
126 
127 // gm_lhs = op(gms_rhs)
128 void DenseLinAlgPack::assign(DMatrix* gm_lhs, const DMatrixSlice& gms_rhs, BLAS_Cpp::Transp trans_rhs)
129 {
130  if(gm_lhs->overlap(gms_rhs) == SAME_MEM && trans_rhs == BLAS_Cpp::no_trans) return; // assignment to self
131  if(gm_lhs->overlap(gms_rhs) != NO_OVERLAP) {
132  // some overlap so we must create a copy
133  DMatrix tmp(gms_rhs);
134  resize_gm_lhs(gm_lhs,gms_rhs.rows(),gms_rhs.cols(),trans_rhs);
135  i_assign(&(*gm_lhs)(), tmp(), trans_rhs);
136  }
137  else {
138  // no overlap so just assign
139  resize_gm_lhs(gm_lhs,gms_rhs.rows(),gms_rhs.cols(),trans_rhs);
140  i_assign(&(*gm_lhs)(), gms_rhs, trans_rhs);
141  }
142 }
143 
144 // gms_lhs = alpha (elementwise)
145 void DenseLinAlgPack::assign(DMatrixSlice* gms_lhs, value_type alpha)
146 {
148  !gms_lhs->rows(), std::length_error
149  ,"Error, assign(gms_lhs,alpha): You can not assign Scalar to an unsized DMatrixSlice" );
150  i_assign(gms_lhs, alpha);
151 }
152 
153 // gms_lhs = op(gms_rhs)
154 void DenseLinAlgPack::assign(DMatrixSlice* gms_lhs, const DMatrixSlice& gms_rhs, BLAS_Cpp::Transp trans_rhs)
155 {
156  assert_gms_lhs(*gms_lhs,gms_rhs.rows(),gms_rhs.cols(),trans_rhs);
157  i_assign(gms_lhs, gms_rhs, trans_rhs);
158 }
159 
160 // tri_lhs = alpha (elementwise)
161 void DenseLinAlgPack::assign(DMatrixSliceTriEle* tri_lhs, value_type alpha)
162 {
164  !tri_lhs->gms().rows(), std::length_error
165  ,"Error, assign(tri_lhs,alpha): You can not assign a scalar to an unsized triangular DMatrixSlice" );
166  assert_gms_square(tri_lhs->gms());
167  DMatrixSlice::size_type n = tri_lhs->gms().cols();
168  // access BLAS_Cpp::lower tri by col and upper tri by row
170  trans_lhs = (tri_lhs->uplo() == BLAS_Cpp::lower) ? BLAS_Cpp::no_trans : BLAS_Cpp::trans;
171  for(int i = 1; i <= n; ++i)
172  col( tri_lhs->gms(), trans_lhs , i )(i,n) = alpha;
173 }
174 
175 // tri_lhs = tri_rhs
176 void DenseLinAlgPack::assign(DMatrixSliceTriEle* tri_lhs, const DMatrixSliceTriEle& tri_rhs)
177 {
178  assert_gms_lhs(tri_lhs->gms(),tri_rhs.gms().rows(),tri_rhs.gms().cols(),BLAS_Cpp::no_trans);
179 
180  switch(tri_lhs->gms().overlap(tri_rhs.gms())) {
181  case SAME_MEM:
182  if(tri_lhs->uplo() == tri_rhs.uplo()) return; // Assignment to self to exit
183 
184  case SOME_OVERLAP:
185  // Test for the special case where the upper tri region (above diagonal) of a
186  // matrix is being copied into the BLAS_Cpp::lower tri region (below the diagonal) of
187  // the same matrix or visa-versa. No temp is need in this case
188  if(tri_lhs->uplo() != tri_rhs.uplo()) {
189  const DMatrixSlice *up = (tri_lhs->uplo() == upper)
190  ? &tri_lhs->gms() : &tri_rhs.gms();
191  const DMatrixSlice *lo = (tri_rhs.uplo() == BLAS_Cpp::lower)
192  ? &tri_lhs->gms() : &tri_rhs.gms();
193  if(lo->col_ptr(1) + lo->max_rows() - 1 == up->col_ptr(1)) { // false if SAME_MEM
194  // One triangular region is being copied into another so no temp needed.
195  i_assign_basic(tri_lhs, tri_rhs);
196  return;
197  }
198  }
199  // Give up and copy the vs_rhs as a temp.
200  {
201  DMatrix temp(tri_rhs.gms());
202  i_assign_basic(tri_lhs, tri_ele(temp(),tri_rhs.uplo()));
203  return;
204  }
205 
206  case NO_OVERLAP: // no overlap so just perform the copy
207  i_assign_basic(tri_lhs, tri_rhs);
208  return;
209  }
210 }
211 
212 // /////////////////////////////////////////////////////////////////////////////////////////
213 // Element-wise Algebraic Operations
214 
215 void DenseLinAlgPack::Mt_S(DMatrixSlice* gms_lhs, value_type alpha)
216 {
218  !gms_lhs->rows(), std::length_error
219  ,"Error, Mt_S(gms_lhs,alpha): You can not scale an unsized DMatrixSlice" );
220  for(int j = 1; j <= gms_lhs->cols(); ++j)
221  Vt_S(&gms_lhs->col(j), alpha);
222 }
223 
224 void DenseLinAlgPack::M_diagVtM( DMatrixSlice* gms_lhs, const DVectorSlice& vs_rhs
225  , const DMatrixSlice& gms_rhs, BLAS_Cpp::Transp trans_rhs )
226 {
227  Mp_M_assert_sizes(gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans
228  , gms_rhs.rows(), gms_rhs.cols(), trans_rhs);
229  for(DMatrixSlice::size_type j = 1; j <= gms_lhs->cols(); ++j)
230  prod( &gms_lhs->col(j), vs_rhs, col(gms_rhs,trans_rhs,j) );
231 }
232 
233 void DenseLinAlgPack::Mt_S(DMatrixSliceTriEle* tri_lhs, value_type alpha) {
235  !tri_lhs->gms().rows(), std::length_error
236  ,"Error, Mt_S(tri_lhs,alpha): You can not scale an unsized triangular DMatrixSlice" );
238  trans_lhs = (tri_lhs->uplo() == BLAS_Cpp::lower) ? BLAS_Cpp::no_trans : BLAS_Cpp::trans;
239 
240  DMatrixSlice::size_type n = tri_lhs->gms().cols();
241  for(DMatrixSlice::size_type j = 1; j <= n; ++j)
242  Vt_S( &col( tri_lhs->gms(), trans_lhs , j )(j,n), alpha );
243 }
244 
245 void DenseLinAlgPack::Mp_StM(DMatrixSliceTriEle* tri_lhs, value_type alpha, const DMatrixSliceTriEle& tri_ele_rhs)
246 {
247  Mp_M_assert_sizes(tri_lhs->gms().rows(), tri_lhs->gms().cols(), BLAS_Cpp::no_trans
248  , tri_ele_rhs.gms().rows(), tri_ele_rhs.gms().cols(), BLAS_Cpp::no_trans);
249 
251  trans_lhs = (tri_lhs->uplo() == BLAS_Cpp::lower) ? BLAS_Cpp::no_trans : BLAS_Cpp::trans,
252  trans_rhs = (tri_ele_rhs.uplo() == BLAS_Cpp::lower) ? BLAS_Cpp::no_trans : BLAS_Cpp::trans;
253 
254  DMatrixSlice::size_type n = tri_lhs->gms().cols();
255  for(DMatrixSlice::size_type j = 1; j <= n; ++j)
256  Vp_StV( &col(tri_lhs->gms(),trans_lhs,j)(j,n), alpha, col(tri_ele_rhs.gms(),trans_rhs,j)(j,n) );
257 }
258 
259 // LinAlgOpPack Compatable (compile time polymorphism)
260 
261 void DenseLinAlgPack::Mp_StM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSlice& gms_rhs
262  , BLAS_Cpp::Transp trans_rhs)
263 {
264  Mp_M_assert_sizes(gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans
265  , gms_rhs.rows(), gms_rhs.cols(), trans_rhs);
266  for(DMatrixSlice::size_type j = 1; j <= gms_lhs->cols(); ++j)
267  Vp_StV( &gms_lhs->col(j), alpha, col(gms_rhs,trans_rhs,j) );
268 }
269 
270 namespace {
271 // Implement upper and lower copies for a symmetric matrix
272 // Does not check sizes.
273 // inline
274 void i_Mp_StSM( DenseLinAlgPack::DMatrixSlice* gms_lhs, DenseLinAlgPack::value_type alpha
275  , const DenseLinAlgPack::DMatrixSliceTriEle& tri_ele_rhs)
276 {
277  using DenseLinAlgPack::Mp_StM;
278  using DenseLinAlgPack::tri_ele;
279  const DenseLinAlgPack::size_type n = gms_lhs->rows(); // same as cols
280  Mp_StM( const_cast<DenseLinAlgPack::DMatrixSliceTriEle*>(
281  &tri_ele((*gms_lhs)(2,n,1,n-1), BLAS_Cpp::lower ) )
282  , alpha, tri_ele_rhs );
283  Mp_StM( const_cast<DenseLinAlgPack::DMatrixSliceTriEle*>(
284  &tri_ele((*gms_lhs)(1,n-1,2,n), BLAS_Cpp::upper ) )
285  , alpha, tri_ele_rhs );
286 }
287 } // end namespace
288 
289 void DenseLinAlgPack::Mp_StM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceSym& sym_rhs
290  , BLAS_Cpp::Transp trans_rhs )
291 {
292  Mp_M_assert_sizes(gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans
293  , sym_rhs.rows(), sym_rhs.cols(), trans_rhs);
294  Vp_StV( &gms_lhs->diag(), alpha, sym_rhs.gms().diag() );
295  const size_type n = gms_lhs->rows(); // same as cols
296  switch(sym_rhs.uplo()) {
297  case BLAS_Cpp::lower:
298  i_Mp_StSM( gms_lhs, alpha, tri_ele(sym_rhs.gms()(2,n,1,n-1),BLAS_Cpp::lower) );
299  break;
300  case BLAS_Cpp::upper:
301  i_Mp_StSM( gms_lhs, alpha, tri_ele(sym_rhs.gms()(1,n-1,2,n),BLAS_Cpp::upper) );
302  break;
303  }
304 }
305 
306 void DenseLinAlgPack::Mp_StM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceTri& tri_rhs
307  , BLAS_Cpp::Transp trans_rhs)
308 {
309  using BLAS_Cpp::trans;
310  using BLAS_Cpp::no_trans;
311  using BLAS_Cpp::lower;
312  using BLAS_Cpp::upper;
313  Mp_M_assert_sizes(gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans
314  , tri_rhs.rows(), tri_rhs.cols(), trans_rhs );
315  // diagonal
316  if( tri_rhs.diag() == BLAS_Cpp::nonunit )
317  Vp_StV( &gms_lhs->diag(), alpha, tri_rhs.gms().diag() );
318  else
319  Vp_S( &gms_lhs->diag(), alpha );
320  // upper or lower triangle
321  const size_type n = gms_lhs->rows(); // same as cols
322  if( n == 1 )
323  return; // There is not lower or upper triangular region
324  const DMatrixSliceTriEle
325  tri = ( tri_rhs.uplo() == lower
326  ? tri_ele(tri_rhs.gms()(2,n,1,n-1),lower)
327  : tri_ele(tri_rhs.gms()(1,n-1,2,n),upper) );
328  const BLAS_Cpp::Uplo
329  as_uplo = ( ( tri_rhs.uplo() == lower && trans_rhs == no_trans )
330  || ( tri_rhs.uplo() == upper && trans_rhs == trans )
331  ? lower : upper );
332  switch(as_uplo) {
333  case lower:
334  Mp_StM( const_cast<DenseLinAlgPack::DMatrixSliceTriEle*>(
335  &tri_ele((*gms_lhs)(2,n,1,n-1),lower) )
336  , alpha, tri );
337  break;
338  case upper:
339  Mp_StM( const_cast<DenseLinAlgPack::DMatrixSliceTriEle*>(
340  &tri_ele((*gms_lhs)(1,n-1,2,n),upper) )
341  , alpha, tri );
342  break;
343  }
344 }
345 
346 // /////////////////////////////////////////////////////////////////////////////////////
347 // Level-2 BLAS (vector-matrtix) Liner Algebra Operations
348 
349 void DenseLinAlgPack::Vp_StMtV(DVectorSlice* vs_lhs, value_type alpha, const DMatrixSlice& gms_rhs1
350  , BLAS_Cpp::Transp trans_rhs1, const DVectorSlice& vs_rhs2, value_type beta)
351 {
352  Vp_MtV_assert_sizes(vs_lhs->dim(), gms_rhs1.rows() , gms_rhs1.cols(), trans_rhs1
353  , vs_rhs2.dim());
354  BLAS_Cpp::gemv(trans_rhs1,gms_rhs1.rows(),gms_rhs1.cols(),alpha,gms_rhs1.col_ptr(1)
355  ,gms_rhs1.max_rows(), vs_rhs2.raw_ptr(),vs_rhs2.stride(),beta,vs_lhs->raw_ptr()
356  ,vs_lhs->stride());
357 }
358 
359 void DenseLinAlgPack::Vp_StMtV(DVectorSlice* vs_lhs, value_type alpha, const DMatrixSliceSym& sym_rhs1
360  , BLAS_Cpp::Transp trans_rhs1, const DVectorSlice& vs_rhs2, value_type beta)
361 {
362  assert_gms_square(sym_rhs1.gms());
363  Vp_MtV_assert_sizes(vs_lhs->dim(), sym_rhs1.gms().rows(), sym_rhs1.gms().cols(), trans_rhs1
364  , vs_rhs2.dim());
365  BLAS_Cpp::symv(sym_rhs1.uplo(),sym_rhs1.gms().rows(),alpha,sym_rhs1.gms().col_ptr(1)
366  ,sym_rhs1.gms().max_rows(),vs_rhs2.raw_ptr(),vs_rhs2.stride(),beta
367  ,vs_lhs->raw_ptr(),vs_lhs->stride());
368 }
369 
370 void DenseLinAlgPack::V_MtV(DVector* v_lhs, const DMatrixSliceTri& tri_rhs1, BLAS_Cpp::Transp trans_rhs1
371  , const DVectorSlice& vs_rhs2)
372 {
373  assert_gms_square(tri_rhs1.gms());
374  MtV_assert_sizes(tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1, vs_rhs2.dim());
375  v_lhs->resize(tri_rhs1.gms().rows());
376  (*v_lhs) = vs_rhs2;
377  BLAS_Cpp::trmv(tri_rhs1.uplo(),trans_rhs1,tri_rhs1.diag(),tri_rhs1.gms().rows()
378  ,tri_rhs1.gms().col_ptr(1),tri_rhs1.gms().max_rows(), v_lhs->raw_ptr(),1);
379 }
380 
381 void DenseLinAlgPack::V_MtV(DVectorSlice* vs_lhs, const DMatrixSliceTri& tri_rhs1, BLAS_Cpp::Transp trans_rhs1
382  , const DVectorSlice& vs_rhs2)
383 {
384  assert_gms_square(tri_rhs1.gms());
385  MtV_assert_sizes(tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1, vs_rhs2.dim());
386  Vp_V_assert_sizes( vs_lhs->dim(), tri_rhs1.gms().rows() );
387  (*vs_lhs) = vs_rhs2;
388  BLAS_Cpp::trmv(tri_rhs1.uplo(),trans_rhs1,tri_rhs1.diag(),tri_rhs1.gms().rows()
389  ,tri_rhs1.gms().col_ptr(1),tri_rhs1.gms().max_rows(), vs_lhs->raw_ptr(),vs_lhs->stride());
390 }
391 
392 void DenseLinAlgPack::Vp_StMtV(DVectorSlice* vs_lhs, value_type alpha, const DMatrixSliceTri& tri_rhs1
393  , BLAS_Cpp::Transp trans_rhs1, const DVectorSlice& vs_rhs2, value_type beta)
394 {
395  Vp_MtV_assert_sizes(vs_lhs->dim(),tri_rhs1.gms().rows(),tri_rhs1.gms().cols()
396  ,trans_rhs1,vs_rhs2.dim() );
397 
398  // If op(gms_rhs2) == gms_lhs and beta = 0.0 then this is a direct call to the BLAS.
399  if( vs_lhs->overlap(vs_rhs2) == SAME_MEM && beta == 0.0 )
400  {
401  V_MtV(vs_lhs, tri_rhs1, trans_rhs1, vs_rhs2);
402  if(alpha != 1.0) Vt_S(vs_lhs,alpha);
403  }
404  else {
405  // This is something else so the alias problem is not handled.
406  if(beta != 1.0) Vt_S(vs_lhs,beta);
407  DVector tmp;
408  V_MtV(&tmp, tri_rhs1, trans_rhs1, vs_rhs2);
409  Vp_StV(vs_lhs,alpha,tmp());
410  }
411 }
412 
413 void DenseLinAlgPack::V_InvMtV(DVector* v_lhs, const DMatrixSliceTri& tri_rhs1, BLAS_Cpp::Transp trans_rhs1
414  , const DVectorSlice& vs_rhs2)
415 {
416  assert_gms_square(tri_rhs1.gms());
417  MtV_assert_sizes(tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1, vs_rhs2.dim());
418  v_lhs->resize(tri_rhs1.gms().rows());
419  (*v_lhs) = vs_rhs2;
420  BLAS_Cpp::trsv(tri_rhs1.uplo(),trans_rhs1,tri_rhs1.diag(),tri_rhs1.gms().rows()
421  ,tri_rhs1.gms().col_ptr(1),tri_rhs1.gms().max_rows(), v_lhs->raw_ptr(),1);
422 }
423 
424 void DenseLinAlgPack::V_InvMtV(DVectorSlice* vs_lhs, const DMatrixSliceTri& tri_rhs1, BLAS_Cpp::Transp trans_rhs1
425  , const DVectorSlice& vs_rhs2)
426 {
427  assert_gms_square(tri_rhs1.gms());
428  MtV_assert_sizes(tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1, vs_rhs2.dim());
429  Vp_V_assert_sizes( vs_lhs->dim(), tri_rhs1.gms().rows() );
430  (*vs_lhs) = vs_rhs2;
431  BLAS_Cpp::trsv(tri_rhs1.uplo(),trans_rhs1,tri_rhs1.diag(),tri_rhs1.gms().rows()
432  ,tri_rhs1.gms().col_ptr(1),tri_rhs1.gms().max_rows(), vs_lhs->raw_ptr(),vs_lhs->stride());
433 }
434 
435 
436 void DenseLinAlgPack::ger(
437  value_type alpha, const DVectorSlice& vs_rhs1, const DVectorSlice& vs_rhs2
438  , DMatrixSlice* gms_lhs )
439 {
440  Vp_MtV_assert_sizes( vs_rhs2.dim(), gms_lhs->rows(), gms_lhs->cols()
441  , BLAS_Cpp::no_trans, vs_rhs1.dim() );
442  BLAS_Cpp::ger(
443  gms_lhs->rows(), gms_lhs->cols(), alpha
444  ,vs_rhs1.raw_ptr(), vs_rhs1.stride()
445  ,vs_rhs2.raw_ptr(), vs_rhs2.stride()
446  ,gms_lhs->col_ptr(1), gms_lhs->max_rows() );
447 }
448 
449 void DenseLinAlgPack::syr(value_type alpha, const DVectorSlice& vs_rhs, DMatrixSliceSym* sym_lhs)
450 {
451  assert_gms_square(sym_lhs->gms());
452  MtV_assert_sizes( sym_lhs->gms().rows(), sym_lhs->gms().cols()
453  , BLAS_Cpp::no_trans, vs_rhs.dim() );
454  BLAS_Cpp::syr( sym_lhs->uplo(), vs_rhs.dim(), alpha, vs_rhs.raw_ptr()
455  , vs_rhs.stride(), sym_lhs->gms().col_ptr(1), sym_lhs->gms().max_rows() );
456 }
457 
458 void DenseLinAlgPack::syr2(value_type alpha, const DVectorSlice& vs_rhs1, const DVectorSlice& vs_rhs2
459  , DMatrixSliceSym* sym_lhs)
460 {
461  assert_gms_square(sym_lhs->gms());
462  VopV_assert_sizes( vs_rhs1.dim(), vs_rhs2.dim() );
463  MtV_assert_sizes( sym_lhs->gms().rows(), sym_lhs->gms().cols()
464  , BLAS_Cpp::no_trans, vs_rhs1.dim() );
465  BLAS_Cpp::syr2( sym_lhs->uplo(), vs_rhs1.dim(), alpha, vs_rhs1.raw_ptr()
466  , vs_rhs1.stride(), vs_rhs2.raw_ptr(), vs_rhs2.stride()
467  , sym_lhs->gms().col_ptr(1), sym_lhs->gms().max_rows() );
468 }
469 
470 // //////////////////////////////////////////////////////////////////////////////////////////
471 // Level-3 BLAS (matrix-matrix) Linear Algebra Operations
472 
473 // ////////////////////////////
474 // Rectangular Matrices
475 
476 void DenseLinAlgPack::Mp_StMtM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSlice& gms_rhs1
477  , BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice& gms_rhs2
478  , BLAS_Cpp::Transp trans_rhs2, value_type beta)
479 {
480  Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans
481  , gms_rhs1.rows(), gms_rhs1.cols(), trans_rhs1
482  , gms_rhs2.rows(), gms_rhs2.cols(), trans_rhs2);
483  BLAS_Cpp::gemm(trans_rhs1,trans_rhs2,gms_lhs->rows(),gms_lhs->cols()
484  ,cols(gms_rhs1.rows(),gms_rhs1.cols(),trans_rhs1)
485  ,alpha,gms_rhs1.col_ptr(1),gms_rhs1.max_rows()
486  ,gms_rhs2.col_ptr(1),gms_rhs2.max_rows()
487  ,beta,gms_lhs->col_ptr(1),gms_lhs->max_rows() );
488 }
489 
490 // ////////////////////////////
491 // Symmetric Matrices
492 
493 namespace {
494 
495 // implementation: gms_lhs = alpha * sym_rhs * gms_rhs + beta * gms_lhs (left) (BLAS xSYMM).
496 // or gms_lhs = alpha * gms_rhs * sym_rhs + beta * gms_lhs (right).
497 // does not check sizes.
498 void i_symm(BLAS_Cpp::Side side, value_type alpha, const DMatrixSliceSym& sym_rhs
499  , const DMatrixSlice& gms_rhs, value_type beta, DMatrixSlice* gms_lhs)
500 {
501  BLAS_Cpp::symm(side,sym_rhs.uplo(),gms_lhs->rows(),gms_lhs->cols()
502  ,alpha,sym_rhs.gms().col_ptr(1),sym_rhs.gms().max_rows()
503  ,gms_rhs.col_ptr(1),gms_rhs.max_rows()
504  ,beta,gms_lhs->col_ptr(1),gms_lhs->max_rows() );
505 }
506 
507 } // end namespace
508 
509 void DenseLinAlgPack::Mp_StMtM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceSym& sym_rhs1
510  , BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice& gms_rhs2
511  , BLAS_Cpp::Transp trans_rhs2, value_type beta)
512 {
513  Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans
514  , sym_rhs1.gms().rows(), sym_rhs1.gms().cols(), trans_rhs1
515  , gms_rhs2.rows(), gms_rhs2.cols(), trans_rhs2);
516  if(trans_rhs2 == BLAS_Cpp::no_trans) {
517  i_symm(BLAS_Cpp::left,alpha,sym_rhs1,gms_rhs2,beta,gms_lhs);
518  }
519  else {
520  // must make a temporary copy to call the BLAS
521  DMatrix tmp;
522  assign(&tmp,gms_rhs2,trans_rhs2);
523  i_symm(BLAS_Cpp::left,alpha,sym_rhs1,tmp(),beta,gms_lhs);
524  }
525 }
526 
527 void DenseLinAlgPack::Mp_StMtM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSlice& gms_rhs1
528  , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceSym& sym_rhs2
529  , BLAS_Cpp::Transp trans_rhs2, value_type beta)
530 {
531  Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans
532  , gms_rhs1.rows(), gms_rhs1.cols(), trans_rhs1
533  , sym_rhs2.gms().rows(), sym_rhs2.gms().cols(), trans_rhs2 );
534  if(trans_rhs1 == BLAS_Cpp::no_trans) {
535  i_symm(BLAS_Cpp::right,alpha,sym_rhs2,gms_rhs1,beta,gms_lhs);
536  }
537  else {
538  // must make a temporary copy to call the BLAS
539  DMatrix tmp;
540  assign(&tmp,gms_rhs1,trans_rhs1);
541  i_symm(BLAS_Cpp::right,alpha,sym_rhs2,tmp(),beta,gms_lhs);
542  }
543 }
544 
545 void DenseLinAlgPack::syrk(BLAS_Cpp::Transp trans, value_type alpha, const DMatrixSlice& gms_rhs
546  , value_type beta, DMatrixSliceSym* sym_lhs)
547 {
548  Mp_MtM_assert_sizes( sym_lhs->gms().rows(), sym_lhs->gms().cols(), BLAS_Cpp::no_trans
549  , gms_rhs.rows(), gms_rhs.cols(), trans
550  , gms_rhs.rows(), gms_rhs.cols(), trans_not(trans) );
551  BLAS_Cpp::syrk(sym_lhs->uplo(),trans,sym_lhs->gms().rows()
552  ,cols(gms_rhs.rows(), gms_rhs.cols(), trans),alpha
553  ,gms_rhs.col_ptr(1),gms_rhs.max_rows(),beta
554  ,sym_lhs->gms().col_ptr(1),sym_lhs->gms().max_rows() );
555 }
556 
557 void DenseLinAlgPack::syr2k(BLAS_Cpp::Transp trans,value_type alpha, const DMatrixSlice& gms_rhs1
558  , const DMatrixSlice& gms_rhs2, value_type beta, DMatrixSliceSym* sym_lhs)
559 {
560  Mp_MtM_assert_sizes( sym_lhs->gms().rows(), sym_lhs->gms().cols(), BLAS_Cpp::no_trans
561  , gms_rhs1.rows(), gms_rhs1.cols(), trans
562  , gms_rhs2.rows(), gms_rhs2.cols(), trans_not(trans) );
563  BLAS_Cpp::syr2k(sym_lhs->uplo(),trans,sym_lhs->gms().rows()
564  ,cols(gms_rhs1.rows(), gms_rhs1.cols(), trans),alpha
565  ,gms_rhs1.col_ptr(1),gms_rhs1.max_rows()
566  ,gms_rhs2.col_ptr(1),gms_rhs2.max_rows(),beta
567  ,sym_lhs->gms().col_ptr(1),sym_lhs->gms().max_rows() );
568 }
569 
570 // ////////////////////////////
571 // Triangular Matrices
572 
573 // ToDo: Finish the definitions below.
574 
575 namespace {
576 
577 // implementation: gms_lhs = alpha * op(tri_rhs) * gms_lhs (left) (BLAS xTRMM).
578 // or gms_lhs = alpha * gms_rhs * op(tri_rhs) (right).
579 // does not check sizes.
580 inline
581 void i_trmm(BLAS_Cpp::Side side, BLAS_Cpp::Transp trans, value_type alpha, const DMatrixSliceTri& tri_rhs
582  , DMatrixSlice* gms_lhs)
583 {
584  BLAS_Cpp::trmm(side,tri_rhs.uplo(),trans,tri_rhs.diag(),gms_lhs->rows(),gms_lhs->cols()
585  ,alpha,tri_rhs.gms().col_ptr(1),tri_rhs.gms().max_rows()
586  ,gms_lhs->col_ptr(1),gms_lhs->max_rows() );
587 }
588 
589 // implementation: gms_lhs = alpha * op(tri_rhs) * op(gms_rhs) (left)
590 // or gms_lhs = alpha * op(gms_rhs) * op(tri_rhs) (right)
591 // Takes care of temporaries but does not check sizes.
592 inline
593 void i_trmm_alt( BLAS_Cpp::Side side, value_type alpha, const DMatrixSliceTri& tri_rhs
594  , BLAS_Cpp::Transp trans_tri_rhs, const DMatrixSlice& gms_rhs
595  , BLAS_Cpp::Transp trans_gms_rhs, DMatrixSlice* gms_lhs )
596 {
597  assign(gms_lhs,gms_rhs,trans_gms_rhs);
598  i_trmm( side, trans_tri_rhs, alpha, tri_rhs, gms_lhs );
599 }
600 
601 // implementation: gms_lhs = alpha * inv(op(tri_rhs)) * gms_lhs (left) (BLAS xTRSM).
602 // or gms_lhs = alpha * gms_rhs * inv(op(tri_rhs)) (right).
603 // does not check sizes.
604 inline
605 void i_trsm(BLAS_Cpp::Side side, BLAS_Cpp::Transp trans, value_type alpha, const DMatrixSliceTri& tri_rhs
606  , DMatrixSlice* gms_lhs)
607 {
608  BLAS_Cpp::trsm(side,tri_rhs.uplo(),trans,tri_rhs.diag(),gms_lhs->rows(),gms_lhs->cols()
609  ,alpha,tri_rhs.gms().col_ptr(1),tri_rhs.gms().max_rows()
610  ,gms_lhs->col_ptr(1),gms_lhs->max_rows() );
611 }
612 
613 // implementation: gms_lhs = alpha * inv(op(tri_rhs)) * op(gms_rhs) (left)
614 // or gms_lhs = alpha * op(gms_rhs) * inv(op(tri_rhs)) (right)
615 // Takes care of temporaries but does not check sizes.
616 inline
617 void i_trsm_alt(BLAS_Cpp::Side side, value_type alpha, const DMatrixSliceTri& tri_rhs
618  , BLAS_Cpp::Transp trans_tri_rhs, const DMatrixSlice& gms_rhs, BLAS_Cpp::Transp trans_gms_rhs
619  , DMatrixSlice* gms_lhs)
620 {
621  assign(gms_lhs,gms_rhs,trans_gms_rhs);
622  i_trsm( side, trans_tri_rhs, alpha, tri_rhs, gms_lhs );
623 }
624 
625 } // end namespace
626 
627 void DenseLinAlgPack::M_StMtM(DMatrix* gm_lhs, value_type alpha, const DMatrixSliceTri& tri_rhs1
628  , BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice& gms_rhs2
629  , BLAS_Cpp::Transp trans_rhs2)
630 {
631  MtM_assert_sizes( tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1
632  , gms_rhs2.rows(), gms_rhs2.cols(), trans_rhs2 );
633  gm_lhs->resize( rows(tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1)
634  , cols(gms_rhs2.rows(), gms_rhs2.cols(), trans_rhs2) );
635  i_trmm_alt(BLAS_Cpp::left,alpha,tri_rhs1,trans_rhs1,gms_rhs2,trans_rhs2,&(*gm_lhs)());
636 }
637 
638 void DenseLinAlgPack::M_StMtM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceTri& tri_rhs1
639  , BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice& gms_rhs2
640  , BLAS_Cpp::Transp trans_rhs2)
641 {
642  Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans
643  , tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1
644  , gms_rhs2.rows(), gms_rhs2.cols(), trans_rhs2 );
645  i_trmm_alt(BLAS_Cpp::left,alpha,tri_rhs1,trans_rhs1,gms_rhs2,trans_rhs2,gms_lhs);
646 }
647 
648 void DenseLinAlgPack::M_StMtM(DMatrix* gm_lhs, value_type alpha, const DMatrixSlice& gms_rhs1
649  , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceTri& tri_rhs2
650  , BLAS_Cpp::Transp trans_rhs2)
651 {
652  MtM_assert_sizes( gms_rhs1.rows(), gms_rhs1.cols(), trans_rhs1
653  , tri_rhs2.gms().rows(), tri_rhs2.gms().cols(), trans_rhs2 );
654  gm_lhs->resize( rows(gms_rhs1.rows(), gms_rhs1.cols(), trans_rhs1)
655  , cols(tri_rhs2.gms().rows(), tri_rhs2.gms().cols(), trans_rhs2) );
656  i_trmm_alt(BLAS_Cpp::right,alpha,tri_rhs2,trans_rhs2,gms_rhs1,trans_rhs1,&(*gm_lhs)());
657 }
658 
659 void DenseLinAlgPack::M_StMtM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSlice& gms_rhs1
660  , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceTri& tri_rhs2
661  , BLAS_Cpp::Transp trans_rhs2)
662 {
663  Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans
664  , gms_rhs1.rows(), gms_rhs1.cols(), trans_rhs1
665  , tri_rhs2.gms().rows(), tri_rhs2.gms().cols(), trans_rhs2 );
666  i_trmm_alt(BLAS_Cpp::right,alpha,tri_rhs2,trans_rhs2,gms_rhs1,trans_rhs1,gms_lhs);
667 }
668 
669 void DenseLinAlgPack::Mp_StMtM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceTri& tri_rhs1
670  , BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice& gms_rhs2
671  , BLAS_Cpp::Transp trans_rhs2, value_type beta)
672 {
673  // If op(gms_rhs2) == gms_lhs and beta = 0.0 then this is a direct call to the BLAS.
674  if( gms_lhs->overlap(gms_rhs2) == SAME_MEM && trans_rhs2 == BLAS_Cpp::no_trans
675  && beta == 0.0 )
676  {
677  i_trmm(BLAS_Cpp::left,trans_rhs1,alpha,tri_rhs1,gms_lhs);
678  }
679  else {
680  // This is something else so the alias problem is not handled.
681  if(beta != 1.0) Mt_S(gms_lhs,beta);
682  DMatrix tmp;
683  M_StMtM(&tmp,alpha,tri_rhs1,trans_rhs1,gms_rhs2,trans_rhs2);
684  Mp_StM(gms_lhs,1.0,tmp(),BLAS_Cpp::no_trans);
685  }
686 }
687 
688 void DenseLinAlgPack::Mp_StMtM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSlice& gms_rhs1
689  , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceTri& tri_rhs2
690  , BLAS_Cpp::Transp trans_rhs2, value_type beta)
691 {
692  // If op(gms_rhs1) == gms_lhs and beta = 0.0 then this is a direct call to the BLAS.
693  if( gms_lhs->overlap(gms_rhs1) == SAME_MEM && trans_rhs1 == BLAS_Cpp::no_trans
694  && beta == 0.0 )
695  {
696  i_trmm(BLAS_Cpp::right,trans_rhs2,alpha,tri_rhs2,gms_lhs);
697  }
698  else {
699  // This is something else so the alias problem is not handled.
700  if(beta != 1.0) Mt_S(gms_lhs,beta);
701  DMatrix tmp;
702  M_StMtM(&tmp,alpha,gms_rhs1,trans_rhs1,tri_rhs2,trans_rhs2);
703  Mp_StM(gms_lhs,1.0,tmp(),BLAS_Cpp::no_trans);
704  }
705 }
706 
707 void DenseLinAlgPack::M_StInvMtM(DMatrix* gm_lhs, value_type alpha, const DMatrixSliceTri& tri_rhs1
708  , BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice& gms_rhs2
709  , BLAS_Cpp::Transp trans_rhs2)
710 {
711  MtM_assert_sizes( tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1
712  , gms_rhs2.rows(), gms_rhs2.cols(), trans_rhs2 );
713  gm_lhs->resize( rows(tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1)
714  , cols(gms_rhs2.rows(), gms_rhs2.cols(), trans_rhs2) );
715  i_trsm_alt(BLAS_Cpp::left,alpha,tri_rhs1,trans_rhs1,gms_rhs2,trans_rhs2,&(*gm_lhs)());
716 }
717 
718 void DenseLinAlgPack::M_StInvMtM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceTri& tri_rhs1
719  , BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice& gms_rhs2
720  , BLAS_Cpp::Transp trans_rhs2)
721 {
722  Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans
723  , tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1
724  , gms_rhs2.rows(), gms_rhs2.cols(), trans_rhs2 );
725  i_trsm_alt(BLAS_Cpp::left,alpha,tri_rhs1,trans_rhs1,gms_rhs2,trans_rhs2,gms_lhs);
726 }
727 
728 void DenseLinAlgPack::M_StMtInvM(DMatrix* gm_lhs, value_type alpha, const DMatrixSlice& gms_rhs1
729  , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceTri& tri_rhs2
730  , BLAS_Cpp::Transp trans_rhs2)
731 {
732  MtM_assert_sizes( gms_rhs1.rows(), gms_rhs1.cols(), trans_rhs1
733  , tri_rhs2.gms().rows(), tri_rhs2.gms().cols(), trans_rhs2 );
734  gm_lhs->resize( rows(gms_rhs1.rows(), gms_rhs1.cols(), trans_rhs1)
735  , cols(tri_rhs2.gms().rows(), tri_rhs2.gms().cols(), trans_rhs2) );
736  i_trsm_alt(BLAS_Cpp::right,alpha,tri_rhs2,trans_rhs2,gms_rhs1,trans_rhs1,&(*gm_lhs)());
737 }
738 
739 void DenseLinAlgPack::M_StMtInvM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSlice& gms_rhs1
740  , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceTri& tri_rhs2
741  , BLAS_Cpp::Transp trans_rhs2)
742 {
743  Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans
744  , gms_rhs1.rows(), gms_rhs1.cols(), trans_rhs1
745  , tri_rhs2.gms().rows(), tri_rhs2.gms().cols(), trans_rhs2 );
746  i_trsm_alt(BLAS_Cpp::right,alpha,tri_rhs2,trans_rhs2,gms_rhs1,trans_rhs1,gms_lhs);
747 }
Uplo
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_type rows(size_type rows, size_type cols, BLAS_Cpp::Transp _trans)
Side
size_t size_type
Transp trans_not(Transp _trans)
size_type rows() const
Return the number of rows.
Transp
size_type cols(size_type rows, size_type cols, BLAS_Cpp::Transp _trans)