46 #include "Epetra_config.h"
55 #include "EpetraExt_BlockUtility.h"
62 template<
typename IntType >
69 return k + N * ( j + N * i );
72 template <
typename ordinal >
81 for (
int i = 0 ; i < (
int) N ; ++i ) {
82 for (
int j = 0 ;
j < (
int) N ; ++
j ) {
83 for (
int k = 0 ; k < (
int) N ; ++k ) {
87 graph[row].reserve(27);
89 for (
int ii = -1 ; ii < 2 ; ++ii ) {
90 for (
int jj = -1 ; jj < 2 ; ++jj ) {
91 for (
int kk = -1 ; kk < 2 ; ++kk ) {
92 if ( 0 <= i + ii && i + ii < (
int) N &&
93 0 <=
j + jj &&
j + jj < (
int) N &&
94 0 <= k + kk && k + kk < (
int) N ) {
97 graph[row].push_back(col);
100 total += graph[row].size();
110 const int iterCount ,
111 const bool print_flag ,
112 const bool test_block ,
127 RCP<const Epetra_Comm> globalComm;
129 RCP<const Epetra_MpiComm> globalMpiComm =
132 globalComm = globalMpiComm;
134 RCP<const Epetra_SerialComm> globalSerialComm =
137 globalComm = globalSerialComm;
143 const size_t num_KL = var_degree.
size();
144 Array< RCP<const abstract_basis_type> > bases(num_KL);
145 for (
size_t i=0; i<num_KL; i++)
147 RCP<const product_basis_type> basis =
148 rcp(
new product_basis_type(bases, 1e-12));
149 const size_t stoch_length = basis->size();
150 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
153 ParameterList parallelParams;
154 RCP<Stokhos::ParallelData> sg_parallel_data =
156 RCP<const EpetraExt::MultiComm> sg_comm =
157 sg_parallel_data->getMultiComm();
158 RCP<const Epetra_Comm> app_comm =
159 sg_parallel_data->getSpatialComm();
160 RCP<const Stokhos::EpetraSparse3Tensor> epetraCijk =
161 sg_parallel_data->getEpetraCijk();
162 RCP<const Epetra_BlockMap> stoch_row_map =
163 epetraCijk->getStochasticRowMap();
170 const size_t fem_length = nGrid * nGrid * nGrid ;
176 RCP<const Epetra_Map> x_map =
rcp(
new Epetra_Map(static_cast<int>(fem_length), 0, *app_comm));
177 RCP<const Epetra_Map> sg_x_map =
178 rcp(EpetraExt::BlockUtility::GenerateBlockMap(
179 *x_map, *stoch_row_map, *sg_comm));
182 int *my_GIDs = x_map->MyGlobalElements();
183 int num_my_GIDs = x_map->NumMyElements();
184 for (
int i=0; i<num_my_GIDs; ++i) {
185 int row = my_GIDs[i];
186 int num_indices = fem_graph[row].
size();
187 int *indices = &(fem_graph[row][0]);
188 graph->InsertGlobalIndices(row, num_indices, indices);
190 graph->FillComplete();
191 int nnz = graph->NumGlobalNonzeros();
193 RCP<ParameterList> sg_op_params =
rcp(
new ParameterList);
194 RCP<Stokhos::MatrixFreeOperator> sg_A =
196 x_map, x_map, sg_x_map, sg_x_map,
198 RCP<Epetra_BlockMap> sg_A_overlap_map =
200 static_cast<int>(stoch_length), 0, *(sg_parallel_data->getStochasticComm())));
201 RCP< Stokhos::EpetraOperatorOrthogPoly > A_sg_blocks =
203 basis, sg_A_overlap_map, x_map, x_map, sg_x_map, sg_comm));
204 for (
unsigned int i=0; i<stoch_length; i++) {
208 A_sg_blocks->setCoeffPtr(i, A);
210 sg_A->setupOperator(A_sg_blocks);
212 RCP<Stokhos::EpetraVectorOrthogPoly> sg_x =
214 basis, stoch_row_map, x_map, sg_x_map, sg_comm));
215 RCP<Stokhos::EpetraVectorOrthogPoly> sg_y =
217 basis, stoch_row_map, x_map, sg_x_map, sg_comm));
225 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
226 sg_A->Apply( *(sg_x->getBlockVector()), *(sg_y->getBlockVector()) );
229 double seconds_per_iter =
233 double average_seconds_per_iter;
234 globalComm->SumAll(&seconds_per_iter, &average_seconds_per_iter, 1);
235 average_seconds_per_iter /= globalComm->NumProc();
240 for (Cijk_type::k_iterator k_it=Cijk->k_begin();
241 k_it!=Cijk->k_end(); ++k_it) {
242 for (Cijk_type::kj_iterator j_it = Cijk->j_begin(k_it);
243 j_it != Cijk->j_end(k_it); ++j_it) {
245 for (Cijk_type::kji_iterator i_it = Cijk->i_begin(j_it);
246 i_it != Cijk->i_end(j_it); ++i_it) {
252 const double flops = 1.0e-9*(2.0*
static_cast<double>(n_apply)*nnz +
253 static_cast<double>(n_add)*fem_length);
258 perf[0] = stoch_length;
259 perf[1] = fem_length;
260 perf[2] = stoch_length * fem_length;
261 perf[3] = Cijk->num_entries();
263 perf[5] = average_seconds_per_iter ;
264 perf[6] = flops/average_seconds_per_iter;
276 const bool test_block ,
285 <<
"\"#nGrid\" , \"NNZ\" "
286 <<
"\"#Variable\" , \"PolyDegree\" , \"#Bases\" , "
287 <<
"\"#TensorEntry\" , "
288 <<
"\"VectorSize\" , "
289 <<
"\"Original-Matrix-Free-Block-MXV-Time\" , "
290 <<
"\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
293 for (
int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
299 out << nGrid <<
" , "
300 << perf_original_mat_free_epetra[4] <<
" , "
301 << nvar <<
" , " << pdeg <<
" , "
302 << perf_original_mat_free_epetra[0] <<
" , "
303 << perf_original_mat_free_epetra[3] <<
" , "
304 << perf_original_mat_free_epetra[2] <<
" , "
305 << perf_original_mat_free_epetra[5] <<
" , "
306 << perf_original_mat_free_epetra[6] <<
" , "
321 Teuchos::VerboseObjectBase::getDefaultOStream();
326 CLP.
setOption(
"p", &p,
"Polynomial order");
328 CLP.
setOption(
"dmin", &d_min,
"Starting stochastic dimension");
330 CLP.
setOption(
"dmax", &d_max,
"Ending stochastic dimension");
332 CLP.
setOption(
"n", &nGrid,
"Number of spatial grid points in each dimension");
334 CLP.
setOption(
"niter", &nIter,
"Number of iterations");
335 bool test_block =
true;
336 CLP.
setOption(
"block",
"no-block", &test_block,
"Use block algorithm");
337 CLP.
parse( argc, argv );
342 p , d_min , d_max , nGrid , nIter , print , test_block , check , *out );
An Epetra operator representing the block stochastic Galerkin operator.
A container class storing an orthogonal polynomial whose coefficients are vectors, operators, or in general any type that would have an expensive copy constructor.
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
ordinal generate_fem_graph(ordinal N, std::vector< std::vector< ordinal > > &graph)
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)
Stokhos::LegendreBasis< int, double > basis_type
void start(bool reset=false)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
A container class storing an orthogonal polynomial whose coefficients are vectors, operators, or in general any type that would have an expensive copy constructor.
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
Multivariate orthogonal polynomial basis generated from a total-order complete-polynomial tensor prod...
Legendre polynomial basis.
Stokhos::Sparse3Tensor< int, double > Cijk_type
int main(int argc, char **argv)
Teuchos::Array< double > test_original_matrix_free_epetra(const Teuchos::Array< int > &var_degree, const int nGrid, const int iterCount, const bool print_flag, const bool test_block, const bool check)
void performance_test_driver_epetra(const int pdeg, const int minvar, const int maxvar, const int nGrid, const int nIter, const bool print, const bool test_block, const bool check, Teuchos::FancyOStream &out)
Abstract base class for 1-D orthogonal polynomials.
double totalElapsedTime(bool readCurrentTime=false) const