14 #include "Epetra_config.h"
23 #include "EpetraExt_BlockUtility.h"
30 template<
typename IntType >
37 return k + N * ( j + N * i );
40 template <
typename ordinal >
49 for (
int i = 0 ; i < (int) N ; ++i ) {
50 for (
int j = 0 ;
j < (int) N ; ++
j ) {
51 for (
int k = 0 ; k < (int) N ; ++k ) {
55 graph[row].reserve(27);
57 for (
int ii = -1 ; ii < 2 ; ++ii ) {
58 for (
int jj = -1 ; jj < 2 ; ++jj ) {
59 for (
int kk = -1 ; kk < 2 ; ++kk ) {
60 if ( 0 <= i + ii && i + ii < (
int) N &&
61 0 <=
j + jj &&
j + jj < (
int) N &&
62 0 <= k + kk && k + kk < (
int) N ) {
65 graph[row].push_back(col);
68 total += graph[row].size();
79 const bool print_flag ,
80 const bool test_block ,
95 RCP<const Epetra_Comm> globalComm;
97 RCP<const Epetra_MpiComm> globalMpiComm =
100 globalComm = globalMpiComm;
102 RCP<const Epetra_SerialComm> globalSerialComm =
105 globalComm = globalSerialComm;
111 const size_t num_KL = var_degree.
size();
112 Array< RCP<const abstract_basis_type> > bases(num_KL);
113 for (
size_t i=0; i<num_KL; i++)
115 RCP<const product_basis_type> basis =
116 rcp(
new product_basis_type(bases, 1e-12));
117 const size_t stoch_length = basis->size();
118 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
121 ParameterList parallelParams;
122 RCP<Stokhos::ParallelData> sg_parallel_data =
124 RCP<const EpetraExt::MultiComm> sg_comm =
125 sg_parallel_data->getMultiComm();
126 RCP<const Epetra_Comm> app_comm =
127 sg_parallel_data->getSpatialComm();
128 RCP<const Stokhos::EpetraSparse3Tensor> epetraCijk =
129 sg_parallel_data->getEpetraCijk();
130 RCP<const Epetra_BlockMap> stoch_row_map =
131 epetraCijk->getStochasticRowMap();
138 const size_t fem_length = nGrid * nGrid * nGrid ;
144 RCP<const Epetra_Map> x_map =
rcp(
new Epetra_Map(static_cast<int>(fem_length), 0, *app_comm));
145 RCP<const Epetra_Map> sg_x_map =
146 rcp(EpetraExt::BlockUtility::GenerateBlockMap(
147 *x_map, *stoch_row_map, *sg_comm));
150 int *my_GIDs = x_map->MyGlobalElements();
151 int num_my_GIDs = x_map->NumMyElements();
152 for (
int i=0; i<num_my_GIDs; ++i) {
153 int row = my_GIDs[i];
154 int num_indices = fem_graph[row].
size();
155 int *indices = &(fem_graph[row][0]);
156 graph->InsertGlobalIndices(row, num_indices, indices);
158 graph->FillComplete();
159 int nnz = graph->NumGlobalNonzeros();
161 RCP<ParameterList> sg_op_params =
rcp(
new ParameterList);
162 RCP<Stokhos::MatrixFreeOperator> sg_A =
164 x_map, x_map, sg_x_map, sg_x_map,
166 RCP<Epetra_BlockMap> sg_A_overlap_map =
168 static_cast<int>(stoch_length), 0, *(sg_parallel_data->getStochasticComm())));
169 RCP< Stokhos::EpetraOperatorOrthogPoly > A_sg_blocks =
171 basis, sg_A_overlap_map, x_map, x_map, sg_x_map, sg_comm));
172 for (
unsigned int i=0; i<stoch_length; i++) {
176 A_sg_blocks->setCoeffPtr(i, A);
178 sg_A->setupOperator(A_sg_blocks);
180 RCP<Stokhos::EpetraVectorOrthogPoly> sg_x =
182 basis, stoch_row_map, x_map, sg_x_map, sg_comm));
183 RCP<Stokhos::EpetraVectorOrthogPoly> sg_y =
185 basis, stoch_row_map, x_map, sg_x_map, sg_comm));
193 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
194 sg_A->Apply( *(sg_x->getBlockVector()), *(sg_y->getBlockVector()) );
197 double seconds_per_iter =
201 double average_seconds_per_iter;
202 globalComm->SumAll(&seconds_per_iter, &average_seconds_per_iter, 1);
203 average_seconds_per_iter /= globalComm->NumProc();
208 for (Cijk_type::k_iterator k_it=Cijk->k_begin();
209 k_it!=Cijk->k_end(); ++k_it) {
210 for (Cijk_type::kj_iterator j_it = Cijk->j_begin(k_it);
211 j_it != Cijk->j_end(k_it); ++j_it) {
213 for (Cijk_type::kji_iterator i_it = Cijk->i_begin(j_it);
214 i_it != Cijk->i_end(j_it); ++i_it) {
220 const double flops = 1.0e-9*(2.0*
static_cast<double>(n_apply)*nnz +
221 static_cast<double>(n_add)*fem_length);
226 perf[0] = stoch_length;
227 perf[1] = fem_length;
228 perf[2] = stoch_length * fem_length;
229 perf[3] = Cijk->num_entries();
231 perf[5] = average_seconds_per_iter ;
232 perf[6] = flops/average_seconds_per_iter;
244 const bool test_block ,
253 <<
"\"#nGrid\" , \"NNZ\" "
254 <<
"\"#Variable\" , \"PolyDegree\" , \"#Bases\" , "
255 <<
"\"#TensorEntry\" , "
256 <<
"\"VectorSize\" , "
257 <<
"\"Original-Matrix-Free-Block-MXV-Time\" , "
258 <<
"\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
261 for (
int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
267 out << nGrid <<
" , "
268 << perf_original_mat_free_epetra[4] <<
" , "
269 << nvar <<
" , " << pdeg <<
" , "
270 << perf_original_mat_free_epetra[0] <<
" , "
271 << perf_original_mat_free_epetra[3] <<
" , "
272 << perf_original_mat_free_epetra[2] <<
" , "
273 << perf_original_mat_free_epetra[5] <<
" , "
274 << perf_original_mat_free_epetra[6] <<
" , "
289 Teuchos::VerboseObjectBase::getDefaultOStream();
294 CLP.
setOption(
"p", &p,
"Polynomial order");
296 CLP.
setOption(
"dmin", &d_min,
"Starting stochastic dimension");
298 CLP.
setOption(
"dmax", &d_max,
"Ending stochastic dimension");
300 CLP.
setOption(
"n", &nGrid,
"Number of spatial grid points in each dimension");
302 CLP.
setOption(
"niter", &nIter,
"Number of iterations");
303 bool test_block =
true;
304 CLP.
setOption(
"block",
"no-block", &test_block,
"Use block algorithm");
305 CLP.
parse( argc, argv );
310 p , d_min , d_max , nGrid , nIter , print , test_block , check , *out );
An Epetra operator representing the block stochastic Galerkin operator.
A container class storing an orthogonal polynomial whose coefficients are vectors, operators, or in general any type that would have an expensive copy constructor.
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
ordinal generate_fem_graph(ordinal N, std::vector< std::vector< ordinal > > &graph)
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)
Stokhos::LegendreBasis< int, double > basis_type
void start(bool reset=false)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
A container class storing an orthogonal polynomial whose coefficients are vectors, operators, or in general any type that would have an expensive copy constructor.
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
Multivariate orthogonal polynomial basis generated from a total-order complete-polynomial tensor prod...
Legendre polynomial basis.
Stokhos::Sparse3Tensor< int, double > Cijk_type
int main(int argc, char **argv)
Teuchos::Array< double > test_original_matrix_free_epetra(const Teuchos::Array< int > &var_degree, const int nGrid, const int iterCount, const bool print_flag, const bool test_block, const bool check)
void performance_test_driver_epetra(const int pdeg, const int minvar, const int maxvar, const int nGrid, const int nIter, const bool print, const bool test_block, const bool check, Teuchos::FancyOStream &out)
Abstract base class for 1-D orthogonal polynomials.
double totalElapsedTime(bool readCurrentTime=false) const