1 #include <petscmat.h> 2 3 static char help[] = "Tests MatMatMult with MAT_REUSE_MATRIX and already allocated dense result.\n\n"; 4 5 static PetscErrorCode CheckLocal(Mat A, Mat B, PetscScalar *a, PetscScalar *b) 6 { 7 PetscErrorCode ierr; 8 PetscBool wA = PETSC_FALSE, wB = PETSC_FALSE; 9 10 PetscFunctionBegin; 11 if (a) { 12 const PetscScalar *Aa; 13 ierr = MatDenseGetArrayRead(A,&Aa);CHKERRQ(ierr); 14 wA = (PetscBool)(a != Aa); 15 ierr = MatDenseRestoreArrayRead(A,&Aa);CHKERRQ(ierr); 16 } 17 if (b) { 18 const PetscScalar *Bb; 19 ierr = MatDenseGetArrayRead(B,&Bb);CHKERRQ(ierr); 20 wB = (PetscBool)(b != Bb); 21 ierr = MatDenseRestoreArrayRead(B,&Bb);CHKERRQ(ierr); 22 } 23 if (wA || wB) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Wrong array in first Mat? %d, Wrong array in second Mat? %d",wA,wB); 24 PetscFunctionReturn(0); 25 } 26 27 int main(int argc,char **args) 28 { 29 Mat X,B,A,T,T2; 30 PetscInt m,n,k,M = 10,N = 10,K = 5, ldx = 3, ldb = 5; 31 const char *deft = MATAIJ; 32 char mattype[256]; 33 PetscBool flg,symm = PETSC_FALSE,testtt = PETSC_TRUE, testnest = PETSC_TRUE, testtranspose = PETSC_TRUE, testcircular = PETSC_FALSE, local = PETSC_TRUE; 34 PetscBool xgpu = PETSC_FALSE,bgpu = PETSC_FALSE; 35 PetscScalar *dataX = NULL,*dataB = NULL; 36 PetscScalar *aX,*aB; 37 PetscReal err; 38 PetscErrorCode ierr; 39 40 ierr = PetscInitialize(&argc,&args,NULL,help);if (ierr) return ierr; 41 ierr = PetscOptionsGetInt(NULL,NULL,"-N",&N,NULL);CHKERRQ(ierr); 42 ierr = PetscOptionsGetInt(NULL,NULL,"-M",&M,NULL);CHKERRQ(ierr); 43 ierr = PetscOptionsGetInt(NULL,NULL,"-K",&K,NULL);CHKERRQ(ierr); 44 ierr = PetscOptionsGetBool(NULL,NULL,"-symm",&symm,NULL);CHKERRQ(ierr); 45 ierr = PetscOptionsGetBool(NULL,NULL,"-local",&local,NULL);CHKERRQ(ierr); 46 ierr = PetscOptionsGetInt(NULL,NULL,"-ldx",&ldx,NULL);CHKERRQ(ierr); 47 ierr = PetscOptionsGetInt(NULL,NULL,"-ldb",&ldb,NULL);CHKERRQ(ierr); 48 ierr = PetscOptionsGetBool(NULL,NULL,"-testtranspose",&testtranspose,NULL);CHKERRQ(ierr); 49 ierr = PetscOptionsGetBool(NULL,NULL,"-testnest",&testnest,NULL);CHKERRQ(ierr); 50 ierr = PetscOptionsGetBool(NULL,NULL,"-testtt",&testtt,NULL);CHKERRQ(ierr); 51 ierr = PetscOptionsGetBool(NULL,NULL,"-testcircular",&testcircular,NULL);CHKERRQ(ierr); 52 ierr = PetscOptionsGetBool(NULL,NULL,"-xgpu",&xgpu,NULL);CHKERRQ(ierr); 53 ierr = PetscOptionsGetBool(NULL,NULL,"-bgpu",&bgpu,NULL);CHKERRQ(ierr); 54 #if defined(PETSC_USE_COMPLEX) 55 testtranspose = PETSC_FALSE; 56 testtt = PETSC_FALSE; 57 #endif 58 ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); 59 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 60 ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr); 61 ierr = MatSetUp(A);CHKERRQ(ierr); 62 ierr = MatSetRandom(A,NULL);CHKERRQ(ierr); 63 if (M==N && symm) { 64 Mat AT; 65 66 ierr = MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr); 67 ierr = MatAXPY(A,1.0,AT,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 68 ierr = MatDestroy(&AT);CHKERRQ(ierr); 69 ierr = MatSetOption(A,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 70 } 71 ierr = MatViewFromOptions(A,NULL,"-A_init_view");CHKERRQ(ierr); 72 ierr = PetscOptionsBegin(PETSC_COMM_WORLD,"","","");CHKERRQ(ierr); 73 ierr = PetscOptionsFList("-A_mat_type","Matrix type","MatSetType",MatList,deft,mattype,256,&flg);CHKERRQ(ierr); 74 ierr = PetscOptionsEnd();CHKERRQ(ierr); 75 if (flg) { 76 Mat A2; 77 78 /* MATSEQAIJCUSPARSE does not support MAT_INITIAL_MATRIX */ 79 ierr = MatDuplicate(A,MAT_COPY_VALUES,&A2);CHKERRQ(ierr); 80 ierr = MatConvert(A,mattype,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 81 ierr = MatMultEqual(A,A2,10,&flg);CHKERRQ(ierr); 82 if (!flg) { 83 Mat AE,A2E; 84 85 ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with convert\n");CHKERRQ(ierr); 86 ierr = MatComputeOperator(A,MATDENSE,&AE);CHKERRQ(ierr); 87 ierr = MatComputeOperator(A2,MATDENSE,&A2E);CHKERRQ(ierr); 88 ierr = MatView(AE,NULL);CHKERRQ(ierr); 89 ierr = MatView(A2E,NULL);CHKERRQ(ierr); 90 ierr = MatAXPY(A2E,-1.0,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 91 ierr = MatView(A2E,NULL);CHKERRQ(ierr); 92 ierr = MatDestroy(&A2E);CHKERRQ(ierr); 93 ierr = MatDestroy(&AE);CHKERRQ(ierr); 94 } 95 ierr = MatDestroy(&A2);CHKERRQ(ierr); 96 } 97 ierr = MatViewFromOptions(A,NULL,"-A_view");CHKERRQ(ierr); 98 99 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 100 if (local) { 101 ierr = PetscMalloc1(PetscMax((m+ldx)*K,1),&dataX);CHKERRQ(ierr); 102 ierr = PetscMalloc1(PetscMax((n+ldb)*K,1),&dataB);CHKERRQ(ierr); 103 } 104 ierr = MatCreateDense(PETSC_COMM_WORLD,n,PETSC_DECIDE,N,K,dataB,&B);CHKERRQ(ierr); 105 ierr = MatCreateDense(PETSC_COMM_WORLD,m,PETSC_DECIDE,M,K,dataX,&X);CHKERRQ(ierr); 106 107 /* store pointer to dense data for testing */ 108 ierr = MatDenseGetArrayRead(B,(const PetscScalar**)&dataB);CHKERRQ(ierr); 109 ierr = MatDenseGetArrayRead(X,(const PetscScalar**)&dataX);CHKERRQ(ierr); 110 aX = dataX; 111 aB = dataB; 112 ierr = MatDenseRestoreArrayRead(B,(const PetscScalar**)&dataB);CHKERRQ(ierr); 113 ierr = MatDenseRestoreArrayRead(X,(const PetscScalar**)&dataX);CHKERRQ(ierr); 114 if (local) { 115 dataX = aX; 116 dataB = aB; 117 } 118 ierr = MatSetRandom(B,NULL);CHKERRQ(ierr); 119 /* convert to CUDA if needed */ 120 if (bgpu) { 121 ierr = MatConvert(B,MATDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 122 } 123 if (xgpu) { 124 ierr = MatAssemblyBegin(X,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 125 ierr = MatAssemblyEnd(X,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 126 ierr = MatConvert(X,MATDENSECUDA,MAT_INPLACE_MATRIX,&X);CHKERRQ(ierr); 127 } 128 ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr); 129 130 /* Test reusing a previously allocated dense buffer */ 131 ierr = MatMatMult(A,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 132 ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr); 133 ierr = MatMatMultEqual(A,B,X,10,&flg);CHKERRQ(ierr); 134 if (!flg) { 135 ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with reusage\n");CHKERRQ(ierr); 136 ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr); 137 ierr = MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 138 ierr = MatView(T,NULL);CHKERRQ(ierr); 139 ierr = MatDestroy(&T);CHKERRQ(ierr); 140 } 141 142 /* Test MatDenseGetColumnVec and friends */ 143 ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr); 144 ierr = MatDuplicate(T,MAT_DO_NOT_COPY_VALUES,&T2);CHKERRQ(ierr); 145 for (k=0;k<K;k++) { 146 Vec Xv,Tv,T2v; 147 148 ierr = MatDenseGetColumnVecRead(X,k,&Xv);CHKERRQ(ierr); 149 ierr = MatDenseGetColumnVec(T,k,&Tv);CHKERRQ(ierr); 150 ierr = MatDenseGetColumnVecWrite(T2,k,&T2v);CHKERRQ(ierr); 151 ierr = VecCopy(Xv,T2v);CHKERRQ(ierr); 152 ierr = VecAXPY(Tv,-1.,Xv);CHKERRQ(ierr); 153 ierr = MatDenseRestoreColumnVecRead(X,k,&Xv);CHKERRQ(ierr); 154 ierr = MatDenseRestoreColumnVec(T,k,&Tv);CHKERRQ(ierr); 155 ierr = MatDenseRestoreColumnVecWrite(T2,k,&T2v);CHKERRQ(ierr); 156 } 157 ierr = MatNorm(T,NORM_FROBENIUS,&err);CHKERRQ(ierr); 158 if (err > PETSC_SMALL) { 159 ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with MatDenseGetColumnVec\n");CHKERRQ(ierr); 160 ierr = MatView(T,NULL);CHKERRQ(ierr); 161 } 162 ierr = MatAXPY(T2,-1.,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 163 ierr = MatNorm(T2,NORM_FROBENIUS,&err);CHKERRQ(ierr); 164 if (err > PETSC_SMALL) { 165 ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with MatDenseGetColumnVecWrite\n");CHKERRQ(ierr); 166 ierr = MatView(T2,NULL);CHKERRQ(ierr); 167 } 168 ierr = MatDestroy(&T);CHKERRQ(ierr); 169 ierr = MatDestroy(&T2);CHKERRQ(ierr); 170 171 /* Test with MatShell */ 172 ierr = MatConvert(A,MATSHELL,MAT_INITIAL_MATRIX,&T2);CHKERRQ(ierr); 173 ierr = MatMatMult(T2,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 174 ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr); 175 ierr = MatMatMultEqual(T2,B,X,10,&flg);CHKERRQ(ierr); 176 if (!flg) { 177 ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with reusage (MATSHELL)\n");CHKERRQ(ierr); 178 ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr); 179 ierr = MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 180 ierr = MatView(T,NULL);CHKERRQ(ierr); 181 ierr = MatDestroy(&T);CHKERRQ(ierr); 182 } 183 ierr = MatTransposeMatMult(T2,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 184 ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr); 185 ierr = MatTransposeMatMultEqual(T2,X,B,10,&flg);CHKERRQ(ierr); 186 if (!flg) { 187 ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with reusage (MatTranspose, MATSHELL)\n");CHKERRQ(ierr); 188 } 189 ierr = MatDestroy(&T2);CHKERRQ(ierr); 190 191 if (testnest) { /* test with MatNest */ 192 Mat NA; 193 const char *vtype; 194 195 ierr = MatCreateNest(PETSC_COMM_WORLD,1,NULL,1,NULL,&A,&NA);CHKERRQ(ierr); 196 /* needed to test against CUSPARSE matrices */ 197 ierr = MatGetVecType(A,&vtype);CHKERRQ(ierr); 198 ierr = MatSetVecType(NA,vtype);CHKERRQ(ierr); 199 ierr = MatViewFromOptions(NA,NULL,"-NA_view");CHKERRQ(ierr); 200 ierr = MatMatMult(NA,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 201 ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr); 202 ierr = MatMatMultEqual(NA,B,X,10,&flg);CHKERRQ(ierr); 203 if (!flg) { 204 ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with Nest\n");CHKERRQ(ierr); 205 ierr = MatMatMult(NA,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr); 206 ierr = MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 207 ierr = MatView(T,NULL);CHKERRQ(ierr); 208 ierr = MatDestroy(&T);CHKERRQ(ierr); 209 } 210 ierr = MatDestroy(&NA);CHKERRQ(ierr); 211 } 212 213 if (testtranspose) { /* test with Transpose */ 214 Mat TA; 215 216 ierr = MatCreateHermitianTranspose(A,&TA);CHKERRQ(ierr); 217 ierr = MatMatMult(TA,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 218 ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr); 219 ierr = MatMatMultEqual(TA,X,B,10,&flg);CHKERRQ(ierr); 220 if (!flg) { 221 ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with Transpose\n");CHKERRQ(ierr); 222 ierr = MatMatMult(TA,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr); 223 ierr = MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 224 ierr = MatView(T,NULL);CHKERRQ(ierr); 225 ierr = MatDestroy(&T);CHKERRQ(ierr); 226 } 227 ierr = MatDestroy(&TA);CHKERRQ(ierr); 228 } 229 230 if (testtt) { /* test with Transpose(Transpose) */ 231 Mat TA, TTA; 232 233 ierr = MatCreateHermitianTranspose(A,&TA);CHKERRQ(ierr); 234 ierr = MatCreateHermitianTranspose(TA,&TTA);CHKERRQ(ierr); 235 ierr = MatMatMult(TTA,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 236 ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr); 237 ierr = MatMatMultEqual(TTA,B,X,10,&flg);CHKERRQ(ierr); 238 if (!flg) { 239 ierr = PetscPrintf(PETSC_COMM_WORLD,"Error with Transpose(Transpose)\n");CHKERRQ(ierr); 240 ierr = MatMatMult(TTA,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr); 241 ierr = MatAXPY(T,-1.0,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 242 ierr = MatView(T,NULL);CHKERRQ(ierr); 243 ierr = MatDestroy(&T);CHKERRQ(ierr); 244 } 245 ierr = MatDestroy(&TA);CHKERRQ(ierr); 246 ierr = MatDestroy(&TTA);CHKERRQ(ierr); 247 } 248 249 if (testcircular) { /* test circular */ 250 Mat AB; 251 252 ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AB);CHKERRQ(ierr); 253 ierr = MatMatMult(A,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 254 ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr); 255 if (M == N && N == K) { 256 ierr = MatMatMult(A,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 257 } else { 258 ierr = MatTransposeMatMult(A,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 259 } 260 ierr = CheckLocal(B,X,aB,aX);CHKERRQ(ierr); 261 ierr = MatDestroy(&AB);CHKERRQ(ierr); 262 } 263 ierr = MatDestroy(&X);CHKERRQ(ierr); 264 ierr = MatDestroy(&B);CHKERRQ(ierr); 265 ierr = MatDestroy(&A);CHKERRQ(ierr); 266 ierr = PetscFree(dataX);CHKERRQ(ierr); 267 ierr = PetscFree(dataB);CHKERRQ(ierr); 268 ierr = PetscFinalize(); 269 return ierr; 270 } 271 272 /*TEST 273 274 test: 275 suffix: 1 276 args: -local {{0 1}} 277 278 test: 279 output_file: output/ex70_1.out 280 requires: cuda 281 suffix: 1_cuda 282 args: -local {{0 1}} -xgpu {{0 1}} -bgpu {{0 1}} -A_mat_type {{seqaijcusparse seqaij}} -testnest 0 283 284 test: 285 TODO: VecGetSubVector seems broken with CUDA 286 output_file: output/ex70_1.out 287 requires: cuda 288 suffix: 1_cuda_broken 289 args: -local {{0 1}} -xgpu {{0 1}} -bgpu {{0 1}} -A_mat_type seqaijcusparse -testnest 290 291 test: 292 output_file: output/ex70_1.out 293 nsize: 2 294 suffix: 1_par 295 args: -testtranspose 0 -local {{0 1}} 296 297 test: 298 output_file: output/ex70_1.out 299 requires: cuda 300 nsize: 2 301 suffix: 1_par_cuda 302 args: -testtranspose 0 -local {{0 1}} -xgpu {{0 1}} -bgpu {{0 1}} -A_mat_type {{mpiaijcusparse mpiaij}} -testnest 0 303 304 test: 305 TODO: MPIAIJ x MPIDENSE broken for MatTransposeMatMult 306 output_file: output/ex70_1.out 307 nsize: 2 308 suffix: 1_par_broken 309 args: -testtranspose -local {{0 1}} 310 311 test: 312 output_file: output/ex70_1.out 313 suffix: 2 314 nsize: 1 315 args: -M {{7 11}} -N {{12 9}} -K {{1 3}} -local {{0 1}} 316 317 test: 318 requires: cuda 319 output_file: output/ex70_1.out 320 suffix: 2_cuda 321 nsize: 1 322 args: -M 7 -N 9 -K 2 -local {{0 1}} -testnest 0 -A_mat_type {{seqdensecuda seqdense}} -xgpu {{0 1}} -bgpu {{0 1}} 323 324 test: 325 output_file: output/ex70_1.out 326 suffix: 2_par 327 nsize: 2 328 args: -M {{7 11}} -N {{12 9}} -K {{1 3}} -local {{0 1}} -testcircular 0 329 330 test: 331 requires: cuda 332 output_file: output/ex70_1.out 333 suffix: 2_par_cuda 334 nsize: 2 335 args: -M 11 -N 9 -K 1 -local {{0 1}} -testcircular 0 -A_mat_type mpiaijcusparse -xgpu -bgpu -testnest 0 336 337 test: 338 TODO: MatTransposeMatMultSymbolic_SeqAIJ_SeqDense plays with the destroy routine 339 output_file: output/ex70_1.out 340 suffix: 2_broken 341 nsize: 2 342 args: -M {{7 11}} -N {{12 9}} -K {{1 3}} -local {{0 1}} -testcircular 1 343 344 test: 345 output_file: output/ex70_1.out 346 suffix: 3 347 nsize: {{1 3}} 348 args: -M 13 -N 13 -K {{1 3}} -local {{0 1}} -A_mat_type sbaij -symm -testtranspose 0 349 350 test: 351 output_file: output/ex70_1.out 352 suffix: 4 353 nsize: 1 354 args: -M 3 -N 3 -K 3 -local {{0 1}} -testcircular 355 356 test: 357 output_file: output/ex70_1.out 358 suffix: 5 359 nsize: {{2 4}} 360 args: -M 3 -N 3 -K 3 -local 1 -testcircular -testtranspose 0 361 362 test: 363 TODO: MatCreateDense broken with some processors not having local rows 364 output_file: output/ex70_1.out 365 suffix: 5_broken 366 nsize: {{2 4}} 367 args: -M 3 -N 3 -K 3 -local 0 -testcircular -testtranspose 0 368 369 test: 370 output_file: output/ex70_1.out 371 suffix: 6 372 nsize: 1 373 args: -M {{1 3}} -N {{2 5}} -K {{1 2}} -local {{0 1}} -testcircular -testtranspose 0 374 375 test: 376 TODO: MatTransposeMatMultSymbolic_SeqAIJ_SeqDense plays with the destroy routine 377 output_file: output/ex70_1.out 378 suffix: 6_broken 379 nsize: 1 380 args: -M {{1 3}} -N {{2 5}} -K {{1 2}} -local {{0 1}} -testcircular -testtranspose 381 382 test: 383 output_file: output/ex70_1.out 384 suffix: 7 385 nsize: 1 386 args: -M 13 -N 13 -K {{1 3}} -local {{0 1}} -A_mat_type dense -testnest 0 -testcircular 387 388 test: 389 TODO: NEST x DENSE with dense nested matrices seems broken in this case 390 output_file: output/ex70_1.out 391 suffix: 7_broken 392 nsize: 1 393 args: -M 13 -N 13 -K {{1 3}} -local {{0 1}} -A_mat_type dense -testnest -testcircular 394 TEST*/ 395