1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else *B = A; 36 PetscFunctionReturn(0); 37 } 38 39 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 40 { 41 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 42 PetscErrorCode ierr; 43 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 44 45 PetscFunctionBegin; 46 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 47 lrow = row - rstart; 48 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 53 { 54 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55 PetscErrorCode ierr; 56 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 57 58 PetscFunctionBegin; 59 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 60 lrow = row - rstart; 61 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 66 { 67 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 68 PetscErrorCode ierr; 69 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 70 PetscScalar *array; 71 MPI_Comm comm; 72 PetscBool flg; 73 Mat B; 74 75 PetscFunctionBegin; 76 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 77 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 78 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 79 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 80 81 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 82 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 83 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 84 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 85 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 86 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 87 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 88 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 89 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 90 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 92 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 93 *a = B; 94 ierr = MatDestroy(&B);CHKERRQ(ierr); 95 } else *a = B; 96 PetscFunctionReturn(0); 97 } 98 99 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 100 { 101 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 102 PetscErrorCode ierr; 103 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 104 PetscBool roworiented = A->roworiented; 105 106 PetscFunctionBegin; 107 for (i=0; i<m; i++) { 108 if (idxm[i] < 0) continue; 109 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 110 if (idxm[i] >= rstart && idxm[i] < rend) { 111 row = idxm[i] - rstart; 112 if (roworiented) { 113 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 114 } else { 115 for (j=0; j<n; j++) { 116 if (idxn[j] < 0) continue; 117 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 118 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 119 } 120 } 121 } else if (!A->donotstash) { 122 mat->assembled = PETSC_FALSE; 123 if (roworiented) { 124 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 125 } else { 126 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 127 } 128 } 129 } 130 PetscFunctionReturn(0); 131 } 132 133 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 134 { 135 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 136 PetscErrorCode ierr; 137 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 138 139 PetscFunctionBegin; 140 for (i=0; i<m; i++) { 141 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 142 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 143 if (idxm[i] >= rstart && idxm[i] < rend) { 144 row = idxm[i] - rstart; 145 for (j=0; j<n; j++) { 146 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 147 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 148 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 149 } 150 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 156 { 157 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 158 PetscErrorCode ierr; 159 160 PetscFunctionBegin; 161 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 166 { 167 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 168 PetscErrorCode ierr; 169 170 PetscFunctionBegin; 171 ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 172 PetscFunctionReturn(0); 173 } 174 175 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 176 { 177 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 182 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 187 { 188 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 189 PetscErrorCode ierr; 190 191 PetscFunctionBegin; 192 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 193 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 194 PetscFunctionReturn(0); 195 } 196 197 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 198 { 199 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 200 PetscErrorCode ierr; 201 202 PetscFunctionBegin; 203 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 204 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 205 PetscFunctionReturn(0); 206 } 207 208 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 209 { 210 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 211 PetscErrorCode ierr; 212 213 PetscFunctionBegin; 214 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 215 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 216 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 217 PetscFunctionReturn(0); 218 } 219 220 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 221 { 222 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 223 PetscErrorCode ierr; 224 225 PetscFunctionBegin; 226 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 227 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 228 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 229 PetscFunctionReturn(0); 230 } 231 232 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 233 { 234 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 235 PetscErrorCode ierr; 236 237 PetscFunctionBegin; 238 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 239 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 240 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 241 PetscFunctionReturn(0); 242 } 243 244 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 245 { 246 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 247 PetscErrorCode ierr; 248 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 249 const PetscInt *irow,*icol; 250 const PetscScalar *v; 251 PetscScalar *bv; 252 Mat newmat; 253 IS iscol_local; 254 MPI_Comm comm_is,comm_mat; 255 256 PetscFunctionBegin; 257 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 258 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 259 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 260 261 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 262 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 263 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 264 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 265 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 266 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 267 268 /* No parallel redistribution currently supported! Should really check each index set 269 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 270 original matrix! */ 271 272 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 273 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 274 275 /* Check submatrix call */ 276 if (scall == MAT_REUSE_MATRIX) { 277 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 278 /* Really need to test rows and column sizes! */ 279 newmat = *B; 280 } else { 281 /* Create and fill new matrix */ 282 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 283 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 284 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 285 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 286 } 287 288 /* Now extract the data pointers and do the copy, column at a time */ 289 newmatd = (Mat_MPIDense*)newmat->data; 290 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 291 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 292 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 293 for (i=0; i<Ncols; i++) { 294 const PetscScalar *av = v + lda*icol[i]; 295 for (j=0; j<nrows; j++) { 296 *bv++ = av[irow[j] - rstart]; 297 } 298 } 299 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 300 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 301 302 /* Assemble the matrices so that the correct flags are set */ 303 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 304 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 305 306 /* Free work space */ 307 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 308 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 309 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 310 *B = newmat; 311 PetscFunctionReturn(0); 312 } 313 314 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 315 { 316 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 317 PetscErrorCode ierr; 318 319 PetscFunctionBegin; 320 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323 324 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 325 { 326 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 327 PetscErrorCode ierr; 328 329 PetscFunctionBegin; 330 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 331 PetscFunctionReturn(0); 332 } 333 334 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 335 { 336 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 337 PetscErrorCode ierr; 338 339 PetscFunctionBegin; 340 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 341 PetscFunctionReturn(0); 342 } 343 344 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 345 { 346 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 347 PetscErrorCode ierr; 348 PetscInt nstash,reallocs; 349 350 PetscFunctionBegin; 351 if (mdn->donotstash || mat->nooffprocentries) return(0); 352 353 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 354 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 355 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 356 PetscFunctionReturn(0); 357 } 358 359 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 360 { 361 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 362 PetscErrorCode ierr; 363 PetscInt i,*row,*col,flg,j,rstart,ncols; 364 PetscMPIInt n; 365 PetscScalar *val; 366 367 PetscFunctionBegin; 368 if (!mdn->donotstash && !mat->nooffprocentries) { 369 /* wait on receives */ 370 while (1) { 371 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 372 if (!flg) break; 373 374 for (i=0; i<n;) { 375 /* Now identify the consecutive vals belonging to the same row */ 376 for (j=i,rstart=row[j]; j<n; j++) { 377 if (row[j] != rstart) break; 378 } 379 if (j < n) ncols = j-i; 380 else ncols = n-i; 381 /* Now assemble all these values with a single function call */ 382 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 383 i = j; 384 } 385 } 386 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 387 } 388 389 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 390 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 391 392 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 393 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 394 } 395 PetscFunctionReturn(0); 396 } 397 398 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 399 { 400 PetscErrorCode ierr; 401 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 402 403 PetscFunctionBegin; 404 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 409 { 410 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 411 PetscErrorCode ierr; 412 PetscInt i,len,*lrows; 413 414 PetscFunctionBegin; 415 /* get locally owned rows */ 416 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 417 /* fix right hand side if needed */ 418 if (x && b) { 419 const PetscScalar *xx; 420 PetscScalar *bb; 421 422 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 423 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 424 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 425 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 426 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 427 } 428 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 429 if (diag != 0.0) { 430 Vec d; 431 432 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 433 ierr = VecSet(d,diag);CHKERRQ(ierr); 434 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecDestroy(&d);CHKERRQ(ierr); 436 } 437 ierr = PetscFree(lrows);CHKERRQ(ierr); 438 PetscFunctionReturn(0); 439 } 440 441 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 442 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 443 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 444 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 445 446 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 447 { 448 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 449 PetscErrorCode ierr; 450 const PetscScalar *ax; 451 PetscScalar *ay; 452 453 PetscFunctionBegin; 454 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 455 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 456 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 457 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 458 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 459 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 460 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 465 { 466 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 467 PetscErrorCode ierr; 468 const PetscScalar *ax; 469 PetscScalar *ay; 470 471 PetscFunctionBegin; 472 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 473 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 474 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 475 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 476 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 477 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 478 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 479 PetscFunctionReturn(0); 480 } 481 482 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 483 { 484 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 485 PetscErrorCode ierr; 486 const PetscScalar *ax; 487 PetscScalar *ay; 488 489 PetscFunctionBegin; 490 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 491 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 492 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 493 ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr); 494 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 495 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 496 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 497 ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 502 { 503 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 504 PetscErrorCode ierr; 505 const PetscScalar *ax; 506 PetscScalar *ay; 507 508 PetscFunctionBegin; 509 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 510 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 511 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 512 ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr); 513 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 514 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 515 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 516 ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr); 517 PetscFunctionReturn(0); 518 } 519 520 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 521 { 522 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 523 PetscErrorCode ierr; 524 PetscInt lda,len,i,n,m = A->rmap->n,radd; 525 PetscScalar *x,zero = 0.0; 526 const PetscScalar *av; 527 528 PetscFunctionBegin; 529 ierr = VecSet(v,zero);CHKERRQ(ierr); 530 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 531 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 532 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 533 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 534 radd = A->rmap->rstart*m; 535 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 536 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 537 for (i=0; i<len; i++) { 538 x[i] = av[radd + i*lda + i]; 539 } 540 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 541 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 542 PetscFunctionReturn(0); 543 } 544 545 PetscErrorCode MatDestroy_MPIDense(Mat mat) 546 { 547 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 548 PetscErrorCode ierr; 549 550 PetscFunctionBegin; 551 #if defined(PETSC_USE_LOG) 552 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 553 #endif 554 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 555 if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 556 if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 557 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 558 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 559 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 560 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 561 ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr); 562 563 ierr = PetscFree(mat->data);CHKERRQ(ierr); 564 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 565 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 568 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 570 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 573 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 577 #if defined(PETSC_HAVE_ELEMENTAL) 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 579 #endif 580 #if defined(PETSC_HAVE_SCALAPACK) 581 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr); 582 #endif 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 584 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 585 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 586 #if defined (PETSC_HAVE_CUDA) 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 589 #endif 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 594 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 595 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 597 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr); 600 PetscFunctionReturn(0); 601 } 602 603 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 604 605 #include <petscdraw.h> 606 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 607 { 608 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 609 PetscErrorCode ierr; 610 PetscMPIInt rank; 611 PetscViewerType vtype; 612 PetscBool iascii,isdraw; 613 PetscViewer sviewer; 614 PetscViewerFormat format; 615 616 PetscFunctionBegin; 617 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 618 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 619 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 620 if (iascii) { 621 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 622 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 623 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 624 MatInfo info; 625 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 626 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 628 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 629 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 630 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 631 PetscFunctionReturn(0); 632 } else if (format == PETSC_VIEWER_ASCII_INFO) { 633 PetscFunctionReturn(0); 634 } 635 } else if (isdraw) { 636 PetscDraw draw; 637 PetscBool isnull; 638 639 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 640 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 641 if (isnull) PetscFunctionReturn(0); 642 } 643 644 { 645 /* assemble the entire matrix onto first processor. */ 646 Mat A; 647 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 648 PetscInt *cols; 649 PetscScalar *vals; 650 651 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 652 if (!rank) { 653 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 654 } else { 655 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 656 } 657 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 658 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 659 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 660 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 661 662 /* Copy the matrix ... This isn't the most efficient means, 663 but it's quick for now */ 664 A->insertmode = INSERT_VALUES; 665 666 row = mat->rmap->rstart; 667 m = mdn->A->rmap->n; 668 for (i=0; i<m; i++) { 669 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 670 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 671 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 672 row++; 673 } 674 675 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 676 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 677 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 678 if (!rank) { 679 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 680 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 681 } 682 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 683 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 684 ierr = MatDestroy(&A);CHKERRQ(ierr); 685 } 686 PetscFunctionReturn(0); 687 } 688 689 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 690 { 691 PetscErrorCode ierr; 692 PetscBool iascii,isbinary,isdraw,issocket; 693 694 PetscFunctionBegin; 695 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 696 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 697 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 698 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 699 700 if (iascii || issocket || isdraw) { 701 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 702 } else if (isbinary) { 703 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 704 } 705 PetscFunctionReturn(0); 706 } 707 708 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 709 { 710 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 711 Mat mdn = mat->A; 712 PetscErrorCode ierr; 713 PetscLogDouble isend[5],irecv[5]; 714 715 PetscFunctionBegin; 716 info->block_size = 1.0; 717 718 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 719 720 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 721 isend[3] = info->memory; isend[4] = info->mallocs; 722 if (flag == MAT_LOCAL) { 723 info->nz_used = isend[0]; 724 info->nz_allocated = isend[1]; 725 info->nz_unneeded = isend[2]; 726 info->memory = isend[3]; 727 info->mallocs = isend[4]; 728 } else if (flag == MAT_GLOBAL_MAX) { 729 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 730 731 info->nz_used = irecv[0]; 732 info->nz_allocated = irecv[1]; 733 info->nz_unneeded = irecv[2]; 734 info->memory = irecv[3]; 735 info->mallocs = irecv[4]; 736 } else if (flag == MAT_GLOBAL_SUM) { 737 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 738 739 info->nz_used = irecv[0]; 740 info->nz_allocated = irecv[1]; 741 info->nz_unneeded = irecv[2]; 742 info->memory = irecv[3]; 743 info->mallocs = irecv[4]; 744 } 745 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 746 info->fill_ratio_needed = 0; 747 info->factor_mallocs = 0; 748 PetscFunctionReturn(0); 749 } 750 751 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 752 { 753 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 754 PetscErrorCode ierr; 755 756 PetscFunctionBegin; 757 switch (op) { 758 case MAT_NEW_NONZERO_LOCATIONS: 759 case MAT_NEW_NONZERO_LOCATION_ERR: 760 case MAT_NEW_NONZERO_ALLOCATION_ERR: 761 MatCheckPreallocated(A,1); 762 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 763 break; 764 case MAT_ROW_ORIENTED: 765 MatCheckPreallocated(A,1); 766 a->roworiented = flg; 767 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 768 break; 769 case MAT_NEW_DIAGONALS: 770 case MAT_KEEP_NONZERO_PATTERN: 771 case MAT_USE_HASH_TABLE: 772 case MAT_SORTED_FULL: 773 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 774 break; 775 case MAT_IGNORE_OFF_PROC_ENTRIES: 776 a->donotstash = flg; 777 break; 778 case MAT_SYMMETRIC: 779 case MAT_STRUCTURALLY_SYMMETRIC: 780 case MAT_HERMITIAN: 781 case MAT_SYMMETRY_ETERNAL: 782 case MAT_IGNORE_LOWER_TRIANGULAR: 783 case MAT_IGNORE_ZERO_ENTRIES: 784 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 785 break; 786 default: 787 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 788 } 789 PetscFunctionReturn(0); 790 } 791 792 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 793 { 794 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 795 const PetscScalar *l; 796 PetscScalar x,*v,*vv,*r; 797 PetscErrorCode ierr; 798 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 799 800 PetscFunctionBegin; 801 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 802 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 803 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 804 if (ll) { 805 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 806 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 807 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 808 for (i=0; i<m; i++) { 809 x = l[i]; 810 v = vv + i; 811 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 812 } 813 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 814 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 815 } 816 if (rr) { 817 const PetscScalar *ar; 818 819 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 820 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 821 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 822 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 823 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 824 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 825 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 826 for (i=0; i<n; i++) { 827 x = r[i]; 828 v = vv + i*lda; 829 for (j=0; j<m; j++) (*v++) *= x; 830 } 831 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 832 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 833 } 834 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837 838 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 839 { 840 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 841 PetscErrorCode ierr; 842 PetscInt i,j; 843 PetscMPIInt size; 844 PetscReal sum = 0.0; 845 const PetscScalar *av,*v; 846 847 PetscFunctionBegin; 848 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 849 v = av; 850 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 851 if (size == 1) { 852 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 853 } else { 854 if (type == NORM_FROBENIUS) { 855 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 856 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 857 } 858 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 859 *nrm = PetscSqrtReal(*nrm); 860 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 861 } else if (type == NORM_1) { 862 PetscReal *tmp,*tmp2; 863 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 864 *nrm = 0.0; 865 v = av; 866 for (j=0; j<mdn->A->cmap->n; j++) { 867 for (i=0; i<mdn->A->rmap->n; i++) { 868 tmp[j] += PetscAbsScalar(*v); v++; 869 } 870 } 871 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 872 for (j=0; j<A->cmap->N; j++) { 873 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 874 } 875 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 876 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 877 } else if (type == NORM_INFINITY) { /* max row norm */ 878 PetscReal ntemp; 879 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 880 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 881 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 882 } 883 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 884 PetscFunctionReturn(0); 885 } 886 887 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 888 { 889 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 890 Mat B; 891 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 892 PetscErrorCode ierr; 893 PetscInt j,i,lda; 894 PetscScalar *v; 895 896 PetscFunctionBegin; 897 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 898 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 899 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 900 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 901 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 902 } else B = *matout; 903 904 m = a->A->rmap->n; n = a->A->cmap->n; 905 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 906 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 907 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 908 for (i=0; i<m; i++) rwork[i] = rstart + i; 909 for (j=0; j<n; j++) { 910 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 911 v += lda; 912 } 913 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 914 ierr = PetscFree(rwork);CHKERRQ(ierr); 915 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 916 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 917 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 918 *matout = B; 919 } else { 920 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 921 } 922 PetscFunctionReturn(0); 923 } 924 925 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 926 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 927 928 PetscErrorCode MatSetUp_MPIDense(Mat A) 929 { 930 PetscErrorCode ierr; 931 932 PetscFunctionBegin; 933 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 934 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 935 if (!A->preallocated) { 936 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 937 } 938 PetscFunctionReturn(0); 939 } 940 941 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 942 { 943 PetscErrorCode ierr; 944 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 945 946 PetscFunctionBegin; 947 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 948 PetscFunctionReturn(0); 949 } 950 951 PetscErrorCode MatConjugate_MPIDense(Mat mat) 952 { 953 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 954 PetscErrorCode ierr; 955 956 PetscFunctionBegin; 957 ierr = MatConjugate(a->A);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 961 PetscErrorCode MatRealPart_MPIDense(Mat A) 962 { 963 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 964 PetscErrorCode ierr; 965 966 PetscFunctionBegin; 967 ierr = MatRealPart(a->A);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 972 { 973 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 982 { 983 PetscErrorCode ierr; 984 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 985 986 PetscFunctionBegin; 987 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 988 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 989 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 994 995 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 996 { 997 PetscErrorCode ierr; 998 PetscInt i,n; 999 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1000 PetscReal *work; 1001 1002 PetscFunctionBegin; 1003 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1004 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1005 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1006 if (type == NORM_2) { 1007 for (i=0; i<n; i++) work[i] *= work[i]; 1008 } 1009 if (type == NORM_INFINITY) { 1010 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1011 } else { 1012 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1013 } 1014 ierr = PetscFree(work);CHKERRQ(ierr); 1015 if (type == NORM_2) { 1016 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1017 } 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #if defined(PETSC_HAVE_CUDA) 1022 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1023 { 1024 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1025 PetscErrorCode ierr; 1026 PetscInt lda; 1027 1028 PetscFunctionBegin; 1029 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1030 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1031 if (!a->cvec) { 1032 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1033 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1034 } 1035 a->vecinuse = col + 1; 1036 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1037 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1038 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1039 *v = a->cvec; 1040 PetscFunctionReturn(0); 1041 } 1042 1043 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1044 { 1045 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1046 PetscErrorCode ierr; 1047 1048 PetscFunctionBegin; 1049 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1050 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1051 a->vecinuse = 0; 1052 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1053 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1054 *v = NULL; 1055 PetscFunctionReturn(0); 1056 } 1057 1058 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1059 { 1060 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1061 PetscInt lda; 1062 PetscErrorCode ierr; 1063 1064 PetscFunctionBegin; 1065 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1066 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1067 if (!a->cvec) { 1068 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1069 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1070 } 1071 a->vecinuse = col + 1; 1072 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1073 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1074 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1075 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1076 *v = a->cvec; 1077 PetscFunctionReturn(0); 1078 } 1079 1080 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1081 { 1082 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1083 PetscErrorCode ierr; 1084 1085 PetscFunctionBegin; 1086 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1087 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1088 a->vecinuse = 0; 1089 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1090 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1091 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1092 *v = NULL; 1093 PetscFunctionReturn(0); 1094 } 1095 1096 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1097 { 1098 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1099 PetscErrorCode ierr; 1100 PetscInt lda; 1101 1102 PetscFunctionBegin; 1103 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1104 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1105 if (!a->cvec) { 1106 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1107 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1108 } 1109 a->vecinuse = col + 1; 1110 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1111 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1112 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1113 *v = a->cvec; 1114 PetscFunctionReturn(0); 1115 } 1116 1117 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1118 { 1119 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1120 PetscErrorCode ierr; 1121 1122 PetscFunctionBegin; 1123 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1124 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1125 a->vecinuse = 0; 1126 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1127 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1128 *v = NULL; 1129 PetscFunctionReturn(0); 1130 } 1131 1132 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1133 { 1134 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1139 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1140 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1145 { 1146 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1147 PetscErrorCode ierr; 1148 1149 PetscFunctionBegin; 1150 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1151 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1152 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1157 { 1158 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1159 PetscErrorCode ierr; 1160 1161 PetscFunctionBegin; 1162 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1163 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1164 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1169 { 1170 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1171 PetscErrorCode ierr; 1172 1173 PetscFunctionBegin; 1174 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1179 { 1180 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1181 PetscErrorCode ierr; 1182 1183 PetscFunctionBegin; 1184 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1189 { 1190 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1191 PetscErrorCode ierr; 1192 1193 PetscFunctionBegin; 1194 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1199 { 1200 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1201 PetscErrorCode ierr; 1202 1203 PetscFunctionBegin; 1204 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1209 { 1210 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1211 PetscErrorCode ierr; 1212 1213 PetscFunctionBegin; 1214 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1219 { 1220 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1221 PetscErrorCode ierr; 1222 1223 PetscFunctionBegin; 1224 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1229 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1230 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1231 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1232 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1233 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1234 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1235 1236 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1237 { 1238 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1239 PetscErrorCode ierr; 1240 1241 PetscFunctionBegin; 1242 if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1243 if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1244 if (d->A) { 1245 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1246 } 1247 mat->boundtocpu = bind; 1248 if (!bind) { 1249 PetscBool iscuda; 1250 1251 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1252 if (!iscuda) { 1253 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1254 } 1255 ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1256 if (!iscuda) { 1257 ierr = MatDestroy(&d->cmat);CHKERRQ(ierr); 1258 } 1259 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1260 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1261 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1262 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1263 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1264 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1265 } else { 1266 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1267 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1272 } 1273 if (d->cmat) { 1274 ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr); 1275 } 1276 PetscFunctionReturn(0); 1277 } 1278 1279 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1280 { 1281 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1282 PetscErrorCode ierr; 1283 PetscBool iscuda; 1284 1285 PetscFunctionBegin; 1286 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1287 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1288 if (!iscuda) PetscFunctionReturn(0); 1289 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1290 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1291 if (!d->A) { 1292 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1293 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1294 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1295 } 1296 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1297 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1298 A->preallocated = PETSC_TRUE; 1299 PetscFunctionReturn(0); 1300 } 1301 #endif 1302 1303 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1304 { 1305 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1306 PetscErrorCode ierr; 1307 1308 PetscFunctionBegin; 1309 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1314 { 1315 PetscFunctionBegin; 1316 *missing = PETSC_FALSE; 1317 PetscFunctionReturn(0); 1318 } 1319 1320 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1321 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1322 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1323 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1324 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1325 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1326 1327 /* -------------------------------------------------------------------*/ 1328 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1329 MatGetRow_MPIDense, 1330 MatRestoreRow_MPIDense, 1331 MatMult_MPIDense, 1332 /* 4*/ MatMultAdd_MPIDense, 1333 MatMultTranspose_MPIDense, 1334 MatMultTransposeAdd_MPIDense, 1335 0, 1336 0, 1337 0, 1338 /* 10*/ 0, 1339 0, 1340 0, 1341 0, 1342 MatTranspose_MPIDense, 1343 /* 15*/ MatGetInfo_MPIDense, 1344 MatEqual_MPIDense, 1345 MatGetDiagonal_MPIDense, 1346 MatDiagonalScale_MPIDense, 1347 MatNorm_MPIDense, 1348 /* 20*/ MatAssemblyBegin_MPIDense, 1349 MatAssemblyEnd_MPIDense, 1350 MatSetOption_MPIDense, 1351 MatZeroEntries_MPIDense, 1352 /* 24*/ MatZeroRows_MPIDense, 1353 0, 1354 0, 1355 0, 1356 0, 1357 /* 29*/ MatSetUp_MPIDense, 1358 0, 1359 0, 1360 MatGetDiagonalBlock_MPIDense, 1361 0, 1362 /* 34*/ MatDuplicate_MPIDense, 1363 0, 1364 0, 1365 0, 1366 0, 1367 /* 39*/ MatAXPY_MPIDense, 1368 MatCreateSubMatrices_MPIDense, 1369 0, 1370 MatGetValues_MPIDense, 1371 0, 1372 /* 44*/ 0, 1373 MatScale_MPIDense, 1374 MatShift_Basic, 1375 0, 1376 0, 1377 /* 49*/ MatSetRandom_MPIDense, 1378 0, 1379 0, 1380 0, 1381 0, 1382 /* 54*/ 0, 1383 0, 1384 0, 1385 0, 1386 0, 1387 /* 59*/ MatCreateSubMatrix_MPIDense, 1388 MatDestroy_MPIDense, 1389 MatView_MPIDense, 1390 0, 1391 0, 1392 /* 64*/ 0, 1393 0, 1394 0, 1395 0, 1396 0, 1397 /* 69*/ 0, 1398 0, 1399 0, 1400 0, 1401 0, 1402 /* 74*/ 0, 1403 0, 1404 0, 1405 0, 1406 0, 1407 /* 79*/ 0, 1408 0, 1409 0, 1410 0, 1411 /* 83*/ MatLoad_MPIDense, 1412 0, 1413 0, 1414 0, 1415 0, 1416 0, 1417 /* 89*/ 0, 1418 0, 1419 0, 1420 0, 1421 0, 1422 /* 94*/ 0, 1423 0, 1424 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1425 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1426 0, 1427 /* 99*/ MatProductSetFromOptions_MPIDense, 1428 0, 1429 0, 1430 MatConjugate_MPIDense, 1431 0, 1432 /*104*/ 0, 1433 MatRealPart_MPIDense, 1434 MatImaginaryPart_MPIDense, 1435 0, 1436 0, 1437 /*109*/ 0, 1438 0, 1439 0, 1440 MatGetColumnVector_MPIDense, 1441 MatMissingDiagonal_MPIDense, 1442 /*114*/ 0, 1443 0, 1444 0, 1445 0, 1446 0, 1447 /*119*/ 0, 1448 0, 1449 0, 1450 0, 1451 0, 1452 /*124*/ 0, 1453 MatGetColumnNorms_MPIDense, 1454 0, 1455 0, 1456 0, 1457 /*129*/ 0, 1458 0, 1459 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1460 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1461 0, 1462 /*134*/ 0, 1463 0, 1464 0, 1465 0, 1466 0, 1467 /*139*/ 0, 1468 0, 1469 0, 1470 0, 1471 0, 1472 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1473 /*145*/ 0, 1474 0, 1475 0 1476 }; 1477 1478 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1479 { 1480 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1481 PetscBool iscuda; 1482 PetscErrorCode ierr; 1483 1484 PetscFunctionBegin; 1485 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1486 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1487 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1488 if (!a->A) { 1489 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1490 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1491 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1492 } 1493 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1494 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1495 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1496 mat->preallocated = PETSC_TRUE; 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #if defined(PETSC_HAVE_ELEMENTAL) 1501 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1502 { 1503 Mat mat_elemental; 1504 PetscErrorCode ierr; 1505 PetscScalar *v; 1506 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1507 1508 PetscFunctionBegin; 1509 if (reuse == MAT_REUSE_MATRIX) { 1510 mat_elemental = *newmat; 1511 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1512 } else { 1513 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1514 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1515 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1516 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1517 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1518 } 1519 1520 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1521 for (i=0; i<N; i++) cols[i] = i; 1522 for (i=0; i<m; i++) rows[i] = rstart + i; 1523 1524 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1525 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1526 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1527 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1528 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1529 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1530 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1531 1532 if (reuse == MAT_INPLACE_MATRIX) { 1533 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1534 } else { 1535 *newmat = mat_elemental; 1536 } 1537 PetscFunctionReturn(0); 1538 } 1539 #endif 1540 1541 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1542 { 1543 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1544 PetscErrorCode ierr; 1545 1546 PetscFunctionBegin; 1547 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1548 PetscFunctionReturn(0); 1549 } 1550 1551 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1552 { 1553 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1554 PetscErrorCode ierr; 1555 1556 PetscFunctionBegin; 1557 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1562 { 1563 PetscErrorCode ierr; 1564 Mat_MPIDense *mat; 1565 PetscInt m,nloc,N; 1566 1567 PetscFunctionBegin; 1568 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1569 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1570 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1571 PetscInt sum; 1572 1573 if (n == PETSC_DECIDE) { 1574 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1575 } 1576 /* Check sum(n) = N */ 1577 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1578 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1579 1580 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1581 } 1582 1583 /* numeric phase */ 1584 mat = (Mat_MPIDense*)(*outmat)->data; 1585 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1586 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1587 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #if defined(PETSC_HAVE_CUDA) 1592 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1593 { 1594 Mat B; 1595 Mat_MPIDense *m; 1596 PetscErrorCode ierr; 1597 1598 PetscFunctionBegin; 1599 if (reuse == MAT_INITIAL_MATRIX) { 1600 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1601 } else if (reuse == MAT_REUSE_MATRIX) { 1602 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1603 } 1604 1605 B = *newmat; 1606 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1607 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1608 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1609 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr); 1613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1619 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1621 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1622 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1623 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1624 m = (Mat_MPIDense*)(B)->data; 1625 if (m->A) { 1626 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1627 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1628 } 1629 B->ops->bindtocpu = NULL; 1630 B->offloadmask = PETSC_OFFLOAD_CPU; 1631 PetscFunctionReturn(0); 1632 } 1633 1634 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1635 { 1636 Mat B; 1637 Mat_MPIDense *m; 1638 PetscErrorCode ierr; 1639 1640 PetscFunctionBegin; 1641 if (reuse == MAT_INITIAL_MATRIX) { 1642 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1643 } else if (reuse == MAT_REUSE_MATRIX) { 1644 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1645 } 1646 1647 B = *newmat; 1648 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1649 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1650 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1651 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1652 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1654 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1655 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1656 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1657 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1658 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1659 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1660 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1665 m = (Mat_MPIDense*)(B)->data; 1666 if (m->A) { 1667 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1668 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1669 B->offloadmask = PETSC_OFFLOAD_BOTH; 1670 } else { 1671 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1672 } 1673 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1674 1675 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1676 PetscFunctionReturn(0); 1677 } 1678 #endif 1679 1680 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1681 { 1682 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1683 PetscErrorCode ierr; 1684 PetscInt lda; 1685 1686 PetscFunctionBegin; 1687 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1688 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1689 if (!a->cvec) { 1690 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1691 } 1692 a->vecinuse = col + 1; 1693 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1694 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1695 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1696 *v = a->cvec; 1697 PetscFunctionReturn(0); 1698 } 1699 1700 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1701 { 1702 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1703 PetscErrorCode ierr; 1704 1705 PetscFunctionBegin; 1706 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1707 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1708 a->vecinuse = 0; 1709 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1710 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1711 *v = NULL; 1712 PetscFunctionReturn(0); 1713 } 1714 1715 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1716 { 1717 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1718 PetscErrorCode ierr; 1719 PetscInt lda; 1720 1721 PetscFunctionBegin; 1722 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1723 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1724 if (!a->cvec) { 1725 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1726 } 1727 a->vecinuse = col + 1; 1728 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1729 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1730 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1731 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1732 *v = a->cvec; 1733 PetscFunctionReturn(0); 1734 } 1735 1736 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1737 { 1738 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1739 PetscErrorCode ierr; 1740 1741 PetscFunctionBegin; 1742 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1743 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1744 a->vecinuse = 0; 1745 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1746 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1747 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1748 *v = NULL; 1749 PetscFunctionReturn(0); 1750 } 1751 1752 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1753 { 1754 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1755 PetscErrorCode ierr; 1756 PetscInt lda; 1757 1758 PetscFunctionBegin; 1759 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1760 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1761 if (!a->cvec) { 1762 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1763 } 1764 a->vecinuse = col + 1; 1765 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1766 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1767 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1768 *v = a->cvec; 1769 PetscFunctionReturn(0); 1770 } 1771 1772 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1773 { 1774 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1775 PetscErrorCode ierr; 1776 1777 PetscFunctionBegin; 1778 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1779 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1780 a->vecinuse = 0; 1781 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1782 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1783 *v = NULL; 1784 PetscFunctionReturn(0); 1785 } 1786 1787 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1788 { 1789 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1790 Mat_MPIDense *c; 1791 PetscErrorCode ierr; 1792 MPI_Comm comm; 1793 PetscBool setup = PETSC_FALSE; 1794 1795 PetscFunctionBegin; 1796 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1797 if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1798 if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1799 if (!a->cmat) { 1800 setup = PETSC_TRUE; 1801 ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr); 1802 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr); 1803 ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1804 ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr); 1805 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1806 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1807 } else if (cend-cbegin != a->cmat->cmap->N) { 1808 setup = PETSC_TRUE; 1809 ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr); 1810 ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr); 1811 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1812 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1813 } 1814 c = (Mat_MPIDense*)a->cmat->data; 1815 if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1816 ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr); 1817 if (setup) { /* do we really need this? */ 1818 ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr); 1819 } 1820 a->cmat->preallocated = PETSC_TRUE; 1821 a->cmat->assembled = PETSC_TRUE; 1822 a->matinuse = cbegin + 1; 1823 *v = a->cmat; 1824 PetscFunctionReturn(0); 1825 } 1826 1827 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1828 { 1829 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1830 Mat_MPIDense *c; 1831 PetscErrorCode ierr; 1832 1833 PetscFunctionBegin; 1834 if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1835 if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1836 if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1837 a->matinuse = 0; 1838 c = (Mat_MPIDense*)a->cmat->data; 1839 ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr); 1840 *v = NULL; 1841 PetscFunctionReturn(0); 1842 } 1843 1844 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1845 { 1846 Mat_MPIDense *a; 1847 PetscErrorCode ierr; 1848 1849 PetscFunctionBegin; 1850 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1851 mat->data = (void*)a; 1852 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1853 1854 mat->insertmode = NOT_SET_VALUES; 1855 1856 /* build cache for off array entries formed */ 1857 a->donotstash = PETSC_FALSE; 1858 1859 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1860 1861 /* stuff used for matrix vector multiply */ 1862 a->lvec = 0; 1863 a->Mvctx = 0; 1864 a->roworiented = PETSC_TRUE; 1865 1866 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1867 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1868 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1869 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1870 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1871 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1872 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1873 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1874 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1875 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1876 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1877 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1878 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1879 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1880 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1881 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1882 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1883 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr); 1884 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr); 1885 #if defined(PETSC_HAVE_ELEMENTAL) 1886 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1887 #endif 1888 #if defined(PETSC_HAVE_SCALAPACK) 1889 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr); 1890 #endif 1891 #if defined(PETSC_HAVE_CUDA) 1892 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1893 #endif 1894 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1895 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1896 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1897 #if defined(PETSC_HAVE_CUDA) 1898 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1899 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1900 #endif 1901 1902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1904 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1905 PetscFunctionReturn(0); 1906 } 1907 1908 /*MC 1909 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1910 1911 Options Database Keys: 1912 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1913 1914 Level: beginner 1915 1916 .seealso: 1917 1918 M*/ 1919 #if defined(PETSC_HAVE_CUDA) 1920 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1921 { 1922 PetscErrorCode ierr; 1923 1924 PetscFunctionBegin; 1925 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1926 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 #endif 1930 1931 /*MC 1932 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1933 1934 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1935 and MATMPIDENSE otherwise. 1936 1937 Options Database Keys: 1938 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1939 1940 Level: beginner 1941 1942 1943 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1944 M*/ 1945 1946 /*MC 1947 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1948 1949 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1950 and MATMPIDENSECUDA otherwise. 1951 1952 Options Database Keys: 1953 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1954 1955 Level: beginner 1956 1957 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1958 M*/ 1959 1960 /*@C 1961 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1962 1963 Not collective 1964 1965 Input Parameters: 1966 . B - the matrix 1967 - data - optional location of matrix data. Set data=NULL for PETSc 1968 to control all matrix memory allocation. 1969 1970 Notes: 1971 The dense format is fully compatible with standard Fortran 77 1972 storage by columns. 1973 1974 The data input variable is intended primarily for Fortran programmers 1975 who wish to allocate their own matrix memory space. Most users should 1976 set data=NULL. 1977 1978 Level: intermediate 1979 1980 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1981 @*/ 1982 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1983 { 1984 PetscErrorCode ierr; 1985 1986 PetscFunctionBegin; 1987 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1988 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1989 PetscFunctionReturn(0); 1990 } 1991 1992 /*@ 1993 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 1994 array provided by the user. This is useful to avoid copying an array 1995 into a matrix 1996 1997 Not Collective 1998 1999 Input Parameters: 2000 + mat - the matrix 2001 - array - the array in column major order 2002 2003 Notes: 2004 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2005 freed when the matrix is destroyed. 2006 2007 Level: developer 2008 2009 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2010 2011 @*/ 2012 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2013 { 2014 PetscErrorCode ierr; 2015 2016 PetscFunctionBegin; 2017 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2018 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2019 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2020 #if defined(PETSC_HAVE_CUDA) 2021 mat->offloadmask = PETSC_OFFLOAD_CPU; 2022 #endif 2023 PetscFunctionReturn(0); 2024 } 2025 2026 /*@ 2027 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2028 2029 Not Collective 2030 2031 Input Parameters: 2032 . mat - the matrix 2033 2034 Notes: 2035 You can only call this after a call to MatDensePlaceArray() 2036 2037 Level: developer 2038 2039 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2040 2041 @*/ 2042 PetscErrorCode MatDenseResetArray(Mat mat) 2043 { 2044 PetscErrorCode ierr; 2045 2046 PetscFunctionBegin; 2047 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2048 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2049 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2050 PetscFunctionReturn(0); 2051 } 2052 2053 /*@ 2054 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2055 array provided by the user. This is useful to avoid copying an array 2056 into a matrix 2057 2058 Not Collective 2059 2060 Input Parameters: 2061 + mat - the matrix 2062 - array - the array in column major order 2063 2064 Notes: 2065 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2066 freed by the user. It will be freed when the matrix is destroyed. 2067 2068 Level: developer 2069 2070 .seealso: MatDenseGetArray(), VecReplaceArray() 2071 @*/ 2072 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2073 { 2074 PetscErrorCode ierr; 2075 2076 PetscFunctionBegin; 2077 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2078 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2079 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2080 #if defined(PETSC_HAVE_CUDA) 2081 mat->offloadmask = PETSC_OFFLOAD_CPU; 2082 #endif 2083 PetscFunctionReturn(0); 2084 } 2085 2086 #if defined(PETSC_HAVE_CUDA) 2087 /*@C 2088 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2089 array provided by the user. This is useful to avoid copying an array 2090 into a matrix 2091 2092 Not Collective 2093 2094 Input Parameters: 2095 + mat - the matrix 2096 - array - the array in column major order 2097 2098 Notes: 2099 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2100 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2101 2102 Level: developer 2103 2104 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2105 @*/ 2106 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2107 { 2108 PetscErrorCode ierr; 2109 2110 PetscFunctionBegin; 2111 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2112 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2113 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2114 mat->offloadmask = PETSC_OFFLOAD_GPU; 2115 PetscFunctionReturn(0); 2116 } 2117 2118 /*@C 2119 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2120 2121 Not Collective 2122 2123 Input Parameters: 2124 . mat - the matrix 2125 2126 Notes: 2127 You can only call this after a call to MatDenseCUDAPlaceArray() 2128 2129 Level: developer 2130 2131 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2132 2133 @*/ 2134 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2135 { 2136 PetscErrorCode ierr; 2137 2138 PetscFunctionBegin; 2139 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2140 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2141 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 /*@C 2146 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2147 array provided by the user. This is useful to avoid copying an array 2148 into a matrix 2149 2150 Not Collective 2151 2152 Input Parameters: 2153 + mat - the matrix 2154 - array - the array in column major order 2155 2156 Notes: 2157 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2158 The memory passed in CANNOT be freed by the user. It will be freed 2159 when the matrix is destroyed. The array should respect the matrix leading dimension. 2160 2161 Level: developer 2162 2163 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2164 @*/ 2165 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2166 { 2167 PetscErrorCode ierr; 2168 2169 PetscFunctionBegin; 2170 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2171 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2172 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2173 mat->offloadmask = PETSC_OFFLOAD_GPU; 2174 PetscFunctionReturn(0); 2175 } 2176 2177 /*@C 2178 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2179 2180 Not Collective 2181 2182 Input Parameters: 2183 . A - the matrix 2184 2185 Output Parameters 2186 . array - the GPU array in column major order 2187 2188 Notes: 2189 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2190 2191 Level: developer 2192 2193 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2194 @*/ 2195 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2196 { 2197 PetscErrorCode ierr; 2198 2199 PetscFunctionBegin; 2200 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2201 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2202 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2203 PetscFunctionReturn(0); 2204 } 2205 2206 /*@C 2207 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2208 2209 Not Collective 2210 2211 Input Parameters: 2212 + A - the matrix 2213 - array - the GPU array in column major order 2214 2215 Notes: 2216 2217 Level: developer 2218 2219 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2220 @*/ 2221 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2222 { 2223 PetscErrorCode ierr; 2224 2225 PetscFunctionBegin; 2226 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2227 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2228 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2229 A->offloadmask = PETSC_OFFLOAD_GPU; 2230 PetscFunctionReturn(0); 2231 } 2232 2233 /*@C 2234 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2235 2236 Not Collective 2237 2238 Input Parameters: 2239 . A - the matrix 2240 2241 Output Parameters 2242 . array - the GPU array in column major order 2243 2244 Notes: 2245 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2246 2247 Level: developer 2248 2249 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2250 @*/ 2251 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2252 { 2253 PetscErrorCode ierr; 2254 2255 PetscFunctionBegin; 2256 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2257 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2258 PetscFunctionReturn(0); 2259 } 2260 2261 /*@C 2262 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2263 2264 Not Collective 2265 2266 Input Parameters: 2267 + A - the matrix 2268 - array - the GPU array in column major order 2269 2270 Notes: 2271 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2272 2273 Level: developer 2274 2275 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2276 @*/ 2277 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2278 { 2279 PetscErrorCode ierr; 2280 2281 PetscFunctionBegin; 2282 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2283 PetscFunctionReturn(0); 2284 } 2285 2286 /*@C 2287 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2288 2289 Not Collective 2290 2291 Input Parameters: 2292 . A - the matrix 2293 2294 Output Parameters 2295 . array - the GPU array in column major order 2296 2297 Notes: 2298 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2299 2300 Level: developer 2301 2302 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2303 @*/ 2304 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2305 { 2306 PetscErrorCode ierr; 2307 2308 PetscFunctionBegin; 2309 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2310 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2311 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2312 PetscFunctionReturn(0); 2313 } 2314 2315 /*@C 2316 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2317 2318 Not Collective 2319 2320 Input Parameters: 2321 + A - the matrix 2322 - array - the GPU array in column major order 2323 2324 Notes: 2325 2326 Level: developer 2327 2328 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2329 @*/ 2330 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2331 { 2332 PetscErrorCode ierr; 2333 2334 PetscFunctionBegin; 2335 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2336 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2337 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2338 A->offloadmask = PETSC_OFFLOAD_GPU; 2339 PetscFunctionReturn(0); 2340 } 2341 #endif 2342 2343 /*@C 2344 MatCreateDense - Creates a matrix in dense format. 2345 2346 Collective 2347 2348 Input Parameters: 2349 + comm - MPI communicator 2350 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2351 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2352 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2353 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2354 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2355 to control all matrix memory allocation. 2356 2357 Output Parameter: 2358 . A - the matrix 2359 2360 Notes: 2361 The dense format is fully compatible with standard Fortran 77 2362 storage by columns. 2363 2364 The data input variable is intended primarily for Fortran programmers 2365 who wish to allocate their own matrix memory space. Most users should 2366 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2367 2368 The user MUST specify either the local or global matrix dimensions 2369 (possibly both). 2370 2371 Level: intermediate 2372 2373 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2374 @*/ 2375 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2376 { 2377 PetscErrorCode ierr; 2378 PetscMPIInt size; 2379 2380 PetscFunctionBegin; 2381 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2382 PetscValidLogicalCollectiveBool(*A,!!data,6); 2383 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2384 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2385 if (size > 1) { 2386 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2387 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2388 if (data) { /* user provided data array, so no need to assemble */ 2389 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2390 (*A)->assembled = PETSC_TRUE; 2391 } 2392 } else { 2393 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2394 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2395 } 2396 PetscFunctionReturn(0); 2397 } 2398 2399 #if defined(PETSC_HAVE_CUDA) 2400 /*@C 2401 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2402 2403 Collective 2404 2405 Input Parameters: 2406 + comm - MPI communicator 2407 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2408 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2409 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2410 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2411 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2412 to control matrix memory allocation. 2413 2414 Output Parameter: 2415 . A - the matrix 2416 2417 Notes: 2418 2419 Level: intermediate 2420 2421 .seealso: MatCreate(), MatCreateDense() 2422 @*/ 2423 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2424 { 2425 PetscErrorCode ierr; 2426 PetscMPIInt size; 2427 2428 PetscFunctionBegin; 2429 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2430 PetscValidLogicalCollectiveBool(*A,!!data,6); 2431 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2432 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2433 if (size > 1) { 2434 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2435 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2436 if (data) { /* user provided data array, so no need to assemble */ 2437 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2438 (*A)->assembled = PETSC_TRUE; 2439 } 2440 } else { 2441 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2442 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2443 } 2444 PetscFunctionReturn(0); 2445 } 2446 #endif 2447 2448 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2449 { 2450 Mat mat; 2451 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2452 PetscErrorCode ierr; 2453 2454 PetscFunctionBegin; 2455 *newmat = 0; 2456 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2457 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2458 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2459 a = (Mat_MPIDense*)mat->data; 2460 2461 mat->factortype = A->factortype; 2462 mat->assembled = PETSC_TRUE; 2463 mat->preallocated = PETSC_TRUE; 2464 2465 mat->insertmode = NOT_SET_VALUES; 2466 a->donotstash = oldmat->donotstash; 2467 2468 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2469 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2470 2471 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2472 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2473 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2474 2475 *newmat = mat; 2476 PetscFunctionReturn(0); 2477 } 2478 2479 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2480 { 2481 PetscErrorCode ierr; 2482 PetscBool isbinary; 2483 #if defined(PETSC_HAVE_HDF5) 2484 PetscBool ishdf5; 2485 #endif 2486 2487 PetscFunctionBegin; 2488 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2489 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2490 /* force binary viewer to load .info file if it has not yet done so */ 2491 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2492 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2493 #if defined(PETSC_HAVE_HDF5) 2494 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2495 #endif 2496 if (isbinary) { 2497 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2498 #if defined(PETSC_HAVE_HDF5) 2499 } else if (ishdf5) { 2500 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2501 #endif 2502 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2503 PetscFunctionReturn(0); 2504 } 2505 2506 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2507 { 2508 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2509 Mat a,b; 2510 PetscBool flg; 2511 PetscErrorCode ierr; 2512 2513 PetscFunctionBegin; 2514 a = matA->A; 2515 b = matB->A; 2516 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2517 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2518 PetscFunctionReturn(0); 2519 } 2520 2521 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2522 { 2523 PetscErrorCode ierr; 2524 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2525 2526 PetscFunctionBegin; 2527 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2528 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2529 ierr = PetscFree(atb);CHKERRQ(ierr); 2530 PetscFunctionReturn(0); 2531 } 2532 2533 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2534 { 2535 PetscErrorCode ierr; 2536 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2537 2538 PetscFunctionBegin; 2539 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2540 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2541 ierr = PetscFree(abt);CHKERRQ(ierr); 2542 PetscFunctionReturn(0); 2543 } 2544 2545 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2546 { 2547 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2548 Mat_TransMatMultDense *atb; 2549 PetscErrorCode ierr; 2550 MPI_Comm comm; 2551 PetscMPIInt size,*recvcounts; 2552 PetscScalar *carray,*sendbuf; 2553 const PetscScalar *atbarray; 2554 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2555 const PetscInt *ranges; 2556 2557 PetscFunctionBegin; 2558 MatCheckProduct(C,3); 2559 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2560 atb = (Mat_TransMatMultDense *)C->product->data; 2561 recvcounts = atb->recvcounts; 2562 sendbuf = atb->sendbuf; 2563 2564 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2565 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2566 2567 /* compute atbarray = aseq^T * bseq */ 2568 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2569 2570 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2571 2572 /* arrange atbarray into sendbuf */ 2573 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2574 for (proc=0, k=0; proc<size; proc++) { 2575 for (j=0; j<cN; j++) { 2576 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2577 } 2578 } 2579 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2580 2581 /* sum all atbarray to local values of C */ 2582 ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr); 2583 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 2584 ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr); 2585 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2586 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2587 PetscFunctionReturn(0); 2588 } 2589 2590 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2591 { 2592 PetscErrorCode ierr; 2593 MPI_Comm comm; 2594 PetscMPIInt size; 2595 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2596 Mat_TransMatMultDense *atb; 2597 PetscBool cisdense; 2598 PetscInt i; 2599 const PetscInt *ranges; 2600 2601 PetscFunctionBegin; 2602 MatCheckProduct(C,3); 2603 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2604 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2605 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2606 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2607 } 2608 2609 /* create matrix product C */ 2610 ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr); 2611 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 2612 if (!cisdense) { 2613 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2614 } 2615 ierr = MatSetUp(C);CHKERRQ(ierr); 2616 2617 /* create data structure for reuse C */ 2618 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2619 ierr = PetscNew(&atb);CHKERRQ(ierr); 2620 cM = C->rmap->N; 2621 ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2622 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2623 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2624 2625 C->product->data = atb; 2626 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2627 PetscFunctionReturn(0); 2628 } 2629 2630 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2631 { 2632 PetscErrorCode ierr; 2633 MPI_Comm comm; 2634 PetscMPIInt i, size; 2635 PetscInt maxRows, bufsiz; 2636 PetscMPIInt tag; 2637 PetscInt alg; 2638 Mat_MatTransMultDense *abt; 2639 Mat_Product *product = C->product; 2640 PetscBool flg; 2641 2642 PetscFunctionBegin; 2643 MatCheckProduct(C,4); 2644 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2645 /* check local size of A and B */ 2646 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2647 2648 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2649 alg = flg ? 0 : 1; 2650 2651 /* setup matrix product C */ 2652 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2653 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2654 ierr = MatSetUp(C);CHKERRQ(ierr); 2655 ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr); 2656 2657 /* create data structure for reuse C */ 2658 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2659 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2660 ierr = PetscNew(&abt);CHKERRQ(ierr); 2661 abt->tag = tag; 2662 abt->alg = alg; 2663 switch (alg) { 2664 case 1: /* alg: "cyclic" */ 2665 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2666 bufsiz = A->cmap->N * maxRows; 2667 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2668 break; 2669 default: /* alg: "allgatherv" */ 2670 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2671 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2672 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2673 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2674 break; 2675 } 2676 2677 C->product->data = abt; 2678 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2679 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2680 PetscFunctionReturn(0); 2681 } 2682 2683 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2684 { 2685 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2686 Mat_MatTransMultDense *abt; 2687 PetscErrorCode ierr; 2688 MPI_Comm comm; 2689 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2690 PetscScalar *sendbuf, *recvbuf=0, *cv; 2691 PetscInt i,cK=A->cmap->N,k,j,bn; 2692 PetscScalar _DOne=1.0,_DZero=0.0; 2693 const PetscScalar *av,*bv; 2694 PetscBLASInt cm, cn, ck, alda, blda, clda; 2695 MPI_Request reqs[2]; 2696 const PetscInt *ranges; 2697 2698 PetscFunctionBegin; 2699 MatCheckProduct(C,3); 2700 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2701 abt = (Mat_MatTransMultDense*)C->product->data; 2702 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2703 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2704 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2705 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2706 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2707 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2708 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2709 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2710 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2711 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2712 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2713 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2714 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2715 bn = B->rmap->n; 2716 if (blda == bn) { 2717 sendbuf = (PetscScalar*)bv; 2718 } else { 2719 sendbuf = abt->buf[0]; 2720 for (k = 0, i = 0; i < cK; i++) { 2721 for (j = 0; j < bn; j++, k++) { 2722 sendbuf[k] = bv[i * blda + j]; 2723 } 2724 } 2725 } 2726 if (size > 1) { 2727 sendto = (rank + size - 1) % size; 2728 recvfrom = (rank + size + 1) % size; 2729 } else { 2730 sendto = recvfrom = 0; 2731 } 2732 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2733 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2734 recvisfrom = rank; 2735 for (i = 0; i < size; i++) { 2736 /* we have finished receiving in sending, bufs can be read/modified */ 2737 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2738 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2739 2740 if (nextrecvisfrom != rank) { 2741 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2742 sendsiz = cK * bn; 2743 recvsiz = cK * nextbn; 2744 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2745 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2746 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2747 } 2748 2749 /* local aseq * sendbuf^T */ 2750 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2751 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2752 2753 if (nextrecvisfrom != rank) { 2754 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2755 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2756 } 2757 bn = nextbn; 2758 recvisfrom = nextrecvisfrom; 2759 sendbuf = recvbuf; 2760 } 2761 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2762 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2763 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2764 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2765 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2766 PetscFunctionReturn(0); 2767 } 2768 2769 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2770 { 2771 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2772 Mat_MatTransMultDense *abt; 2773 PetscErrorCode ierr; 2774 MPI_Comm comm; 2775 PetscMPIInt size; 2776 PetscScalar *cv, *sendbuf, *recvbuf; 2777 const PetscScalar *av,*bv; 2778 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2779 PetscScalar _DOne=1.0,_DZero=0.0; 2780 PetscBLASInt cm, cn, ck, alda, clda; 2781 2782 PetscFunctionBegin; 2783 MatCheckProduct(C,3); 2784 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2785 abt = (Mat_MatTransMultDense*)C->product->data; 2786 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2787 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2788 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2789 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2790 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2791 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2792 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2793 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2794 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2795 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2796 /* copy transpose of B into buf[0] */ 2797 bn = B->rmap->n; 2798 sendbuf = abt->buf[0]; 2799 recvbuf = abt->buf[1]; 2800 for (k = 0, j = 0; j < bn; j++) { 2801 for (i = 0; i < cK; i++, k++) { 2802 sendbuf[k] = bv[i * blda + j]; 2803 } 2804 } 2805 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2806 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2807 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2808 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2809 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2810 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2811 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2812 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2813 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2814 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2815 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2816 PetscFunctionReturn(0); 2817 } 2818 2819 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2820 { 2821 Mat_MatTransMultDense *abt; 2822 PetscErrorCode ierr; 2823 2824 PetscFunctionBegin; 2825 MatCheckProduct(C,3); 2826 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2827 abt = (Mat_MatTransMultDense*)C->product->data; 2828 switch (abt->alg) { 2829 case 1: 2830 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2831 break; 2832 default: 2833 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2834 break; 2835 } 2836 PetscFunctionReturn(0); 2837 } 2838 2839 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2840 { 2841 PetscErrorCode ierr; 2842 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2843 2844 PetscFunctionBegin; 2845 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2846 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2847 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2848 ierr = PetscFree(ab);CHKERRQ(ierr); 2849 PetscFunctionReturn(0); 2850 } 2851 2852 #if defined(PETSC_HAVE_ELEMENTAL) 2853 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2854 { 2855 PetscErrorCode ierr; 2856 Mat_MatMultDense *ab; 2857 2858 PetscFunctionBegin; 2859 MatCheckProduct(C,3); 2860 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2861 ab = (Mat_MatMultDense*)C->product->data; 2862 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2863 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2864 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2865 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2866 PetscFunctionReturn(0); 2867 } 2868 2869 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2870 { 2871 PetscErrorCode ierr; 2872 Mat Ae,Be,Ce; 2873 Mat_MatMultDense *ab; 2874 2875 PetscFunctionBegin; 2876 MatCheckProduct(C,4); 2877 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2878 /* check local size of A and B */ 2879 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2880 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2881 } 2882 2883 /* create elemental matrices Ae and Be */ 2884 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2885 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2886 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2887 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2888 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2889 2890 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2891 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2892 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2893 ierr = MatSetUp(Be);CHKERRQ(ierr); 2894 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2895 2896 /* compute symbolic Ce = Ae*Be */ 2897 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2898 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2899 2900 /* setup C */ 2901 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2902 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2903 ierr = MatSetUp(C);CHKERRQ(ierr); 2904 2905 /* create data structure for reuse Cdense */ 2906 ierr = PetscNew(&ab);CHKERRQ(ierr); 2907 ab->Ae = Ae; 2908 ab->Be = Be; 2909 ab->Ce = Ce; 2910 2911 C->product->data = ab; 2912 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2913 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2914 PetscFunctionReturn(0); 2915 } 2916 #endif 2917 /* ----------------------------------------------- */ 2918 #if defined(PETSC_HAVE_ELEMENTAL) 2919 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2920 { 2921 PetscFunctionBegin; 2922 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2923 C->ops->productsymbolic = MatProductSymbolic_AB; 2924 PetscFunctionReturn(0); 2925 } 2926 #endif 2927 2928 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2929 { 2930 Mat_Product *product = C->product; 2931 Mat A = product->A,B=product->B; 2932 2933 PetscFunctionBegin; 2934 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2935 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2936 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2937 C->ops->productsymbolic = MatProductSymbolic_AtB; 2938 PetscFunctionReturn(0); 2939 } 2940 2941 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2942 { 2943 PetscErrorCode ierr; 2944 Mat_Product *product = C->product; 2945 const char *algTypes[2] = {"allgatherv","cyclic"}; 2946 PetscInt alg,nalg = 2; 2947 PetscBool flg = PETSC_FALSE; 2948 2949 PetscFunctionBegin; 2950 /* Set default algorithm */ 2951 alg = 0; /* default is allgatherv */ 2952 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2953 if (flg) { 2954 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2955 } 2956 2957 /* Get runtime option */ 2958 if (product->api_user) { 2959 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2960 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2961 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2962 } else { 2963 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2964 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2965 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2966 } 2967 if (flg) { 2968 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2969 } 2970 2971 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2972 C->ops->productsymbolic = MatProductSymbolic_ABt; 2973 PetscFunctionReturn(0); 2974 } 2975 2976 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2977 { 2978 PetscErrorCode ierr; 2979 Mat_Product *product = C->product; 2980 2981 PetscFunctionBegin; 2982 switch (product->type) { 2983 #if defined(PETSC_HAVE_ELEMENTAL) 2984 case MATPRODUCT_AB: 2985 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 2986 break; 2987 #endif 2988 case MATPRODUCT_AtB: 2989 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 2990 break; 2991 case MATPRODUCT_ABt: 2992 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 2993 break; 2994 default: 2995 break; 2996 } 2997 PetscFunctionReturn(0); 2998 } 2999