1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else *B = A; 36 PetscFunctionReturn(0); 37 } 38 39 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 40 { 41 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 42 PetscErrorCode ierr; 43 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 44 45 PetscFunctionBegin; 46 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 47 lrow = row - rstart; 48 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 53 { 54 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55 PetscErrorCode ierr; 56 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 57 58 PetscFunctionBegin; 59 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 60 lrow = row - rstart; 61 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 66 { 67 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 68 PetscErrorCode ierr; 69 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 70 PetscScalar *array; 71 MPI_Comm comm; 72 PetscBool flg; 73 Mat B; 74 75 PetscFunctionBegin; 76 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 77 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 78 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 79 if (!B) { /* This should use a new function, e.g. MatDenseGetSubMatrix (not create) */ 80 81 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 82 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 83 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 84 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 85 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 86 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 87 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 88 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 89 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 90 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 92 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 93 *a = B; 94 ierr = MatDestroy(&B);CHKERRQ(ierr); 95 } else *a = B; 96 PetscFunctionReturn(0); 97 } 98 99 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 100 { 101 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 102 PetscErrorCode ierr; 103 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 104 PetscBool roworiented = A->roworiented; 105 106 PetscFunctionBegin; 107 for (i=0; i<m; i++) { 108 if (idxm[i] < 0) continue; 109 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 110 if (idxm[i] >= rstart && idxm[i] < rend) { 111 row = idxm[i] - rstart; 112 if (roworiented) { 113 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 114 } else { 115 for (j=0; j<n; j++) { 116 if (idxn[j] < 0) continue; 117 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 118 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 119 } 120 } 121 } else if (!A->donotstash) { 122 mat->assembled = PETSC_FALSE; 123 if (roworiented) { 124 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 125 } else { 126 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 127 } 128 } 129 } 130 PetscFunctionReturn(0); 131 } 132 133 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 134 { 135 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 136 PetscErrorCode ierr; 137 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 138 139 PetscFunctionBegin; 140 for (i=0; i<m; i++) { 141 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 142 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 143 if (idxm[i] >= rstart && idxm[i] < rend) { 144 row = idxm[i] - rstart; 145 for (j=0; j<n; j++) { 146 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 147 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 148 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 149 } 150 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 156 { 157 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 158 PetscErrorCode ierr; 159 160 PetscFunctionBegin; 161 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 166 { 167 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 168 PetscErrorCode ierr; 169 170 PetscFunctionBegin; 171 ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 172 PetscFunctionReturn(0); 173 } 174 175 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 176 { 177 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 182 PetscFunctionReturn(0); 183 } 184 185 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 186 { 187 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 188 PetscErrorCode ierr; 189 190 PetscFunctionBegin; 191 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 192 PetscFunctionReturn(0); 193 } 194 195 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 196 { 197 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 198 PetscErrorCode ierr; 199 200 PetscFunctionBegin; 201 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 202 PetscFunctionReturn(0); 203 } 204 205 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 206 { 207 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 208 PetscErrorCode ierr; 209 210 PetscFunctionBegin; 211 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 212 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 213 PetscFunctionReturn(0); 214 } 215 216 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 217 { 218 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 219 PetscErrorCode ierr; 220 221 PetscFunctionBegin; 222 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 223 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 224 PetscFunctionReturn(0); 225 } 226 227 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 228 { 229 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 230 PetscErrorCode ierr; 231 232 PetscFunctionBegin; 233 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 234 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 235 PetscFunctionReturn(0); 236 } 237 238 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 239 { 240 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 241 PetscErrorCode ierr; 242 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 243 const PetscInt *irow,*icol; 244 const PetscScalar *v; 245 PetscScalar *bv; 246 Mat newmat; 247 IS iscol_local; 248 MPI_Comm comm_is,comm_mat; 249 250 PetscFunctionBegin; 251 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 252 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 253 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 254 255 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 256 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 257 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 258 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 259 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 260 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 261 262 /* No parallel redistribution currently supported! Should really check each index set 263 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 264 original matrix! */ 265 266 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 267 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 268 269 /* Check submatrix call */ 270 if (scall == MAT_REUSE_MATRIX) { 271 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 272 /* Really need to test rows and column sizes! */ 273 newmat = *B; 274 } else { 275 /* Create and fill new matrix */ 276 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 277 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 278 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 279 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 280 } 281 282 /* Now extract the data pointers and do the copy, column at a time */ 283 newmatd = (Mat_MPIDense*)newmat->data; 284 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 285 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 286 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 287 for (i=0; i<Ncols; i++) { 288 const PetscScalar *av = v + lda*icol[i]; 289 for (j=0; j<nrows; j++) { 290 *bv++ = av[irow[j] - rstart]; 291 } 292 } 293 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 294 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 295 296 /* Assemble the matrices so that the correct flags are set */ 297 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 298 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 299 300 /* Free work space */ 301 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 302 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 303 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 304 *B = newmat; 305 PetscFunctionReturn(0); 306 } 307 308 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 309 { 310 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 311 PetscErrorCode ierr; 312 313 PetscFunctionBegin; 314 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 319 { 320 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 321 PetscErrorCode ierr; 322 323 PetscFunctionBegin; 324 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 329 { 330 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 331 PetscErrorCode ierr; 332 333 PetscFunctionBegin; 334 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 335 PetscFunctionReturn(0); 336 } 337 338 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 339 { 340 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 341 PetscErrorCode ierr; 342 PetscInt nstash,reallocs; 343 344 PetscFunctionBegin; 345 if (mdn->donotstash || mat->nooffprocentries) return(0); 346 347 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 348 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 349 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 353 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 354 { 355 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 356 PetscErrorCode ierr; 357 PetscInt i,*row,*col,flg,j,rstart,ncols; 358 PetscMPIInt n; 359 PetscScalar *val; 360 361 PetscFunctionBegin; 362 if (!mdn->donotstash && !mat->nooffprocentries) { 363 /* wait on receives */ 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { 371 if (row[j] != rstart) break; 372 } 373 if (j < n) ncols = j-i; 374 else ncols = n-i; 375 /* Now assemble all these values with a single function call */ 376 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 377 i = j; 378 } 379 } 380 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 381 } 382 383 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 384 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 385 386 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 387 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 388 } 389 PetscFunctionReturn(0); 390 } 391 392 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 393 { 394 PetscErrorCode ierr; 395 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 396 397 PetscFunctionBegin; 398 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 399 PetscFunctionReturn(0); 400 } 401 402 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 403 { 404 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 405 PetscErrorCode ierr; 406 PetscInt i,len,*lrows; 407 408 PetscFunctionBegin; 409 /* get locally owned rows */ 410 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 411 /* fix right hand side if needed */ 412 if (x && b) { 413 const PetscScalar *xx; 414 PetscScalar *bb; 415 416 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 417 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 418 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 419 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 420 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 421 } 422 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 423 if (diag != 0.0) { 424 Vec d; 425 426 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 427 ierr = VecSet(d,diag);CHKERRQ(ierr); 428 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 429 ierr = VecDestroy(&d);CHKERRQ(ierr); 430 } 431 ierr = PetscFree(lrows);CHKERRQ(ierr); 432 PetscFunctionReturn(0); 433 } 434 435 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 436 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 437 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 438 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 439 440 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 441 { 442 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 443 PetscErrorCode ierr; 444 const PetscScalar *ax; 445 PetscScalar *ay; 446 447 PetscFunctionBegin; 448 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 449 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 450 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 451 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 452 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 453 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 454 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 455 PetscFunctionReturn(0); 456 } 457 458 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 459 { 460 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 461 PetscErrorCode ierr; 462 const PetscScalar *ax; 463 PetscScalar *ay; 464 465 PetscFunctionBegin; 466 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 467 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 468 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 469 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 470 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 471 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 472 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 477 { 478 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 479 PetscErrorCode ierr; 480 const PetscScalar *ax; 481 PetscScalar *ay; 482 483 PetscFunctionBegin; 484 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 485 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 486 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 487 ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr); 488 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 489 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 490 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 491 ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr); 492 PetscFunctionReturn(0); 493 } 494 495 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 496 { 497 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 498 PetscErrorCode ierr; 499 const PetscScalar *ax; 500 PetscScalar *ay; 501 502 PetscFunctionBegin; 503 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 504 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 505 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 506 ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr); 507 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 508 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 509 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 510 ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr); 511 PetscFunctionReturn(0); 512 } 513 514 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 515 { 516 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 517 PetscErrorCode ierr; 518 PetscInt lda,len,i,n,m = A->rmap->n,radd; 519 PetscScalar *x,zero = 0.0; 520 const PetscScalar *av; 521 522 PetscFunctionBegin; 523 ierr = VecSet(v,zero);CHKERRQ(ierr); 524 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 525 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 526 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 527 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 528 radd = A->rmap->rstart*m; 529 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 530 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 531 for (i=0; i<len; i++) { 532 x[i] = av[radd + i*lda + i]; 533 } 534 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 535 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 PetscErrorCode MatDestroy_MPIDense(Mat mat) 540 { 541 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 542 PetscErrorCode ierr; 543 544 PetscFunctionBegin; 545 #if defined(PETSC_USE_LOG) 546 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 547 #endif 548 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 549 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 550 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 551 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 552 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 553 554 ierr = PetscFree(mat->data);CHKERRQ(ierr); 555 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 556 557 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 558 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 559 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 560 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 561 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 562 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 563 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 564 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 565 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 568 #if defined(PETSC_HAVE_ELEMENTAL) 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 570 #endif 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 573 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 579 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 580 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 581 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 584 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 585 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 588 PetscFunctionReturn(0); 589 } 590 591 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 592 593 #include <petscdraw.h> 594 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 595 { 596 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 597 PetscErrorCode ierr; 598 PetscMPIInt rank = mdn->rank; 599 PetscViewerType vtype; 600 PetscBool iascii,isdraw; 601 PetscViewer sviewer; 602 PetscViewerFormat format; 603 604 PetscFunctionBegin; 605 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 606 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 607 if (iascii) { 608 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 609 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 610 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 611 MatInfo info; 612 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 613 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 614 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 615 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 616 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 617 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 618 PetscFunctionReturn(0); 619 } else if (format == PETSC_VIEWER_ASCII_INFO) { 620 PetscFunctionReturn(0); 621 } 622 } else if (isdraw) { 623 PetscDraw draw; 624 PetscBool isnull; 625 626 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 627 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 628 if (isnull) PetscFunctionReturn(0); 629 } 630 631 { 632 /* assemble the entire matrix onto first processor. */ 633 Mat A; 634 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 635 PetscInt *cols; 636 PetscScalar *vals; 637 638 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 639 if (!rank) { 640 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 641 } else { 642 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 643 } 644 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 645 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 646 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 647 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 648 649 /* Copy the matrix ... This isn't the most efficient means, 650 but it's quick for now */ 651 A->insertmode = INSERT_VALUES; 652 653 row = mat->rmap->rstart; 654 m = mdn->A->rmap->n; 655 for (i=0; i<m; i++) { 656 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 657 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 658 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 659 row++; 660 } 661 662 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 663 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 664 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 665 if (!rank) { 666 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 667 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 668 } 669 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 670 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 671 ierr = MatDestroy(&A);CHKERRQ(ierr); 672 } 673 PetscFunctionReturn(0); 674 } 675 676 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 677 { 678 PetscErrorCode ierr; 679 PetscBool iascii,isbinary,isdraw,issocket; 680 681 PetscFunctionBegin; 682 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 683 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 684 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 685 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 686 687 if (iascii || issocket || isdraw) { 688 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 689 } else if (isbinary) { 690 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 691 } 692 PetscFunctionReturn(0); 693 } 694 695 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 696 { 697 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 698 Mat mdn = mat->A; 699 PetscErrorCode ierr; 700 PetscLogDouble isend[5],irecv[5]; 701 702 PetscFunctionBegin; 703 info->block_size = 1.0; 704 705 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 706 707 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 708 isend[3] = info->memory; isend[4] = info->mallocs; 709 if (flag == MAT_LOCAL) { 710 info->nz_used = isend[0]; 711 info->nz_allocated = isend[1]; 712 info->nz_unneeded = isend[2]; 713 info->memory = isend[3]; 714 info->mallocs = isend[4]; 715 } else if (flag == MAT_GLOBAL_MAX) { 716 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 717 718 info->nz_used = irecv[0]; 719 info->nz_allocated = irecv[1]; 720 info->nz_unneeded = irecv[2]; 721 info->memory = irecv[3]; 722 info->mallocs = irecv[4]; 723 } else if (flag == MAT_GLOBAL_SUM) { 724 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 725 726 info->nz_used = irecv[0]; 727 info->nz_allocated = irecv[1]; 728 info->nz_unneeded = irecv[2]; 729 info->memory = irecv[3]; 730 info->mallocs = irecv[4]; 731 } 732 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 733 info->fill_ratio_needed = 0; 734 info->factor_mallocs = 0; 735 PetscFunctionReturn(0); 736 } 737 738 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 739 { 740 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 741 PetscErrorCode ierr; 742 743 PetscFunctionBegin; 744 switch (op) { 745 case MAT_NEW_NONZERO_LOCATIONS: 746 case MAT_NEW_NONZERO_LOCATION_ERR: 747 case MAT_NEW_NONZERO_ALLOCATION_ERR: 748 MatCheckPreallocated(A,1); 749 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 750 break; 751 case MAT_ROW_ORIENTED: 752 MatCheckPreallocated(A,1); 753 a->roworiented = flg; 754 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 755 break; 756 case MAT_NEW_DIAGONALS: 757 case MAT_KEEP_NONZERO_PATTERN: 758 case MAT_USE_HASH_TABLE: 759 case MAT_SORTED_FULL: 760 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 761 break; 762 case MAT_IGNORE_OFF_PROC_ENTRIES: 763 a->donotstash = flg; 764 break; 765 case MAT_SYMMETRIC: 766 case MAT_STRUCTURALLY_SYMMETRIC: 767 case MAT_HERMITIAN: 768 case MAT_SYMMETRY_ETERNAL: 769 case MAT_IGNORE_LOWER_TRIANGULAR: 770 case MAT_IGNORE_ZERO_ENTRIES: 771 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 772 break; 773 default: 774 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 775 } 776 PetscFunctionReturn(0); 777 } 778 779 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 780 { 781 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 782 const PetscScalar *l; 783 PetscScalar x,*v,*vv,*r; 784 PetscErrorCode ierr; 785 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 786 787 PetscFunctionBegin; 788 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 789 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 790 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 791 if (ll) { 792 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 793 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 794 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 795 for (i=0; i<m; i++) { 796 x = l[i]; 797 v = vv + i; 798 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 799 } 800 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 801 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 802 } 803 if (rr) { 804 const PetscScalar *ar; 805 806 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 807 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 808 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 809 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 810 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 811 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 812 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 813 for (i=0; i<n; i++) { 814 x = r[i]; 815 v = vv + i*lda; 816 for (j=0; j<m; j++) (*v++) *= x; 817 } 818 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 819 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 820 } 821 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } 824 825 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 826 { 827 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 828 PetscErrorCode ierr; 829 PetscInt i,j; 830 PetscReal sum = 0.0; 831 const PetscScalar *av,*v; 832 833 PetscFunctionBegin; 834 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 835 v = av; 836 if (mdn->size == 1) { 837 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 838 } else { 839 if (type == NORM_FROBENIUS) { 840 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 841 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 842 } 843 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 844 *nrm = PetscSqrtReal(*nrm); 845 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 846 } else if (type == NORM_1) { 847 PetscReal *tmp,*tmp2; 848 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 849 *nrm = 0.0; 850 v = av; 851 for (j=0; j<mdn->A->cmap->n; j++) { 852 for (i=0; i<mdn->A->rmap->n; i++) { 853 tmp[j] += PetscAbsScalar(*v); v++; 854 } 855 } 856 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 857 for (j=0; j<A->cmap->N; j++) { 858 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 859 } 860 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 861 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 862 } else if (type == NORM_INFINITY) { /* max row norm */ 863 PetscReal ntemp; 864 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 865 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 866 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 867 } 868 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 869 PetscFunctionReturn(0); 870 } 871 872 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 873 { 874 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 875 Mat B; 876 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 877 PetscErrorCode ierr; 878 PetscInt j,i,lda; 879 PetscScalar *v; 880 881 PetscFunctionBegin; 882 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 883 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 884 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 885 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 886 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 887 } else B = *matout; 888 889 m = a->A->rmap->n; n = a->A->cmap->n; 890 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 891 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 892 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 893 for (i=0; i<m; i++) rwork[i] = rstart + i; 894 for (j=0; j<n; j++) { 895 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 896 v += lda; 897 } 898 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 899 ierr = PetscFree(rwork);CHKERRQ(ierr); 900 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 901 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 902 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 903 *matout = B; 904 } else { 905 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 906 } 907 PetscFunctionReturn(0); 908 } 909 910 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 911 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 912 913 PetscErrorCode MatSetUp_MPIDense(Mat A) 914 { 915 PetscErrorCode ierr; 916 917 PetscFunctionBegin; 918 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 919 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 920 if (!A->preallocated) { 921 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 922 } 923 PetscFunctionReturn(0); 924 } 925 926 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 927 { 928 PetscErrorCode ierr; 929 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 930 931 PetscFunctionBegin; 932 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 933 PetscFunctionReturn(0); 934 } 935 936 PetscErrorCode MatConjugate_MPIDense(Mat mat) 937 { 938 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 939 PetscErrorCode ierr; 940 941 PetscFunctionBegin; 942 ierr = MatConjugate(a->A);CHKERRQ(ierr); 943 PetscFunctionReturn(0); 944 } 945 946 PetscErrorCode MatRealPart_MPIDense(Mat A) 947 { 948 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 949 PetscErrorCode ierr; 950 951 PetscFunctionBegin; 952 ierr = MatRealPart(a->A);CHKERRQ(ierr); 953 PetscFunctionReturn(0); 954 } 955 956 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 957 { 958 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 959 PetscErrorCode ierr; 960 961 PetscFunctionBegin; 962 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 967 { 968 PetscErrorCode ierr; 969 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 970 971 PetscFunctionBegin; 972 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 973 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 974 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 978 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 979 980 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 981 { 982 PetscErrorCode ierr; 983 PetscInt i,n; 984 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 985 PetscReal *work; 986 987 PetscFunctionBegin; 988 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 989 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 990 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 991 if (type == NORM_2) { 992 for (i=0; i<n; i++) work[i] *= work[i]; 993 } 994 if (type == NORM_INFINITY) { 995 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 996 } else { 997 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 998 } 999 ierr = PetscFree(work);CHKERRQ(ierr); 1000 if (type == NORM_2) { 1001 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1002 } 1003 PetscFunctionReturn(0); 1004 } 1005 1006 #if defined(PETSC_HAVE_CUDA) 1007 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1008 { 1009 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1010 PetscErrorCode ierr; 1011 PetscInt lda; 1012 1013 PetscFunctionBegin; 1014 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1015 if (!a->cvec) { 1016 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1017 } 1018 a->vecinuse = col + 1; 1019 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1020 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1021 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1022 *v = a->cvec; 1023 PetscFunctionReturn(0); 1024 } 1025 1026 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1027 { 1028 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1029 PetscErrorCode ierr; 1030 1031 PetscFunctionBegin; 1032 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1033 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1034 a->vecinuse = 0; 1035 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1036 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1037 *v = NULL; 1038 PetscFunctionReturn(0); 1039 } 1040 1041 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1042 { 1043 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1044 PetscInt lda; 1045 PetscErrorCode ierr; 1046 1047 PetscFunctionBegin; 1048 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1049 if (!a->cvec) { 1050 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1051 } 1052 a->vecinuse = col + 1; 1053 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1054 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1055 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1056 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1057 *v = a->cvec; 1058 PetscFunctionReturn(0); 1059 } 1060 1061 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1062 { 1063 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1064 PetscErrorCode ierr; 1065 1066 PetscFunctionBegin; 1067 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1068 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1069 a->vecinuse = 0; 1070 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1071 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1072 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1073 *v = NULL; 1074 PetscFunctionReturn(0); 1075 } 1076 1077 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1078 { 1079 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1080 PetscErrorCode ierr; 1081 PetscInt lda; 1082 1083 PetscFunctionBegin; 1084 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1085 if (!a->cvec) { 1086 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1087 } 1088 a->vecinuse = col + 1; 1089 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1090 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1091 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1092 *v = a->cvec; 1093 PetscFunctionReturn(0); 1094 } 1095 1096 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1097 { 1098 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1103 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1104 a->vecinuse = 0; 1105 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1106 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1107 *v = NULL; 1108 PetscFunctionReturn(0); 1109 } 1110 1111 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1112 { 1113 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1114 PetscErrorCode ierr; 1115 1116 PetscFunctionBegin; 1117 if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1118 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1123 { 1124 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1125 PetscErrorCode ierr; 1126 1127 PetscFunctionBegin; 1128 if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1129 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1134 { 1135 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1136 PetscErrorCode ierr; 1137 1138 PetscFunctionBegin; 1139 if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1140 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1145 { 1146 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1147 PetscErrorCode ierr; 1148 1149 PetscFunctionBegin; 1150 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1151 PetscFunctionReturn(0); 1152 } 1153 1154 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1155 { 1156 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1157 PetscErrorCode ierr; 1158 1159 PetscFunctionBegin; 1160 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1165 { 1166 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1167 PetscErrorCode ierr; 1168 1169 PetscFunctionBegin; 1170 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1175 { 1176 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1177 PetscErrorCode ierr; 1178 1179 PetscFunctionBegin; 1180 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1185 { 1186 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1191 PetscFunctionReturn(0); 1192 } 1193 1194 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1195 { 1196 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1197 PetscErrorCode ierr; 1198 1199 PetscFunctionBegin; 1200 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1201 PetscFunctionReturn(0); 1202 } 1203 1204 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1205 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1206 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1207 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1208 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1209 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1210 1211 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1212 { 1213 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1214 PetscErrorCode ierr; 1215 1216 PetscFunctionBegin; 1217 if (d->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1218 if (d->A) { 1219 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1220 } 1221 mat->boundtocpu = bind; 1222 if (!bind) { 1223 PetscBool iscuda; 1224 1225 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1226 if (!iscuda) { 1227 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1228 } 1229 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1233 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1235 } else { 1236 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1238 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1240 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1241 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1242 } 1243 PetscFunctionReturn(0); 1244 } 1245 1246 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1247 { 1248 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1249 PetscErrorCode ierr; 1250 PetscBool iscuda; 1251 1252 PetscFunctionBegin; 1253 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1254 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1255 if (!iscuda) PetscFunctionReturn(0); 1256 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1257 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1258 if (!d->A) { 1259 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1260 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1261 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1262 } 1263 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1264 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1265 A->preallocated = PETSC_TRUE; 1266 PetscFunctionReturn(0); 1267 } 1268 #endif 1269 1270 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1271 { 1272 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1277 PetscFunctionReturn(0); 1278 } 1279 1280 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1281 1282 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1283 { 1284 PetscFunctionBegin; 1285 *missing = PETSC_FALSE; 1286 PetscFunctionReturn(0); 1287 } 1288 1289 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1290 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1291 1292 /* -------------------------------------------------------------------*/ 1293 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1294 MatGetRow_MPIDense, 1295 MatRestoreRow_MPIDense, 1296 MatMult_MPIDense, 1297 /* 4*/ MatMultAdd_MPIDense, 1298 MatMultTranspose_MPIDense, 1299 MatMultTransposeAdd_MPIDense, 1300 0, 1301 0, 1302 0, 1303 /* 10*/ 0, 1304 0, 1305 0, 1306 0, 1307 MatTranspose_MPIDense, 1308 /* 15*/ MatGetInfo_MPIDense, 1309 MatEqual_MPIDense, 1310 MatGetDiagonal_MPIDense, 1311 MatDiagonalScale_MPIDense, 1312 MatNorm_MPIDense, 1313 /* 20*/ MatAssemblyBegin_MPIDense, 1314 MatAssemblyEnd_MPIDense, 1315 MatSetOption_MPIDense, 1316 MatZeroEntries_MPIDense, 1317 /* 24*/ MatZeroRows_MPIDense, 1318 0, 1319 0, 1320 0, 1321 0, 1322 /* 29*/ MatSetUp_MPIDense, 1323 0, 1324 0, 1325 MatGetDiagonalBlock_MPIDense, 1326 0, 1327 /* 34*/ MatDuplicate_MPIDense, 1328 0, 1329 0, 1330 0, 1331 0, 1332 /* 39*/ MatAXPY_MPIDense, 1333 MatCreateSubMatrices_MPIDense, 1334 0, 1335 MatGetValues_MPIDense, 1336 0, 1337 /* 44*/ 0, 1338 MatScale_MPIDense, 1339 MatShift_Basic, 1340 0, 1341 0, 1342 /* 49*/ MatSetRandom_MPIDense, 1343 0, 1344 0, 1345 0, 1346 0, 1347 /* 54*/ 0, 1348 0, 1349 0, 1350 0, 1351 0, 1352 /* 59*/ MatCreateSubMatrix_MPIDense, 1353 MatDestroy_MPIDense, 1354 MatView_MPIDense, 1355 0, 1356 0, 1357 /* 64*/ 0, 1358 0, 1359 0, 1360 0, 1361 0, 1362 /* 69*/ 0, 1363 0, 1364 0, 1365 0, 1366 0, 1367 /* 74*/ 0, 1368 0, 1369 0, 1370 0, 1371 0, 1372 /* 79*/ 0, 1373 0, 1374 0, 1375 0, 1376 /* 83*/ MatLoad_MPIDense, 1377 0, 1378 0, 1379 0, 1380 0, 1381 0, 1382 /* 89*/ 0, 1383 0, 1384 MatMatMultNumeric_MPIDense, 1385 0, 1386 0, 1387 /* 94*/ 0, 1388 0, 1389 0, 1390 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1391 0, 1392 /* 99*/ MatProductSetFromOptions_MPIDense, 1393 0, 1394 0, 1395 MatConjugate_MPIDense, 1396 0, 1397 /*104*/ 0, 1398 MatRealPart_MPIDense, 1399 MatImaginaryPart_MPIDense, 1400 0, 1401 0, 1402 /*109*/ 0, 1403 0, 1404 0, 1405 MatGetColumnVector_MPIDense, 1406 MatMissingDiagonal_MPIDense, 1407 /*114*/ 0, 1408 0, 1409 0, 1410 0, 1411 0, 1412 /*119*/ 0, 1413 0, 1414 0, 1415 0, 1416 0, 1417 /*124*/ 0, 1418 MatGetColumnNorms_MPIDense, 1419 0, 1420 0, 1421 0, 1422 /*129*/ 0, 1423 0, 1424 0, 1425 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1426 0, 1427 /*134*/ 0, 1428 0, 1429 0, 1430 0, 1431 0, 1432 /*139*/ 0, 1433 0, 1434 0, 1435 0, 1436 0, 1437 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1438 /*145*/ 0, 1439 0, 1440 0 1441 }; 1442 1443 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1444 { 1445 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1446 PetscBool iscuda; 1447 PetscErrorCode ierr; 1448 1449 PetscFunctionBegin; 1450 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1451 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1452 if (!a->A) { 1453 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1454 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1455 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1456 } 1457 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1458 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1459 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1460 mat->preallocated = PETSC_TRUE; 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #if defined(PETSC_HAVE_ELEMENTAL) 1465 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1466 { 1467 Mat mat_elemental; 1468 PetscErrorCode ierr; 1469 PetscScalar *v; 1470 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1471 1472 PetscFunctionBegin; 1473 if (reuse == MAT_REUSE_MATRIX) { 1474 mat_elemental = *newmat; 1475 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1476 } else { 1477 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1478 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1479 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1480 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1481 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1482 } 1483 1484 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1485 for (i=0; i<N; i++) cols[i] = i; 1486 for (i=0; i<m; i++) rows[i] = rstart + i; 1487 1488 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1489 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1490 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1491 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1492 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1493 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1494 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1495 1496 if (reuse == MAT_INPLACE_MATRIX) { 1497 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1498 } else { 1499 *newmat = mat_elemental; 1500 } 1501 PetscFunctionReturn(0); 1502 } 1503 #endif 1504 1505 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1506 { 1507 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1508 PetscErrorCode ierr; 1509 1510 PetscFunctionBegin; 1511 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1512 PetscFunctionReturn(0); 1513 } 1514 1515 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1516 { 1517 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1518 PetscErrorCode ierr; 1519 1520 PetscFunctionBegin; 1521 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1522 PetscFunctionReturn(0); 1523 } 1524 1525 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1526 { 1527 PetscErrorCode ierr; 1528 Mat_MPIDense *mat; 1529 PetscInt m,nloc,N; 1530 1531 PetscFunctionBegin; 1532 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1533 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1534 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1535 PetscInt sum; 1536 1537 if (n == PETSC_DECIDE) { 1538 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1539 } 1540 /* Check sum(n) = N */ 1541 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1542 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1543 1544 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1545 } 1546 1547 /* numeric phase */ 1548 mat = (Mat_MPIDense*)(*outmat)->data; 1549 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1550 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1551 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1552 PetscFunctionReturn(0); 1553 } 1554 1555 #if defined(PETSC_HAVE_CUDA) 1556 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1557 { 1558 Mat B; 1559 Mat_MPIDense *m; 1560 PetscErrorCode ierr; 1561 1562 PetscFunctionBegin; 1563 if (reuse == MAT_INITIAL_MATRIX) { 1564 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1565 } else if (reuse == MAT_REUSE_MATRIX) { 1566 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1567 } 1568 1569 B = *newmat; 1570 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1571 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1572 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1573 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1574 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1575 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1576 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1577 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1578 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1579 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1580 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1581 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1582 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1583 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1584 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1585 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1586 m = (Mat_MPIDense*)(B)->data; 1587 if (m->A) { 1588 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1589 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1590 } 1591 B->ops->bindtocpu = NULL; 1592 B->offloadmask = PETSC_OFFLOAD_CPU; 1593 PetscFunctionReturn(0); 1594 } 1595 1596 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1597 { 1598 Mat B; 1599 Mat_MPIDense *m; 1600 PetscErrorCode ierr; 1601 1602 PetscFunctionBegin; 1603 if (reuse == MAT_INITIAL_MATRIX) { 1604 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1605 } else if (reuse == MAT_REUSE_MATRIX) { 1606 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1607 } 1608 1609 B = *newmat; 1610 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1611 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1612 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1619 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1621 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1622 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1623 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1624 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1625 m = (Mat_MPIDense*)(B)->data; 1626 if (m->A) { 1627 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1628 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1629 B->offloadmask = PETSC_OFFLOAD_BOTH; 1630 } else { 1631 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1632 } 1633 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1634 1635 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1636 PetscFunctionReturn(0); 1637 } 1638 #endif 1639 1640 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1641 { 1642 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1643 PetscErrorCode ierr; 1644 PetscInt lda; 1645 1646 PetscFunctionBegin; 1647 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1648 if (!a->cvec) { 1649 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1650 } 1651 a->vecinuse = col + 1; 1652 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1653 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1654 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1655 *v = a->cvec; 1656 PetscFunctionReturn(0); 1657 } 1658 1659 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1660 { 1661 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1662 PetscErrorCode ierr; 1663 1664 PetscFunctionBegin; 1665 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1666 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1667 a->vecinuse = 0; 1668 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1669 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1670 *v = NULL; 1671 PetscFunctionReturn(0); 1672 } 1673 1674 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1675 { 1676 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1677 PetscErrorCode ierr; 1678 PetscInt lda; 1679 1680 PetscFunctionBegin; 1681 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1682 if (!a->cvec) { 1683 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1684 } 1685 a->vecinuse = col + 1; 1686 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1687 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1688 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1689 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1690 *v = a->cvec; 1691 PetscFunctionReturn(0); 1692 } 1693 1694 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1695 { 1696 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1697 PetscErrorCode ierr; 1698 1699 PetscFunctionBegin; 1700 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1701 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1702 a->vecinuse = 0; 1703 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1704 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1705 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1706 *v = NULL; 1707 PetscFunctionReturn(0); 1708 } 1709 1710 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1711 { 1712 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1713 PetscErrorCode ierr; 1714 PetscInt lda; 1715 1716 PetscFunctionBegin; 1717 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1718 if (!a->cvec) { 1719 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1720 } 1721 a->vecinuse = col + 1; 1722 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1723 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1724 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1725 *v = a->cvec; 1726 PetscFunctionReturn(0); 1727 } 1728 1729 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1730 { 1731 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1732 PetscErrorCode ierr; 1733 1734 PetscFunctionBegin; 1735 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1736 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1737 a->vecinuse = 0; 1738 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1739 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1740 *v = NULL; 1741 PetscFunctionReturn(0); 1742 } 1743 1744 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1745 { 1746 Mat_MPIDense *a; 1747 PetscErrorCode ierr; 1748 1749 PetscFunctionBegin; 1750 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1751 mat->data = (void*)a; 1752 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1753 1754 mat->insertmode = NOT_SET_VALUES; 1755 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1756 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1757 1758 /* build cache for off array entries formed */ 1759 a->donotstash = PETSC_FALSE; 1760 1761 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1762 1763 /* stuff used for matrix vector multiply */ 1764 a->lvec = 0; 1765 a->Mvctx = 0; 1766 a->roworiented = PETSC_TRUE; 1767 1768 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1769 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1770 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1771 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1772 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1773 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1774 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1775 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1776 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1777 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1778 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1779 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1780 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1781 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1782 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1783 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1784 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1785 #if defined(PETSC_HAVE_ELEMENTAL) 1786 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1787 #endif 1788 #if defined(PETSC_HAVE_CUDA) 1789 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1790 #endif 1791 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1792 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1793 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1794 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1795 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1796 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr); 1797 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr); 1798 1799 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1800 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1801 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1802 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1803 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1804 PetscFunctionReturn(0); 1805 } 1806 1807 /*MC 1808 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1809 1810 Options Database Keys: 1811 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1812 1813 Level: beginner 1814 1815 .seealso: 1816 1817 M*/ 1818 #if defined(PETSC_HAVE_CUDA) 1819 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1820 { 1821 PetscErrorCode ierr; 1822 1823 PetscFunctionBegin; 1824 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1825 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1826 PetscFunctionReturn(0); 1827 } 1828 #endif 1829 1830 /*MC 1831 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1832 1833 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1834 and MATMPIDENSE otherwise. 1835 1836 Options Database Keys: 1837 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1838 1839 Level: beginner 1840 1841 1842 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1843 M*/ 1844 1845 /*MC 1846 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1847 1848 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1849 and MATMPIDENSECUDA otherwise. 1850 1851 Options Database Keys: 1852 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1853 1854 Level: beginner 1855 1856 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1857 M*/ 1858 1859 /*@C 1860 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1861 1862 Not collective 1863 1864 Input Parameters: 1865 . B - the matrix 1866 - data - optional location of matrix data. Set data=NULL for PETSc 1867 to control all matrix memory allocation. 1868 1869 Notes: 1870 The dense format is fully compatible with standard Fortran 77 1871 storage by columns. 1872 1873 The data input variable is intended primarily for Fortran programmers 1874 who wish to allocate their own matrix memory space. Most users should 1875 set data=NULL. 1876 1877 Level: intermediate 1878 1879 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1880 @*/ 1881 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1882 { 1883 PetscErrorCode ierr; 1884 1885 PetscFunctionBegin; 1886 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1887 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1888 PetscFunctionReturn(0); 1889 } 1890 1891 /*@ 1892 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 1893 array provided by the user. This is useful to avoid copying an array 1894 into a matrix 1895 1896 Not Collective 1897 1898 Input Parameters: 1899 + mat - the matrix 1900 - array - the array in column major order 1901 1902 Notes: 1903 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1904 freed when the matrix is destroyed. 1905 1906 Level: developer 1907 1908 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1909 1910 @*/ 1911 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 1912 { 1913 PetscErrorCode ierr; 1914 1915 PetscFunctionBegin; 1916 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 1917 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1918 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1919 #if defined(PETSC_HAVE_CUDA) 1920 mat->offloadmask = PETSC_OFFLOAD_CPU; 1921 #endif 1922 PetscFunctionReturn(0); 1923 } 1924 1925 /*@ 1926 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1927 1928 Not Collective 1929 1930 Input Parameters: 1931 . mat - the matrix 1932 1933 Notes: 1934 You can only call this after a call to MatDensePlaceArray() 1935 1936 Level: developer 1937 1938 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1939 1940 @*/ 1941 PetscErrorCode MatDenseResetArray(Mat mat) 1942 { 1943 PetscErrorCode ierr; 1944 1945 PetscFunctionBegin; 1946 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 1947 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1948 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1949 PetscFunctionReturn(0); 1950 } 1951 1952 /*@ 1953 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1954 array provided by the user. This is useful to avoid copying an array 1955 into a matrix 1956 1957 Not Collective 1958 1959 Input Parameters: 1960 + mat - the matrix 1961 - array - the array in column major order 1962 1963 Notes: 1964 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 1965 freed by the user. It will be freed when the matrix is destroyed. 1966 1967 Level: developer 1968 1969 .seealso: MatDenseGetArray(), VecReplaceArray() 1970 @*/ 1971 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 1972 { 1973 PetscErrorCode ierr; 1974 1975 PetscFunctionBegin; 1976 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 1977 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1978 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1979 #if defined(PETSC_HAVE_CUDA) 1980 mat->offloadmask = PETSC_OFFLOAD_CPU; 1981 #endif 1982 PetscFunctionReturn(0); 1983 } 1984 1985 #if defined(PETSC_HAVE_CUDA) 1986 /*@C 1987 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 1988 array provided by the user. This is useful to avoid copying an array 1989 into a matrix 1990 1991 Not Collective 1992 1993 Input Parameters: 1994 + mat - the matrix 1995 - array - the array in column major order 1996 1997 Notes: 1998 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 1999 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2000 2001 Level: developer 2002 2003 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2004 @*/ 2005 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2006 { 2007 PetscErrorCode ierr; 2008 2009 PetscFunctionBegin; 2010 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2011 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2012 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2013 mat->offloadmask = PETSC_OFFLOAD_GPU; 2014 PetscFunctionReturn(0); 2015 } 2016 2017 /*@C 2018 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2019 2020 Not Collective 2021 2022 Input Parameters: 2023 . mat - the matrix 2024 2025 Notes: 2026 You can only call this after a call to MatDenseCUDAPlaceArray() 2027 2028 Level: developer 2029 2030 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2031 2032 @*/ 2033 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2034 { 2035 PetscErrorCode ierr; 2036 2037 PetscFunctionBegin; 2038 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2039 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2040 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 /*@C 2045 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2046 array provided by the user. This is useful to avoid copying an array 2047 into a matrix 2048 2049 Not Collective 2050 2051 Input Parameters: 2052 + mat - the matrix 2053 - array - the array in column major order 2054 2055 Notes: 2056 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2057 The memory passed in CANNOT be freed by the user. It will be freed 2058 when the matrix is destroyed. The array should respect the matrix leading dimension. 2059 2060 Level: developer 2061 2062 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2063 @*/ 2064 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2065 { 2066 PetscErrorCode ierr; 2067 2068 PetscFunctionBegin; 2069 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2070 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2071 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2072 mat->offloadmask = PETSC_OFFLOAD_GPU; 2073 PetscFunctionReturn(0); 2074 } 2075 2076 /*@C 2077 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2078 2079 Not Collective 2080 2081 Input Parameters: 2082 . A - the matrix 2083 2084 Output Parameters 2085 . array - the GPU array in column major order 2086 2087 Notes: 2088 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2089 2090 Level: developer 2091 2092 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2093 @*/ 2094 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2095 { 2096 PetscErrorCode ierr; 2097 2098 PetscFunctionBegin; 2099 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2100 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2101 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 /*@C 2106 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2107 2108 Not Collective 2109 2110 Input Parameters: 2111 + A - the matrix 2112 - array - the GPU array in column major order 2113 2114 Notes: 2115 2116 Level: developer 2117 2118 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2119 @*/ 2120 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2121 { 2122 PetscErrorCode ierr; 2123 2124 PetscFunctionBegin; 2125 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2126 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2127 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2128 A->offloadmask = PETSC_OFFLOAD_GPU; 2129 PetscFunctionReturn(0); 2130 } 2131 2132 /*@C 2133 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2134 2135 Not Collective 2136 2137 Input Parameters: 2138 . A - the matrix 2139 2140 Output Parameters 2141 . array - the GPU array in column major order 2142 2143 Notes: 2144 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2145 2146 Level: developer 2147 2148 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2149 @*/ 2150 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2151 { 2152 PetscErrorCode ierr; 2153 2154 PetscFunctionBegin; 2155 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2156 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 /*@C 2161 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2162 2163 Not Collective 2164 2165 Input Parameters: 2166 + A - the matrix 2167 - array - the GPU array in column major order 2168 2169 Notes: 2170 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2171 2172 Level: developer 2173 2174 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2175 @*/ 2176 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2177 { 2178 PetscErrorCode ierr; 2179 2180 PetscFunctionBegin; 2181 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2182 PetscFunctionReturn(0); 2183 } 2184 2185 /*@C 2186 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2187 2188 Not Collective 2189 2190 Input Parameters: 2191 . A - the matrix 2192 2193 Output Parameters 2194 . array - the GPU array in column major order 2195 2196 Notes: 2197 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2198 2199 Level: developer 2200 2201 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2202 @*/ 2203 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2204 { 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2209 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2210 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 /*@C 2215 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2216 2217 Not Collective 2218 2219 Input Parameters: 2220 + A - the matrix 2221 - array - the GPU array in column major order 2222 2223 Notes: 2224 2225 Level: developer 2226 2227 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2228 @*/ 2229 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2230 { 2231 PetscErrorCode ierr; 2232 2233 PetscFunctionBegin; 2234 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2235 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2236 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2237 A->offloadmask = PETSC_OFFLOAD_GPU; 2238 PetscFunctionReturn(0); 2239 } 2240 #endif 2241 2242 /*@C 2243 MatCreateDense - Creates a matrix in dense format. 2244 2245 Collective 2246 2247 Input Parameters: 2248 + comm - MPI communicator 2249 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2250 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2251 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2252 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2253 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2254 to control all matrix memory allocation. 2255 2256 Output Parameter: 2257 . A - the matrix 2258 2259 Notes: 2260 The dense format is fully compatible with standard Fortran 77 2261 storage by columns. 2262 2263 The data input variable is intended primarily for Fortran programmers 2264 who wish to allocate their own matrix memory space. Most users should 2265 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2266 2267 The user MUST specify either the local or global matrix dimensions 2268 (possibly both). 2269 2270 Level: intermediate 2271 2272 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2273 @*/ 2274 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2275 { 2276 PetscErrorCode ierr; 2277 PetscMPIInt size; 2278 2279 PetscFunctionBegin; 2280 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2281 PetscValidLogicalCollectiveBool(*A,!!data,6); 2282 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2283 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2284 if (size > 1) { 2285 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2286 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2287 if (data) { /* user provided data array, so no need to assemble */ 2288 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2289 (*A)->assembled = PETSC_TRUE; 2290 } 2291 } else { 2292 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2293 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2294 } 2295 PetscFunctionReturn(0); 2296 } 2297 2298 #if defined(PETSC_HAVE_CUDA) 2299 /*@C 2300 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2301 2302 Collective 2303 2304 Input Parameters: 2305 + comm - MPI communicator 2306 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2307 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2308 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2309 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2310 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2311 to control matrix memory allocation. 2312 2313 Output Parameter: 2314 . A - the matrix 2315 2316 Notes: 2317 2318 Level: intermediate 2319 2320 .seealso: MatCreate(), MatCreateDense() 2321 @*/ 2322 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2323 { 2324 PetscErrorCode ierr; 2325 PetscMPIInt size; 2326 2327 PetscFunctionBegin; 2328 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2329 PetscValidLogicalCollectiveBool(*A,!!data,6); 2330 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2331 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2332 if (size > 1) { 2333 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2334 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2335 if (data) { /* user provided data array, so no need to assemble */ 2336 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2337 (*A)->assembled = PETSC_TRUE; 2338 } 2339 } else { 2340 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2341 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2342 } 2343 PetscFunctionReturn(0); 2344 } 2345 #endif 2346 2347 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2348 { 2349 Mat mat; 2350 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2351 PetscErrorCode ierr; 2352 2353 PetscFunctionBegin; 2354 *newmat = 0; 2355 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2356 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2357 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2358 a = (Mat_MPIDense*)mat->data; 2359 2360 mat->factortype = A->factortype; 2361 mat->assembled = PETSC_TRUE; 2362 mat->preallocated = PETSC_TRUE; 2363 2364 a->size = oldmat->size; 2365 a->rank = oldmat->rank; 2366 mat->insertmode = NOT_SET_VALUES; 2367 a->donotstash = oldmat->donotstash; 2368 2369 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2370 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2371 2372 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2373 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2374 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2375 2376 *newmat = mat; 2377 PetscFunctionReturn(0); 2378 } 2379 2380 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2381 { 2382 PetscErrorCode ierr; 2383 PetscBool isbinary; 2384 #if defined(PETSC_HAVE_HDF5) 2385 PetscBool ishdf5; 2386 #endif 2387 2388 PetscFunctionBegin; 2389 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2390 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2391 /* force binary viewer to load .info file if it has not yet done so */ 2392 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2393 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2394 #if defined(PETSC_HAVE_HDF5) 2395 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2396 #endif 2397 if (isbinary) { 2398 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2399 #if defined(PETSC_HAVE_HDF5) 2400 } else if (ishdf5) { 2401 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2402 #endif 2403 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2404 PetscFunctionReturn(0); 2405 } 2406 2407 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2408 { 2409 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2410 Mat a,b; 2411 PetscBool flg; 2412 PetscErrorCode ierr; 2413 2414 PetscFunctionBegin; 2415 a = matA->A; 2416 b = matB->A; 2417 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2418 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2419 PetscFunctionReturn(0); 2420 } 2421 2422 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 2423 { 2424 PetscErrorCode ierr; 2425 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2426 Mat_TransMatMultDense *atb = a->atbdense; 2427 2428 PetscFunctionBegin; 2429 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2430 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2431 ierr = (*atb->destroy)(A);CHKERRQ(ierr); 2432 ierr = PetscFree(atb);CHKERRQ(ierr); 2433 PetscFunctionReturn(0); 2434 } 2435 2436 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 2437 { 2438 PetscErrorCode ierr; 2439 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2440 Mat_MatTransMultDense *abt = a->abtdense; 2441 2442 PetscFunctionBegin; 2443 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2444 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2445 ierr = (abt->destroy)(A);CHKERRQ(ierr); 2446 ierr = PetscFree(abt);CHKERRQ(ierr); 2447 PetscFunctionReturn(0); 2448 } 2449 2450 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2451 { 2452 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2453 Mat_TransMatMultDense *atb = c->atbdense; 2454 PetscErrorCode ierr; 2455 MPI_Comm comm; 2456 PetscMPIInt size,*recvcounts=atb->recvcounts; 2457 PetscScalar *carray,*sendbuf=atb->sendbuf; 2458 const PetscScalar *atbarray; 2459 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2460 const PetscInt *ranges; 2461 2462 PetscFunctionBegin; 2463 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2464 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2465 2466 /* compute atbarray = aseq^T * bseq */ 2467 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2468 2469 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2470 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2471 2472 /* arrange atbarray into sendbuf */ 2473 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2474 for (proc=0, k=0; proc<size; proc++) { 2475 for (j=0; j<cN; j++) { 2476 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2477 } 2478 } 2479 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2480 2481 /* sum all atbarray to local values of C */ 2482 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 2483 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 2484 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 2485 PetscFunctionReturn(0); 2486 } 2487 2488 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2489 { 2490 PetscErrorCode ierr; 2491 MPI_Comm comm; 2492 PetscMPIInt size; 2493 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2494 Mat_MPIDense *c; 2495 Mat_TransMatMultDense *atb; 2496 2497 PetscFunctionBegin; 2498 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2499 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2500 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2501 } 2502 2503 /* create matrix product C */ 2504 ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2505 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2506 ierr = MatSetUp(C);CHKERRQ(ierr); 2507 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2508 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2509 2510 /* create data structure for reuse C */ 2511 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2512 ierr = PetscNew(&atb);CHKERRQ(ierr); 2513 cM = C->rmap->N; 2514 ierr = PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2515 2516 c = (Mat_MPIDense*)C->data; 2517 c->atbdense = atb; 2518 atb->destroy = C->ops->destroy; 2519 C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2520 PetscFunctionReturn(0); 2521 } 2522 2523 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2524 { 2525 PetscErrorCode ierr; 2526 MPI_Comm comm; 2527 PetscMPIInt i, size; 2528 PetscInt maxRows, bufsiz; 2529 Mat_MPIDense *c; 2530 PetscMPIInt tag; 2531 PetscInt alg; 2532 Mat_MatTransMultDense *abt; 2533 Mat_Product *product = C->product; 2534 PetscBool flg; 2535 2536 PetscFunctionBegin; 2537 /* check local size of A and B */ 2538 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2539 2540 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2541 alg = flg ? 0 : 1; 2542 2543 /* setup matrix product C */ 2544 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2545 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2546 ierr = MatSetUp(C);CHKERRQ(ierr); 2547 ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr); 2548 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2549 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2550 2551 /* create data structure for reuse C */ 2552 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2553 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2554 ierr = PetscNew(&abt);CHKERRQ(ierr); 2555 abt->tag = tag; 2556 abt->alg = alg; 2557 switch (alg) { 2558 case 1: /* alg: "cyclic" */ 2559 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2560 bufsiz = A->cmap->N * maxRows; 2561 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2562 break; 2563 default: /* alg: "allgatherv" */ 2564 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2565 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2566 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2567 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2568 break; 2569 } 2570 2571 c = (Mat_MPIDense*)C->data; 2572 c->abtdense = abt; 2573 abt->destroy = C->ops->destroy; 2574 C->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2575 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2576 PetscFunctionReturn(0); 2577 } 2578 2579 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2580 { 2581 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2582 Mat_MatTransMultDense *abt = c->abtdense; 2583 PetscErrorCode ierr; 2584 MPI_Comm comm; 2585 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2586 PetscScalar *sendbuf, *recvbuf=0, *cv; 2587 PetscInt i,cK=A->cmap->N,k,j,bn; 2588 PetscScalar _DOne=1.0,_DZero=0.0; 2589 const PetscScalar *av,*bv; 2590 PetscBLASInt cm, cn, ck, alda, blda, clda; 2591 MPI_Request reqs[2]; 2592 const PetscInt *ranges; 2593 2594 PetscFunctionBegin; 2595 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2596 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2597 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2598 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2599 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2600 ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr); 2601 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2602 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2603 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2604 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2605 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2606 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2607 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2608 bn = B->rmap->n; 2609 if (blda == bn) { 2610 sendbuf = (PetscScalar*)bv; 2611 } else { 2612 sendbuf = abt->buf[0]; 2613 for (k = 0, i = 0; i < cK; i++) { 2614 for (j = 0; j < bn; j++, k++) { 2615 sendbuf[k] = bv[i * blda + j]; 2616 } 2617 } 2618 } 2619 if (size > 1) { 2620 sendto = (rank + size - 1) % size; 2621 recvfrom = (rank + size + 1) % size; 2622 } else { 2623 sendto = recvfrom = 0; 2624 } 2625 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2626 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2627 recvisfrom = rank; 2628 for (i = 0; i < size; i++) { 2629 /* we have finished receiving in sending, bufs can be read/modified */ 2630 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2631 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2632 2633 if (nextrecvisfrom != rank) { 2634 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2635 sendsiz = cK * bn; 2636 recvsiz = cK * nextbn; 2637 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2638 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2639 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2640 } 2641 2642 /* local aseq * sendbuf^T */ 2643 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2644 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2645 2646 if (nextrecvisfrom != rank) { 2647 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2648 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2649 } 2650 bn = nextbn; 2651 recvisfrom = nextrecvisfrom; 2652 sendbuf = recvbuf; 2653 } 2654 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2655 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2656 ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr); 2657 PetscFunctionReturn(0); 2658 } 2659 2660 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2661 { 2662 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2663 Mat_MatTransMultDense *abt = c->abtdense; 2664 PetscErrorCode ierr; 2665 MPI_Comm comm; 2666 PetscMPIInt size; 2667 PetscScalar *cv, *sendbuf, *recvbuf; 2668 const PetscScalar *av,*bv; 2669 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2670 PetscScalar _DOne=1.0,_DZero=0.0; 2671 PetscBLASInt cm, cn, ck, alda, clda; 2672 2673 PetscFunctionBegin; 2674 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2675 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2676 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2677 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2678 ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr); 2679 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2680 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2681 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2682 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2683 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2684 /* copy transpose of B into buf[0] */ 2685 bn = B->rmap->n; 2686 sendbuf = abt->buf[0]; 2687 recvbuf = abt->buf[1]; 2688 for (k = 0, j = 0; j < bn; j++) { 2689 for (i = 0; i < cK; i++, k++) { 2690 sendbuf[k] = bv[i * blda + j]; 2691 } 2692 } 2693 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2694 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2695 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2696 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2697 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2698 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2699 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2700 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2701 ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr); 2702 PetscFunctionReturn(0); 2703 } 2704 2705 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2706 { 2707 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2708 Mat_MatTransMultDense *abt = c->abtdense; 2709 PetscErrorCode ierr; 2710 2711 PetscFunctionBegin; 2712 switch (abt->alg) { 2713 case 1: 2714 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2715 break; 2716 default: 2717 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2718 break; 2719 } 2720 PetscFunctionReturn(0); 2721 } 2722 2723 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 2724 { 2725 PetscErrorCode ierr; 2726 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2727 Mat_MatMultDense *ab = a->abdense; 2728 2729 PetscFunctionBegin; 2730 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2731 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2732 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2733 2734 ierr = (ab->destroy)(A);CHKERRQ(ierr); 2735 ierr = PetscFree(ab);CHKERRQ(ierr); 2736 PetscFunctionReturn(0); 2737 } 2738 2739 #if defined(PETSC_HAVE_ELEMENTAL) 2740 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2741 { 2742 PetscErrorCode ierr; 2743 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2744 Mat_MatMultDense *ab=c->abdense; 2745 2746 PetscFunctionBegin; 2747 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2748 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2749 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2750 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2751 PetscFunctionReturn(0); 2752 } 2753 2754 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2755 { 2756 PetscErrorCode ierr; 2757 Mat Ae,Be,Ce; 2758 Mat_MPIDense *c; 2759 Mat_MatMultDense *ab; 2760 2761 PetscFunctionBegin; 2762 /* check local size of A and B */ 2763 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2764 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2765 } 2766 2767 /* create elemental matrices Ae and Be */ 2768 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2769 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2770 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2771 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2772 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2773 2774 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2775 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2776 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2777 ierr = MatSetUp(Be);CHKERRQ(ierr); 2778 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2779 2780 /* compute symbolic Ce = Ae*Be */ 2781 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2782 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2783 2784 /* setup C */ 2785 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2786 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2787 ierr = MatSetUp(C);CHKERRQ(ierr); 2788 2789 /* create data structure for reuse Cdense */ 2790 ierr = PetscNew(&ab);CHKERRQ(ierr); 2791 c = (Mat_MPIDense*)C->data; 2792 c->abdense = ab; 2793 2794 ab->Ae = Ae; 2795 ab->Be = Be; 2796 ab->Ce = Ce; 2797 ab->destroy = C->ops->destroy; 2798 C->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2799 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2800 C->ops->productnumeric = MatProductNumeric_AB; 2801 PetscFunctionReturn(0); 2802 } 2803 #endif 2804 /* ----------------------------------------------- */ 2805 #if defined(PETSC_HAVE_ELEMENTAL) 2806 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2807 { 2808 PetscFunctionBegin; 2809 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2810 C->ops->productsymbolic = MatProductSymbolic_AB; 2811 C->ops->productnumeric = MatProductNumeric_AB; 2812 PetscFunctionReturn(0); 2813 } 2814 #endif 2815 2816 static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C) 2817 { 2818 PetscErrorCode ierr; 2819 Mat_Product *product = C->product; 2820 2821 PetscFunctionBegin; 2822 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr); 2823 C->ops->productnumeric = MatProductNumeric_AtB; 2824 C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense; 2825 PetscFunctionReturn(0); 2826 } 2827 2828 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2829 { 2830 Mat_Product *product = C->product; 2831 Mat A = product->A,B=product->B; 2832 2833 PetscFunctionBegin; 2834 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2835 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2836 2837 C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense; 2838 PetscFunctionReturn(0); 2839 } 2840 2841 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2842 { 2843 PetscErrorCode ierr; 2844 Mat_Product *product = C->product; 2845 const char *algTypes[2] = {"allgatherv","cyclic"}; 2846 PetscInt alg,nalg = 2; 2847 PetscBool flg = PETSC_FALSE; 2848 2849 PetscFunctionBegin; 2850 /* Set default algorithm */ 2851 alg = 0; /* default is allgatherv */ 2852 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2853 if (flg) { 2854 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2855 } 2856 2857 /* Get runtime option */ 2858 if (product->api_user) { 2859 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2860 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2861 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2862 } else { 2863 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2864 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2865 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2866 } 2867 if (flg) { 2868 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2869 } 2870 2871 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2872 C->ops->productsymbolic = MatProductSymbolic_ABt; 2873 C->ops->productnumeric = MatProductNumeric_ABt; 2874 PetscFunctionReturn(0); 2875 } 2876 2877 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2878 { 2879 PetscErrorCode ierr; 2880 Mat_Product *product = C->product; 2881 2882 PetscFunctionBegin; 2883 switch (product->type) { 2884 #if defined(PETSC_HAVE_ELEMENTAL) 2885 case MATPRODUCT_AB: 2886 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 2887 break; 2888 #endif 2889 case MATPRODUCT_AtB: 2890 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 2891 break; 2892 case MATPRODUCT_ABt: 2893 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 2894 break; 2895 default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]); 2896 } 2897 PetscFunctionReturn(0); 2898 } 2899