1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else { 36 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr); 37 if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(0); 41 } 42 43 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44 { 45 Mat_MPIDense *Amat = (Mat_MPIDense*)A->data; 46 Mat_MPIDense *Bmat = (Mat_MPIDense*)B->data; 47 PetscErrorCode ierr; 48 49 PetscFunctionBegin; 50 ierr = MatCopy(Amat->A,Bmat->A,s);CHKERRQ(ierr); 51 PetscFunctionReturn(0); 52 } 53 54 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 57 PetscErrorCode ierr; 58 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 59 60 PetscFunctionBegin; 61 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 62 lrow = row - rstart; 63 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 64 PetscFunctionReturn(0); 65 } 66 67 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 68 { 69 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 70 PetscErrorCode ierr; 71 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 72 73 PetscFunctionBegin; 74 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 75 lrow = row - rstart; 76 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 81 { 82 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 83 PetscErrorCode ierr; 84 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 85 PetscScalar *array; 86 MPI_Comm comm; 87 PetscBool flg; 88 Mat B; 89 90 PetscFunctionBegin; 91 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 92 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 93 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 94 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 95 96 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 97 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 98 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 99 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 100 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 101 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 102 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 103 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 104 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 105 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 106 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 107 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 108 *a = B; 109 ierr = MatDestroy(&B);CHKERRQ(ierr); 110 } else *a = B; 111 PetscFunctionReturn(0); 112 } 113 114 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 115 { 116 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 117 PetscErrorCode ierr; 118 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 119 PetscBool roworiented = A->roworiented; 120 121 PetscFunctionBegin; 122 for (i=0; i<m; i++) { 123 if (idxm[i] < 0) continue; 124 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 125 if (idxm[i] >= rstart && idxm[i] < rend) { 126 row = idxm[i] - rstart; 127 if (roworiented) { 128 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 129 } else { 130 for (j=0; j<n; j++) { 131 if (idxn[j] < 0) continue; 132 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 133 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 134 } 135 } 136 } else if (!A->donotstash) { 137 mat->assembled = PETSC_FALSE; 138 if (roworiented) { 139 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 140 } else { 141 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 142 } 143 } 144 } 145 PetscFunctionReturn(0); 146 } 147 148 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 149 { 150 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 151 PetscErrorCode ierr; 152 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 153 154 PetscFunctionBegin; 155 for (i=0; i<m; i++) { 156 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 157 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 158 if (idxm[i] >= rstart && idxm[i] < rend) { 159 row = idxm[i] - rstart; 160 for (j=0; j<n; j++) { 161 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 162 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 163 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 164 } 165 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 166 } 167 PetscFunctionReturn(0); 168 } 169 170 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 171 { 172 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 181 { 182 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 183 PetscBool iscuda; 184 PetscErrorCode ierr; 185 186 PetscFunctionBegin; 187 if (!a->A) { 188 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 189 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 190 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 191 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 192 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr); 193 ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 194 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 195 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 196 } 197 ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 198 PetscFunctionReturn(0); 199 } 200 201 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 202 { 203 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 204 PetscErrorCode ierr; 205 206 PetscFunctionBegin; 207 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 208 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 213 { 214 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 219 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 224 { 225 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 226 PetscErrorCode ierr; 227 228 PetscFunctionBegin; 229 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 230 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 231 PetscFunctionReturn(0); 232 } 233 234 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 235 { 236 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 237 PetscErrorCode ierr; 238 239 PetscFunctionBegin; 240 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 241 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 242 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 246 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 247 { 248 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 249 PetscErrorCode ierr; 250 251 PetscFunctionBegin; 252 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 253 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 254 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 255 PetscFunctionReturn(0); 256 } 257 258 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 259 { 260 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 261 PetscErrorCode ierr; 262 263 PetscFunctionBegin; 264 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 265 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 266 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 271 { 272 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 273 PetscErrorCode ierr; 274 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 275 const PetscInt *irow,*icol; 276 const PetscScalar *v; 277 PetscScalar *bv; 278 Mat newmat; 279 IS iscol_local; 280 MPI_Comm comm_is,comm_mat; 281 282 PetscFunctionBegin; 283 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 284 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 285 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 286 287 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 288 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 289 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 290 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 291 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 292 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 293 294 /* No parallel redistribution currently supported! Should really check each index set 295 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 296 original matrix! */ 297 298 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 299 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 300 301 /* Check submatrix call */ 302 if (scall == MAT_REUSE_MATRIX) { 303 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 304 /* Really need to test rows and column sizes! */ 305 newmat = *B; 306 } else { 307 /* Create and fill new matrix */ 308 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 309 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 310 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 311 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 312 } 313 314 /* Now extract the data pointers and do the copy, column at a time */ 315 newmatd = (Mat_MPIDense*)newmat->data; 316 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 317 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 318 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 319 for (i=0; i<Ncols; i++) { 320 const PetscScalar *av = v + lda*icol[i]; 321 for (j=0; j<nrows; j++) { 322 *bv++ = av[irow[j] - rstart]; 323 } 324 } 325 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 326 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 327 328 /* Assemble the matrices so that the correct flags are set */ 329 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 330 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 331 332 /* Free work space */ 333 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 334 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 335 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 336 *B = newmat; 337 PetscFunctionReturn(0); 338 } 339 340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 341 { 342 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 343 PetscErrorCode ierr; 344 345 PetscFunctionBegin; 346 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 351 { 352 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 353 PetscErrorCode ierr; 354 355 PetscFunctionBegin; 356 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 357 PetscFunctionReturn(0); 358 } 359 360 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 363 PetscErrorCode ierr; 364 365 PetscFunctionBegin; 366 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 371 { 372 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 373 PetscErrorCode ierr; 374 PetscInt nstash,reallocs; 375 376 PetscFunctionBegin; 377 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 378 379 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 380 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 381 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 385 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 386 { 387 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 388 PetscErrorCode ierr; 389 PetscInt i,*row,*col,flg,j,rstart,ncols; 390 PetscMPIInt n; 391 PetscScalar *val; 392 393 PetscFunctionBegin; 394 if (!mdn->donotstash && !mat->nooffprocentries) { 395 /* wait on receives */ 396 while (1) { 397 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 398 if (!flg) break; 399 400 for (i=0; i<n;) { 401 /* Now identify the consecutive vals belonging to the same row */ 402 for (j=i,rstart=row[j]; j<n; j++) { 403 if (row[j] != rstart) break; 404 } 405 if (j < n) ncols = j-i; 406 else ncols = n-i; 407 /* Now assemble all these values with a single function call */ 408 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 409 i = j; 410 } 411 } 412 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 413 } 414 415 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 417 418 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 419 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 420 } 421 PetscFunctionReturn(0); 422 } 423 424 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 425 { 426 PetscErrorCode ierr; 427 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 428 429 PetscFunctionBegin; 430 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 435 { 436 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 437 PetscErrorCode ierr; 438 PetscInt i,len,*lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 443 /* fix right hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 449 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 450 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 451 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 452 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 453 } 454 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 455 if (diag != 0.0) { 456 Vec d; 457 458 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 459 ierr = VecSet(d,diag);CHKERRQ(ierr); 460 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 461 ierr = VecDestroy(&d);CHKERRQ(ierr); 462 } 463 ierr = PetscFree(lrows);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 471 472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 475 PetscErrorCode ierr; 476 const PetscScalar *ax; 477 PetscScalar *ay; 478 PetscMemType axmtype,aymtype; 479 480 PetscFunctionBegin; 481 ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr); 482 ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr); 483 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr); 484 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr); 485 ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr); 486 ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr); 487 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 494 PetscErrorCode ierr; 495 const PetscScalar *ax; 496 PetscScalar *ay; 497 PetscMemType axmtype,aymtype; 498 499 PetscFunctionBegin; 500 ierr = VecGetArrayReadAndMemType(xx,&ax,&axmtype);CHKERRQ(ierr); 501 ierr = VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr); 502 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);CHKERRQ(ierr); 503 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);CHKERRQ(ierr); 504 ierr = VecRestoreArrayAndMemType(mdn->lvec,&ay);CHKERRQ(ierr); 505 ierr = VecRestoreArrayReadAndMemType(xx,&ax);CHKERRQ(ierr); 506 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 513 PetscErrorCode ierr; 514 const PetscScalar *ax; 515 PetscScalar *ay; 516 PetscMemType axmtype,aymtype; 517 518 PetscFunctionBegin; 519 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 520 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 521 ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr); 522 ierr = VecGetArrayAndMemType(yy,&ay,&aymtype);CHKERRQ(ierr); 523 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr); 524 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 525 ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr); 526 ierr = VecRestoreArrayAndMemType(yy,&ay);CHKERRQ(ierr); 527 PetscFunctionReturn(0); 528 } 529 530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 533 PetscErrorCode ierr; 534 const PetscScalar *ax; 535 PetscScalar *ay; 536 PetscMemType axmtype,aymtype; 537 538 PetscFunctionBegin; 539 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 540 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 541 ierr = VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);CHKERRQ(ierr); 542 ierr = VecGetArrayAndMemType(zz,&ay,&aymtype);CHKERRQ(ierr); 543 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr); 544 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 545 ierr = VecRestoreArrayReadAndMemType(a->lvec,&ax);CHKERRQ(ierr); 546 ierr = VecRestoreArrayAndMemType(zz,&ay);CHKERRQ(ierr); 547 PetscFunctionReturn(0); 548 } 549 550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 553 PetscErrorCode ierr; 554 PetscInt lda,len,i,n,m = A->rmap->n,radd; 555 PetscScalar *x,zero = 0.0; 556 const PetscScalar *av; 557 558 PetscFunctionBegin; 559 ierr = VecSet(v,zero);CHKERRQ(ierr); 560 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 561 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 562 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 563 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 564 radd = A->rmap->rstart*m; 565 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 566 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 567 for (i=0; i<len; i++) { 568 x[i] = av[radd + i*lda + i]; 569 } 570 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 571 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 PetscErrorCode MatDestroy_MPIDense(Mat mat) 576 { 577 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 578 PetscErrorCode ierr; 579 580 PetscFunctionBegin; 581 #if defined(PETSC_USE_LOG) 582 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 583 #endif 584 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 585 if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 586 if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 587 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 588 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 589 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 590 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 591 ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr); 592 593 ierr = PetscFree(mat->data);CHKERRQ(ierr); 594 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 595 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 597 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 601 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 607 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 608 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 609 #if defined(PETSC_HAVE_ELEMENTAL) 610 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 611 #endif 612 #if defined(PETSC_HAVE_SCALAPACK) 613 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr); 614 #endif 615 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 616 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 617 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 618 #if defined (PETSC_HAVE_CUDA) 619 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr); 620 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 621 #endif 622 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 623 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 624 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 625 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 626 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 627 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 628 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 629 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 630 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr); 631 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634 635 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 636 637 #include <petscdraw.h> 638 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 639 { 640 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 641 PetscErrorCode ierr; 642 PetscMPIInt rank; 643 PetscViewerType vtype; 644 PetscBool iascii,isdraw; 645 PetscViewer sviewer; 646 PetscViewerFormat format; 647 648 PetscFunctionBegin; 649 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 650 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 651 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 652 if (iascii) { 653 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 654 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 655 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 656 MatInfo info; 657 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 658 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 659 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 660 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 661 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 662 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } else if (format == PETSC_VIEWER_ASCII_INFO) { 665 PetscFunctionReturn(0); 666 } 667 } else if (isdraw) { 668 PetscDraw draw; 669 PetscBool isnull; 670 671 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 672 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 673 if (isnull) PetscFunctionReturn(0); 674 } 675 676 { 677 /* assemble the entire matrix onto first processor. */ 678 Mat A; 679 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 680 PetscInt *cols; 681 PetscScalar *vals; 682 683 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 684 if (!rank) { 685 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 686 } else { 687 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 688 } 689 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 690 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 691 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 692 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 693 694 /* Copy the matrix ... This isn't the most efficient means, 695 but it's quick for now */ 696 A->insertmode = INSERT_VALUES; 697 698 row = mat->rmap->rstart; 699 m = mdn->A->rmap->n; 700 for (i=0; i<m; i++) { 701 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 702 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 703 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 704 row++; 705 } 706 707 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 708 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 709 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 710 if (!rank) { 711 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 712 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 713 } 714 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 715 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 716 ierr = MatDestroy(&A);CHKERRQ(ierr); 717 } 718 PetscFunctionReturn(0); 719 } 720 721 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 722 { 723 PetscErrorCode ierr; 724 PetscBool iascii,isbinary,isdraw,issocket; 725 726 PetscFunctionBegin; 727 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 728 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 729 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 730 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 731 732 if (iascii || issocket || isdraw) { 733 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 734 } else if (isbinary) { 735 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 741 { 742 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 743 Mat mdn = mat->A; 744 PetscErrorCode ierr; 745 PetscLogDouble isend[5],irecv[5]; 746 747 PetscFunctionBegin; 748 info->block_size = 1.0; 749 750 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 751 752 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 753 isend[3] = info->memory; isend[4] = info->mallocs; 754 if (flag == MAT_LOCAL) { 755 info->nz_used = isend[0]; 756 info->nz_allocated = isend[1]; 757 info->nz_unneeded = isend[2]; 758 info->memory = isend[3]; 759 info->mallocs = isend[4]; 760 } else if (flag == MAT_GLOBAL_MAX) { 761 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 762 763 info->nz_used = irecv[0]; 764 info->nz_allocated = irecv[1]; 765 info->nz_unneeded = irecv[2]; 766 info->memory = irecv[3]; 767 info->mallocs = irecv[4]; 768 } else if (flag == MAT_GLOBAL_SUM) { 769 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 770 771 info->nz_used = irecv[0]; 772 info->nz_allocated = irecv[1]; 773 info->nz_unneeded = irecv[2]; 774 info->memory = irecv[3]; 775 info->mallocs = irecv[4]; 776 } 777 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 778 info->fill_ratio_needed = 0; 779 info->factor_mallocs = 0; 780 PetscFunctionReturn(0); 781 } 782 783 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 784 { 785 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 786 PetscErrorCode ierr; 787 788 PetscFunctionBegin; 789 switch (op) { 790 case MAT_NEW_NONZERO_LOCATIONS: 791 case MAT_NEW_NONZERO_LOCATION_ERR: 792 case MAT_NEW_NONZERO_ALLOCATION_ERR: 793 MatCheckPreallocated(A,1); 794 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 795 break; 796 case MAT_ROW_ORIENTED: 797 MatCheckPreallocated(A,1); 798 a->roworiented = flg; 799 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 800 break; 801 case MAT_FORCE_DIAGONAL_ENTRIES: 802 case MAT_KEEP_NONZERO_PATTERN: 803 case MAT_USE_HASH_TABLE: 804 case MAT_SORTED_FULL: 805 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 806 break; 807 case MAT_IGNORE_OFF_PROC_ENTRIES: 808 a->donotstash = flg; 809 break; 810 case MAT_SYMMETRIC: 811 case MAT_STRUCTURALLY_SYMMETRIC: 812 case MAT_HERMITIAN: 813 case MAT_SYMMETRY_ETERNAL: 814 case MAT_IGNORE_LOWER_TRIANGULAR: 815 case MAT_IGNORE_ZERO_ENTRIES: 816 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 817 break; 818 default: 819 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 820 } 821 PetscFunctionReturn(0); 822 } 823 824 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 825 { 826 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 827 const PetscScalar *l; 828 PetscScalar x,*v,*vv,*r; 829 PetscErrorCode ierr; 830 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 831 832 PetscFunctionBegin; 833 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 834 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 835 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 836 if (ll) { 837 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 838 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 839 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 840 for (i=0; i<m; i++) { 841 x = l[i]; 842 v = vv + i; 843 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 844 } 845 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 846 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 847 } 848 if (rr) { 849 const PetscScalar *ar; 850 851 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 852 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 853 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 854 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 855 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr); 856 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);CHKERRQ(ierr); 857 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 858 for (i=0; i<n; i++) { 859 x = r[i]; 860 v = vv + i*lda; 861 for (j=0; j<m; j++) (*v++) *= x; 862 } 863 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 864 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 865 } 866 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 871 { 872 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 873 PetscErrorCode ierr; 874 PetscInt i,j; 875 PetscMPIInt size; 876 PetscReal sum = 0.0; 877 const PetscScalar *av,*v; 878 879 PetscFunctionBegin; 880 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 881 v = av; 882 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 883 if (size == 1) { 884 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 885 } else { 886 if (type == NORM_FROBENIUS) { 887 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 888 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 889 } 890 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 891 *nrm = PetscSqrtReal(*nrm); 892 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 893 } else if (type == NORM_1) { 894 PetscReal *tmp,*tmp2; 895 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 896 *nrm = 0.0; 897 v = av; 898 for (j=0; j<mdn->A->cmap->n; j++) { 899 for (i=0; i<mdn->A->rmap->n; i++) { 900 tmp[j] += PetscAbsScalar(*v); v++; 901 } 902 } 903 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 904 for (j=0; j<A->cmap->N; j++) { 905 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 906 } 907 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 908 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 909 } else if (type == NORM_INFINITY) { /* max row norm */ 910 PetscReal ntemp; 911 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 912 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 913 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 914 } 915 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 916 PetscFunctionReturn(0); 917 } 918 919 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 920 { 921 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 922 Mat B; 923 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 924 PetscErrorCode ierr; 925 PetscInt j,i,lda; 926 PetscScalar *v; 927 928 PetscFunctionBegin; 929 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 930 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 931 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 932 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 933 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 934 } else B = *matout; 935 936 m = a->A->rmap->n; n = a->A->cmap->n; 937 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 938 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 939 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 940 for (i=0; i<m; i++) rwork[i] = rstart + i; 941 for (j=0; j<n; j++) { 942 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 943 v += lda; 944 } 945 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 946 ierr = PetscFree(rwork);CHKERRQ(ierr); 947 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 948 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 949 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 950 *matout = B; 951 } else { 952 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 953 } 954 PetscFunctionReturn(0); 955 } 956 957 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 958 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 959 960 PetscErrorCode MatSetUp_MPIDense(Mat A) 961 { 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 966 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 967 if (!A->preallocated) { 968 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 969 } 970 PetscFunctionReturn(0); 971 } 972 973 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 974 { 975 PetscErrorCode ierr; 976 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 977 978 PetscFunctionBegin; 979 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } 982 983 PetscErrorCode MatConjugate_MPIDense(Mat mat) 984 { 985 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = MatConjugate(a->A);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 PetscErrorCode MatRealPart_MPIDense(Mat A) 994 { 995 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 996 PetscErrorCode ierr; 997 998 PetscFunctionBegin; 999 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1004 { 1005 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1006 PetscErrorCode ierr; 1007 1008 PetscFunctionBegin; 1009 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 1014 { 1015 PetscErrorCode ierr; 1016 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1017 1018 PetscFunctionBegin; 1019 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 1020 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 1021 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 1022 PetscFunctionReturn(0); 1023 } 1024 1025 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,ReductionType,PetscReal*); 1026 1027 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,ReductionType type,PetscReal *reductions) 1028 { 1029 PetscErrorCode ierr; 1030 PetscInt i,m,n; 1031 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1032 PetscReal *work; 1033 1034 PetscFunctionBegin; 1035 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 1036 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1037 if (type == REDUCTION_MEAN) { 1038 ierr = MatGetColumnReductions_SeqDense(a->A,REDUCTION_SUM,work);CHKERRQ(ierr); 1039 } else { 1040 ierr = MatGetColumnReductions_SeqDense(a->A,type,work);CHKERRQ(ierr); 1041 } 1042 if (type == REDUCTION_NORM_2) { 1043 for (i=0; i<n; i++) work[i] *= work[i]; 1044 } 1045 if (type == REDUCTION_NORM_INFINITY) { 1046 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRMPI(ierr); 1047 } else { 1048 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRMPI(ierr); 1049 } 1050 ierr = PetscFree(work);CHKERRQ(ierr); 1051 if (type == REDUCTION_NORM_2) { 1052 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1053 } else if (type == REDUCTION_MEAN) { 1054 for (i=0; i<n; i++) reductions[i] /= m; 1055 } 1056 PetscFunctionReturn(0); 1057 } 1058 1059 #if defined(PETSC_HAVE_CUDA) 1060 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1061 { 1062 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1063 PetscErrorCode ierr; 1064 PetscInt lda; 1065 1066 PetscFunctionBegin; 1067 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1068 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1069 if (!a->cvec) { 1070 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1071 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1072 } 1073 a->vecinuse = col + 1; 1074 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1075 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1076 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1077 *v = a->cvec; 1078 PetscFunctionReturn(0); 1079 } 1080 1081 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1082 { 1083 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1084 PetscErrorCode ierr; 1085 1086 PetscFunctionBegin; 1087 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1088 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1089 a->vecinuse = 0; 1090 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1091 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1092 *v = NULL; 1093 PetscFunctionReturn(0); 1094 } 1095 1096 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1097 { 1098 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1099 PetscInt lda; 1100 PetscErrorCode ierr; 1101 1102 PetscFunctionBegin; 1103 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1104 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1105 if (!a->cvec) { 1106 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1107 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1108 } 1109 a->vecinuse = col + 1; 1110 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1111 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1112 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1113 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1114 *v = a->cvec; 1115 PetscFunctionReturn(0); 1116 } 1117 1118 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1119 { 1120 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1121 PetscErrorCode ierr; 1122 1123 PetscFunctionBegin; 1124 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1125 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1126 a->vecinuse = 0; 1127 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1128 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1129 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1130 *v = NULL; 1131 PetscFunctionReturn(0); 1132 } 1133 1134 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1135 { 1136 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1137 PetscErrorCode ierr; 1138 PetscInt lda; 1139 1140 PetscFunctionBegin; 1141 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1142 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1143 if (!a->cvec) { 1144 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1145 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1146 } 1147 a->vecinuse = col + 1; 1148 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1149 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1150 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1151 *v = a->cvec; 1152 PetscFunctionReturn(0); 1153 } 1154 1155 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1156 { 1157 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1158 PetscErrorCode ierr; 1159 1160 PetscFunctionBegin; 1161 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1162 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1163 a->vecinuse = 0; 1164 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1165 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1166 *v = NULL; 1167 PetscFunctionReturn(0); 1168 } 1169 1170 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1171 { 1172 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1173 PetscErrorCode ierr; 1174 1175 PetscFunctionBegin; 1176 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1177 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1178 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1183 { 1184 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1185 PetscErrorCode ierr; 1186 1187 PetscFunctionBegin; 1188 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1189 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1190 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1191 PetscFunctionReturn(0); 1192 } 1193 1194 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1195 { 1196 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1197 PetscErrorCode ierr; 1198 1199 PetscFunctionBegin; 1200 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1201 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1202 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1207 { 1208 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1209 PetscErrorCode ierr; 1210 1211 PetscFunctionBegin; 1212 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1217 { 1218 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1219 PetscErrorCode ierr; 1220 1221 PetscFunctionBegin; 1222 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1227 { 1228 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1229 PetscErrorCode ierr; 1230 1231 PetscFunctionBegin; 1232 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1237 { 1238 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1239 PetscErrorCode ierr; 1240 1241 PetscFunctionBegin; 1242 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1247 { 1248 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1257 { 1258 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1259 PetscErrorCode ierr; 1260 1261 PetscFunctionBegin; 1262 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1267 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1268 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1269 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1270 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1271 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1272 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1273 1274 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1275 { 1276 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1277 PetscErrorCode ierr; 1278 1279 PetscFunctionBegin; 1280 if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1281 if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1282 if (d->A) { 1283 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1284 } 1285 mat->boundtocpu = bind; 1286 if (!bind) { 1287 PetscBool iscuda; 1288 1289 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1290 if (!iscuda) { 1291 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1292 } 1293 ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1294 if (!iscuda) { 1295 ierr = MatDestroy(&d->cmat);CHKERRQ(ierr); 1296 } 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1303 } else { 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1310 } 1311 if (d->cmat) { 1312 ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr); 1313 } 1314 PetscFunctionReturn(0); 1315 } 1316 1317 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1318 { 1319 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1320 PetscErrorCode ierr; 1321 PetscBool iscuda; 1322 1323 PetscFunctionBegin; 1324 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1325 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1326 if (!iscuda) PetscFunctionReturn(0); 1327 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1328 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1329 if (!d->A) { 1330 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1331 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1332 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1333 } 1334 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1335 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1336 A->preallocated = PETSC_TRUE; 1337 PetscFunctionReturn(0); 1338 } 1339 #endif 1340 1341 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1342 { 1343 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1344 PetscErrorCode ierr; 1345 1346 PetscFunctionBegin; 1347 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1348 PetscFunctionReturn(0); 1349 } 1350 1351 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1352 { 1353 PetscFunctionBegin; 1354 *missing = PETSC_FALSE; 1355 PetscFunctionReturn(0); 1356 } 1357 1358 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1359 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1360 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1361 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1362 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1363 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1364 1365 /* -------------------------------------------------------------------*/ 1366 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1367 MatGetRow_MPIDense, 1368 MatRestoreRow_MPIDense, 1369 MatMult_MPIDense, 1370 /* 4*/ MatMultAdd_MPIDense, 1371 MatMultTranspose_MPIDense, 1372 MatMultTransposeAdd_MPIDense, 1373 NULL, 1374 NULL, 1375 NULL, 1376 /* 10*/ NULL, 1377 NULL, 1378 NULL, 1379 NULL, 1380 MatTranspose_MPIDense, 1381 /* 15*/ MatGetInfo_MPIDense, 1382 MatEqual_MPIDense, 1383 MatGetDiagonal_MPIDense, 1384 MatDiagonalScale_MPIDense, 1385 MatNorm_MPIDense, 1386 /* 20*/ MatAssemblyBegin_MPIDense, 1387 MatAssemblyEnd_MPIDense, 1388 MatSetOption_MPIDense, 1389 MatZeroEntries_MPIDense, 1390 /* 24*/ MatZeroRows_MPIDense, 1391 NULL, 1392 NULL, 1393 NULL, 1394 NULL, 1395 /* 29*/ MatSetUp_MPIDense, 1396 NULL, 1397 NULL, 1398 MatGetDiagonalBlock_MPIDense, 1399 NULL, 1400 /* 34*/ MatDuplicate_MPIDense, 1401 NULL, 1402 NULL, 1403 NULL, 1404 NULL, 1405 /* 39*/ MatAXPY_MPIDense, 1406 MatCreateSubMatrices_MPIDense, 1407 NULL, 1408 MatGetValues_MPIDense, 1409 MatCopy_MPIDense, 1410 /* 44*/ NULL, 1411 MatScale_MPIDense, 1412 MatShift_Basic, 1413 NULL, 1414 NULL, 1415 /* 49*/ MatSetRandom_MPIDense, 1416 NULL, 1417 NULL, 1418 NULL, 1419 NULL, 1420 /* 54*/ NULL, 1421 NULL, 1422 NULL, 1423 NULL, 1424 NULL, 1425 /* 59*/ MatCreateSubMatrix_MPIDense, 1426 MatDestroy_MPIDense, 1427 MatView_MPIDense, 1428 NULL, 1429 NULL, 1430 /* 64*/ NULL, 1431 NULL, 1432 NULL, 1433 NULL, 1434 NULL, 1435 /* 69*/ NULL, 1436 NULL, 1437 NULL, 1438 NULL, 1439 NULL, 1440 /* 74*/ NULL, 1441 NULL, 1442 NULL, 1443 NULL, 1444 NULL, 1445 /* 79*/ NULL, 1446 NULL, 1447 NULL, 1448 NULL, 1449 /* 83*/ MatLoad_MPIDense, 1450 NULL, 1451 NULL, 1452 NULL, 1453 NULL, 1454 NULL, 1455 /* 89*/ NULL, 1456 NULL, 1457 NULL, 1458 NULL, 1459 NULL, 1460 /* 94*/ NULL, 1461 NULL, 1462 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1463 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1464 NULL, 1465 /* 99*/ MatProductSetFromOptions_MPIDense, 1466 NULL, 1467 NULL, 1468 MatConjugate_MPIDense, 1469 NULL, 1470 /*104*/ NULL, 1471 MatRealPart_MPIDense, 1472 MatImaginaryPart_MPIDense, 1473 NULL, 1474 NULL, 1475 /*109*/ NULL, 1476 NULL, 1477 NULL, 1478 MatGetColumnVector_MPIDense, 1479 MatMissingDiagonal_MPIDense, 1480 /*114*/ NULL, 1481 NULL, 1482 NULL, 1483 NULL, 1484 NULL, 1485 /*119*/ NULL, 1486 NULL, 1487 NULL, 1488 NULL, 1489 NULL, 1490 /*124*/ NULL, 1491 MatGetColumnReductions_MPIDense, 1492 NULL, 1493 NULL, 1494 NULL, 1495 /*129*/ NULL, 1496 NULL, 1497 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1498 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1499 NULL, 1500 /*134*/ NULL, 1501 NULL, 1502 NULL, 1503 NULL, 1504 NULL, 1505 /*139*/ NULL, 1506 NULL, 1507 NULL, 1508 NULL, 1509 NULL, 1510 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1511 /*145*/ NULL, 1512 NULL, 1513 NULL 1514 }; 1515 1516 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1517 { 1518 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1519 PetscBool iscuda; 1520 PetscErrorCode ierr; 1521 1522 PetscFunctionBegin; 1523 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1524 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1525 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1526 if (!a->A) { 1527 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1528 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1529 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1530 } 1531 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1532 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1533 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1534 mat->preallocated = PETSC_TRUE; 1535 PetscFunctionReturn(0); 1536 } 1537 1538 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1539 { 1540 Mat B,C; 1541 PetscErrorCode ierr; 1542 1543 PetscFunctionBegin; 1544 ierr = MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C);CHKERRQ(ierr); 1545 ierr = MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1546 ierr = MatDestroy(&C);CHKERRQ(ierr); 1547 if (reuse == MAT_REUSE_MATRIX) { 1548 C = *newmat; 1549 } else C = NULL; 1550 ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1551 ierr = MatDestroy(&B);CHKERRQ(ierr); 1552 if (reuse == MAT_INPLACE_MATRIX) { 1553 ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr); 1554 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1555 PetscFunctionReturn(0); 1556 } 1557 1558 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1559 { 1560 Mat B,C; 1561 PetscErrorCode ierr; 1562 1563 PetscFunctionBegin; 1564 ierr = MatDenseGetLocalMatrix(A,&C);CHKERRQ(ierr); 1565 ierr = MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1566 if (reuse == MAT_REUSE_MATRIX) { 1567 C = *newmat; 1568 } else C = NULL; 1569 ierr = MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1570 ierr = MatDestroy(&B);CHKERRQ(ierr); 1571 if (reuse == MAT_INPLACE_MATRIX) { 1572 ierr = MatHeaderReplace(A,&C);CHKERRQ(ierr); 1573 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1574 PetscFunctionReturn(0); 1575 } 1576 1577 #if defined(PETSC_HAVE_ELEMENTAL) 1578 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1579 { 1580 Mat mat_elemental; 1581 PetscErrorCode ierr; 1582 PetscScalar *v; 1583 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1584 1585 PetscFunctionBegin; 1586 if (reuse == MAT_REUSE_MATRIX) { 1587 mat_elemental = *newmat; 1588 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1589 } else { 1590 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1591 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1592 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1593 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1594 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1595 } 1596 1597 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1598 for (i=0; i<N; i++) cols[i] = i; 1599 for (i=0; i<m; i++) rows[i] = rstart + i; 1600 1601 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1602 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1603 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1604 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1605 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1606 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1607 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1608 1609 if (reuse == MAT_INPLACE_MATRIX) { 1610 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1611 } else { 1612 *newmat = mat_elemental; 1613 } 1614 PetscFunctionReturn(0); 1615 } 1616 #endif 1617 1618 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1619 { 1620 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1621 PetscErrorCode ierr; 1622 1623 PetscFunctionBegin; 1624 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1625 PetscFunctionReturn(0); 1626 } 1627 1628 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1629 { 1630 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1631 PetscErrorCode ierr; 1632 1633 PetscFunctionBegin; 1634 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1635 PetscFunctionReturn(0); 1636 } 1637 1638 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1639 { 1640 PetscErrorCode ierr; 1641 Mat_MPIDense *mat; 1642 PetscInt m,nloc,N; 1643 1644 PetscFunctionBegin; 1645 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1646 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1647 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1648 PetscInt sum; 1649 1650 if (n == PETSC_DECIDE) { 1651 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1652 } 1653 /* Check sum(n) = N */ 1654 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 1655 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1656 1657 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1658 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1659 } 1660 1661 /* numeric phase */ 1662 mat = (Mat_MPIDense*)(*outmat)->data; 1663 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1664 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1665 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1666 PetscFunctionReturn(0); 1667 } 1668 1669 #if defined(PETSC_HAVE_CUDA) 1670 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1671 { 1672 Mat B; 1673 Mat_MPIDense *m; 1674 PetscErrorCode ierr; 1675 1676 PetscFunctionBegin; 1677 if (reuse == MAT_INITIAL_MATRIX) { 1678 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1679 } else if (reuse == MAT_REUSE_MATRIX) { 1680 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1681 } 1682 1683 B = *newmat; 1684 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1685 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1686 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1687 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1688 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1690 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr); 1691 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1692 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1693 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1694 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1695 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1696 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1697 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1698 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1699 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1700 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1701 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1702 m = (Mat_MPIDense*)(B)->data; 1703 if (m->A) { 1704 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1705 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1706 } 1707 B->ops->bindtocpu = NULL; 1708 B->offloadmask = PETSC_OFFLOAD_CPU; 1709 PetscFunctionReturn(0); 1710 } 1711 1712 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1713 { 1714 Mat B; 1715 Mat_MPIDense *m; 1716 PetscErrorCode ierr; 1717 1718 PetscFunctionBegin; 1719 if (reuse == MAT_INITIAL_MATRIX) { 1720 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1721 } else if (reuse == MAT_REUSE_MATRIX) { 1722 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1723 } 1724 1725 B = *newmat; 1726 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1727 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1728 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1729 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1730 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1731 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1732 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1733 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1734 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1735 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1736 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1737 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1738 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1739 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1740 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1741 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1742 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1743 m = (Mat_MPIDense*)(B->data); 1744 if (m->A) { 1745 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1746 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1747 B->offloadmask = PETSC_OFFLOAD_BOTH; 1748 } else { 1749 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1750 } 1751 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1752 1753 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1754 PetscFunctionReturn(0); 1755 } 1756 #endif 1757 1758 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1759 { 1760 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1761 PetscErrorCode ierr; 1762 PetscInt lda; 1763 1764 PetscFunctionBegin; 1765 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1766 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1767 if (!a->cvec) { 1768 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1769 } 1770 a->vecinuse = col + 1; 1771 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1772 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1773 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1774 *v = a->cvec; 1775 PetscFunctionReturn(0); 1776 } 1777 1778 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1779 { 1780 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1781 PetscErrorCode ierr; 1782 1783 PetscFunctionBegin; 1784 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1785 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1786 a->vecinuse = 0; 1787 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1788 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1789 *v = NULL; 1790 PetscFunctionReturn(0); 1791 } 1792 1793 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1794 { 1795 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1796 PetscErrorCode ierr; 1797 PetscInt lda; 1798 1799 PetscFunctionBegin; 1800 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1801 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1802 if (!a->cvec) { 1803 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1804 } 1805 a->vecinuse = col + 1; 1806 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1807 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1808 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1809 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1810 *v = a->cvec; 1811 PetscFunctionReturn(0); 1812 } 1813 1814 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1815 { 1816 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1817 PetscErrorCode ierr; 1818 1819 PetscFunctionBegin; 1820 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1821 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1822 a->vecinuse = 0; 1823 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1824 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1825 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1826 *v = NULL; 1827 PetscFunctionReturn(0); 1828 } 1829 1830 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1831 { 1832 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1833 PetscErrorCode ierr; 1834 PetscInt lda; 1835 1836 PetscFunctionBegin; 1837 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1838 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1839 if (!a->cvec) { 1840 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1841 } 1842 a->vecinuse = col + 1; 1843 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1844 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1845 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1846 *v = a->cvec; 1847 PetscFunctionReturn(0); 1848 } 1849 1850 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1851 { 1852 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1853 PetscErrorCode ierr; 1854 1855 PetscFunctionBegin; 1856 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1857 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1858 a->vecinuse = 0; 1859 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1860 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1861 *v = NULL; 1862 PetscFunctionReturn(0); 1863 } 1864 1865 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1866 { 1867 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1868 Mat_MPIDense *c; 1869 PetscErrorCode ierr; 1870 MPI_Comm comm; 1871 PetscBool setup = PETSC_FALSE; 1872 1873 PetscFunctionBegin; 1874 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1875 if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1876 if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1877 if (!a->cmat) { 1878 setup = PETSC_TRUE; 1879 ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr); 1880 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr); 1881 ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1882 ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr); 1883 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1884 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1885 } else if (cend-cbegin != a->cmat->cmap->N) { 1886 setup = PETSC_TRUE; 1887 ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr); 1888 ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr); 1889 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1890 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1891 } 1892 c = (Mat_MPIDense*)a->cmat->data; 1893 if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1894 ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr); 1895 if (setup) { /* do we really need this? */ 1896 ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr); 1897 } 1898 a->cmat->preallocated = PETSC_TRUE; 1899 a->cmat->assembled = PETSC_TRUE; 1900 a->matinuse = cbegin + 1; 1901 *v = a->cmat; 1902 PetscFunctionReturn(0); 1903 } 1904 1905 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1906 { 1907 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1908 Mat_MPIDense *c; 1909 PetscErrorCode ierr; 1910 1911 PetscFunctionBegin; 1912 if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1913 if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1914 if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1915 a->matinuse = 0; 1916 c = (Mat_MPIDense*)a->cmat->data; 1917 ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr); 1918 *v = NULL; 1919 PetscFunctionReturn(0); 1920 } 1921 1922 /*MC 1923 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1924 1925 Options Database Keys: 1926 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1927 1928 Level: beginner 1929 1930 .seealso: MatCreateDense() 1931 1932 M*/ 1933 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1934 { 1935 Mat_MPIDense *a; 1936 PetscErrorCode ierr; 1937 1938 PetscFunctionBegin; 1939 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1940 mat->data = (void*)a; 1941 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1942 1943 mat->insertmode = NOT_SET_VALUES; 1944 1945 /* build cache for off array entries formed */ 1946 a->donotstash = PETSC_FALSE; 1947 1948 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1949 1950 /* stuff used for matrix vector multiply */ 1951 a->lvec = NULL; 1952 a->Mvctx = NULL; 1953 a->roworiented = PETSC_TRUE; 1954 1955 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1956 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1957 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1958 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1959 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1960 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1961 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1962 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1963 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1964 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1965 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1966 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1967 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1968 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1969 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1970 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1971 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1972 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr); 1973 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr); 1974 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 1975 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ);CHKERRQ(ierr); 1976 #if defined(PETSC_HAVE_ELEMENTAL) 1977 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1978 #endif 1979 #if defined(PETSC_HAVE_SCALAPACK) 1980 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr); 1981 #endif 1982 #if defined(PETSC_HAVE_CUDA) 1983 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1984 #endif 1985 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1986 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1987 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1988 #if defined(PETSC_HAVE_CUDA) 1989 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1990 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1991 #endif 1992 1993 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1994 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1995 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1996 PetscFunctionReturn(0); 1997 } 1998 1999 /*MC 2000 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 2001 2002 Options Database Keys: 2003 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 2004 2005 Level: beginner 2006 2007 .seealso: 2008 2009 M*/ 2010 #if defined(PETSC_HAVE_CUDA) 2011 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 2012 { 2013 PetscErrorCode ierr; 2014 2015 PetscFunctionBegin; 2016 ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr); 2017 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 2018 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2019 PetscFunctionReturn(0); 2020 } 2021 #endif 2022 2023 /*MC 2024 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 2025 2026 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 2027 and MATMPIDENSE otherwise. 2028 2029 Options Database Keys: 2030 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 2031 2032 Level: beginner 2033 2034 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 2035 M*/ 2036 2037 /*MC 2038 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 2039 2040 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 2041 and MATMPIDENSECUDA otherwise. 2042 2043 Options Database Keys: 2044 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 2045 2046 Level: beginner 2047 2048 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 2049 M*/ 2050 2051 /*@C 2052 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 2053 2054 Collective 2055 2056 Input Parameters: 2057 . B - the matrix 2058 - data - optional location of matrix data. Set data=NULL for PETSc 2059 to control all matrix memory allocation. 2060 2061 Notes: 2062 The dense format is fully compatible with standard Fortran 77 2063 storage by columns. 2064 2065 The data input variable is intended primarily for Fortran programmers 2066 who wish to allocate their own matrix memory space. Most users should 2067 set data=NULL. 2068 2069 Level: intermediate 2070 2071 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2072 @*/ 2073 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 2074 { 2075 PetscErrorCode ierr; 2076 2077 PetscFunctionBegin; 2078 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2079 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 2080 PetscFunctionReturn(0); 2081 } 2082 2083 /*@ 2084 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2085 array provided by the user. This is useful to avoid copying an array 2086 into a matrix 2087 2088 Not Collective 2089 2090 Input Parameters: 2091 + mat - the matrix 2092 - array - the array in column major order 2093 2094 Notes: 2095 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2096 freed when the matrix is destroyed. 2097 2098 Level: developer 2099 2100 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2101 2102 @*/ 2103 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2104 { 2105 PetscErrorCode ierr; 2106 2107 PetscFunctionBegin; 2108 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2109 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2110 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2111 #if defined(PETSC_HAVE_CUDA) 2112 mat->offloadmask = PETSC_OFFLOAD_CPU; 2113 #endif 2114 PetscFunctionReturn(0); 2115 } 2116 2117 /*@ 2118 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2119 2120 Not Collective 2121 2122 Input Parameters: 2123 . mat - the matrix 2124 2125 Notes: 2126 You can only call this after a call to MatDensePlaceArray() 2127 2128 Level: developer 2129 2130 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2131 2132 @*/ 2133 PetscErrorCode MatDenseResetArray(Mat mat) 2134 { 2135 PetscErrorCode ierr; 2136 2137 PetscFunctionBegin; 2138 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2139 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2140 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2141 PetscFunctionReturn(0); 2142 } 2143 2144 /*@ 2145 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2146 array provided by the user. This is useful to avoid copying an array 2147 into a matrix 2148 2149 Not Collective 2150 2151 Input Parameters: 2152 + mat - the matrix 2153 - array - the array in column major order 2154 2155 Notes: 2156 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2157 freed by the user. It will be freed when the matrix is destroyed. 2158 2159 Level: developer 2160 2161 .seealso: MatDenseGetArray(), VecReplaceArray() 2162 @*/ 2163 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2164 { 2165 PetscErrorCode ierr; 2166 2167 PetscFunctionBegin; 2168 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2169 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2170 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2171 #if defined(PETSC_HAVE_CUDA) 2172 mat->offloadmask = PETSC_OFFLOAD_CPU; 2173 #endif 2174 PetscFunctionReturn(0); 2175 } 2176 2177 #if defined(PETSC_HAVE_CUDA) 2178 /*@C 2179 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2180 array provided by the user. This is useful to avoid copying an array 2181 into a matrix 2182 2183 Not Collective 2184 2185 Input Parameters: 2186 + mat - the matrix 2187 - array - the array in column major order 2188 2189 Notes: 2190 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2191 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2192 2193 Level: developer 2194 2195 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2196 @*/ 2197 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2198 { 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2203 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2204 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2205 mat->offloadmask = PETSC_OFFLOAD_GPU; 2206 PetscFunctionReturn(0); 2207 } 2208 2209 /*@C 2210 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2211 2212 Not Collective 2213 2214 Input Parameters: 2215 . mat - the matrix 2216 2217 Notes: 2218 You can only call this after a call to MatDenseCUDAPlaceArray() 2219 2220 Level: developer 2221 2222 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2223 2224 @*/ 2225 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2226 { 2227 PetscErrorCode ierr; 2228 2229 PetscFunctionBegin; 2230 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2231 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2232 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2233 PetscFunctionReturn(0); 2234 } 2235 2236 /*@C 2237 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2238 array provided by the user. This is useful to avoid copying an array 2239 into a matrix 2240 2241 Not Collective 2242 2243 Input Parameters: 2244 + mat - the matrix 2245 - array - the array in column major order 2246 2247 Notes: 2248 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2249 The memory passed in CANNOT be freed by the user. It will be freed 2250 when the matrix is destroyed. The array should respect the matrix leading dimension. 2251 2252 Level: developer 2253 2254 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2255 @*/ 2256 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2257 { 2258 PetscErrorCode ierr; 2259 2260 PetscFunctionBegin; 2261 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2262 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2263 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2264 mat->offloadmask = PETSC_OFFLOAD_GPU; 2265 PetscFunctionReturn(0); 2266 } 2267 2268 /*@C 2269 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2270 2271 Not Collective 2272 2273 Input Parameters: 2274 . A - the matrix 2275 2276 Output Parameters 2277 . array - the GPU array in column major order 2278 2279 Notes: 2280 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2281 2282 Level: developer 2283 2284 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2285 @*/ 2286 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2287 { 2288 PetscErrorCode ierr; 2289 2290 PetscFunctionBegin; 2291 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2292 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2293 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2294 PetscFunctionReturn(0); 2295 } 2296 2297 /*@C 2298 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2299 2300 Not Collective 2301 2302 Input Parameters: 2303 + A - the matrix 2304 - array - the GPU array in column major order 2305 2306 Notes: 2307 2308 Level: developer 2309 2310 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2311 @*/ 2312 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2313 { 2314 PetscErrorCode ierr; 2315 2316 PetscFunctionBegin; 2317 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2318 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2319 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2320 A->offloadmask = PETSC_OFFLOAD_GPU; 2321 PetscFunctionReturn(0); 2322 } 2323 2324 /*@C 2325 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2326 2327 Not Collective 2328 2329 Input Parameters: 2330 . A - the matrix 2331 2332 Output Parameters 2333 . array - the GPU array in column major order 2334 2335 Notes: 2336 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2337 2338 Level: developer 2339 2340 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2341 @*/ 2342 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2343 { 2344 PetscErrorCode ierr; 2345 2346 PetscFunctionBegin; 2347 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2348 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2349 PetscFunctionReturn(0); 2350 } 2351 2352 /*@C 2353 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2354 2355 Not Collective 2356 2357 Input Parameters: 2358 + A - the matrix 2359 - array - the GPU array in column major order 2360 2361 Notes: 2362 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2363 2364 Level: developer 2365 2366 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2367 @*/ 2368 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2369 { 2370 PetscErrorCode ierr; 2371 2372 PetscFunctionBegin; 2373 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2374 PetscFunctionReturn(0); 2375 } 2376 2377 /*@C 2378 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2379 2380 Not Collective 2381 2382 Input Parameters: 2383 . A - the matrix 2384 2385 Output Parameters 2386 . array - the GPU array in column major order 2387 2388 Notes: 2389 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2390 2391 Level: developer 2392 2393 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2394 @*/ 2395 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2396 { 2397 PetscErrorCode ierr; 2398 2399 PetscFunctionBegin; 2400 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2401 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2402 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2403 PetscFunctionReturn(0); 2404 } 2405 2406 /*@C 2407 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2408 2409 Not Collective 2410 2411 Input Parameters: 2412 + A - the matrix 2413 - array - the GPU array in column major order 2414 2415 Notes: 2416 2417 Level: developer 2418 2419 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2420 @*/ 2421 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2422 { 2423 PetscErrorCode ierr; 2424 2425 PetscFunctionBegin; 2426 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2427 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2428 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2429 A->offloadmask = PETSC_OFFLOAD_GPU; 2430 PetscFunctionReturn(0); 2431 } 2432 #endif 2433 2434 /*@C 2435 MatCreateDense - Creates a matrix in dense format. 2436 2437 Collective 2438 2439 Input Parameters: 2440 + comm - MPI communicator 2441 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2442 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2443 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2444 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2445 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2446 to control all matrix memory allocation. 2447 2448 Output Parameter: 2449 . A - the matrix 2450 2451 Notes: 2452 The dense format is fully compatible with standard Fortran 77 2453 storage by columns. 2454 Note that, although local portions of the matrix are stored in column-major 2455 order, the matrix is partitioned across MPI ranks by row. 2456 2457 The data input variable is intended primarily for Fortran programmers 2458 who wish to allocate their own matrix memory space. Most users should 2459 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2460 2461 The user MUST specify either the local or global matrix dimensions 2462 (possibly both). 2463 2464 Level: intermediate 2465 2466 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2467 @*/ 2468 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2469 { 2470 PetscErrorCode ierr; 2471 PetscMPIInt size; 2472 2473 PetscFunctionBegin; 2474 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2475 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2476 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2477 if (size > 1) { 2478 PetscBool havedata = (PetscBool)!!data; 2479 2480 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2481 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2482 ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRMPI(ierr); 2483 if (havedata) { /* user provided data array, so no need to assemble */ 2484 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2485 (*A)->assembled = PETSC_TRUE; 2486 } 2487 } else { 2488 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2489 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2490 } 2491 PetscFunctionReturn(0); 2492 } 2493 2494 #if defined(PETSC_HAVE_CUDA) 2495 /*@C 2496 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2497 2498 Collective 2499 2500 Input Parameters: 2501 + comm - MPI communicator 2502 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2503 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2504 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2505 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2506 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2507 to control matrix memory allocation. 2508 2509 Output Parameter: 2510 . A - the matrix 2511 2512 Notes: 2513 2514 Level: intermediate 2515 2516 .seealso: MatCreate(), MatCreateDense() 2517 @*/ 2518 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2519 { 2520 PetscErrorCode ierr; 2521 PetscMPIInt size; 2522 2523 PetscFunctionBegin; 2524 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2525 PetscValidLogicalCollectiveBool(*A,!!data,6); 2526 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2527 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2528 if (size > 1) { 2529 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2530 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2531 if (data) { /* user provided data array, so no need to assemble */ 2532 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2533 (*A)->assembled = PETSC_TRUE; 2534 } 2535 } else { 2536 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2537 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2538 } 2539 PetscFunctionReturn(0); 2540 } 2541 #endif 2542 2543 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2544 { 2545 Mat mat; 2546 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2547 PetscErrorCode ierr; 2548 2549 PetscFunctionBegin; 2550 *newmat = NULL; 2551 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2552 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2553 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2554 a = (Mat_MPIDense*)mat->data; 2555 2556 mat->factortype = A->factortype; 2557 mat->assembled = PETSC_TRUE; 2558 mat->preallocated = PETSC_TRUE; 2559 2560 mat->insertmode = NOT_SET_VALUES; 2561 a->donotstash = oldmat->donotstash; 2562 2563 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2564 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2565 2566 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2567 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2568 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2569 2570 *newmat = mat; 2571 PetscFunctionReturn(0); 2572 } 2573 2574 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2575 { 2576 PetscErrorCode ierr; 2577 PetscBool isbinary; 2578 #if defined(PETSC_HAVE_HDF5) 2579 PetscBool ishdf5; 2580 #endif 2581 2582 PetscFunctionBegin; 2583 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2584 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2585 /* force binary viewer to load .info file if it has not yet done so */ 2586 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2587 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2588 #if defined(PETSC_HAVE_HDF5) 2589 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2590 #endif 2591 if (isbinary) { 2592 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2593 #if defined(PETSC_HAVE_HDF5) 2594 } else if (ishdf5) { 2595 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2596 #endif 2597 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2598 PetscFunctionReturn(0); 2599 } 2600 2601 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2602 { 2603 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2604 Mat a,b; 2605 PetscBool flg; 2606 PetscErrorCode ierr; 2607 2608 PetscFunctionBegin; 2609 a = matA->A; 2610 b = matB->A; 2611 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2612 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2613 PetscFunctionReturn(0); 2614 } 2615 2616 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2617 { 2618 PetscErrorCode ierr; 2619 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2620 2621 PetscFunctionBegin; 2622 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2623 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2624 ierr = PetscFree(atb);CHKERRQ(ierr); 2625 PetscFunctionReturn(0); 2626 } 2627 2628 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2629 { 2630 PetscErrorCode ierr; 2631 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2632 2633 PetscFunctionBegin; 2634 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2635 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2636 ierr = PetscFree(abt);CHKERRQ(ierr); 2637 PetscFunctionReturn(0); 2638 } 2639 2640 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2641 { 2642 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2643 Mat_TransMatMultDense *atb; 2644 PetscErrorCode ierr; 2645 MPI_Comm comm; 2646 PetscMPIInt size,*recvcounts; 2647 PetscScalar *carray,*sendbuf; 2648 const PetscScalar *atbarray; 2649 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2650 const PetscInt *ranges; 2651 2652 PetscFunctionBegin; 2653 MatCheckProduct(C,3); 2654 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2655 atb = (Mat_TransMatMultDense *)C->product->data; 2656 recvcounts = atb->recvcounts; 2657 sendbuf = atb->sendbuf; 2658 2659 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2660 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2661 2662 /* compute atbarray = aseq^T * bseq */ 2663 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2664 2665 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2666 2667 /* arrange atbarray into sendbuf */ 2668 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2669 for (proc=0, k=0; proc<size; proc++) { 2670 for (j=0; j<cN; j++) { 2671 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2672 } 2673 } 2674 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2675 2676 /* sum all atbarray to local values of C */ 2677 ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr); 2678 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRMPI(ierr); 2679 ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr); 2680 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2681 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2682 PetscFunctionReturn(0); 2683 } 2684 2685 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2686 { 2687 PetscErrorCode ierr; 2688 MPI_Comm comm; 2689 PetscMPIInt size; 2690 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2691 Mat_TransMatMultDense *atb; 2692 PetscBool cisdense; 2693 PetscInt i; 2694 const PetscInt *ranges; 2695 2696 PetscFunctionBegin; 2697 MatCheckProduct(C,3); 2698 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2699 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2700 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2701 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2702 } 2703 2704 /* create matrix product C */ 2705 ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr); 2706 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 2707 if (!cisdense) { 2708 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2709 } 2710 ierr = MatSetUp(C);CHKERRQ(ierr); 2711 2712 /* create data structure for reuse C */ 2713 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2714 ierr = PetscNew(&atb);CHKERRQ(ierr); 2715 cM = C->rmap->N; 2716 ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2717 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2718 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2719 2720 C->product->data = atb; 2721 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2722 PetscFunctionReturn(0); 2723 } 2724 2725 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2726 { 2727 PetscErrorCode ierr; 2728 MPI_Comm comm; 2729 PetscMPIInt i, size; 2730 PetscInt maxRows, bufsiz; 2731 PetscMPIInt tag; 2732 PetscInt alg; 2733 Mat_MatTransMultDense *abt; 2734 Mat_Product *product = C->product; 2735 PetscBool flg; 2736 2737 PetscFunctionBegin; 2738 MatCheckProduct(C,4); 2739 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2740 /* check local size of A and B */ 2741 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2742 2743 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2744 alg = flg ? 0 : 1; 2745 2746 /* setup matrix product C */ 2747 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2748 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2749 ierr = MatSetUp(C);CHKERRQ(ierr); 2750 ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr); 2751 2752 /* create data structure for reuse C */ 2753 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2754 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2755 ierr = PetscNew(&abt);CHKERRQ(ierr); 2756 abt->tag = tag; 2757 abt->alg = alg; 2758 switch (alg) { 2759 case 1: /* alg: "cyclic" */ 2760 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2761 bufsiz = A->cmap->N * maxRows; 2762 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2763 break; 2764 default: /* alg: "allgatherv" */ 2765 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2766 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2767 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2768 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2769 break; 2770 } 2771 2772 C->product->data = abt; 2773 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2774 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2775 PetscFunctionReturn(0); 2776 } 2777 2778 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2779 { 2780 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2781 Mat_MatTransMultDense *abt; 2782 PetscErrorCode ierr; 2783 MPI_Comm comm; 2784 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2785 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2786 PetscInt i,cK=A->cmap->N,k,j,bn; 2787 PetscScalar _DOne=1.0,_DZero=0.0; 2788 const PetscScalar *av,*bv; 2789 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2790 MPI_Request reqs[2]; 2791 const PetscInt *ranges; 2792 2793 PetscFunctionBegin; 2794 MatCheckProduct(C,3); 2795 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2796 abt = (Mat_MatTransMultDense*)C->product->data; 2797 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2798 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 2799 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2800 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2801 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2802 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2803 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2804 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2805 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2806 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2807 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2808 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2809 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2810 bn = B->rmap->n; 2811 if (blda == bn) { 2812 sendbuf = (PetscScalar*)bv; 2813 } else { 2814 sendbuf = abt->buf[0]; 2815 for (k = 0, i = 0; i < cK; i++) { 2816 for (j = 0; j < bn; j++, k++) { 2817 sendbuf[k] = bv[i * blda + j]; 2818 } 2819 } 2820 } 2821 if (size > 1) { 2822 sendto = (rank + size - 1) % size; 2823 recvfrom = (rank + size + 1) % size; 2824 } else { 2825 sendto = recvfrom = 0; 2826 } 2827 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2828 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2829 recvisfrom = rank; 2830 for (i = 0; i < size; i++) { 2831 /* we have finished receiving in sending, bufs can be read/modified */ 2832 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2833 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2834 2835 if (nextrecvisfrom != rank) { 2836 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2837 sendsiz = cK * bn; 2838 recvsiz = cK * nextbn; 2839 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2840 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRMPI(ierr); 2841 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRMPI(ierr); 2842 } 2843 2844 /* local aseq * sendbuf^T */ 2845 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2846 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2847 2848 if (nextrecvisfrom != rank) { 2849 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2850 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 2851 } 2852 bn = nextbn; 2853 recvisfrom = nextrecvisfrom; 2854 sendbuf = recvbuf; 2855 } 2856 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2857 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2858 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2859 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2860 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2861 PetscFunctionReturn(0); 2862 } 2863 2864 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2865 { 2866 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2867 Mat_MatTransMultDense *abt; 2868 PetscErrorCode ierr; 2869 MPI_Comm comm; 2870 PetscMPIInt size; 2871 PetscScalar *cv, *sendbuf, *recvbuf; 2872 const PetscScalar *av,*bv; 2873 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2874 PetscScalar _DOne=1.0,_DZero=0.0; 2875 PetscBLASInt cm, cn, ck, alda, clda; 2876 2877 PetscFunctionBegin; 2878 MatCheckProduct(C,3); 2879 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2880 abt = (Mat_MatTransMultDense*)C->product->data; 2881 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2882 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2883 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2884 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2885 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2886 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2887 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2888 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2889 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2890 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2891 /* copy transpose of B into buf[0] */ 2892 bn = B->rmap->n; 2893 sendbuf = abt->buf[0]; 2894 recvbuf = abt->buf[1]; 2895 for (k = 0, j = 0; j < bn; j++) { 2896 for (i = 0; i < cK; i++, k++) { 2897 sendbuf[k] = bv[i * blda + j]; 2898 } 2899 } 2900 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2901 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRMPI(ierr); 2902 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2903 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2904 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2905 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2906 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2907 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2908 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2909 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2910 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2911 PetscFunctionReturn(0); 2912 } 2913 2914 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2915 { 2916 Mat_MatTransMultDense *abt; 2917 PetscErrorCode ierr; 2918 2919 PetscFunctionBegin; 2920 MatCheckProduct(C,3); 2921 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2922 abt = (Mat_MatTransMultDense*)C->product->data; 2923 switch (abt->alg) { 2924 case 1: 2925 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2926 break; 2927 default: 2928 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2929 break; 2930 } 2931 PetscFunctionReturn(0); 2932 } 2933 2934 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2935 { 2936 PetscErrorCode ierr; 2937 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2938 2939 PetscFunctionBegin; 2940 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2941 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2942 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2943 ierr = PetscFree(ab);CHKERRQ(ierr); 2944 PetscFunctionReturn(0); 2945 } 2946 2947 #if defined(PETSC_HAVE_ELEMENTAL) 2948 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2949 { 2950 PetscErrorCode ierr; 2951 Mat_MatMultDense *ab; 2952 2953 PetscFunctionBegin; 2954 MatCheckProduct(C,3); 2955 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2956 ab = (Mat_MatMultDense*)C->product->data; 2957 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2958 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2959 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2960 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2961 PetscFunctionReturn(0); 2962 } 2963 2964 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2965 { 2966 PetscErrorCode ierr; 2967 Mat Ae,Be,Ce; 2968 Mat_MatMultDense *ab; 2969 2970 PetscFunctionBegin; 2971 MatCheckProduct(C,4); 2972 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2973 /* check local size of A and B */ 2974 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2975 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2976 } 2977 2978 /* create elemental matrices Ae and Be */ 2979 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2980 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2981 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2982 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2983 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2984 2985 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2986 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2987 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2988 ierr = MatSetUp(Be);CHKERRQ(ierr); 2989 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2990 2991 /* compute symbolic Ce = Ae*Be */ 2992 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2993 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2994 2995 /* setup C */ 2996 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2997 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2998 ierr = MatSetUp(C);CHKERRQ(ierr); 2999 3000 /* create data structure for reuse Cdense */ 3001 ierr = PetscNew(&ab);CHKERRQ(ierr); 3002 ab->Ae = Ae; 3003 ab->Be = Be; 3004 ab->Ce = Ce; 3005 3006 C->product->data = ab; 3007 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 3008 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 3009 PetscFunctionReturn(0); 3010 } 3011 #endif 3012 /* ----------------------------------------------- */ 3013 #if defined(PETSC_HAVE_ELEMENTAL) 3014 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 3015 { 3016 PetscFunctionBegin; 3017 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 3018 C->ops->productsymbolic = MatProductSymbolic_AB; 3019 PetscFunctionReturn(0); 3020 } 3021 #endif 3022 3023 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 3024 { 3025 Mat_Product *product = C->product; 3026 Mat A = product->A,B=product->B; 3027 3028 PetscFunctionBegin; 3029 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 3030 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 3031 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 3032 C->ops->productsymbolic = MatProductSymbolic_AtB; 3033 PetscFunctionReturn(0); 3034 } 3035 3036 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 3037 { 3038 PetscErrorCode ierr; 3039 Mat_Product *product = C->product; 3040 const char *algTypes[2] = {"allgatherv","cyclic"}; 3041 PetscInt alg,nalg = 2; 3042 PetscBool flg = PETSC_FALSE; 3043 3044 PetscFunctionBegin; 3045 /* Set default algorithm */ 3046 alg = 0; /* default is allgatherv */ 3047 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 3048 if (flg) { 3049 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 3050 } 3051 3052 /* Get runtime option */ 3053 if (product->api_user) { 3054 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 3055 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 3056 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3057 } else { 3058 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 3059 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 3060 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3061 } 3062 if (flg) { 3063 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 3064 } 3065 3066 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 3067 C->ops->productsymbolic = MatProductSymbolic_ABt; 3068 PetscFunctionReturn(0); 3069 } 3070 3071 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 3072 { 3073 PetscErrorCode ierr; 3074 Mat_Product *product = C->product; 3075 3076 PetscFunctionBegin; 3077 switch (product->type) { 3078 #if defined(PETSC_HAVE_ELEMENTAL) 3079 case MATPRODUCT_AB: 3080 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 3081 break; 3082 #endif 3083 case MATPRODUCT_AtB: 3084 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 3085 break; 3086 case MATPRODUCT_ABt: 3087 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 3088 break; 3089 default: 3090 break; 3091 } 3092 PetscFunctionReturn(0); 3093 } 3094