1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 32 if (flg) *B = mat->A; 33 else *B = A; 34 PetscFunctionReturn(0); 35 } 36 37 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 38 { 39 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 40 PetscErrorCode ierr; 41 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 42 43 PetscFunctionBegin; 44 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 45 lrow = row - rstart; 46 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 51 { 52 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 53 PetscErrorCode ierr; 54 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 55 56 PetscFunctionBegin; 57 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 58 lrow = row - rstart; 59 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 60 PetscFunctionReturn(0); 61 } 62 63 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 64 { 65 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 66 PetscErrorCode ierr; 67 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 68 PetscScalar *array; 69 MPI_Comm comm; 70 PetscBool cong; 71 Mat B; 72 73 PetscFunctionBegin; 74 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 75 if (!cong) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 76 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 77 if (!B) { 78 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 79 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 80 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 81 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 82 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 83 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 84 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 85 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 86 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 87 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 88 *a = B; 89 ierr = MatDestroy(&B);CHKERRQ(ierr); 90 } else *a = B; 91 PetscFunctionReturn(0); 92 } 93 94 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 95 { 96 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 97 PetscErrorCode ierr; 98 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 99 PetscBool roworiented = A->roworiented; 100 101 PetscFunctionBegin; 102 for (i=0; i<m; i++) { 103 if (idxm[i] < 0) continue; 104 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 105 if (idxm[i] >= rstart && idxm[i] < rend) { 106 row = idxm[i] - rstart; 107 if (roworiented) { 108 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 109 } else { 110 for (j=0; j<n; j++) { 111 if (idxn[j] < 0) continue; 112 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 113 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 114 } 115 } 116 } else if (!A->donotstash) { 117 mat->assembled = PETSC_FALSE; 118 if (roworiented) { 119 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 120 } else { 121 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 122 } 123 } 124 } 125 PetscFunctionReturn(0); 126 } 127 128 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 129 { 130 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 131 PetscErrorCode ierr; 132 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 133 134 PetscFunctionBegin; 135 for (i=0; i<m; i++) { 136 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 137 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 138 if (idxm[i] >= rstart && idxm[i] < rend) { 139 row = idxm[i] - rstart; 140 for (j=0; j<n; j++) { 141 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 142 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 143 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 144 } 145 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 146 } 147 PetscFunctionReturn(0); 148 } 149 150 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 151 { 152 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 153 PetscErrorCode ierr; 154 155 PetscFunctionBegin; 156 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 157 PetscFunctionReturn(0); 158 } 159 160 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 161 { 162 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 163 PetscErrorCode ierr; 164 165 PetscFunctionBegin; 166 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 167 PetscFunctionReturn(0); 168 } 169 170 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 171 { 172 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 181 { 182 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 183 PetscErrorCode ierr; 184 185 PetscFunctionBegin; 186 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 191 { 192 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 193 PetscErrorCode ierr; 194 195 PetscFunctionBegin; 196 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 197 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 198 PetscFunctionReturn(0); 199 } 200 201 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 202 { 203 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 204 PetscErrorCode ierr; 205 206 PetscFunctionBegin; 207 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 208 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 213 { 214 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 215 PetscErrorCode ierr; 216 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 217 const PetscInt *irow,*icol; 218 const PetscScalar *v; 219 PetscScalar *bv; 220 Mat newmat; 221 IS iscol_local; 222 MPI_Comm comm_is,comm_mat; 223 224 PetscFunctionBegin; 225 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 226 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 227 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 228 229 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 230 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 231 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 232 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 233 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 234 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 235 236 /* No parallel redistribution currently supported! Should really check each index set 237 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 238 original matrix! */ 239 240 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 241 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 242 243 /* Check submatrix call */ 244 if (scall == MAT_REUSE_MATRIX) { 245 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 246 /* Really need to test rows and column sizes! */ 247 newmat = *B; 248 } else { 249 /* Create and fill new matrix */ 250 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 251 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 252 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 253 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 254 } 255 256 /* Now extract the data pointers and do the copy, column at a time */ 257 newmatd = (Mat_MPIDense*)newmat->data; 258 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 259 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 260 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 261 for (i=0; i<Ncols; i++) { 262 const PetscScalar *av = v + lda*icol[i]; 263 for (j=0; j<nrows; j++) { 264 *bv++ = av[irow[j] - rstart]; 265 } 266 } 267 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 268 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 269 270 /* Assemble the matrices so that the correct flags are set */ 271 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 272 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 273 274 /* Free work space */ 275 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 276 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 277 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 278 *B = newmat; 279 PetscFunctionReturn(0); 280 } 281 282 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 283 { 284 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 285 PetscErrorCode ierr; 286 287 PetscFunctionBegin; 288 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 293 { 294 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 295 PetscErrorCode ierr; 296 297 PetscFunctionBegin; 298 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 303 { 304 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 305 PetscErrorCode ierr; 306 307 PetscFunctionBegin; 308 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 309 PetscFunctionReturn(0); 310 } 311 312 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 313 { 314 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 315 PetscErrorCode ierr; 316 PetscInt nstash,reallocs; 317 318 PetscFunctionBegin; 319 if (mdn->donotstash || mat->nooffprocentries) return(0); 320 321 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 322 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 323 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 328 { 329 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 330 PetscErrorCode ierr; 331 PetscInt i,*row,*col,flg,j,rstart,ncols; 332 PetscMPIInt n; 333 PetscScalar *val; 334 335 PetscFunctionBegin; 336 if (!mdn->donotstash && !mat->nooffprocentries) { 337 /* wait on receives */ 338 while (1) { 339 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 340 if (!flg) break; 341 342 for (i=0; i<n;) { 343 /* Now identify the consecutive vals belonging to the same row */ 344 for (j=i,rstart=row[j]; j<n; j++) { 345 if (row[j] != rstart) break; 346 } 347 if (j < n) ncols = j-i; 348 else ncols = n-i; 349 /* Now assemble all these values with a single function call */ 350 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 351 i = j; 352 } 353 } 354 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 355 } 356 357 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 358 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 359 360 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 361 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 362 } 363 PetscFunctionReturn(0); 364 } 365 366 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 367 { 368 PetscErrorCode ierr; 369 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 370 371 PetscFunctionBegin; 372 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 373 PetscFunctionReturn(0); 374 } 375 376 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 377 { 378 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 379 PetscErrorCode ierr; 380 PetscInt i,len,*lrows; 381 382 PetscFunctionBegin; 383 /* get locally owned rows */ 384 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 385 /* fix right hand side if needed */ 386 if (x && b) { 387 const PetscScalar *xx; 388 PetscScalar *bb; 389 390 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 391 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 392 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 393 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 394 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 395 } 396 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 397 if (diag != 0.0) { 398 Vec d; 399 400 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 401 ierr = VecSet(d,diag);CHKERRQ(ierr); 402 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 403 ierr = VecDestroy(&d);CHKERRQ(ierr); 404 } 405 ierr = PetscFree(lrows);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 410 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 411 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 412 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 413 414 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 415 { 416 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 417 PetscErrorCode ierr; 418 const PetscScalar *ax; 419 PetscScalar *ay; 420 421 PetscFunctionBegin; 422 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 423 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 424 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 425 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 426 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 427 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 428 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 429 PetscFunctionReturn(0); 430 } 431 432 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 433 { 434 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 435 PetscErrorCode ierr; 436 const PetscScalar *ax; 437 PetscScalar *ay; 438 439 PetscFunctionBegin; 440 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 441 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 442 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 443 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 444 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 445 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 446 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 451 { 452 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 453 PetscErrorCode ierr; 454 const PetscScalar *ax; 455 PetscScalar *ay; 456 457 PetscFunctionBegin; 458 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 459 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 460 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 461 ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr); 462 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 463 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 464 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 465 ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 470 { 471 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 472 PetscErrorCode ierr; 473 const PetscScalar *ax; 474 PetscScalar *ay; 475 476 PetscFunctionBegin; 477 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 478 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 479 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 480 ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr); 481 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 482 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 483 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 484 ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr); 485 PetscFunctionReturn(0); 486 } 487 488 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 489 { 490 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 491 PetscErrorCode ierr; 492 PetscInt lda,len,i,n,m = A->rmap->n,radd; 493 PetscScalar *x,zero = 0.0; 494 const PetscScalar *av; 495 496 PetscFunctionBegin; 497 ierr = VecSet(v,zero);CHKERRQ(ierr); 498 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 499 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 500 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 501 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 502 radd = A->rmap->rstart*m; 503 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 504 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 505 for (i=0; i<len; i++) { 506 x[i] = av[radd + i*lda + i]; 507 } 508 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 509 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 510 PetscFunctionReturn(0); 511 } 512 513 PetscErrorCode MatDestroy_MPIDense(Mat mat) 514 { 515 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 516 PetscErrorCode ierr; 517 518 PetscFunctionBegin; 519 #if defined(PETSC_USE_LOG) 520 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 521 #endif 522 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 523 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 524 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 525 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 526 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 527 528 ierr = PetscFree(mat->data);CHKERRQ(ierr); 529 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 530 531 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 532 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 533 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 534 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 535 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 536 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 537 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 538 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 539 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 540 #if defined(PETSC_HAVE_ELEMENTAL) 541 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 542 #endif 543 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 544 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 545 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 546 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 547 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 548 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr); 549 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr); 550 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 551 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 552 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 553 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 554 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 555 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 556 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 557 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 558 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 559 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 560 PetscFunctionReturn(0); 561 } 562 563 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 564 565 #include <petscdraw.h> 566 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 567 { 568 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 569 PetscErrorCode ierr; 570 PetscMPIInt rank = mdn->rank; 571 PetscViewerType vtype; 572 PetscBool iascii,isdraw; 573 PetscViewer sviewer; 574 PetscViewerFormat format; 575 576 PetscFunctionBegin; 577 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 578 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 579 if (iascii) { 580 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 581 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 582 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 583 MatInfo info; 584 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 585 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 586 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 587 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 588 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 589 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 } else if (format == PETSC_VIEWER_ASCII_INFO) { 592 PetscFunctionReturn(0); 593 } 594 } else if (isdraw) { 595 PetscDraw draw; 596 PetscBool isnull; 597 598 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 599 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 600 if (isnull) PetscFunctionReturn(0); 601 } 602 603 { 604 /* assemble the entire matrix onto first processor. */ 605 Mat A; 606 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 607 PetscInt *cols; 608 PetscScalar *vals; 609 610 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 611 if (!rank) { 612 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 613 } else { 614 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 615 } 616 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 617 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 618 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 619 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 620 621 /* Copy the matrix ... This isn't the most efficient means, 622 but it's quick for now */ 623 A->insertmode = INSERT_VALUES; 624 625 row = mat->rmap->rstart; 626 m = mdn->A->rmap->n; 627 for (i=0; i<m; i++) { 628 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 629 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 630 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 631 row++; 632 } 633 634 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 635 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 636 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 637 if (!rank) { 638 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 639 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 640 } 641 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 642 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 643 ierr = MatDestroy(&A);CHKERRQ(ierr); 644 } 645 PetscFunctionReturn(0); 646 } 647 648 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 649 { 650 PetscErrorCode ierr; 651 PetscBool iascii,isbinary,isdraw,issocket; 652 653 PetscFunctionBegin; 654 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 655 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 656 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 657 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 658 659 if (iascii || issocket || isdraw) { 660 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 661 } else if (isbinary) { 662 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 663 } 664 PetscFunctionReturn(0); 665 } 666 667 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 668 { 669 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 670 Mat mdn = mat->A; 671 PetscErrorCode ierr; 672 PetscLogDouble isend[5],irecv[5]; 673 674 PetscFunctionBegin; 675 info->block_size = 1.0; 676 677 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 678 679 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 680 isend[3] = info->memory; isend[4] = info->mallocs; 681 if (flag == MAT_LOCAL) { 682 info->nz_used = isend[0]; 683 info->nz_allocated = isend[1]; 684 info->nz_unneeded = isend[2]; 685 info->memory = isend[3]; 686 info->mallocs = isend[4]; 687 } else if (flag == MAT_GLOBAL_MAX) { 688 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 689 690 info->nz_used = irecv[0]; 691 info->nz_allocated = irecv[1]; 692 info->nz_unneeded = irecv[2]; 693 info->memory = irecv[3]; 694 info->mallocs = irecv[4]; 695 } else if (flag == MAT_GLOBAL_SUM) { 696 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 697 698 info->nz_used = irecv[0]; 699 info->nz_allocated = irecv[1]; 700 info->nz_unneeded = irecv[2]; 701 info->memory = irecv[3]; 702 info->mallocs = irecv[4]; 703 } 704 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 705 info->fill_ratio_needed = 0; 706 info->factor_mallocs = 0; 707 PetscFunctionReturn(0); 708 } 709 710 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 711 { 712 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 713 PetscErrorCode ierr; 714 715 PetscFunctionBegin; 716 switch (op) { 717 case MAT_NEW_NONZERO_LOCATIONS: 718 case MAT_NEW_NONZERO_LOCATION_ERR: 719 case MAT_NEW_NONZERO_ALLOCATION_ERR: 720 MatCheckPreallocated(A,1); 721 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 722 break; 723 case MAT_ROW_ORIENTED: 724 MatCheckPreallocated(A,1); 725 a->roworiented = flg; 726 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 727 break; 728 case MAT_NEW_DIAGONALS: 729 case MAT_KEEP_NONZERO_PATTERN: 730 case MAT_USE_HASH_TABLE: 731 case MAT_SORTED_FULL: 732 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 733 break; 734 case MAT_IGNORE_OFF_PROC_ENTRIES: 735 a->donotstash = flg; 736 break; 737 case MAT_SYMMETRIC: 738 case MAT_STRUCTURALLY_SYMMETRIC: 739 case MAT_HERMITIAN: 740 case MAT_SYMMETRY_ETERNAL: 741 case MAT_IGNORE_LOWER_TRIANGULAR: 742 case MAT_IGNORE_ZERO_ENTRIES: 743 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 744 break; 745 default: 746 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 747 } 748 PetscFunctionReturn(0); 749 } 750 751 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 752 { 753 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 754 const PetscScalar *l; 755 PetscScalar x,*v,*vv,*r; 756 PetscErrorCode ierr; 757 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 758 759 PetscFunctionBegin; 760 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 761 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 762 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 763 if (ll) { 764 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 765 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 766 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 767 for (i=0; i<m; i++) { 768 x = l[i]; 769 v = vv + i; 770 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 771 } 772 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 773 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 774 } 775 if (rr) { 776 const PetscScalar *ar; 777 778 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 779 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 780 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 781 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 782 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 783 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 784 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 785 for (i=0; i<n; i++) { 786 x = r[i]; 787 v = vv + i*lda; 788 for (j=0; j<m; j++) (*v++) *= x; 789 } 790 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 791 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 792 } 793 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 794 PetscFunctionReturn(0); 795 } 796 797 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 798 { 799 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 800 PetscErrorCode ierr; 801 PetscInt i,j; 802 PetscReal sum = 0.0; 803 const PetscScalar *av,*v; 804 805 PetscFunctionBegin; 806 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 807 v = av; 808 if (mdn->size == 1) { 809 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 810 } else { 811 if (type == NORM_FROBENIUS) { 812 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 813 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 814 } 815 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 *nrm = PetscSqrtReal(*nrm); 817 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 818 } else if (type == NORM_1) { 819 PetscReal *tmp,*tmp2; 820 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 821 *nrm = 0.0; 822 v = av; 823 for (j=0; j<mdn->A->cmap->n; j++) { 824 for (i=0; i<mdn->A->rmap->n; i++) { 825 tmp[j] += PetscAbsScalar(*v); v++; 826 } 827 } 828 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 829 for (j=0; j<A->cmap->N; j++) { 830 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 831 } 832 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 833 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 834 } else if (type == NORM_INFINITY) { /* max row norm */ 835 PetscReal ntemp; 836 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 837 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 838 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 839 } 840 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 841 PetscFunctionReturn(0); 842 } 843 844 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 845 { 846 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 847 Mat B; 848 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 849 PetscErrorCode ierr; 850 PetscInt j,i,lda; 851 PetscScalar *v; 852 853 PetscFunctionBegin; 854 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 855 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 856 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 857 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 858 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 859 } else B = *matout; 860 861 m = a->A->rmap->n; n = a->A->cmap->n; 862 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 863 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 864 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 865 for (i=0; i<m; i++) rwork[i] = rstart + i; 866 for (j=0; j<n; j++) { 867 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 868 v += lda; 869 } 870 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 871 ierr = PetscFree(rwork);CHKERRQ(ierr); 872 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 873 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 874 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 875 *matout = B; 876 } else { 877 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 878 } 879 PetscFunctionReturn(0); 880 } 881 882 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 883 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 884 885 PetscErrorCode MatSetUp_MPIDense(Mat A) 886 { 887 PetscErrorCode ierr; 888 889 PetscFunctionBegin; 890 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 891 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 892 if (!A->preallocated) { 893 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 894 } 895 PetscFunctionReturn(0); 896 } 897 898 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 899 { 900 PetscErrorCode ierr; 901 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 902 903 PetscFunctionBegin; 904 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 905 PetscFunctionReturn(0); 906 } 907 908 PetscErrorCode MatConjugate_MPIDense(Mat mat) 909 { 910 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 911 PetscErrorCode ierr; 912 913 PetscFunctionBegin; 914 ierr = MatConjugate(a->A);CHKERRQ(ierr); 915 PetscFunctionReturn(0); 916 } 917 918 PetscErrorCode MatRealPart_MPIDense(Mat A) 919 { 920 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 921 PetscErrorCode ierr; 922 923 PetscFunctionBegin; 924 ierr = MatRealPart(a->A);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } 927 928 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 929 { 930 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 931 PetscErrorCode ierr; 932 933 PetscFunctionBegin; 934 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 935 PetscFunctionReturn(0); 936 } 937 938 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 939 { 940 PetscErrorCode ierr; 941 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 942 943 PetscFunctionBegin; 944 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 945 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 946 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 947 PetscFunctionReturn(0); 948 } 949 950 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 951 952 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 953 { 954 PetscErrorCode ierr; 955 PetscInt i,n; 956 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 957 PetscReal *work; 958 959 PetscFunctionBegin; 960 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 961 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 962 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 963 if (type == NORM_2) { 964 for (i=0; i<n; i++) work[i] *= work[i]; 965 } 966 if (type == NORM_INFINITY) { 967 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 968 } else { 969 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 970 } 971 ierr = PetscFree(work);CHKERRQ(ierr); 972 if (type == NORM_2) { 973 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 974 } 975 PetscFunctionReturn(0); 976 } 977 978 #if defined(PETSC_HAVE_CUDA) 979 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 980 { 981 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 982 PetscErrorCode ierr; 983 PetscInt lda; 984 985 PetscFunctionBegin; 986 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 987 if (!a->cvec) { 988 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 989 } 990 a->vecinuse = col + 1; 991 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 992 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 993 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 994 *v = a->cvec; 995 PetscFunctionReturn(0); 996 } 997 998 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 999 { 1000 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1001 PetscErrorCode ierr; 1002 1003 PetscFunctionBegin; 1004 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1005 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1006 a->vecinuse = 0; 1007 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1008 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1009 *v = NULL; 1010 PetscFunctionReturn(0); 1011 } 1012 1013 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1014 { 1015 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1016 PetscInt lda; 1017 PetscErrorCode ierr; 1018 1019 PetscFunctionBegin; 1020 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1021 if (!a->cvec) { 1022 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1023 } 1024 a->vecinuse = col + 1; 1025 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1026 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1027 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1028 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1029 *v = a->cvec; 1030 PetscFunctionReturn(0); 1031 } 1032 1033 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1034 { 1035 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1036 PetscErrorCode ierr; 1037 1038 PetscFunctionBegin; 1039 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1040 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1041 a->vecinuse = 0; 1042 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1043 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1044 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1045 *v = NULL; 1046 PetscFunctionReturn(0); 1047 } 1048 1049 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1050 { 1051 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1052 PetscErrorCode ierr; 1053 PetscInt lda; 1054 1055 PetscFunctionBegin; 1056 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1057 if (!a->cvec) { 1058 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1059 } 1060 a->vecinuse = col + 1; 1061 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1062 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1063 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1064 *v = a->cvec; 1065 PetscFunctionReturn(0); 1066 } 1067 1068 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1069 { 1070 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1071 PetscErrorCode ierr; 1072 1073 PetscFunctionBegin; 1074 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1075 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1076 a->vecinuse = 0; 1077 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1078 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1079 *v = NULL; 1080 PetscFunctionReturn(0); 1081 } 1082 1083 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1084 { 1085 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1086 PetscErrorCode ierr; 1087 1088 PetscFunctionBegin; 1089 if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1090 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1095 { 1096 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1097 PetscErrorCode ierr; 1098 1099 PetscFunctionBegin; 1100 if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1101 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1106 { 1107 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1116 { 1117 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1118 PetscErrorCode ierr; 1119 1120 PetscFunctionBegin; 1121 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1126 { 1127 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1128 PetscErrorCode ierr; 1129 1130 PetscFunctionBegin; 1131 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1136 { 1137 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1138 PetscErrorCode ierr; 1139 1140 PetscFunctionBegin; 1141 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1146 { 1147 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1152 PetscFunctionReturn(0); 1153 } 1154 1155 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1156 { 1157 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1158 PetscErrorCode ierr; 1159 1160 PetscFunctionBegin; 1161 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1162 PetscFunctionReturn(0); 1163 } 1164 1165 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1166 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1167 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1168 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1169 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1170 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1171 1172 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1173 { 1174 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 if (d->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1179 if (d->A) { 1180 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1181 } 1182 mat->boundtocpu = bind; 1183 if (!bind) { 1184 PetscBool iscuda; 1185 1186 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1187 if (!iscuda) { 1188 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1189 } 1190 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1191 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1192 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1193 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1194 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1195 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1196 } else { 1197 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1198 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1199 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1200 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1201 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1202 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1203 } 1204 PetscFunctionReturn(0); 1205 } 1206 1207 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1208 { 1209 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1210 PetscErrorCode ierr; 1211 PetscBool iscuda; 1212 1213 PetscFunctionBegin; 1214 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1215 if (!iscuda) PetscFunctionReturn(0); 1216 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1217 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1218 if (!d->A) { 1219 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1220 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1221 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1222 } 1223 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1224 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1225 A->preallocated = PETSC_TRUE; 1226 PetscFunctionReturn(0); 1227 } 1228 #endif 1229 1230 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1231 { 1232 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1233 PetscErrorCode ierr; 1234 1235 PetscFunctionBegin; 1236 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1241 1242 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1243 { 1244 PetscFunctionBegin; 1245 *missing = PETSC_FALSE; 1246 PetscFunctionReturn(0); 1247 } 1248 1249 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1250 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1251 1252 /* -------------------------------------------------------------------*/ 1253 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1254 MatGetRow_MPIDense, 1255 MatRestoreRow_MPIDense, 1256 MatMult_MPIDense, 1257 /* 4*/ MatMultAdd_MPIDense, 1258 MatMultTranspose_MPIDense, 1259 MatMultTransposeAdd_MPIDense, 1260 0, 1261 0, 1262 0, 1263 /* 10*/ 0, 1264 0, 1265 0, 1266 0, 1267 MatTranspose_MPIDense, 1268 /* 15*/ MatGetInfo_MPIDense, 1269 MatEqual_MPIDense, 1270 MatGetDiagonal_MPIDense, 1271 MatDiagonalScale_MPIDense, 1272 MatNorm_MPIDense, 1273 /* 20*/ MatAssemblyBegin_MPIDense, 1274 MatAssemblyEnd_MPIDense, 1275 MatSetOption_MPIDense, 1276 MatZeroEntries_MPIDense, 1277 /* 24*/ MatZeroRows_MPIDense, 1278 0, 1279 0, 1280 0, 1281 0, 1282 /* 29*/ MatSetUp_MPIDense, 1283 0, 1284 0, 1285 MatGetDiagonalBlock_MPIDense, 1286 0, 1287 /* 34*/ MatDuplicate_MPIDense, 1288 0, 1289 0, 1290 0, 1291 0, 1292 /* 39*/ MatAXPY_MPIDense, 1293 MatCreateSubMatrices_MPIDense, 1294 0, 1295 MatGetValues_MPIDense, 1296 0, 1297 /* 44*/ 0, 1298 MatScale_MPIDense, 1299 MatShift_Basic, 1300 0, 1301 0, 1302 /* 49*/ MatSetRandom_MPIDense, 1303 0, 1304 0, 1305 0, 1306 0, 1307 /* 54*/ 0, 1308 0, 1309 0, 1310 0, 1311 0, 1312 /* 59*/ MatCreateSubMatrix_MPIDense, 1313 MatDestroy_MPIDense, 1314 MatView_MPIDense, 1315 0, 1316 0, 1317 /* 64*/ 0, 1318 0, 1319 0, 1320 0, 1321 0, 1322 /* 69*/ 0, 1323 0, 1324 0, 1325 0, 1326 0, 1327 /* 74*/ 0, 1328 0, 1329 0, 1330 0, 1331 0, 1332 /* 79*/ 0, 1333 0, 1334 0, 1335 0, 1336 /* 83*/ MatLoad_MPIDense, 1337 0, 1338 0, 1339 0, 1340 0, 1341 0, 1342 /* 89*/ 0, 1343 0, 1344 MatMatMultNumeric_MPIDense, 1345 0, 1346 0, 1347 /* 94*/ 0, 1348 0, 1349 0, 1350 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1351 0, 1352 /* 99*/ MatProductSetFromOptions_MPIDense, 1353 0, 1354 0, 1355 MatConjugate_MPIDense, 1356 0, 1357 /*104*/ 0, 1358 MatRealPart_MPIDense, 1359 MatImaginaryPart_MPIDense, 1360 0, 1361 0, 1362 /*109*/ 0, 1363 0, 1364 0, 1365 MatGetColumnVector_MPIDense, 1366 MatMissingDiagonal_MPIDense, 1367 /*114*/ 0, 1368 0, 1369 0, 1370 0, 1371 0, 1372 /*119*/ 0, 1373 0, 1374 0, 1375 0, 1376 0, 1377 /*124*/ 0, 1378 MatGetColumnNorms_MPIDense, 1379 0, 1380 0, 1381 0, 1382 /*129*/ 0, 1383 0, 1384 0, 1385 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1386 0, 1387 /*134*/ 0, 1388 0, 1389 0, 1390 0, 1391 0, 1392 /*139*/ 0, 1393 0, 1394 0, 1395 0, 1396 0, 1397 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1398 /*145*/ 0, 1399 0, 1400 0 1401 }; 1402 1403 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1404 { 1405 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1406 PetscBool iscuda; 1407 PetscErrorCode ierr; 1408 1409 PetscFunctionBegin; 1410 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1411 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1412 if (!a->A) { 1413 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1414 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1415 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1416 } 1417 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1418 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1419 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1420 mat->preallocated = PETSC_TRUE; 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #if defined(PETSC_HAVE_ELEMENTAL) 1425 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1426 { 1427 Mat mat_elemental; 1428 PetscErrorCode ierr; 1429 PetscScalar *v; 1430 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1431 1432 PetscFunctionBegin; 1433 if (reuse == MAT_REUSE_MATRIX) { 1434 mat_elemental = *newmat; 1435 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1436 } else { 1437 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1438 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1439 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1440 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1441 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1442 } 1443 1444 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1445 for (i=0; i<N; i++) cols[i] = i; 1446 for (i=0; i<m; i++) rows[i] = rstart + i; 1447 1448 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1449 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1450 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1451 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1452 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1453 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1454 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1455 1456 if (reuse == MAT_INPLACE_MATRIX) { 1457 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1458 } else { 1459 *newmat = mat_elemental; 1460 } 1461 PetscFunctionReturn(0); 1462 } 1463 #endif 1464 1465 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1466 { 1467 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1468 PetscErrorCode ierr; 1469 1470 PetscFunctionBegin; 1471 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1472 PetscFunctionReturn(0); 1473 } 1474 1475 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1476 { 1477 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1478 PetscErrorCode ierr; 1479 1480 PetscFunctionBegin; 1481 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1486 { 1487 PetscErrorCode ierr; 1488 Mat_MPIDense *mat; 1489 PetscInt m,nloc,N; 1490 1491 PetscFunctionBegin; 1492 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1493 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1494 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1495 PetscInt sum; 1496 1497 if (n == PETSC_DECIDE) { 1498 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1499 } 1500 /* Check sum(n) = N */ 1501 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1502 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1503 1504 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1505 } 1506 1507 /* numeric phase */ 1508 mat = (Mat_MPIDense*)(*outmat)->data; 1509 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1510 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1511 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1512 PetscFunctionReturn(0); 1513 } 1514 1515 #if defined(PETSC_HAVE_CUDA) 1516 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1517 { 1518 Mat B; 1519 Mat_MPIDense *m; 1520 PetscErrorCode ierr; 1521 1522 PetscFunctionBegin; 1523 if (reuse == MAT_INITIAL_MATRIX) { 1524 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1525 } else if (reuse == MAT_REUSE_MATRIX) { 1526 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1527 } 1528 1529 B = *newmat; 1530 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1531 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1532 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1533 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1534 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1535 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1536 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1537 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1538 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1539 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1540 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1541 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1542 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1543 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1544 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1545 m = (Mat_MPIDense*)(B)->data; 1546 if (m->A) { 1547 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1548 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1549 } 1550 B->ops->bindtocpu = NULL; 1551 B->offloadmask = PETSC_OFFLOAD_CPU; 1552 PetscFunctionReturn(0); 1553 } 1554 1555 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1556 { 1557 Mat B; 1558 Mat_MPIDense *m; 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 if (reuse == MAT_INITIAL_MATRIX) { 1563 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1564 } else if (reuse == MAT_REUSE_MATRIX) { 1565 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1566 } 1567 1568 B = *newmat; 1569 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1570 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1571 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1572 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1573 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1574 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1575 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1576 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1577 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1578 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1579 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1580 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1581 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1582 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1583 m = (Mat_MPIDense*)(B)->data; 1584 if (m->A) { 1585 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1586 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1587 B->offloadmask = PETSC_OFFLOAD_BOTH; 1588 } else { 1589 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1590 } 1591 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1592 1593 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1594 PetscFunctionReturn(0); 1595 } 1596 #endif 1597 1598 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1599 { 1600 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1601 PetscErrorCode ierr; 1602 PetscInt lda; 1603 1604 PetscFunctionBegin; 1605 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1606 if (!a->cvec) { 1607 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1608 } 1609 a->vecinuse = col + 1; 1610 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1611 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1612 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1613 *v = a->cvec; 1614 PetscFunctionReturn(0); 1615 } 1616 1617 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1618 { 1619 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1620 PetscErrorCode ierr; 1621 1622 PetscFunctionBegin; 1623 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1624 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1625 a->vecinuse = 0; 1626 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1627 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1628 *v = NULL; 1629 PetscFunctionReturn(0); 1630 } 1631 1632 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1633 { 1634 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1635 PetscErrorCode ierr; 1636 PetscInt lda; 1637 1638 PetscFunctionBegin; 1639 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1640 if (!a->cvec) { 1641 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1642 } 1643 a->vecinuse = col + 1; 1644 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1645 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1646 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1647 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1648 *v = a->cvec; 1649 PetscFunctionReturn(0); 1650 } 1651 1652 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1653 { 1654 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1655 PetscErrorCode ierr; 1656 1657 PetscFunctionBegin; 1658 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1659 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1660 a->vecinuse = 0; 1661 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1662 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1663 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1664 *v = NULL; 1665 PetscFunctionReturn(0); 1666 } 1667 1668 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1669 { 1670 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1671 PetscErrorCode ierr; 1672 PetscInt lda; 1673 1674 PetscFunctionBegin; 1675 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1676 if (!a->cvec) { 1677 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1678 } 1679 a->vecinuse = col + 1; 1680 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1681 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1682 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1683 *v = a->cvec; 1684 PetscFunctionReturn(0); 1685 } 1686 1687 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1688 { 1689 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1690 PetscErrorCode ierr; 1691 1692 PetscFunctionBegin; 1693 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1694 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1695 a->vecinuse = 0; 1696 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1697 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1698 *v = NULL; 1699 PetscFunctionReturn(0); 1700 } 1701 1702 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1703 { 1704 Mat_MPIDense *a; 1705 PetscErrorCode ierr; 1706 1707 PetscFunctionBegin; 1708 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1709 mat->data = (void*)a; 1710 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1711 1712 mat->insertmode = NOT_SET_VALUES; 1713 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1714 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1715 1716 /* build cache for off array entries formed */ 1717 a->donotstash = PETSC_FALSE; 1718 1719 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1720 1721 /* stuff used for matrix vector multiply */ 1722 a->lvec = 0; 1723 a->Mvctx = 0; 1724 a->roworiented = PETSC_TRUE; 1725 1726 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1727 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1728 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1729 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1737 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1738 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1739 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1740 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1741 #if defined(PETSC_HAVE_ELEMENTAL) 1742 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1743 #endif 1744 #if defined(PETSC_HAVE_CUDA) 1745 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1746 #endif 1747 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1748 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1749 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1750 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1751 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1752 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr); 1753 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr); 1754 1755 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1756 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1757 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1758 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1759 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1760 PetscFunctionReturn(0); 1761 } 1762 1763 /*MC 1764 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1765 1766 Options Database Keys: 1767 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1768 1769 Level: beginner 1770 1771 .seealso: 1772 1773 M*/ 1774 #if defined(PETSC_HAVE_CUDA) 1775 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1776 { 1777 PetscErrorCode ierr; 1778 1779 PetscFunctionBegin; 1780 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1781 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1782 PetscFunctionReturn(0); 1783 } 1784 #endif 1785 1786 /*MC 1787 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1788 1789 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1790 and MATMPIDENSE otherwise. 1791 1792 Options Database Keys: 1793 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1794 1795 Level: beginner 1796 1797 1798 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1799 M*/ 1800 1801 /*MC 1802 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1803 1804 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1805 and MATMPIDENSECUDA otherwise. 1806 1807 Options Database Keys: 1808 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1809 1810 Level: beginner 1811 1812 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1813 M*/ 1814 1815 /*@C 1816 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1817 1818 Not collective 1819 1820 Input Parameters: 1821 . B - the matrix 1822 - data - optional location of matrix data. Set data=NULL for PETSc 1823 to control all matrix memory allocation. 1824 1825 Notes: 1826 The dense format is fully compatible with standard Fortran 77 1827 storage by columns. 1828 1829 The data input variable is intended primarily for Fortran programmers 1830 who wish to allocate their own matrix memory space. Most users should 1831 set data=NULL. 1832 1833 Level: intermediate 1834 1835 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1836 @*/ 1837 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1838 { 1839 PetscErrorCode ierr; 1840 1841 PetscFunctionBegin; 1842 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1843 PetscFunctionReturn(0); 1844 } 1845 1846 /*@ 1847 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 1848 array provided by the user. This is useful to avoid copying an array 1849 into a matrix 1850 1851 Not Collective 1852 1853 Input Parameters: 1854 + mat - the matrix 1855 - array - the array in column major order 1856 1857 Notes: 1858 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1859 freed when the matrix is destroyed. 1860 1861 Level: developer 1862 1863 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1864 1865 @*/ 1866 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 1867 { 1868 PetscErrorCode ierr; 1869 1870 PetscFunctionBegin; 1871 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1872 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1873 #if defined(PETSC_HAVE_CUDA) 1874 mat->offloadmask = PETSC_OFFLOAD_CPU; 1875 #endif 1876 PetscFunctionReturn(0); 1877 } 1878 1879 /*@ 1880 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1881 1882 Not Collective 1883 1884 Input Parameters: 1885 . mat - the matrix 1886 1887 Notes: 1888 You can only call this after a call to MatDensePlaceArray() 1889 1890 Level: developer 1891 1892 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1893 1894 @*/ 1895 PetscErrorCode MatDenseResetArray(Mat mat) 1896 { 1897 PetscErrorCode ierr; 1898 1899 PetscFunctionBegin; 1900 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1901 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1902 PetscFunctionReturn(0); 1903 } 1904 1905 #if defined(PETSC_HAVE_CUDA) 1906 /*@C 1907 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 1908 array provided by the user. This is useful to avoid copying an array 1909 into a matrix 1910 1911 Not Collective 1912 1913 Input Parameters: 1914 + mat - the matrix 1915 - array - the array in column major order 1916 1917 Notes: 1918 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 1919 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 1920 1921 Level: developer 1922 1923 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 1924 @*/ 1925 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 1926 { 1927 PetscErrorCode ierr; 1928 1929 PetscFunctionBegin; 1930 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1931 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1932 mat->offloadmask = PETSC_OFFLOAD_GPU; 1933 PetscFunctionReturn(0); 1934 } 1935 1936 /*@C 1937 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 1938 1939 Not Collective 1940 1941 Input Parameters: 1942 . mat - the matrix 1943 1944 Notes: 1945 You can only call this after a call to MatDenseCUDAPlaceArray() 1946 1947 Level: developer 1948 1949 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 1950 1951 @*/ 1952 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 1953 { 1954 PetscErrorCode ierr; 1955 1956 PetscFunctionBegin; 1957 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1958 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 /*@C 1963 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 1964 1965 Not Collective 1966 1967 Input Parameters: 1968 . A - the matrix 1969 1970 Output Parameters 1971 . array - the GPU array in column major order 1972 1973 Notes: 1974 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 1975 1976 Level: developer 1977 1978 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 1979 @*/ 1980 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 1981 { 1982 PetscErrorCode ierr; 1983 1984 PetscFunctionBegin; 1985 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 1986 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 1987 PetscFunctionReturn(0); 1988 } 1989 1990 /*@C 1991 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 1992 1993 Not Collective 1994 1995 Input Parameters: 1996 + A - the matrix 1997 - array - the GPU array in column major order 1998 1999 Notes: 2000 2001 Level: developer 2002 2003 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2004 @*/ 2005 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2006 { 2007 PetscErrorCode ierr; 2008 2009 PetscFunctionBegin; 2010 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2011 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2012 A->offloadmask = PETSC_OFFLOAD_GPU; 2013 PetscFunctionReturn(0); 2014 } 2015 2016 /*@C 2017 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2018 2019 Not Collective 2020 2021 Input Parameters: 2022 . A - the matrix 2023 2024 Output Parameters 2025 . array - the GPU array in column major order 2026 2027 Notes: 2028 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2029 2030 Level: developer 2031 2032 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2033 @*/ 2034 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2035 { 2036 PetscErrorCode ierr; 2037 2038 PetscFunctionBegin; 2039 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2040 PetscFunctionReturn(0); 2041 } 2042 2043 /*@C 2044 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2045 2046 Not Collective 2047 2048 Input Parameters: 2049 + A - the matrix 2050 - array - the GPU array in column major order 2051 2052 Notes: 2053 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2054 2055 Level: developer 2056 2057 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2058 @*/ 2059 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2060 { 2061 PetscErrorCode ierr; 2062 2063 PetscFunctionBegin; 2064 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /*@C 2069 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2070 2071 Not Collective 2072 2073 Input Parameters: 2074 . A - the matrix 2075 2076 Output Parameters 2077 . array - the GPU array in column major order 2078 2079 Notes: 2080 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2081 2082 Level: developer 2083 2084 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2085 @*/ 2086 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2087 { 2088 PetscErrorCode ierr; 2089 2090 PetscFunctionBegin; 2091 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2092 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2093 PetscFunctionReturn(0); 2094 } 2095 2096 /*@C 2097 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2098 2099 Not Collective 2100 2101 Input Parameters: 2102 + A - the matrix 2103 - array - the GPU array in column major order 2104 2105 Notes: 2106 2107 Level: developer 2108 2109 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2110 @*/ 2111 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2112 { 2113 PetscErrorCode ierr; 2114 2115 PetscFunctionBegin; 2116 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2117 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2118 A->offloadmask = PETSC_OFFLOAD_GPU; 2119 PetscFunctionReturn(0); 2120 } 2121 #endif 2122 2123 /*@C 2124 MatCreateDense - Creates a matrix in dense format. 2125 2126 Collective 2127 2128 Input Parameters: 2129 + comm - MPI communicator 2130 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2131 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2132 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2133 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2134 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2135 to control all matrix memory allocation. 2136 2137 Output Parameter: 2138 . A - the matrix 2139 2140 Notes: 2141 The dense format is fully compatible with standard Fortran 77 2142 storage by columns. 2143 2144 The data input variable is intended primarily for Fortran programmers 2145 who wish to allocate their own matrix memory space. Most users should 2146 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2147 2148 The user MUST specify either the local or global matrix dimensions 2149 (possibly both). 2150 2151 Level: intermediate 2152 2153 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2154 @*/ 2155 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2156 { 2157 PetscErrorCode ierr; 2158 PetscMPIInt size; 2159 2160 PetscFunctionBegin; 2161 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2162 PetscValidLogicalCollectiveBool(*A,!!data,6); 2163 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2164 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2165 if (size > 1) { 2166 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2167 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2168 if (data) { /* user provided data array, so no need to assemble */ 2169 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2170 (*A)->assembled = PETSC_TRUE; 2171 } 2172 } else { 2173 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2174 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2175 } 2176 PetscFunctionReturn(0); 2177 } 2178 2179 #if defined(PETSC_HAVE_CUDA) 2180 /*@C 2181 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2182 2183 Collective 2184 2185 Input Parameters: 2186 + comm - MPI communicator 2187 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2188 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2189 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2190 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2191 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2192 to control matrix memory allocation. 2193 2194 Output Parameter: 2195 . A - the matrix 2196 2197 Notes: 2198 2199 Level: intermediate 2200 2201 .seealso: MatCreate(), MatCreateDense() 2202 @*/ 2203 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2204 { 2205 PetscErrorCode ierr; 2206 PetscMPIInt size; 2207 2208 PetscFunctionBegin; 2209 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2210 PetscValidLogicalCollectiveBool(*A,!!data,6); 2211 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2212 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2213 if (size > 1) { 2214 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2215 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2216 if (data) { /* user provided data array, so no need to assemble */ 2217 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2218 (*A)->assembled = PETSC_TRUE; 2219 } 2220 } else { 2221 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2222 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2223 } 2224 PetscFunctionReturn(0); 2225 } 2226 #endif 2227 2228 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2229 { 2230 Mat mat; 2231 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2232 PetscErrorCode ierr; 2233 2234 PetscFunctionBegin; 2235 *newmat = 0; 2236 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2237 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2238 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2239 a = (Mat_MPIDense*)mat->data; 2240 2241 mat->factortype = A->factortype; 2242 mat->assembled = PETSC_TRUE; 2243 mat->preallocated = PETSC_TRUE; 2244 2245 a->size = oldmat->size; 2246 a->rank = oldmat->rank; 2247 mat->insertmode = NOT_SET_VALUES; 2248 a->donotstash = oldmat->donotstash; 2249 2250 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2251 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2252 2253 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2254 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2255 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2256 2257 *newmat = mat; 2258 PetscFunctionReturn(0); 2259 } 2260 2261 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2262 { 2263 PetscErrorCode ierr; 2264 PetscBool isbinary; 2265 #if defined(PETSC_HAVE_HDF5) 2266 PetscBool ishdf5; 2267 #endif 2268 2269 PetscFunctionBegin; 2270 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2271 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2272 /* force binary viewer to load .info file if it has not yet done so */ 2273 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2274 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2275 #if defined(PETSC_HAVE_HDF5) 2276 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2277 #endif 2278 if (isbinary) { 2279 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2280 #if defined(PETSC_HAVE_HDF5) 2281 } else if (ishdf5) { 2282 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2283 #endif 2284 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2285 PetscFunctionReturn(0); 2286 } 2287 2288 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2289 { 2290 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2291 Mat a,b; 2292 PetscBool flg; 2293 PetscErrorCode ierr; 2294 2295 PetscFunctionBegin; 2296 a = matA->A; 2297 b = matB->A; 2298 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2299 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 2304 { 2305 PetscErrorCode ierr; 2306 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2307 Mat_TransMatMultDense *atb = a->atbdense; 2308 2309 PetscFunctionBegin; 2310 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2311 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2312 ierr = (*atb->destroy)(A);CHKERRQ(ierr); 2313 ierr = PetscFree(atb);CHKERRQ(ierr); 2314 PetscFunctionReturn(0); 2315 } 2316 2317 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 2318 { 2319 PetscErrorCode ierr; 2320 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2321 Mat_MatTransMultDense *abt = a->abtdense; 2322 2323 PetscFunctionBegin; 2324 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2325 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2326 ierr = (abt->destroy)(A);CHKERRQ(ierr); 2327 ierr = PetscFree(abt);CHKERRQ(ierr); 2328 PetscFunctionReturn(0); 2329 } 2330 2331 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2332 { 2333 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2334 Mat_TransMatMultDense *atb = c->atbdense; 2335 PetscErrorCode ierr; 2336 MPI_Comm comm; 2337 PetscMPIInt size,*recvcounts=atb->recvcounts; 2338 PetscScalar *carray,*sendbuf=atb->sendbuf; 2339 const PetscScalar *atbarray; 2340 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2341 const PetscInt *ranges; 2342 2343 PetscFunctionBegin; 2344 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2345 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2346 2347 /* compute atbarray = aseq^T * bseq */ 2348 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2349 2350 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2351 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2352 2353 /* arrange atbarray into sendbuf */ 2354 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2355 for (proc=0, k=0; proc<size; proc++) { 2356 for (j=0; j<cN; j++) { 2357 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2358 } 2359 } 2360 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2361 2362 /* sum all atbarray to local values of C */ 2363 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 2364 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 2365 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 2366 PetscFunctionReturn(0); 2367 } 2368 2369 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2370 { 2371 PetscErrorCode ierr; 2372 MPI_Comm comm; 2373 PetscMPIInt size; 2374 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2375 Mat_MPIDense *c; 2376 Mat_TransMatMultDense *atb; 2377 2378 PetscFunctionBegin; 2379 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2380 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2381 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2382 } 2383 2384 /* create matrix product C */ 2385 ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2386 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2387 ierr = MatSetUp(C);CHKERRQ(ierr); 2388 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2389 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2390 2391 /* create data structure for reuse C */ 2392 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2393 ierr = PetscNew(&atb);CHKERRQ(ierr); 2394 cM = C->rmap->N; 2395 ierr = PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2396 2397 c = (Mat_MPIDense*)C->data; 2398 c->atbdense = atb; 2399 atb->destroy = C->ops->destroy; 2400 C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2401 PetscFunctionReturn(0); 2402 } 2403 2404 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2405 { 2406 PetscErrorCode ierr; 2407 MPI_Comm comm; 2408 PetscMPIInt i, size; 2409 PetscInt maxRows, bufsiz; 2410 Mat_MPIDense *c; 2411 PetscMPIInt tag; 2412 PetscInt alg; 2413 Mat_MatTransMultDense *abt; 2414 Mat_Product *product = C->product; 2415 PetscBool flg; 2416 2417 PetscFunctionBegin; 2418 /* check local size of A and B */ 2419 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2420 2421 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2422 alg = flg ? 0 : 1; 2423 2424 /* setup matrix product C */ 2425 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2426 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2427 ierr = MatSetUp(C);CHKERRQ(ierr); 2428 ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr); 2429 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2430 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2431 2432 /* create data structure for reuse C */ 2433 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2434 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2435 ierr = PetscNew(&abt);CHKERRQ(ierr); 2436 abt->tag = tag; 2437 abt->alg = alg; 2438 switch (alg) { 2439 case 1: /* alg: "cyclic" */ 2440 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2441 bufsiz = A->cmap->N * maxRows; 2442 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2443 break; 2444 default: /* alg: "allgatherv" */ 2445 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2446 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2447 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2448 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2449 break; 2450 } 2451 2452 c = (Mat_MPIDense*)C->data; 2453 c->abtdense = abt; 2454 abt->destroy = C->ops->destroy; 2455 C->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2456 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2457 PetscFunctionReturn(0); 2458 } 2459 2460 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2461 { 2462 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2463 Mat_MatTransMultDense *abt = c->abtdense; 2464 PetscErrorCode ierr; 2465 MPI_Comm comm; 2466 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2467 PetscScalar *sendbuf, *recvbuf=0, *cv; 2468 PetscInt i,cK=A->cmap->N,k,j,bn; 2469 PetscScalar _DOne=1.0,_DZero=0.0; 2470 const PetscScalar *av,*bv; 2471 PetscBLASInt cm, cn, ck, alda, blda, clda; 2472 MPI_Request reqs[2]; 2473 const PetscInt *ranges; 2474 2475 PetscFunctionBegin; 2476 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2477 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2478 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2479 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2480 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2481 ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr); 2482 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2483 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2484 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2485 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2486 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2487 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2488 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2489 bn = B->rmap->n; 2490 if (blda == bn) { 2491 sendbuf = (PetscScalar*)bv; 2492 } else { 2493 sendbuf = abt->buf[0]; 2494 for (k = 0, i = 0; i < cK; i++) { 2495 for (j = 0; j < bn; j++, k++) { 2496 sendbuf[k] = bv[i * blda + j]; 2497 } 2498 } 2499 } 2500 if (size > 1) { 2501 sendto = (rank + size - 1) % size; 2502 recvfrom = (rank + size + 1) % size; 2503 } else { 2504 sendto = recvfrom = 0; 2505 } 2506 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2507 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2508 recvisfrom = rank; 2509 for (i = 0; i < size; i++) { 2510 /* we have finished receiving in sending, bufs can be read/modified */ 2511 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2512 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2513 2514 if (nextrecvisfrom != rank) { 2515 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2516 sendsiz = cK * bn; 2517 recvsiz = cK * nextbn; 2518 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2519 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2520 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2521 } 2522 2523 /* local aseq * sendbuf^T */ 2524 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2525 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2526 2527 if (nextrecvisfrom != rank) { 2528 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2529 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2530 } 2531 bn = nextbn; 2532 recvisfrom = nextrecvisfrom; 2533 sendbuf = recvbuf; 2534 } 2535 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2536 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2537 ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr); 2538 PetscFunctionReturn(0); 2539 } 2540 2541 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2542 { 2543 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2544 Mat_MatTransMultDense *abt = c->abtdense; 2545 PetscErrorCode ierr; 2546 MPI_Comm comm; 2547 PetscMPIInt size; 2548 PetscScalar *cv, *sendbuf, *recvbuf; 2549 const PetscScalar *av,*bv; 2550 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2551 PetscScalar _DOne=1.0,_DZero=0.0; 2552 PetscBLASInt cm, cn, ck, alda, clda; 2553 2554 PetscFunctionBegin; 2555 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2556 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2557 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2558 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2559 ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr); 2560 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2561 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2562 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2563 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2564 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2565 /* copy transpose of B into buf[0] */ 2566 bn = B->rmap->n; 2567 sendbuf = abt->buf[0]; 2568 recvbuf = abt->buf[1]; 2569 for (k = 0, j = 0; j < bn; j++) { 2570 for (i = 0; i < cK; i++, k++) { 2571 sendbuf[k] = bv[i * blda + j]; 2572 } 2573 } 2574 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2575 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2576 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2577 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2578 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2579 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2580 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2581 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2582 ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr); 2583 PetscFunctionReturn(0); 2584 } 2585 2586 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2587 { 2588 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2589 Mat_MatTransMultDense *abt = c->abtdense; 2590 PetscErrorCode ierr; 2591 2592 PetscFunctionBegin; 2593 switch (abt->alg) { 2594 case 1: 2595 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2596 break; 2597 default: 2598 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2599 break; 2600 } 2601 PetscFunctionReturn(0); 2602 } 2603 2604 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 2605 { 2606 PetscErrorCode ierr; 2607 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2608 Mat_MatMultDense *ab = a->abdense; 2609 2610 PetscFunctionBegin; 2611 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2612 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2613 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2614 2615 ierr = (ab->destroy)(A);CHKERRQ(ierr); 2616 ierr = PetscFree(ab);CHKERRQ(ierr); 2617 PetscFunctionReturn(0); 2618 } 2619 2620 #if defined(PETSC_HAVE_ELEMENTAL) 2621 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2622 { 2623 PetscErrorCode ierr; 2624 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2625 Mat_MatMultDense *ab=c->abdense; 2626 2627 PetscFunctionBegin; 2628 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2629 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2630 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2631 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2632 PetscFunctionReturn(0); 2633 } 2634 2635 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2636 { 2637 PetscErrorCode ierr; 2638 Mat Ae,Be,Ce; 2639 Mat_MPIDense *c; 2640 Mat_MatMultDense *ab; 2641 2642 PetscFunctionBegin; 2643 /* check local size of A and B */ 2644 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2645 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2646 } 2647 2648 /* create elemental matrices Ae and Be */ 2649 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2650 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2651 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2652 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2653 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2654 2655 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2656 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2657 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2658 ierr = MatSetUp(Be);CHKERRQ(ierr); 2659 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2660 2661 /* compute symbolic Ce = Ae*Be */ 2662 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2663 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2664 2665 /* setup C */ 2666 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2667 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2668 ierr = MatSetUp(C);CHKERRQ(ierr); 2669 2670 /* create data structure for reuse Cdense */ 2671 ierr = PetscNew(&ab);CHKERRQ(ierr); 2672 c = (Mat_MPIDense*)C->data; 2673 c->abdense = ab; 2674 2675 ab->Ae = Ae; 2676 ab->Be = Be; 2677 ab->Ce = Ce; 2678 ab->destroy = C->ops->destroy; 2679 C->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2680 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2681 C->ops->productnumeric = MatProductNumeric_AB; 2682 PetscFunctionReturn(0); 2683 } 2684 #endif 2685 /* ----------------------------------------------- */ 2686 #if defined(PETSC_HAVE_ELEMENTAL) 2687 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2688 { 2689 PetscFunctionBegin; 2690 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2691 C->ops->productsymbolic = MatProductSymbolic_AB; 2692 C->ops->productnumeric = MatProductNumeric_AB; 2693 PetscFunctionReturn(0); 2694 } 2695 #endif 2696 2697 static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C) 2698 { 2699 PetscErrorCode ierr; 2700 Mat_Product *product = C->product; 2701 2702 PetscFunctionBegin; 2703 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr); 2704 C->ops->productnumeric = MatProductNumeric_AtB; 2705 C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense; 2706 PetscFunctionReturn(0); 2707 } 2708 2709 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2710 { 2711 Mat_Product *product = C->product; 2712 Mat A = product->A,B=product->B; 2713 2714 PetscFunctionBegin; 2715 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2716 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2717 2718 C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense; 2719 PetscFunctionReturn(0); 2720 } 2721 2722 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2723 { 2724 PetscErrorCode ierr; 2725 Mat_Product *product = C->product; 2726 const char *algTypes[2] = {"allgatherv","cyclic"}; 2727 PetscInt alg,nalg = 2; 2728 PetscBool flg = PETSC_FALSE; 2729 2730 PetscFunctionBegin; 2731 /* Set default algorithm */ 2732 alg = 0; /* default is allgatherv */ 2733 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2734 if (flg) { 2735 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2736 } 2737 2738 /* Get runtime option */ 2739 if (product->api_user) { 2740 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2741 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2742 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2743 } else { 2744 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2745 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2746 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2747 } 2748 if (flg) { 2749 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2750 } 2751 2752 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2753 C->ops->productsymbolic = MatProductSymbolic_ABt; 2754 C->ops->productnumeric = MatProductNumeric_ABt; 2755 PetscFunctionReturn(0); 2756 } 2757 2758 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2759 { 2760 PetscErrorCode ierr; 2761 Mat_Product *product = C->product; 2762 2763 PetscFunctionBegin; 2764 switch (product->type) { 2765 #if defined(PETSC_HAVE_ELEMENTAL) 2766 case MATPRODUCT_AB: 2767 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 2768 break; 2769 #endif 2770 case MATPRODUCT_AtB: 2771 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 2772 break; 2773 case MATPRODUCT_ABt: 2774 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 2775 break; 2776 default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]); 2777 } 2778 PetscFunctionReturn(0); 2779 } 2780