1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 #endif 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatDenseGetLocalMatrix" 16 /*@ 17 18 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 19 matrix that represents the operator. For sequential matrices it returns itself. 20 21 Input Parameter: 22 . A - the Seq or MPI dense matrix 23 24 Output Parameter: 25 . B - the inner matrix 26 27 Level: intermediate 28 29 @*/ 30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 31 { 32 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 33 PetscErrorCode ierr; 34 PetscTruth flg; 35 36 PetscFunctionBegin; 37 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 38 if (flg) { 39 *B = mat->A; 40 } else { 41 *B = A; 42 } 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatGetRow_MPIDense" 48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 49 { 50 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 51 PetscErrorCode ierr; 52 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 53 54 PetscFunctionBegin; 55 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 56 lrow = row - rstart; 57 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRestoreRow_MPIDense" 63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 64 { 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 69 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 70 PetscFunctionReturn(0); 71 } 72 73 EXTERN_C_BEGIN 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 79 PetscErrorCode ierr; 80 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 81 PetscScalar *array; 82 MPI_Comm comm; 83 84 PetscFunctionBegin; 85 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 86 87 /* The reuse aspect is not implemented efficiently */ 88 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 89 90 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 91 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatCreate(comm,B);CHKERRQ(ierr); 93 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 94 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 95 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 96 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 97 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 98 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 99 100 *iscopy = PETSC_TRUE; 101 PetscFunctionReturn(0); 102 } 103 EXTERN_C_END 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIDense" 107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 110 PetscErrorCode ierr; 111 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 112 PetscTruth roworiented = A->roworiented; 113 114 PetscFunctionBegin; 115 for (i=0; i<m; i++) { 116 if (idxm[i] < 0) continue; 117 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 118 if (idxm[i] >= rstart && idxm[i] < rend) { 119 row = idxm[i] - rstart; 120 if (roworiented) { 121 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 122 } else { 123 for (j=0; j<n; j++) { 124 if (idxn[j] < 0) continue; 125 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 126 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 127 } 128 } 129 } else { 130 if (!A->donotstash) { 131 if (roworiented) { 132 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 133 } else { 134 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 135 } 136 } 137 } 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetValues_MPIDense" 144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 145 { 146 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 147 PetscErrorCode ierr; 148 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 149 150 PetscFunctionBegin; 151 for (i=0; i<m; i++) { 152 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 153 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 154 if (idxm[i] >= rstart && idxm[i] < rend) { 155 row = idxm[i] - rstart; 156 for (j=0; j<n; j++) { 157 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 158 if (idxn[j] >= mat->cmap->N) { 159 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 160 } 161 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 162 } 163 } else { 164 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 165 } 166 } 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatGetArray_MPIDense" 172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 173 { 174 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 175 PetscErrorCode ierr; 176 177 PetscFunctionBegin; 178 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 185 { 186 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 187 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 188 PetscErrorCode ierr; 189 PetscInt i,j,rstart,rend,nrows,ncols,nlrows,nlcols; 190 const PetscInt *irow,*icol; 191 PetscScalar *av,*bv,*v = lmat->v; 192 Mat newmat; 193 194 PetscFunctionBegin; 195 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 196 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 197 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 198 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 199 200 /* No parallel redistribution currently supported! Should really check each index set 201 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 202 original matrix! */ 203 204 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 205 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 206 207 /* Check submatrix call */ 208 if (scall == MAT_REUSE_MATRIX) { 209 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 210 /* Really need to test rows and column sizes! */ 211 newmat = *B; 212 } else { 213 /* Create and fill new matrix */ 214 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 215 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 216 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 217 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 218 } 219 220 /* Now extract the data pointers and do the copy, column at a time */ 221 newmatd = (Mat_MPIDense*)newmat->data; 222 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 223 224 for (i=0; i<ncols; i++) { 225 av = v + nlrows*icol[i]; 226 for (j=0; j<nrows; j++) { 227 *bv++ = av[irow[j] - rstart]; 228 } 229 } 230 231 /* Assemble the matrices so that the correct flags are set */ 232 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 234 235 /* Free work space */ 236 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 237 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 238 *B = newmat; 239 PetscFunctionReturn(0); 240 } 241 242 #undef __FUNCT__ 243 #define __FUNCT__ "MatRestoreArray_MPIDense" 244 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 245 { 246 PetscFunctionBegin; 247 PetscFunctionReturn(0); 248 } 249 250 #undef __FUNCT__ 251 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 252 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 253 { 254 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 255 MPI_Comm comm = ((PetscObject)mat)->comm; 256 PetscErrorCode ierr; 257 PetscInt nstash,reallocs; 258 InsertMode addv; 259 260 PetscFunctionBegin; 261 /* make sure all processors are either in INSERTMODE or ADDMODE */ 262 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 263 if (addv == (ADD_VALUES|INSERT_VALUES)) { 264 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 265 } 266 mat->insertmode = addv; /* in case this processor had no cache */ 267 268 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 269 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 270 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 276 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 277 { 278 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 279 PetscErrorCode ierr; 280 PetscInt i,*row,*col,flg,j,rstart,ncols; 281 PetscMPIInt n; 282 PetscScalar *val; 283 InsertMode addv=mat->insertmode; 284 285 PetscFunctionBegin; 286 /* wait on receives */ 287 while (1) { 288 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 289 if (!flg) break; 290 291 for (i=0; i<n;) { 292 /* Now identify the consecutive vals belonging to the same row */ 293 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 294 if (j < n) ncols = j-i; 295 else ncols = n-i; 296 /* Now assemble all these values with a single function call */ 297 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 298 i = j; 299 } 300 } 301 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 302 303 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 304 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 305 306 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 307 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 308 } 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatZeroEntries_MPIDense" 314 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 315 { 316 PetscErrorCode ierr; 317 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 318 319 PetscFunctionBegin; 320 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323 324 /* the code does not do the diagonal entries correctly unless the 325 matrix is square and the column and row owerships are identical. 326 This is a BUG. The only way to fix it seems to be to access 327 mdn->A and mdn->B directly and not through the MatZeroRows() 328 routine. 329 */ 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatZeroRows_MPIDense" 332 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 333 { 334 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 335 PetscErrorCode ierr; 336 PetscInt i,*owners = A->rmap->range; 337 PetscInt *nprocs,j,idx,nsends; 338 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 339 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 340 PetscInt *lens,*lrows,*values; 341 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 342 MPI_Comm comm = ((PetscObject)A)->comm; 343 MPI_Request *send_waits,*recv_waits; 344 MPI_Status recv_status,*send_status; 345 PetscTruth found; 346 347 PetscFunctionBegin; 348 /* first count number of contributors to each processor */ 349 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 350 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 352 for (i=0; i<N; i++) { 353 idx = rows[i]; 354 found = PETSC_FALSE; 355 for (j=0; j<size; j++) { 356 if (idx >= owners[j] && idx < owners[j+1]) { 357 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 358 } 359 } 360 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 361 } 362 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 363 364 /* inform other processors of number of messages and max length*/ 365 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 366 367 /* post receives: */ 368 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 369 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 370 for (i=0; i<nrecvs; i++) { 371 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 372 } 373 374 /* do sends: 375 1) starts[i] gives the starting index in svalues for stuff going to 376 the ith processor 377 */ 378 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 379 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 380 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 381 starts[0] = 0; 382 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 383 for (i=0; i<N; i++) { 384 svalues[starts[owner[i]]++] = rows[i]; 385 } 386 387 starts[0] = 0; 388 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 389 count = 0; 390 for (i=0; i<size; i++) { 391 if (nprocs[2*i+1]) { 392 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 393 } 394 } 395 ierr = PetscFree(starts);CHKERRQ(ierr); 396 397 base = owners[rank]; 398 399 /* wait on receives */ 400 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 401 source = lens + nrecvs; 402 count = nrecvs; slen = 0; 403 while (count) { 404 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 405 /* unpack receives into our local space */ 406 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 407 source[imdex] = recv_status.MPI_SOURCE; 408 lens[imdex] = n; 409 slen += n; 410 count--; 411 } 412 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 413 414 /* move the data into the send scatter */ 415 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 416 count = 0; 417 for (i=0; i<nrecvs; i++) { 418 values = rvalues + i*nmax; 419 for (j=0; j<lens[i]; j++) { 420 lrows[count++] = values[j] - base; 421 } 422 } 423 ierr = PetscFree(rvalues);CHKERRQ(ierr); 424 ierr = PetscFree(lens);CHKERRQ(ierr); 425 ierr = PetscFree(owner);CHKERRQ(ierr); 426 ierr = PetscFree(nprocs);CHKERRQ(ierr); 427 428 /* actually zap the local rows */ 429 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 430 ierr = PetscFree(lrows);CHKERRQ(ierr); 431 432 /* wait on sends */ 433 if (nsends) { 434 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 435 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 436 ierr = PetscFree(send_status);CHKERRQ(ierr); 437 } 438 ierr = PetscFree(send_waits);CHKERRQ(ierr); 439 ierr = PetscFree(svalues);CHKERRQ(ierr); 440 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatMult_MPIDense" 446 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 447 { 448 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 449 PetscErrorCode ierr; 450 451 PetscFunctionBegin; 452 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 453 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 454 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 455 PetscFunctionReturn(0); 456 } 457 458 #undef __FUNCT__ 459 #define __FUNCT__ "MatMultAdd_MPIDense" 460 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 461 { 462 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 463 PetscErrorCode ierr; 464 465 PetscFunctionBegin; 466 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 468 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatMultTranspose_MPIDense" 474 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 475 { 476 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 477 PetscErrorCode ierr; 478 PetscScalar zero = 0.0; 479 480 PetscFunctionBegin; 481 ierr = VecSet(yy,zero);CHKERRQ(ierr); 482 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 483 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 484 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNCT__ 489 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 490 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 491 { 492 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 493 PetscErrorCode ierr; 494 495 PetscFunctionBegin; 496 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 497 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 498 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 499 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 500 PetscFunctionReturn(0); 501 } 502 503 #undef __FUNCT__ 504 #define __FUNCT__ "MatGetDiagonal_MPIDense" 505 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 506 { 507 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 508 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 509 PetscErrorCode ierr; 510 PetscInt len,i,n,m = A->rmap->n,radd; 511 PetscScalar *x,zero = 0.0; 512 513 PetscFunctionBegin; 514 ierr = VecSet(v,zero);CHKERRQ(ierr); 515 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 516 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 517 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 518 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 519 radd = A->rmap->rstart*m; 520 for (i=0; i<len; i++) { 521 x[i] = aloc->v[radd + i*m + i]; 522 } 523 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 524 PetscFunctionReturn(0); 525 } 526 527 #undef __FUNCT__ 528 #define __FUNCT__ "MatDestroy_MPIDense" 529 PetscErrorCode MatDestroy_MPIDense(Mat mat) 530 { 531 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 532 PetscErrorCode ierr; 533 #if defined(PETSC_HAVE_PLAPACK) 534 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 535 #endif 536 537 PetscFunctionBegin; 538 539 #if defined(PETSC_USE_LOG) 540 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 541 #endif 542 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 543 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 544 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 545 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 546 #if defined(PETSC_HAVE_PLAPACK) 547 if (lu) { 548 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 549 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 550 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 551 552 if (lu->is_pla) { 553 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 554 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 555 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 556 } 557 } 558 #endif 559 560 ierr = PetscFree(mdn);CHKERRQ(ierr); 561 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 562 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 563 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 564 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 565 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNCT__ 571 #define __FUNCT__ "MatView_MPIDense_Binary" 572 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 573 { 574 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 575 PetscErrorCode ierr; 576 PetscViewerFormat format; 577 int fd; 578 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 579 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 580 PetscScalar *work,*v,*vv; 581 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 582 MPI_Status status; 583 584 PetscFunctionBegin; 585 if (mdn->size == 1) { 586 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 587 } else { 588 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 589 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 590 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 591 592 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 593 if (format == PETSC_VIEWER_BINARY_NATIVE) { 594 595 if (!rank) { 596 /* store the matrix as a dense matrix */ 597 header[0] = MAT_FILE_COOKIE; 598 header[1] = mat->rmap->N; 599 header[2] = N; 600 header[3] = MATRIX_BINARY_FORMAT_DENSE; 601 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 602 603 /* get largest work array needed for transposing array */ 604 mmax = mat->rmap->n; 605 for (i=1; i<size; i++) { 606 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 607 } 608 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 609 610 /* write out local array, by rows */ 611 m = mat->rmap->n; 612 v = a->v; 613 for (j=0; j<N; j++) { 614 for (i=0; i<m; i++) { 615 work[j + i*N] = *v++; 616 } 617 } 618 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 619 /* get largest work array to receive messages from other processes, excludes process zero */ 620 mmax = 0; 621 for (i=1; i<size; i++) { 622 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 623 } 624 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 625 for(k = 1; k < size; k++) { 626 v = vv; 627 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 628 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 629 630 for(j = 0; j < N; j++) { 631 for(i = 0; i < m; i++) { 632 work[j + i*N] = *v++; 633 } 634 } 635 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 636 } 637 ierr = PetscFree(work);CHKERRQ(ierr); 638 ierr = PetscFree(vv);CHKERRQ(ierr); 639 } else { 640 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 641 } 642 } else { 643 SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_BINARY_NATIVE"); 644 } 645 } 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 651 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 652 { 653 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 654 PetscErrorCode ierr; 655 PetscMPIInt size = mdn->size,rank = mdn->rank; 656 const PetscViewerType vtype; 657 PetscTruth iascii,isdraw; 658 PetscViewer sviewer; 659 PetscViewerFormat format; 660 #if defined(PETSC_HAVE_PLAPACK) 661 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 662 #endif 663 664 PetscFunctionBegin; 665 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 666 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 667 if (iascii) { 668 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 669 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 670 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 671 MatInfo info; 672 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 673 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 674 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 675 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 676 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 677 #if defined(PETSC_HAVE_PLAPACK) 678 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 679 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",lu->Plapack_nprows, lu->Plapack_npcols);CHKERRQ(ierr); 680 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 681 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",lu->Plapack_ierror);CHKERRQ(ierr); 682 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",lu->Plapack_nb_alg);CHKERRQ(ierr); 683 #endif 684 PetscFunctionReturn(0); 685 } else if (format == PETSC_VIEWER_ASCII_INFO) { 686 PetscFunctionReturn(0); 687 } 688 } else if (isdraw) { 689 PetscDraw draw; 690 PetscTruth isnull; 691 692 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 693 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 694 if (isnull) PetscFunctionReturn(0); 695 } 696 697 if (size == 1) { 698 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 699 } else { 700 /* assemble the entire matrix onto first processor. */ 701 Mat A; 702 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 703 PetscInt *cols; 704 PetscScalar *vals; 705 706 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 707 if (!rank) { 708 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 709 } else { 710 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 711 } 712 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 713 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 714 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 715 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 716 717 /* Copy the matrix ... This isn't the most efficient means, 718 but it's quick for now */ 719 A->insertmode = INSERT_VALUES; 720 row = mat->rmap->rstart; m = mdn->A->rmap->n; 721 for (i=0; i<m; i++) { 722 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 723 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 724 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 725 row++; 726 } 727 728 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 729 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 730 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 731 if (!rank) { 732 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 733 } 734 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 735 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 736 ierr = MatDestroy(A);CHKERRQ(ierr); 737 } 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatView_MPIDense" 743 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 744 { 745 PetscErrorCode ierr; 746 PetscTruth iascii,isbinary,isdraw,issocket; 747 748 PetscFunctionBegin; 749 750 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 751 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 752 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 753 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 754 755 if (iascii || issocket || isdraw) { 756 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 757 } else if (isbinary) { 758 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 759 } else { 760 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 761 } 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNCT__ 766 #define __FUNCT__ "MatGetInfo_MPIDense" 767 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 768 { 769 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 770 Mat mdn = mat->A; 771 PetscErrorCode ierr; 772 PetscReal isend[5],irecv[5]; 773 774 PetscFunctionBegin; 775 info->rows_global = (double)A->rmap->N; 776 info->columns_global = (double)A->cmap->N; 777 info->rows_local = (double)A->rmap->n; 778 info->columns_local = (double)A->cmap->N; 779 info->block_size = 1.0; 780 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 781 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 782 isend[3] = info->memory; isend[4] = info->mallocs; 783 if (flag == MAT_LOCAL) { 784 info->nz_used = isend[0]; 785 info->nz_allocated = isend[1]; 786 info->nz_unneeded = isend[2]; 787 info->memory = isend[3]; 788 info->mallocs = isend[4]; 789 } else if (flag == MAT_GLOBAL_MAX) { 790 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 791 info->nz_used = irecv[0]; 792 info->nz_allocated = irecv[1]; 793 info->nz_unneeded = irecv[2]; 794 info->memory = irecv[3]; 795 info->mallocs = irecv[4]; 796 } else if (flag == MAT_GLOBAL_SUM) { 797 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 798 info->nz_used = irecv[0]; 799 info->nz_allocated = irecv[1]; 800 info->nz_unneeded = irecv[2]; 801 info->memory = irecv[3]; 802 info->mallocs = irecv[4]; 803 } 804 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 805 info->fill_ratio_needed = 0; 806 info->factor_mallocs = 0; 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatSetOption_MPIDense" 812 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 813 { 814 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 815 PetscErrorCode ierr; 816 817 PetscFunctionBegin; 818 switch (op) { 819 case MAT_NEW_NONZERO_LOCATIONS: 820 case MAT_NEW_NONZERO_LOCATION_ERR: 821 case MAT_NEW_NONZERO_ALLOCATION_ERR: 822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 823 break; 824 case MAT_ROW_ORIENTED: 825 a->roworiented = flg; 826 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 827 break; 828 case MAT_NEW_DIAGONALS: 829 case MAT_USE_HASH_TABLE: 830 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 831 break; 832 case MAT_IGNORE_OFF_PROC_ENTRIES: 833 a->donotstash = flg; 834 break; 835 case MAT_SYMMETRIC: 836 case MAT_STRUCTURALLY_SYMMETRIC: 837 case MAT_HERMITIAN: 838 case MAT_SYMMETRY_ETERNAL: 839 case MAT_IGNORE_LOWER_TRIANGULAR: 840 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 841 break; 842 default: 843 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 844 } 845 PetscFunctionReturn(0); 846 } 847 848 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatDiagonalScale_MPIDense" 851 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 852 { 853 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 854 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 855 PetscScalar *l,*r,x,*v; 856 PetscErrorCode ierr; 857 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 858 859 PetscFunctionBegin; 860 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 861 if (ll) { 862 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 863 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 864 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 865 for (i=0; i<m; i++) { 866 x = l[i]; 867 v = mat->v + i; 868 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 869 } 870 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 871 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 872 } 873 if (rr) { 874 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 875 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 876 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 877 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 878 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 879 for (i=0; i<n; i++) { 880 x = r[i]; 881 v = mat->v + i*m; 882 for (j=0; j<m; j++) { (*v++) *= x;} 883 } 884 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 885 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 886 } 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatNorm_MPIDense" 892 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 893 { 894 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 895 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 896 PetscErrorCode ierr; 897 PetscInt i,j; 898 PetscReal sum = 0.0; 899 PetscScalar *v = mat->v; 900 901 PetscFunctionBegin; 902 if (mdn->size == 1) { 903 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 904 } else { 905 if (type == NORM_FROBENIUS) { 906 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 907 #if defined(PETSC_USE_COMPLEX) 908 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 909 #else 910 sum += (*v)*(*v); v++; 911 #endif 912 } 913 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 914 *nrm = sqrt(*nrm); 915 ierr = PetscLogFlops(2*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 916 } else if (type == NORM_1) { 917 PetscReal *tmp,*tmp2; 918 ierr = PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 919 tmp2 = tmp + A->cmap->N; 920 ierr = PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 921 *nrm = 0.0; 922 v = mat->v; 923 for (j=0; j<mdn->A->cmap->n; j++) { 924 for (i=0; i<mdn->A->rmap->n; i++) { 925 tmp[j] += PetscAbsScalar(*v); v++; 926 } 927 } 928 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 929 for (j=0; j<A->cmap->N; j++) { 930 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 931 } 932 ierr = PetscFree(tmp);CHKERRQ(ierr); 933 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 934 } else if (type == NORM_INFINITY) { /* max row norm */ 935 PetscReal ntemp; 936 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 937 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 938 } else { 939 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 940 } 941 } 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatTranspose_MPIDense" 947 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 948 { 949 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 950 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 951 Mat B; 952 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 953 PetscErrorCode ierr; 954 PetscInt j,i; 955 PetscScalar *v; 956 957 PetscFunctionBegin; 958 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 959 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 960 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 961 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 962 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 963 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 964 } else { 965 B = *matout; 966 } 967 968 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 969 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 970 for (i=0; i<m; i++) rwork[i] = rstart + i; 971 for (j=0; j<n; j++) { 972 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 973 v += m; 974 } 975 ierr = PetscFree(rwork);CHKERRQ(ierr); 976 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 979 *matout = B; 980 } else { 981 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 982 } 983 PetscFunctionReturn(0); 984 } 985 986 #include "petscblaslapack.h" 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatScale_MPIDense" 989 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 990 { 991 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 992 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 993 PetscScalar oalpha = alpha; 994 PetscErrorCode ierr; 995 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap->n*inA->cmap->N); 996 997 PetscFunctionBegin; 998 BLASscal_(&nz,&oalpha,a->v,&one); 999 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1007 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1008 { 1009 PetscErrorCode ierr; 1010 1011 PetscFunctionBegin; 1012 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1013 PetscFunctionReturn(0); 1014 } 1015 1016 #if defined(PETSC_HAVE_PLAPACK) 1017 1018 #undef __FUNCT__ 1019 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1020 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1021 { 1022 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1023 PetscErrorCode ierr; 1024 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1025 PetscScalar *array; 1026 PetscReal one = 1.0; 1027 1028 PetscFunctionBegin; 1029 /* Copy A into F->lu->A */ 1030 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1031 ierr = PLA_API_begin();CHKERRQ(ierr); 1032 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1033 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1034 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1035 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1036 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1037 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1038 ierr = PLA_API_end();CHKERRQ(ierr); 1039 lu->rstart = rstart; 1040 PetscFunctionReturn(0); 1041 } 1042 1043 #undef __FUNCT__ 1044 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1045 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1046 { 1047 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1048 PetscErrorCode ierr; 1049 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1050 PetscScalar *array; 1051 PetscReal one = 1.0; 1052 1053 PetscFunctionBegin; 1054 /* Copy F into A->lu->A */ 1055 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1056 ierr = PLA_API_begin();CHKERRQ(ierr); 1057 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1058 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1059 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1060 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1061 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1062 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1063 ierr = PLA_API_end();CHKERRQ(ierr); 1064 lu->rstart = rstart; 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1070 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1071 { 1072 PetscErrorCode ierr; 1073 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1074 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1075 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1076 PLA_Obj alpha = NULL,beta = NULL; 1077 1078 PetscFunctionBegin; 1079 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1080 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1081 1082 /* 1083 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1084 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1085 */ 1086 1087 /* do the multiply in PLA */ 1088 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1089 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1090 CHKMEMQ; 1091 1092 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1093 CHKMEMQ; 1094 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1095 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1096 1097 /* 1098 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1099 */ 1100 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1106 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1107 { 1108 PetscErrorCode ierr; 1109 PetscInt m=A->rmap->n,n=B->cmap->n; 1110 Mat Cmat; 1111 1112 PetscFunctionBegin; 1113 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1114 SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported"); 1115 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1116 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1117 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1118 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1119 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1120 //ierr = MatMPIDenseCreatePlapack(A);CHKERRQ(ierr); 1121 //ierr = MatMPIDenseCreatePlapack(B);CHKERRQ(ierr); 1122 //ierr = MatMPIDenseCreatePlapack(Cmat);CHKERRQ(ierr); 1123 *C = Cmat; 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1129 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1130 { 1131 PetscErrorCode ierr; 1132 1133 PetscFunctionBegin; 1134 if (scall == MAT_INITIAL_MATRIX){ 1135 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1136 } 1137 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatSolve_MPIDense" 1143 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1144 { 1145 MPI_Comm comm = ((PetscObject)A)->comm; 1146 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1147 PetscErrorCode ierr; 1148 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1149 PetscScalar *array; 1150 PetscReal one = 1.0; 1151 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1152 PLA_Obj v_pla = NULL; 1153 PetscScalar *loc_buf; 1154 Vec loc_x; 1155 1156 PetscFunctionBegin; 1157 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1158 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1159 1160 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1161 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1162 PLA_Obj_set_to_zero(v_pla); 1163 1164 /* Copy b into rhs_pla */ 1165 PLA_API_begin(); 1166 PLA_Obj_API_open(v_pla); 1167 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1168 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1169 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1170 PLA_Obj_API_close(v_pla); 1171 PLA_API_end(); 1172 1173 if (A->factor == MAT_FACTOR_LU){ 1174 /* Apply the permutations to the right hand sides */ 1175 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1176 1177 /* Solve L y = b, overwriting b with y */ 1178 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1179 1180 /* Solve U x = y (=b), overwriting b with x */ 1181 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1182 } else { /*MAT_FACTOR_CHOLESKY */ 1183 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1184 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1185 PLA_NONUNIT_DIAG,lu->A,v_pla); 1186 } 1187 1188 /* Copy PLAPACK x into Petsc vector x */ 1189 PLA_Obj_local_length(v_pla, &loc_m); 1190 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1191 PLA_Obj_local_stride(v_pla, &loc_stride); 1192 /* 1193 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1194 */ 1195 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1196 if (!lu->pla_solved){ 1197 1198 PLA_Temp_comm_row_info(lu->templ,&lu->comm_2d,&r_rank,&r_nproc); 1199 PLA_Temp_comm_col_info(lu->templ,&lu->comm_2d,&c_rank,&c_nproc); 1200 /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */ 1201 1202 /* Create IS and cts for VecScatterring */ 1203 PLA_Obj_local_length(v_pla, &loc_m); 1204 PLA_Obj_local_stride(v_pla, &loc_stride); 1205 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1206 idx_petsc = idx_pla + loc_m; 1207 1208 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1209 for (i=0; i<loc_m; i+=lu->nb){ 1210 j = 0; 1211 while (j < lu->nb && i+j < loc_m){ 1212 idx_petsc[i+j] = rstart + j; j++; 1213 } 1214 rstart += size*lu->nb; 1215 } 1216 1217 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1218 1219 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1220 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1221 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1222 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1223 } 1224 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1225 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1226 1227 /* Free data */ 1228 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1229 PLA_Obj_free(&v_pla); 1230 1231 lu->pla_solved = PETSC_TRUE; 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1237 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1238 { 1239 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1240 PetscErrorCode ierr; 1241 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1242 PetscInt info_pla=0; 1243 PetscScalar *array,one = 1.0; 1244 1245 PetscFunctionBegin; 1246 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1247 PLA_Obj_free(&lu->A); 1248 PLA_Obj_free (&lu->pivots); 1249 } 1250 /* Create PLAPACK matrix object */ 1251 lu->A = NULL; lu->pivots = NULL; 1252 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1253 PLA_Obj_set_to_zero(lu->A); 1254 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1255 1256 /* Copy A into lu->A */ 1257 PLA_API_begin(); 1258 PLA_Obj_API_open(lu->A); 1259 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1260 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1261 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1262 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1263 PLA_Obj_API_close(lu->A); 1264 PLA_API_end(); 1265 1266 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1267 info_pla = PLA_LU(lu->A,lu->pivots); 1268 if (info_pla != 0) 1269 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1270 1271 lu->CleanUpPlapack = PETSC_TRUE; 1272 lu->rstart = rstart; 1273 lu->mstruct = SAME_NONZERO_PATTERN; 1274 F->ops->solve = MatSolve_MPIDense; 1275 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1281 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1282 { 1283 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1284 PetscErrorCode ierr; 1285 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1286 PetscInt info_pla=0; 1287 PetscScalar *array,one = 1.0; 1288 1289 PetscFunctionBegin; 1290 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1291 PLA_Obj_free(&lu->A); 1292 } 1293 /* Create PLAPACK matrix object */ 1294 lu->A = NULL; 1295 lu->pivots = NULL; 1296 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1297 1298 /* Copy A into lu->A */ 1299 PLA_API_begin(); 1300 PLA_Obj_API_open(lu->A); 1301 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1302 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1303 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1304 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1305 PLA_Obj_API_close(lu->A); 1306 PLA_API_end(); 1307 1308 /* Factor P A -> Chol */ 1309 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1310 if (info_pla != 0) 1311 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1312 1313 lu->CleanUpPlapack = PETSC_TRUE; 1314 lu->rstart = rstart; 1315 lu->mstruct = SAME_NONZERO_PATTERN; 1316 F->ops->solve = MatSolve_MPIDense; 1317 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatFactorSymbolic_Plapack_Private" 1323 PetscErrorCode MatFactorSymbolic_Plapack_Private(Mat F,Mat A,const MatFactorInfo *info) 1324 { 1325 Mat B = F; 1326 Mat_Plapack *lu; 1327 PetscErrorCode ierr; 1328 PetscInt M=A->rmap->N; 1329 MPI_Comm comm=((PetscObject)A)->comm,comm_2d; 1330 PetscMPIInt size; 1331 PetscInt ierror; 1332 1333 PetscFunctionBegin; 1334 lu = (Mat_Plapack*)(B->spptr); 1335 1336 /* Set default Plapack parameters */ 1337 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1338 lu->nprows = 1; lu->npcols = size; 1339 ierror = 0; 1340 lu->nb = M/size; 1341 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1342 1343 /* Set runtime options */ 1344 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1345 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",lu->nprows,&lu->nprows,PETSC_NULL);CHKERRQ(ierr); 1346 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",lu->npcols,&lu->npcols,PETSC_NULL);CHKERRQ(ierr); 1347 1348 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1349 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr); 1350 if (ierror){ 1351 PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE ); 1352 } else { 1353 PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE ); 1354 } 1355 lu->ierror = ierror; 1356 1357 lu->nb_alg = 0; 1358 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",lu->nb_alg,&lu->nb_alg,PETSC_NULL);CHKERRQ(ierr); 1359 if (lu->nb_alg){ 1360 pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,lu->nb_alg); 1361 } 1362 PetscOptionsEnd(); 1363 1364 1365 /* Create a 2D communicator */ 1366 PLA_Comm_1D_to_2D(comm,lu->nprows,lu->npcols,&comm_2d); 1367 lu->comm_2d = comm_2d; 1368 1369 /* Initialize PLAPACK */ 1370 PLA_Init(comm_2d); 1371 1372 /* Create object distribution template */ 1373 lu->templ = NULL; 1374 PLA_Temp_create(lu->nb, 0, &lu->templ); 1375 1376 /* Use suggested nb_alg if it is not provided by user */ 1377 if (lu->nb_alg == 0){ 1378 PLA_Environ_nb_alg(PLA_OP_PAN_PAN,lu->templ,&lu->nb_alg); 1379 pla_Environ_set_nb_alg(PLA_OP_ALL_ALG,lu->nb_alg); 1380 } 1381 1382 /* Set the datatype */ 1383 #if defined(PETSC_USE_COMPLEX) 1384 lu->datatype = MPI_DOUBLE_COMPLEX; 1385 #else 1386 lu->datatype = MPI_DOUBLE; 1387 #endif 1388 1389 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1390 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1391 lu->CleanUpPlapack = PETSC_TRUE; 1392 PetscFunctionReturn(0); 1393 } 1394 1395 /* Note the Petsc perm permutation is ignored */ 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1398 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1399 { 1400 PetscErrorCode ierr; 1401 PetscTruth issymmetric,set; 1402 1403 PetscFunctionBegin; 1404 ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr); 1405 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1406 ierr = MatFactorSymbolic_Plapack_Private(F,A,info);CHKERRQ(ierr); 1407 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1408 PetscFunctionReturn(0); 1409 } 1410 1411 /* Note the Petsc r and c permutations are ignored */ 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1414 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1415 { 1416 PetscErrorCode ierr; 1417 PetscInt M = A->rmap->N; 1418 Mat_Plapack *lu; 1419 1420 PetscFunctionBegin; 1421 ierr = MatFactorSymbolic_Plapack_Private(F,A,info);CHKERRQ(ierr); 1422 lu = (Mat_Plapack*)F->spptr; 1423 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1424 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1430 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1431 { 1432 PetscErrorCode ierr; 1433 Mat_Plapack *lu; 1434 PetscMPIInt size; 1435 PetscInt M; 1436 1437 PetscFunctionBegin; 1438 /* Create the factorization matrix */ 1439 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1440 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1441 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1442 ierr = PetscNewLog(A,Mat_Plapack,&lu);CHKERRQ(ierr); 1443 A->spptr = (void*)lu; 1444 1445 lu = (Mat_Plapack*)(A->spptr); 1446 1447 /* Set default Plapack parameters */ 1448 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1449 lu->nb = M/size; 1450 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1451 1452 /* Set runtime options */ 1453 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1454 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1455 PetscOptionsEnd(); 1456 1457 /* Create object distribution template */ 1458 lu->templ = NULL; 1459 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1460 1461 /* Set the datatype */ 1462 #if defined(PETSC_USE_COMPLEX) 1463 lu->datatype = MPI_DOUBLE_COMPLEX; 1464 #else 1465 lu->datatype = MPI_DOUBLE; 1466 #endif 1467 1468 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1469 1470 1471 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1472 1473 if (ftype == MAT_FACTOR_LU) { 1474 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1475 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1476 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1477 } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1478 (*F)->factor = ftype; 1479 PetscFunctionReturn(0); 1480 } 1481 #endif 1482 1483 /* -------------------------------------------------------------------*/ 1484 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1485 MatGetRow_MPIDense, 1486 MatRestoreRow_MPIDense, 1487 MatMult_MPIDense, 1488 /* 4*/ MatMultAdd_MPIDense, 1489 MatMultTranspose_MPIDense, 1490 MatMultTransposeAdd_MPIDense, 1491 0, 1492 0, 1493 0, 1494 /*10*/ 0, 1495 0, 1496 0, 1497 0, 1498 MatTranspose_MPIDense, 1499 /*15*/ MatGetInfo_MPIDense, 1500 MatEqual_MPIDense, 1501 MatGetDiagonal_MPIDense, 1502 MatDiagonalScale_MPIDense, 1503 MatNorm_MPIDense, 1504 /*20*/ MatAssemblyBegin_MPIDense, 1505 MatAssemblyEnd_MPIDense, 1506 0, 1507 MatSetOption_MPIDense, 1508 MatZeroEntries_MPIDense, 1509 /*25*/ MatZeroRows_MPIDense, 1510 0, 1511 0, 1512 0, 1513 0, 1514 /*30*/ MatSetUpPreallocation_MPIDense, 1515 0, 1516 0, 1517 MatGetArray_MPIDense, 1518 MatRestoreArray_MPIDense, 1519 /*35*/ MatDuplicate_MPIDense, 1520 0, 1521 0, 1522 0, 1523 0, 1524 /*40*/ 0, 1525 MatGetSubMatrices_MPIDense, 1526 0, 1527 MatGetValues_MPIDense, 1528 0, 1529 /*45*/ 0, 1530 MatScale_MPIDense, 1531 0, 1532 0, 1533 0, 1534 /*50*/ 0, 1535 0, 1536 0, 1537 0, 1538 0, 1539 /*55*/ 0, 1540 0, 1541 0, 1542 0, 1543 0, 1544 /*60*/ MatGetSubMatrix_MPIDense, 1545 MatDestroy_MPIDense, 1546 MatView_MPIDense, 1547 0, 1548 0, 1549 /*65*/ 0, 1550 0, 1551 0, 1552 0, 1553 0, 1554 /*70*/ 0, 1555 0, 1556 0, 1557 0, 1558 0, 1559 /*75*/ 0, 1560 0, 1561 0, 1562 0, 1563 0, 1564 /*80*/ 0, 1565 0, 1566 0, 1567 0, 1568 /*84*/ MatLoad_MPIDense, 1569 0, 1570 0, 1571 0, 1572 0, 1573 0, 1574 /*90*/ 1575 #if defined(PETSC_HAVE_PLAPACK) 1576 MatMatMult_MPIDense_MPIDense, 1577 MatMatMultSymbolic_MPIDense_MPIDense, 1578 MatMatMultNumeric_MPIDense_MPIDense, 1579 #else 1580 0, 1581 0, 1582 0, 1583 #endif 1584 0, 1585 /*95*/ 0, 1586 0, 1587 0, 1588 0}; 1589 1590 EXTERN_C_BEGIN 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1593 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1594 { 1595 Mat_MPIDense *a; 1596 PetscErrorCode ierr; 1597 1598 PetscFunctionBegin; 1599 mat->preallocated = PETSC_TRUE; 1600 /* Note: For now, when data is specified above, this assumes the user correctly 1601 allocates the local dense storage space. We should add error checking. */ 1602 1603 a = (Mat_MPIDense*)mat->data; 1604 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1605 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1606 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1607 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1608 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1609 PetscFunctionReturn(0); 1610 } 1611 EXTERN_C_END 1612 1613 /*MC 1614 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1615 1616 Options Database Keys: 1617 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1618 1619 Level: beginner 1620 1621 MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR) 1622 for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed 1623 (run config/configure.py with the option --download-plapack) 1624 1625 1626 Options Database Keys: 1627 . -mat_plapack_nprows <n> - number of rows in processor partition 1628 . -mat_plapack_npcols <n> - number of columns in processor partition 1629 . -mat_plapack_nb <n> - block size of template vector 1630 . -mat_plapack_nb_alg <n> - algorithmic block size 1631 - -mat_plapack_ckerror <n> - error checking flag 1632 1633 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE 1634 M*/ 1635 1636 EXTERN_C_BEGIN 1637 #undef __FUNCT__ 1638 #define __FUNCT__ "MatCreate_MPIDense" 1639 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1640 { 1641 Mat_MPIDense *a; 1642 PetscErrorCode ierr; 1643 1644 PetscFunctionBegin; 1645 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1646 mat->data = (void*)a; 1647 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1648 mat->mapping = 0; 1649 1650 mat->insertmode = NOT_SET_VALUES; 1651 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1652 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1653 1654 mat->rmap->bs = mat->cmap->bs = 1; 1655 ierr = PetscMapSetUp(mat->rmap);CHKERRQ(ierr); 1656 ierr = PetscMapSetUp(mat->cmap);CHKERRQ(ierr); 1657 a->nvec = mat->cmap->n; 1658 1659 /* build cache for off array entries formed */ 1660 a->donotstash = PETSC_FALSE; 1661 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1662 1663 /* stuff used for matrix vector multiply */ 1664 a->lvec = 0; 1665 a->Mvctx = 0; 1666 a->roworiented = PETSC_TRUE; 1667 1668 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1669 "MatGetDiagonalBlock_MPIDense", 1670 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1671 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1672 "MatMPIDenseSetPreallocation_MPIDense", 1673 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1674 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1675 "MatMatMult_MPIAIJ_MPIDense", 1676 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1677 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1678 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1679 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1680 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1681 "MatMatMultNumeric_MPIAIJ_MPIDense", 1682 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1683 #if defined(PETSC_HAVE_PLAPACK) 1684 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_mpidense_plapack_C", 1685 "MatGetFactor_mpidense_plapack", 1686 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1687 #if defined(PETSC_HAVE_PLAPACK) 1688 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1689 #endif 1690 1691 #endif 1692 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1693 1694 PetscFunctionReturn(0); 1695 } 1696 EXTERN_C_END 1697 1698 /*MC 1699 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1700 1701 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1702 and MATMPIDENSE otherwise. 1703 1704 Options Database Keys: 1705 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1706 1707 Level: beginner 1708 1709 1710 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1711 M*/ 1712 1713 EXTERN_C_BEGIN 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatCreate_Dense" 1716 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1717 { 1718 PetscErrorCode ierr; 1719 PetscMPIInt size; 1720 1721 PetscFunctionBegin; 1722 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1723 if (size == 1) { 1724 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1725 } else { 1726 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1727 } 1728 PetscFunctionReturn(0); 1729 } 1730 EXTERN_C_END 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1734 /*@C 1735 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1736 1737 Not collective 1738 1739 Input Parameters: 1740 . A - the matrix 1741 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1742 to control all matrix memory allocation. 1743 1744 Notes: 1745 The dense format is fully compatible with standard Fortran 77 1746 storage by columns. 1747 1748 The data input variable is intended primarily for Fortran programmers 1749 who wish to allocate their own matrix memory space. Most users should 1750 set data=PETSC_NULL. 1751 1752 Level: intermediate 1753 1754 .keywords: matrix,dense, parallel 1755 1756 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1757 @*/ 1758 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1759 { 1760 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1761 1762 PetscFunctionBegin; 1763 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1764 if (f) { 1765 ierr = (*f)(mat,data);CHKERRQ(ierr); 1766 } 1767 PetscFunctionReturn(0); 1768 } 1769 1770 #undef __FUNCT__ 1771 #define __FUNCT__ "MatCreateMPIDense" 1772 /*@C 1773 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1774 1775 Collective on MPI_Comm 1776 1777 Input Parameters: 1778 + comm - MPI communicator 1779 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1780 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1781 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1782 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1783 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1784 to control all matrix memory allocation. 1785 1786 Output Parameter: 1787 . A - the matrix 1788 1789 Notes: 1790 The dense format is fully compatible with standard Fortran 77 1791 storage by columns. 1792 1793 The data input variable is intended primarily for Fortran programmers 1794 who wish to allocate their own matrix memory space. Most users should 1795 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1796 1797 The user MUST specify either the local or global matrix dimensions 1798 (possibly both). 1799 1800 Level: intermediate 1801 1802 .keywords: matrix,dense, parallel 1803 1804 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1805 @*/ 1806 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1807 { 1808 PetscErrorCode ierr; 1809 PetscMPIInt size; 1810 1811 PetscFunctionBegin; 1812 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1813 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1814 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1815 if (size > 1) { 1816 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1817 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1818 } else { 1819 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1820 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1821 } 1822 PetscFunctionReturn(0); 1823 } 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatDuplicate_MPIDense" 1827 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1828 { 1829 Mat mat; 1830 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1831 PetscErrorCode ierr; 1832 1833 PetscFunctionBegin; 1834 *newmat = 0; 1835 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1836 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1837 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1838 a = (Mat_MPIDense*)mat->data; 1839 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1840 mat->factor = A->factor; 1841 mat->assembled = PETSC_TRUE; 1842 mat->preallocated = PETSC_TRUE; 1843 1844 mat->rmap->rstart = A->rmap->rstart; 1845 mat->rmap->rend = A->rmap->rend; 1846 a->size = oldmat->size; 1847 a->rank = oldmat->rank; 1848 mat->insertmode = NOT_SET_VALUES; 1849 a->nvec = oldmat->nvec; 1850 a->donotstash = oldmat->donotstash; 1851 1852 ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1853 ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1854 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1855 1856 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1857 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1858 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1859 1860 *newmat = mat; 1861 PetscFunctionReturn(0); 1862 } 1863 1864 #include "petscsys.h" 1865 1866 #undef __FUNCT__ 1867 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1868 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat) 1869 { 1870 PetscErrorCode ierr; 1871 PetscMPIInt rank,size; 1872 PetscInt *rowners,i,m,nz,j; 1873 PetscScalar *array,*vals,*vals_ptr; 1874 MPI_Status status; 1875 1876 PetscFunctionBegin; 1877 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1878 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1879 1880 /* determine ownership of all rows */ 1881 m = M/size + ((M % size) > rank); 1882 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1883 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1884 rowners[0] = 0; 1885 for (i=2; i<=size; i++) { 1886 rowners[i] += rowners[i-1]; 1887 } 1888 1889 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1890 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1891 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1892 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1893 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1894 1895 if (!rank) { 1896 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1897 1898 /* read in my part of the matrix numerical values */ 1899 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1900 1901 /* insert into matrix-by row (this is why cannot directly read into array */ 1902 vals_ptr = vals; 1903 for (i=0; i<m; i++) { 1904 for (j=0; j<N; j++) { 1905 array[i + j*m] = *vals_ptr++; 1906 } 1907 } 1908 1909 /* read in other processors and ship out */ 1910 for (i=1; i<size; i++) { 1911 nz = (rowners[i+1] - rowners[i])*N; 1912 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1913 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1914 } 1915 } else { 1916 /* receive numeric values */ 1917 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1918 1919 /* receive message of values*/ 1920 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1921 1922 /* insert into matrix-by row (this is why cannot directly read into array */ 1923 vals_ptr = vals; 1924 for (i=0; i<m; i++) { 1925 for (j=0; j<N; j++) { 1926 array[i + j*m] = *vals_ptr++; 1927 } 1928 } 1929 } 1930 ierr = PetscFree(rowners);CHKERRQ(ierr); 1931 ierr = PetscFree(vals);CHKERRQ(ierr); 1932 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1933 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1934 PetscFunctionReturn(0); 1935 } 1936 1937 #undef __FUNCT__ 1938 #define __FUNCT__ "MatLoad_MPIDense" 1939 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1940 { 1941 Mat A; 1942 PetscScalar *vals,*svals; 1943 MPI_Comm comm = ((PetscObject)viewer)->comm; 1944 MPI_Status status; 1945 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1946 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1947 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1948 PetscInt i,nz,j,rstart,rend; 1949 int fd; 1950 PetscErrorCode ierr; 1951 1952 PetscFunctionBegin; 1953 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1954 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1955 if (!rank) { 1956 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1957 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1958 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1959 } 1960 1961 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1962 M = header[1]; N = header[2]; nz = header[3]; 1963 1964 /* 1965 Handle case where matrix is stored on disk as a dense matrix 1966 */ 1967 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1968 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 /* determine ownership of all rows */ 1973 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1974 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1975 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1976 rowners[0] = 0; 1977 for (i=2; i<=size; i++) { 1978 rowners[i] += rowners[i-1]; 1979 } 1980 rstart = rowners[rank]; 1981 rend = rowners[rank+1]; 1982 1983 /* distribute row lengths to all processors */ 1984 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1985 offlens = ourlens + (rend-rstart); 1986 if (!rank) { 1987 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1988 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1989 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1990 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1991 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1992 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1993 } else { 1994 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1995 } 1996 1997 if (!rank) { 1998 /* calculate the number of nonzeros on each processor */ 1999 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2000 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2001 for (i=0; i<size; i++) { 2002 for (j=rowners[i]; j< rowners[i+1]; j++) { 2003 procsnz[i] += rowlengths[j]; 2004 } 2005 } 2006 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2007 2008 /* determine max buffer needed and allocate it */ 2009 maxnz = 0; 2010 for (i=0; i<size; i++) { 2011 maxnz = PetscMax(maxnz,procsnz[i]); 2012 } 2013 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2014 2015 /* read in my part of the matrix column indices */ 2016 nz = procsnz[0]; 2017 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2018 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2019 2020 /* read in every one elses and ship off */ 2021 for (i=1; i<size; i++) { 2022 nz = procsnz[i]; 2023 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2024 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2025 } 2026 ierr = PetscFree(cols);CHKERRQ(ierr); 2027 } else { 2028 /* determine buffer space needed for message */ 2029 nz = 0; 2030 for (i=0; i<m; i++) { 2031 nz += ourlens[i]; 2032 } 2033 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2034 2035 /* receive message of column indices*/ 2036 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2037 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2038 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2039 } 2040 2041 /* loop over local rows, determining number of off diagonal entries */ 2042 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2043 jj = 0; 2044 for (i=0; i<m; i++) { 2045 for (j=0; j<ourlens[i]; j++) { 2046 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2047 jj++; 2048 } 2049 } 2050 2051 /* create our matrix */ 2052 for (i=0; i<m; i++) { 2053 ourlens[i] -= offlens[i]; 2054 } 2055 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 2056 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2057 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 2058 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 2059 A = *newmat; 2060 for (i=0; i<m; i++) { 2061 ourlens[i] += offlens[i]; 2062 } 2063 2064 if (!rank) { 2065 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2066 2067 /* read in my part of the matrix numerical values */ 2068 nz = procsnz[0]; 2069 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2070 2071 /* insert into matrix */ 2072 jj = rstart; 2073 smycols = mycols; 2074 svals = vals; 2075 for (i=0; i<m; i++) { 2076 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2077 smycols += ourlens[i]; 2078 svals += ourlens[i]; 2079 jj++; 2080 } 2081 2082 /* read in other processors and ship out */ 2083 for (i=1; i<size; i++) { 2084 nz = procsnz[i]; 2085 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2086 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2087 } 2088 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2089 } else { 2090 /* receive numeric values */ 2091 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2092 2093 /* receive message of values*/ 2094 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2095 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2096 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2097 2098 /* insert into matrix */ 2099 jj = rstart; 2100 smycols = mycols; 2101 svals = vals; 2102 for (i=0; i<m; i++) { 2103 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2104 smycols += ourlens[i]; 2105 svals += ourlens[i]; 2106 jj++; 2107 } 2108 } 2109 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2110 ierr = PetscFree(vals);CHKERRQ(ierr); 2111 ierr = PetscFree(mycols);CHKERRQ(ierr); 2112 ierr = PetscFree(rowners);CHKERRQ(ierr); 2113 2114 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2115 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2116 PetscFunctionReturn(0); 2117 } 2118 2119 #undef __FUNCT__ 2120 #define __FUNCT__ "MatEqual_MPIDense" 2121 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2122 { 2123 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2124 Mat a,b; 2125 PetscTruth flg; 2126 PetscErrorCode ierr; 2127 2128 PetscFunctionBegin; 2129 a = matA->A; 2130 b = matB->A; 2131 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2132 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2133 PetscFunctionReturn(0); 2134 } 2135 2136 #if defined(PETSC_HAVE_PLAPACK) 2137 2138 #undef __FUNCT__ 2139 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2140 /*@C 2141 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2142 Level: developer 2143 2144 .keywords: Petsc, destroy, package, PLAPACK 2145 .seealso: PetscFinalize() 2146 @*/ 2147 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2148 { 2149 PetscErrorCode ierr; 2150 2151 PetscFunctionBegin; 2152 ierr = PLA_Finalize();CHKERRQ(ierr); 2153 PetscFunctionReturn(0); 2154 } 2155 2156 #undef __FUNCT__ 2157 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2158 /*@C 2159 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2160 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2161 2162 Input Parameter: 2163 . comm - the communicator the matrix lives on 2164 2165 Level: developer 2166 2167 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2168 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2169 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2170 cannot overlap. 2171 2172 .keywords: Petsc, initialize, package, PLAPACK 2173 .seealso: PetscInitializePackage(), PetscInitialize() 2174 @*/ 2175 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2176 { 2177 PetscMPIInt size; 2178 PetscErrorCode ierr; 2179 2180 PetscFunctionBegin; 2181 if (!PLA_Initialized(PETSC_NULL)) { 2182 2183 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2184 Plapack_nprows = 1; 2185 Plapack_npcols = size; 2186 2187 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2188 ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2189 ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2190 #if defined(PETSC_USE_DEBUG) 2191 Plapack_ierror = 3; 2192 #else 2193 Plapack_ierror = 0; 2194 #endif 2195 ierr = PetscOptionsInt("-plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2196 if (Plapack_ierror){ 2197 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2198 } else { 2199 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2200 } 2201 2202 Plapack_nb_alg = 0; 2203 ierr = PetscOptionsInt("-plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2204 if (Plapack_nb_alg) { 2205 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2206 } 2207 PetscOptionsEnd(); 2208 2209 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2210 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2211 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2212 } 2213 PetscFunctionReturn(0); 2214 } 2215 2216 2217 #endif 2218