1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 EXTERN_C_BEGIN 13 #include "PLA.h" 14 EXTERN_C_END 15 16 typedef struct { 17 PLA_Obj A,pivots; 18 PLA_Template templ; 19 MPI_Datatype datatype; 20 PetscInt nb,rstart; 21 VecScatter ctx; 22 IS is_pla,is_petsc; 23 PetscTruth pla_solved; 24 MatStructure mstruct; 25 } Mat_Plapack; 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatDenseGetLocalMatrix" 30 /*@ 31 32 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 33 matrix that represents the operator. For sequential matrices it returns itself. 34 35 Input Parameter: 36 . A - the Seq or MPI dense matrix 37 38 Output Parameter: 39 . B - the inner matrix 40 41 Level: intermediate 42 43 @*/ 44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 45 { 46 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 47 PetscErrorCode ierr; 48 PetscTruth flg; 49 50 PetscFunctionBegin; 51 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 52 if (flg) { 53 *B = mat->A; 54 } else { 55 *B = A; 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatGetRow_MPIDense" 62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 63 { 64 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 65 PetscErrorCode ierr; 66 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 67 68 PetscFunctionBegin; 69 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 70 lrow = row - rstart; 71 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatRestoreRow_MPIDense" 77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 78 { 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 83 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 84 PetscFunctionReturn(0); 85 } 86 87 EXTERN_C_BEGIN 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 91 { 92 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 93 PetscErrorCode ierr; 94 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 95 PetscScalar *array; 96 MPI_Comm comm; 97 98 PetscFunctionBegin; 99 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 100 101 /* The reuse aspect is not implemented efficiently */ 102 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 103 104 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 105 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 106 ierr = MatCreate(comm,B);CHKERRQ(ierr); 107 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 108 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 109 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 110 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 111 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 113 114 *iscopy = PETSC_TRUE; 115 PetscFunctionReturn(0); 116 } 117 EXTERN_C_END 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "MatSetValues_MPIDense" 121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 122 { 123 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 124 PetscErrorCode ierr; 125 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 126 PetscTruth roworiented = A->roworiented; 127 128 PetscFunctionBegin; 129 if (v) PetscValidScalarPointer(v,6); 130 for (i=0; i<m; i++) { 131 if (idxm[i] < 0) continue; 132 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 133 if (idxm[i] >= rstart && idxm[i] < rend) { 134 row = idxm[i] - rstart; 135 if (roworiented) { 136 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 137 } else { 138 for (j=0; j<n; j++) { 139 if (idxn[j] < 0) continue; 140 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 141 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 142 } 143 } 144 } else { 145 if (!A->donotstash) { 146 if (roworiented) { 147 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 148 } else { 149 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 150 } 151 } 152 } 153 } 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatGetValues_MPIDense" 159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 160 { 161 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 164 165 PetscFunctionBegin; 166 for (i=0; i<m; i++) { 167 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 168 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 169 if (idxm[i] >= rstart && idxm[i] < rend) { 170 row = idxm[i] - rstart; 171 for (j=0; j<n; j++) { 172 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 173 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 174 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 175 } 176 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 177 } 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatGetArray_MPIDense" 183 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 184 { 185 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 186 PetscErrorCode ierr; 187 188 PetscFunctionBegin; 189 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 190 PetscFunctionReturn(0); 191 } 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 195 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 196 { 197 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 198 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 199 PetscErrorCode ierr; 200 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 201 const PetscInt *irow,*icol; 202 PetscScalar *av,*bv,*v = lmat->v; 203 Mat newmat; 204 IS iscol_local; 205 206 PetscFunctionBegin; 207 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 208 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 209 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 210 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 211 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 212 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 213 214 /* No parallel redistribution currently supported! Should really check each index set 215 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 216 original matrix! */ 217 218 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 219 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 220 221 /* Check submatrix call */ 222 if (scall == MAT_REUSE_MATRIX) { 223 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 224 /* Really need to test rows and column sizes! */ 225 newmat = *B; 226 } else { 227 /* Create and fill new matrix */ 228 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 229 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 230 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 231 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 232 } 233 234 /* Now extract the data pointers and do the copy, column at a time */ 235 newmatd = (Mat_MPIDense*)newmat->data; 236 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 237 238 for (i=0; i<Ncols; i++) { 239 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 240 for (j=0; j<nrows; j++) { 241 *bv++ = av[irow[j] - rstart]; 242 } 243 } 244 245 /* Assemble the matrices so that the correct flags are set */ 246 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 247 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 248 249 /* Free work space */ 250 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 251 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 252 *B = newmat; 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatRestoreArray_MPIDense" 258 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 259 { 260 PetscFunctionBegin; 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 266 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 267 { 268 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 269 MPI_Comm comm = ((PetscObject)mat)->comm; 270 PetscErrorCode ierr; 271 PetscInt nstash,reallocs; 272 InsertMode addv; 273 274 PetscFunctionBegin; 275 /* make sure all processors are either in INSERTMODE or ADDMODE */ 276 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 277 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 278 mat->insertmode = addv; /* in case this processor had no cache */ 279 280 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 281 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 282 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 288 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 289 { 290 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 291 PetscErrorCode ierr; 292 PetscInt i,*row,*col,flg,j,rstart,ncols; 293 PetscMPIInt n; 294 PetscScalar *val; 295 InsertMode addv=mat->insertmode; 296 297 PetscFunctionBegin; 298 /* wait on receives */ 299 while (1) { 300 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 301 if (!flg) break; 302 303 for (i=0; i<n;) { 304 /* Now identify the consecutive vals belonging to the same row */ 305 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 306 if (j < n) ncols = j-i; 307 else ncols = n-i; 308 /* Now assemble all these values with a single function call */ 309 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 310 i = j; 311 } 312 } 313 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 314 315 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 316 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 317 318 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 319 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatZeroEntries_MPIDense" 326 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 327 { 328 PetscErrorCode ierr; 329 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 330 331 PetscFunctionBegin; 332 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 333 PetscFunctionReturn(0); 334 } 335 336 /* the code does not do the diagonal entries correctly unless the 337 matrix is square and the column and row owerships are identical. 338 This is a BUG. The only way to fix it seems to be to access 339 mdn->A and mdn->B directly and not through the MatZeroRows() 340 routine. 341 */ 342 #undef __FUNCT__ 343 #define __FUNCT__ "MatZeroRows_MPIDense" 344 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 345 { 346 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 347 PetscErrorCode ierr; 348 PetscInt i,*owners = A->rmap->range; 349 PetscInt *nprocs,j,idx,nsends; 350 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 351 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 352 PetscInt *lens,*lrows,*values; 353 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 354 MPI_Comm comm = ((PetscObject)A)->comm; 355 MPI_Request *send_waits,*recv_waits; 356 MPI_Status recv_status,*send_status; 357 PetscTruth found; 358 359 PetscFunctionBegin; 360 /* first count number of contributors to each processor */ 361 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 362 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 363 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 364 for (i=0; i<N; i++) { 365 idx = rows[i]; 366 found = PETSC_FALSE; 367 for (j=0; j<size; j++) { 368 if (idx >= owners[j] && idx < owners[j+1]) { 369 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 370 } 371 } 372 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 373 } 374 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 375 376 /* inform other processors of number of messages and max length*/ 377 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 378 379 /* post receives: */ 380 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 381 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 382 for (i=0; i<nrecvs; i++) { 383 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 384 } 385 386 /* do sends: 387 1) starts[i] gives the starting index in svalues for stuff going to 388 the ith processor 389 */ 390 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 391 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 392 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 393 starts[0] = 0; 394 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 395 for (i=0; i<N; i++) { 396 svalues[starts[owner[i]]++] = rows[i]; 397 } 398 399 starts[0] = 0; 400 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 401 count = 0; 402 for (i=0; i<size; i++) { 403 if (nprocs[2*i+1]) { 404 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 405 } 406 } 407 ierr = PetscFree(starts);CHKERRQ(ierr); 408 409 base = owners[rank]; 410 411 /* wait on receives */ 412 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 413 count = nrecvs; 414 slen = 0; 415 while (count) { 416 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 417 /* unpack receives into our local space */ 418 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 419 source[imdex] = recv_status.MPI_SOURCE; 420 lens[imdex] = n; 421 slen += n; 422 count--; 423 } 424 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 425 426 /* move the data into the send scatter */ 427 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 428 count = 0; 429 for (i=0; i<nrecvs; i++) { 430 values = rvalues + i*nmax; 431 for (j=0; j<lens[i]; j++) { 432 lrows[count++] = values[j] - base; 433 } 434 } 435 ierr = PetscFree(rvalues);CHKERRQ(ierr); 436 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 437 ierr = PetscFree(owner);CHKERRQ(ierr); 438 ierr = PetscFree(nprocs);CHKERRQ(ierr); 439 440 /* actually zap the local rows */ 441 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 442 ierr = PetscFree(lrows);CHKERRQ(ierr); 443 444 /* wait on sends */ 445 if (nsends) { 446 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 447 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 448 ierr = PetscFree(send_status);CHKERRQ(ierr); 449 } 450 ierr = PetscFree(send_waits);CHKERRQ(ierr); 451 ierr = PetscFree(svalues);CHKERRQ(ierr); 452 453 PetscFunctionReturn(0); 454 } 455 456 #undef __FUNCT__ 457 #define __FUNCT__ "MatMult_MPIDense" 458 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 459 { 460 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 465 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 467 PetscFunctionReturn(0); 468 } 469 470 #undef __FUNCT__ 471 #define __FUNCT__ "MatMultAdd_MPIDense" 472 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 475 PetscErrorCode ierr; 476 477 PetscFunctionBegin; 478 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 480 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "MatMultTranspose_MPIDense" 486 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 487 { 488 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 489 PetscErrorCode ierr; 490 PetscScalar zero = 0.0; 491 492 PetscFunctionBegin; 493 ierr = VecSet(yy,zero);CHKERRQ(ierr); 494 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 495 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 496 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 502 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 503 { 504 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 505 PetscErrorCode ierr; 506 507 PetscFunctionBegin; 508 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 509 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 510 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 511 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 512 PetscFunctionReturn(0); 513 } 514 515 #undef __FUNCT__ 516 #define __FUNCT__ "MatGetDiagonal_MPIDense" 517 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 518 { 519 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 520 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 521 PetscErrorCode ierr; 522 PetscInt len,i,n,m = A->rmap->n,radd; 523 PetscScalar *x,zero = 0.0; 524 525 PetscFunctionBegin; 526 ierr = VecSet(v,zero);CHKERRQ(ierr); 527 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 528 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 529 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 530 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 531 radd = A->rmap->rstart*m; 532 for (i=0; i<len; i++) { 533 x[i] = aloc->v[radd + i*m + i]; 534 } 535 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatDestroy_MPIDense" 541 PetscErrorCode MatDestroy_MPIDense(Mat mat) 542 { 543 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 544 PetscErrorCode ierr; 545 #if defined(PETSC_HAVE_PLAPACK) 546 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 547 #endif 548 549 PetscFunctionBegin; 550 551 #if defined(PETSC_USE_LOG) 552 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 553 #endif 554 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 555 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 556 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 557 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 558 #if defined(PETSC_HAVE_PLAPACK) 559 if (lu) { 560 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 561 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 562 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 563 564 if (lu->is_pla) { 565 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 566 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 567 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 568 } 569 } 570 #endif 571 572 ierr = PetscFree(mdn);CHKERRQ(ierr); 573 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatView_MPIDense_Binary" 584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 585 { 586 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 587 PetscErrorCode ierr; 588 PetscViewerFormat format; 589 int fd; 590 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 591 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 592 PetscScalar *work,*v,*vv; 593 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 594 MPI_Status status; 595 596 PetscFunctionBegin; 597 if (mdn->size == 1) { 598 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 599 } else { 600 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 601 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 602 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 603 604 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 605 if (format == PETSC_VIEWER_NATIVE) { 606 607 if (!rank) { 608 /* store the matrix as a dense matrix */ 609 header[0] = MAT_FILE_CLASSID; 610 header[1] = mat->rmap->N; 611 header[2] = N; 612 header[3] = MATRIX_BINARY_FORMAT_DENSE; 613 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 614 615 /* get largest work array needed for transposing array */ 616 mmax = mat->rmap->n; 617 for (i=1; i<size; i++) { 618 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 619 } 620 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 621 622 /* write out local array, by rows */ 623 m = mat->rmap->n; 624 v = a->v; 625 for (j=0; j<N; j++) { 626 for (i=0; i<m; i++) { 627 work[j + i*N] = *v++; 628 } 629 } 630 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 631 /* get largest work array to receive messages from other processes, excludes process zero */ 632 mmax = 0; 633 for (i=1; i<size; i++) { 634 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 635 } 636 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 637 for(k = 1; k < size; k++) { 638 v = vv; 639 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 640 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 641 642 for(j = 0; j < N; j++) { 643 for(i = 0; i < m; i++) { 644 work[j + i*N] = *v++; 645 } 646 } 647 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 648 } 649 ierr = PetscFree(work);CHKERRQ(ierr); 650 ierr = PetscFree(vv);CHKERRQ(ierr); 651 } else { 652 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 653 } 654 } else { 655 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 656 } 657 } 658 PetscFunctionReturn(0); 659 } 660 661 #undef __FUNCT__ 662 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 663 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 664 { 665 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 666 PetscErrorCode ierr; 667 PetscMPIInt size = mdn->size,rank = mdn->rank; 668 const PetscViewerType vtype; 669 PetscTruth iascii,isdraw; 670 PetscViewer sviewer; 671 PetscViewerFormat format; 672 #if defined(PETSC_HAVE_PLAPACK) 673 Mat_Plapack *lu; 674 #endif 675 676 PetscFunctionBegin; 677 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 678 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 679 if (iascii) { 680 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 681 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 682 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 683 MatInfo info; 684 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 685 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 686 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 687 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 688 #if defined(PETSC_HAVE_PLAPACK) 689 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 690 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 691 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 692 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 693 if (mat->factortype){ 694 lu=(Mat_Plapack*)(mat->spptr); 695 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 696 } 697 #else 698 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 699 #endif 700 PetscFunctionReturn(0); 701 } else if (format == PETSC_VIEWER_ASCII_INFO) { 702 PetscFunctionReturn(0); 703 } 704 } else if (isdraw) { 705 PetscDraw draw; 706 PetscTruth isnull; 707 708 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 709 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 710 if (isnull) PetscFunctionReturn(0); 711 } 712 713 if (size == 1) { 714 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 715 } else { 716 /* assemble the entire matrix onto first processor. */ 717 Mat A; 718 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 719 PetscInt *cols; 720 PetscScalar *vals; 721 722 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 723 if (!rank) { 724 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 725 } else { 726 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 727 } 728 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 729 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 730 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 731 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 732 733 /* Copy the matrix ... This isn't the most efficient means, 734 but it's quick for now */ 735 A->insertmode = INSERT_VALUES; 736 row = mat->rmap->rstart; m = mdn->A->rmap->n; 737 for (i=0; i<m; i++) { 738 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 739 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 740 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 741 row++; 742 } 743 744 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 745 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 746 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 747 if (!rank) { 748 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 749 } 750 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 751 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 752 ierr = MatDestroy(A);CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatView_MPIDense" 759 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 760 { 761 PetscErrorCode ierr; 762 PetscTruth iascii,isbinary,isdraw,issocket; 763 764 PetscFunctionBegin; 765 766 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 767 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 768 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 769 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 770 771 if (iascii || issocket || isdraw) { 772 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 773 } else if (isbinary) { 774 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 775 } else { 776 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 777 } 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatGetInfo_MPIDense" 783 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 784 { 785 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 786 Mat mdn = mat->A; 787 PetscErrorCode ierr; 788 PetscReal isend[5],irecv[5]; 789 790 PetscFunctionBegin; 791 info->block_size = 1.0; 792 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 793 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 794 isend[3] = info->memory; isend[4] = info->mallocs; 795 if (flag == MAT_LOCAL) { 796 info->nz_used = isend[0]; 797 info->nz_allocated = isend[1]; 798 info->nz_unneeded = isend[2]; 799 info->memory = isend[3]; 800 info->mallocs = isend[4]; 801 } else if (flag == MAT_GLOBAL_MAX) { 802 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 803 info->nz_used = irecv[0]; 804 info->nz_allocated = irecv[1]; 805 info->nz_unneeded = irecv[2]; 806 info->memory = irecv[3]; 807 info->mallocs = irecv[4]; 808 } else if (flag == MAT_GLOBAL_SUM) { 809 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 810 info->nz_used = irecv[0]; 811 info->nz_allocated = irecv[1]; 812 info->nz_unneeded = irecv[2]; 813 info->memory = irecv[3]; 814 info->mallocs = irecv[4]; 815 } 816 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 817 info->fill_ratio_needed = 0; 818 info->factor_mallocs = 0; 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatSetOption_MPIDense" 824 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 825 { 826 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 switch (op) { 831 case MAT_NEW_NONZERO_LOCATIONS: 832 case MAT_NEW_NONZERO_LOCATION_ERR: 833 case MAT_NEW_NONZERO_ALLOCATION_ERR: 834 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 835 break; 836 case MAT_ROW_ORIENTED: 837 a->roworiented = flg; 838 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 839 break; 840 case MAT_NEW_DIAGONALS: 841 case MAT_USE_HASH_TABLE: 842 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 843 break; 844 case MAT_IGNORE_OFF_PROC_ENTRIES: 845 a->donotstash = flg; 846 break; 847 case MAT_SYMMETRIC: 848 case MAT_STRUCTURALLY_SYMMETRIC: 849 case MAT_HERMITIAN: 850 case MAT_SYMMETRY_ETERNAL: 851 case MAT_IGNORE_LOWER_TRIANGULAR: 852 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 853 break; 854 default: 855 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 856 } 857 PetscFunctionReturn(0); 858 } 859 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatDiagonalScale_MPIDense" 863 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 864 { 865 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 866 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 867 PetscScalar *l,*r,x,*v; 868 PetscErrorCode ierr; 869 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 870 871 PetscFunctionBegin; 872 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 873 if (ll) { 874 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 875 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 876 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 877 for (i=0; i<m; i++) { 878 x = l[i]; 879 v = mat->v + i; 880 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 881 } 882 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 883 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 884 } 885 if (rr) { 886 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 887 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 888 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 891 for (i=0; i<n; i++) { 892 x = r[i]; 893 v = mat->v + i*m; 894 for (j=0; j<m; j++) { (*v++) *= x;} 895 } 896 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 897 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 898 } 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatNorm_MPIDense" 904 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 905 { 906 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 907 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 908 PetscErrorCode ierr; 909 PetscInt i,j; 910 PetscReal sum = 0.0; 911 PetscScalar *v = mat->v; 912 913 PetscFunctionBegin; 914 if (mdn->size == 1) { 915 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 916 } else { 917 if (type == NORM_FROBENIUS) { 918 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 919 #if defined(PETSC_USE_COMPLEX) 920 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 921 #else 922 sum += (*v)*(*v); v++; 923 #endif 924 } 925 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 926 *nrm = sqrt(*nrm); 927 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 928 } else if (type == NORM_1) { 929 PetscReal *tmp,*tmp2; 930 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 931 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 932 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 933 *nrm = 0.0; 934 v = mat->v; 935 for (j=0; j<mdn->A->cmap->n; j++) { 936 for (i=0; i<mdn->A->rmap->n; i++) { 937 tmp[j] += PetscAbsScalar(*v); v++; 938 } 939 } 940 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 941 for (j=0; j<A->cmap->N; j++) { 942 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 943 } 944 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 945 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 946 } else if (type == NORM_INFINITY) { /* max row norm */ 947 PetscReal ntemp; 948 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 949 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 950 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 951 } 952 PetscFunctionReturn(0); 953 } 954 955 #undef __FUNCT__ 956 #define __FUNCT__ "MatTranspose_MPIDense" 957 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 958 { 959 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 960 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 961 Mat B; 962 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 963 PetscErrorCode ierr; 964 PetscInt j,i; 965 PetscScalar *v; 966 967 PetscFunctionBegin; 968 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 969 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 970 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 971 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 972 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 973 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 974 } else { 975 B = *matout; 976 } 977 978 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 979 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 980 for (i=0; i<m; i++) rwork[i] = rstart + i; 981 for (j=0; j<n; j++) { 982 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 983 v += m; 984 } 985 ierr = PetscFree(rwork);CHKERRQ(ierr); 986 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 987 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 988 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 989 *matout = B; 990 } else { 991 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 992 } 993 PetscFunctionReturn(0); 994 } 995 996 997 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 998 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 999 1000 #undef __FUNCT__ 1001 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1002 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1003 { 1004 PetscErrorCode ierr; 1005 1006 PetscFunctionBegin; 1007 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #if defined(PETSC_HAVE_PLAPACK) 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1015 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1016 { 1017 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1018 PetscErrorCode ierr; 1019 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1020 PetscScalar *array; 1021 PetscReal one = 1.0; 1022 1023 PetscFunctionBegin; 1024 /* Copy A into F->lu->A */ 1025 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1026 ierr = PLA_API_begin();CHKERRQ(ierr); 1027 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1028 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1029 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1030 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1031 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1032 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1033 ierr = PLA_API_end();CHKERRQ(ierr); 1034 lu->rstart = rstart; 1035 PetscFunctionReturn(0); 1036 } 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1040 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1041 { 1042 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1043 PetscErrorCode ierr; 1044 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1045 PetscScalar *array; 1046 PetscReal one = 1.0; 1047 1048 PetscFunctionBegin; 1049 /* Copy F into A->lu->A */ 1050 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1051 ierr = PLA_API_begin();CHKERRQ(ierr); 1052 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1053 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1054 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1055 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1056 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1057 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1058 ierr = PLA_API_end();CHKERRQ(ierr); 1059 lu->rstart = rstart; 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1065 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1066 { 1067 PetscErrorCode ierr; 1068 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1069 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1070 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1071 PLA_Obj alpha = NULL,beta = NULL; 1072 1073 PetscFunctionBegin; 1074 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1075 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1076 1077 /* 1078 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1079 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1080 */ 1081 1082 /* do the multiply in PLA */ 1083 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1084 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1085 CHKMEMQ; 1086 1087 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1088 CHKMEMQ; 1089 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1090 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1091 1092 /* 1093 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1094 */ 1095 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1101 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1102 { 1103 PetscErrorCode ierr; 1104 PetscInt m=A->rmap->n,n=B->cmap->n; 1105 Mat Cmat; 1106 1107 PetscFunctionBegin; 1108 if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1109 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported"); 1110 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1111 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1112 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1113 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1114 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1115 1116 *C = Cmat; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1122 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1123 { 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 if (scall == MAT_INITIAL_MATRIX){ 1128 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1129 } 1130 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatSolve_MPIDense" 1136 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1137 { 1138 MPI_Comm comm = ((PetscObject)A)->comm; 1139 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1140 PetscErrorCode ierr; 1141 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1142 PetscScalar *array; 1143 PetscReal one = 1.0; 1144 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1145 PLA_Obj v_pla = NULL; 1146 PetscScalar *loc_buf; 1147 Vec loc_x; 1148 1149 PetscFunctionBegin; 1150 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1151 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1152 1153 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1154 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1155 PLA_Obj_set_to_zero(v_pla); 1156 1157 /* Copy b into rhs_pla */ 1158 PLA_API_begin(); 1159 PLA_Obj_API_open(v_pla); 1160 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1161 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1162 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1163 PLA_Obj_API_close(v_pla); 1164 PLA_API_end(); 1165 1166 if (A->factortype == MAT_FACTOR_LU){ 1167 /* Apply the permutations to the right hand sides */ 1168 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1169 1170 /* Solve L y = b, overwriting b with y */ 1171 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1172 1173 /* Solve U x = y (=b), overwriting b with x */ 1174 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1175 } else { /* MAT_FACTOR_CHOLESKY */ 1176 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1177 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1178 PLA_NONUNIT_DIAG,lu->A,v_pla); 1179 } 1180 1181 /* Copy PLAPACK x into Petsc vector x */ 1182 PLA_Obj_local_length(v_pla, &loc_m); 1183 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1184 PLA_Obj_local_stride(v_pla, &loc_stride); 1185 /* 1186 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1187 */ 1188 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1189 if (!lu->pla_solved){ 1190 1191 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1192 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1193 1194 /* Create IS and cts for VecScatterring */ 1195 PLA_Obj_local_length(v_pla, &loc_m); 1196 PLA_Obj_local_stride(v_pla, &loc_stride); 1197 ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr); 1198 1199 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1200 for (i=0; i<loc_m; i+=lu->nb){ 1201 j = 0; 1202 while (j < lu->nb && i+j < loc_m){ 1203 idx_petsc[i+j] = rstart + j; j++; 1204 } 1205 rstart += size*lu->nb; 1206 } 1207 1208 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1209 1210 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1211 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1212 ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr); 1213 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1214 } 1215 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1216 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1217 1218 /* Free data */ 1219 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1220 PLA_Obj_free(&v_pla); 1221 1222 lu->pla_solved = PETSC_TRUE; 1223 PetscFunctionReturn(0); 1224 } 1225 1226 #undef __FUNCT__ 1227 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1228 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1229 { 1230 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1231 PetscErrorCode ierr; 1232 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1233 PetscInt info_pla=0; 1234 PetscScalar *array,one = 1.0; 1235 1236 PetscFunctionBegin; 1237 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1238 PLA_Obj_free(&lu->A); 1239 PLA_Obj_free (&lu->pivots); 1240 } 1241 /* Create PLAPACK matrix object */ 1242 lu->A = NULL; lu->pivots = NULL; 1243 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1244 PLA_Obj_set_to_zero(lu->A); 1245 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1246 1247 /* Copy A into lu->A */ 1248 PLA_API_begin(); 1249 PLA_Obj_API_open(lu->A); 1250 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1251 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1252 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1253 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1254 PLA_Obj_API_close(lu->A); 1255 PLA_API_end(); 1256 1257 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1258 info_pla = PLA_LU(lu->A,lu->pivots); 1259 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1260 1261 lu->rstart = rstart; 1262 lu->mstruct = SAME_NONZERO_PATTERN; 1263 F->ops->solve = MatSolve_MPIDense; 1264 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1270 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1271 { 1272 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1273 PetscErrorCode ierr; 1274 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1275 PetscInt info_pla=0; 1276 PetscScalar *array,one = 1.0; 1277 1278 PetscFunctionBegin; 1279 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1280 PLA_Obj_free(&lu->A); 1281 } 1282 /* Create PLAPACK matrix object */ 1283 lu->A = NULL; 1284 lu->pivots = NULL; 1285 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1286 1287 /* Copy A into lu->A */ 1288 PLA_API_begin(); 1289 PLA_Obj_API_open(lu->A); 1290 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1291 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1292 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1293 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1294 PLA_Obj_API_close(lu->A); 1295 PLA_API_end(); 1296 1297 /* Factor P A -> Chol */ 1298 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1299 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1300 1301 lu->rstart = rstart; 1302 lu->mstruct = SAME_NONZERO_PATTERN; 1303 F->ops->solve = MatSolve_MPIDense; 1304 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1305 PetscFunctionReturn(0); 1306 } 1307 1308 /* Note the Petsc perm permutation is ignored */ 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1311 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1312 { 1313 PetscErrorCode ierr; 1314 PetscTruth issymmetric,set; 1315 1316 PetscFunctionBegin; 1317 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1318 if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1319 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1320 PetscFunctionReturn(0); 1321 } 1322 1323 /* Note the Petsc r and c permutations are ignored */ 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1326 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1327 { 1328 PetscErrorCode ierr; 1329 PetscInt M = A->rmap->N; 1330 Mat_Plapack *lu; 1331 1332 PetscFunctionBegin; 1333 lu = (Mat_Plapack*)F->spptr; 1334 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1335 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1336 PetscFunctionReturn(0); 1337 } 1338 1339 EXTERN_C_BEGIN 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1342 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1343 { 1344 PetscFunctionBegin; 1345 *type = MATSOLVERPLAPACK; 1346 PetscFunctionReturn(0); 1347 } 1348 EXTERN_C_END 1349 1350 EXTERN_C_BEGIN 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1353 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1354 { 1355 PetscErrorCode ierr; 1356 Mat_Plapack *lu; 1357 PetscMPIInt size; 1358 PetscInt M=A->rmap->N; 1359 1360 PetscFunctionBegin; 1361 /* Create the factorization matrix */ 1362 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1363 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1364 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1365 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1366 (*F)->spptr = (void*)lu; 1367 1368 /* Set default Plapack parameters */ 1369 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1370 lu->nb = M/size; 1371 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1372 1373 /* Set runtime options */ 1374 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1375 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1376 PetscOptionsEnd(); 1377 1378 /* Create object distribution template */ 1379 lu->templ = NULL; 1380 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1381 1382 /* Set the datatype */ 1383 #if defined(PETSC_USE_COMPLEX) 1384 lu->datatype = MPI_DOUBLE_COMPLEX; 1385 #else 1386 lu->datatype = MPI_DOUBLE; 1387 #endif 1388 1389 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1390 1391 1392 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1393 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1394 1395 if (ftype == MAT_FACTOR_LU) { 1396 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1397 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1398 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1399 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1400 (*F)->factortype = ftype; 1401 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1402 PetscFunctionReturn(0); 1403 } 1404 EXTERN_C_END 1405 #endif 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1409 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1410 { 1411 #if defined(PETSC_HAVE_PLAPACK) 1412 PetscErrorCode ierr; 1413 #endif 1414 1415 PetscFunctionBegin; 1416 #if defined(PETSC_HAVE_PLAPACK) 1417 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1418 #else 1419 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1420 #endif 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatAXPY_MPIDense" 1426 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1427 { 1428 PetscErrorCode ierr; 1429 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1430 1431 PetscFunctionBegin; 1432 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatConjugate_MPIDense" 1438 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIDense(Mat mat) 1439 { 1440 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1441 PetscErrorCode ierr; 1442 1443 PetscFunctionBegin; 1444 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 #undef __FUNCT__ 1449 #define __FUNCT__ "MatRealPart_MPIDense" 1450 PetscErrorCode MatRealPart_MPIDense(Mat A) 1451 { 1452 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1453 PetscErrorCode ierr; 1454 1455 PetscFunctionBegin; 1456 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1457 PetscFunctionReturn(0); 1458 } 1459 1460 #undef __FUNCT__ 1461 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1462 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1463 { 1464 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1465 PetscErrorCode ierr; 1466 1467 PetscFunctionBegin; 1468 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } 1471 1472 /* -------------------------------------------------------------------*/ 1473 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1474 MatGetRow_MPIDense, 1475 MatRestoreRow_MPIDense, 1476 MatMult_MPIDense, 1477 /* 4*/ MatMultAdd_MPIDense, 1478 MatMultTranspose_MPIDense, 1479 MatMultTransposeAdd_MPIDense, 1480 0, 1481 0, 1482 0, 1483 /*10*/ 0, 1484 0, 1485 0, 1486 0, 1487 MatTranspose_MPIDense, 1488 /*15*/ MatGetInfo_MPIDense, 1489 MatEqual_MPIDense, 1490 MatGetDiagonal_MPIDense, 1491 MatDiagonalScale_MPIDense, 1492 MatNorm_MPIDense, 1493 /*20*/ MatAssemblyBegin_MPIDense, 1494 MatAssemblyEnd_MPIDense, 1495 MatSetOption_MPIDense, 1496 MatZeroEntries_MPIDense, 1497 /*24*/ MatZeroRows_MPIDense, 1498 0, 1499 0, 1500 0, 1501 0, 1502 /*29*/ MatSetUpPreallocation_MPIDense, 1503 0, 1504 0, 1505 MatGetArray_MPIDense, 1506 MatRestoreArray_MPIDense, 1507 /*34*/ MatDuplicate_MPIDense, 1508 0, 1509 0, 1510 0, 1511 0, 1512 /*39*/ MatAXPY_MPIDense, 1513 MatGetSubMatrices_MPIDense, 1514 0, 1515 MatGetValues_MPIDense, 1516 0, 1517 /*44*/ 0, 1518 MatScale_MPIDense, 1519 0, 1520 0, 1521 0, 1522 /*49*/ 0, 1523 0, 1524 0, 1525 0, 1526 0, 1527 /*54*/ 0, 1528 0, 1529 0, 1530 0, 1531 0, 1532 /*59*/ MatGetSubMatrix_MPIDense, 1533 MatDestroy_MPIDense, 1534 MatView_MPIDense, 1535 0, 1536 0, 1537 /*64*/ 0, 1538 0, 1539 0, 1540 0, 1541 0, 1542 /*69*/ 0, 1543 0, 1544 0, 1545 0, 1546 0, 1547 /*74*/ 0, 1548 0, 1549 0, 1550 0, 1551 0, 1552 /*79*/ 0, 1553 0, 1554 0, 1555 0, 1556 /*83*/ 0, 1557 0, 1558 0, 1559 0, 1560 0, 1561 0, 1562 /*89*/ 1563 #if defined(PETSC_HAVE_PLAPACK) 1564 MatMatMult_MPIDense_MPIDense, 1565 MatMatMultSymbolic_MPIDense_MPIDense, 1566 MatMatMultNumeric_MPIDense_MPIDense, 1567 #else 1568 0, 1569 0, 1570 0, 1571 #endif 1572 0, 1573 0, 1574 /*94*/ 0, 1575 0, 1576 0, 1577 0, 1578 0, 1579 /*99*/ 0, 1580 0, 1581 0, 1582 MatConjugate_MPIDense, 1583 0, 1584 /*104*/0, 1585 MatRealPart_MPIDense, 1586 MatImaginaryPart_MPIDense, 1587 0, 1588 0, 1589 /*109*/0, 1590 0, 1591 0, 1592 0, 1593 0, 1594 /*114*/0, 1595 0, 1596 0, 1597 0, 1598 0, 1599 /*119*/0, 1600 0, 1601 0, 1602 0, 1603 MatLoadnew_MPIDense 1604 }; 1605 1606 EXTERN_C_BEGIN 1607 #undef __FUNCT__ 1608 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1609 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1610 { 1611 Mat_MPIDense *a; 1612 PetscErrorCode ierr; 1613 1614 PetscFunctionBegin; 1615 mat->preallocated = PETSC_TRUE; 1616 /* Note: For now, when data is specified above, this assumes the user correctly 1617 allocates the local dense storage space. We should add error checking. */ 1618 1619 a = (Mat_MPIDense*)mat->data; 1620 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1621 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1622 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1623 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1624 a->nvec = mat->cmap->n; 1625 1626 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1627 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1628 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1629 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1630 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1631 PetscFunctionReturn(0); 1632 } 1633 EXTERN_C_END 1634 1635 /*MC 1636 MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1637 1638 run ./configure with the option --download-plapack 1639 1640 1641 Options Database Keys: 1642 . -mat_plapack_nprows <n> - number of rows in processor partition 1643 . -mat_plapack_npcols <n> - number of columns in processor partition 1644 . -mat_plapack_nb <n> - block size of template vector 1645 . -mat_plapack_nb_alg <n> - algorithmic block size 1646 - -mat_plapack_ckerror <n> - error checking flag 1647 1648 Level: intermediate 1649 1650 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1651 1652 M*/ 1653 1654 EXTERN_C_BEGIN 1655 #undef __FUNCT__ 1656 #define __FUNCT__ "MatCreate_MPIDense" 1657 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1658 { 1659 Mat_MPIDense *a; 1660 PetscErrorCode ierr; 1661 1662 PetscFunctionBegin; 1663 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1664 mat->data = (void*)a; 1665 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1666 mat->mapping = 0; 1667 1668 mat->insertmode = NOT_SET_VALUES; 1669 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1670 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1671 1672 /* build cache for off array entries formed */ 1673 a->donotstash = PETSC_FALSE; 1674 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1675 1676 /* stuff used for matrix vector multiply */ 1677 a->lvec = 0; 1678 a->Mvctx = 0; 1679 a->roworiented = PETSC_TRUE; 1680 1681 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1682 "MatGetDiagonalBlock_MPIDense", 1683 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1684 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1685 "MatMPIDenseSetPreallocation_MPIDense", 1686 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1687 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1688 "MatMatMult_MPIAIJ_MPIDense", 1689 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1690 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1691 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1692 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1693 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1694 "MatMatMultNumeric_MPIAIJ_MPIDense", 1695 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1696 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1697 "MatGetFactor_mpidense_petsc", 1698 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1699 #if defined(PETSC_HAVE_PLAPACK) 1700 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1701 "MatGetFactor_mpidense_plapack", 1702 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1703 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1704 #endif 1705 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1706 1707 PetscFunctionReturn(0); 1708 } 1709 EXTERN_C_END 1710 1711 /*MC 1712 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1713 1714 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1715 and MATMPIDENSE otherwise. 1716 1717 Options Database Keys: 1718 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1719 1720 Level: beginner 1721 1722 1723 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1724 M*/ 1725 1726 EXTERN_C_BEGIN 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatCreate_Dense" 1729 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1730 { 1731 PetscErrorCode ierr; 1732 PetscMPIInt size; 1733 1734 PetscFunctionBegin; 1735 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1736 if (size == 1) { 1737 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1738 } else { 1739 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1740 } 1741 PetscFunctionReturn(0); 1742 } 1743 EXTERN_C_END 1744 1745 #undef __FUNCT__ 1746 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1747 /*@C 1748 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1749 1750 Not collective 1751 1752 Input Parameters: 1753 . A - the matrix 1754 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1755 to control all matrix memory allocation. 1756 1757 Notes: 1758 The dense format is fully compatible with standard Fortran 77 1759 storage by columns. 1760 1761 The data input variable is intended primarily for Fortran programmers 1762 who wish to allocate their own matrix memory space. Most users should 1763 set data=PETSC_NULL. 1764 1765 Level: intermediate 1766 1767 .keywords: matrix,dense, parallel 1768 1769 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1770 @*/ 1771 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1772 { 1773 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1774 1775 PetscFunctionBegin; 1776 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1777 if (f) { 1778 ierr = (*f)(mat,data);CHKERRQ(ierr); 1779 } 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #undef __FUNCT__ 1784 #define __FUNCT__ "MatCreateMPIDense" 1785 /*@C 1786 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1787 1788 Collective on MPI_Comm 1789 1790 Input Parameters: 1791 + comm - MPI communicator 1792 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1793 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1794 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1795 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1796 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1797 to control all matrix memory allocation. 1798 1799 Output Parameter: 1800 . A - the matrix 1801 1802 Notes: 1803 The dense format is fully compatible with standard Fortran 77 1804 storage by columns. 1805 1806 The data input variable is intended primarily for Fortran programmers 1807 who wish to allocate their own matrix memory space. Most users should 1808 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1809 1810 The user MUST specify either the local or global matrix dimensions 1811 (possibly both). 1812 1813 Level: intermediate 1814 1815 .keywords: matrix,dense, parallel 1816 1817 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1818 @*/ 1819 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1820 { 1821 PetscErrorCode ierr; 1822 PetscMPIInt size; 1823 1824 PetscFunctionBegin; 1825 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1826 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1827 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1828 if (size > 1) { 1829 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1830 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1831 } else { 1832 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1833 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1834 } 1835 PetscFunctionReturn(0); 1836 } 1837 1838 #undef __FUNCT__ 1839 #define __FUNCT__ "MatDuplicate_MPIDense" 1840 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1841 { 1842 Mat mat; 1843 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1844 PetscErrorCode ierr; 1845 1846 PetscFunctionBegin; 1847 *newmat = 0; 1848 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1849 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1850 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1851 a = (Mat_MPIDense*)mat->data; 1852 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1853 mat->factortype = A->factortype; 1854 mat->assembled = PETSC_TRUE; 1855 mat->preallocated = PETSC_TRUE; 1856 1857 mat->rmap->rstart = A->rmap->rstart; 1858 mat->rmap->rend = A->rmap->rend; 1859 a->size = oldmat->size; 1860 a->rank = oldmat->rank; 1861 mat->insertmode = NOT_SET_VALUES; 1862 a->nvec = oldmat->nvec; 1863 a->donotstash = oldmat->donotstash; 1864 1865 ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1866 ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1867 1868 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1869 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1870 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1871 1872 *newmat = mat; 1873 PetscFunctionReturn(0); 1874 } 1875 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatLoadnew_MPIDense_DenseInFile" 1878 PetscErrorCode MatLoadnew_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1879 { 1880 PetscErrorCode ierr; 1881 PetscMPIInt rank,size; 1882 PetscInt *rowners,i,m,nz,j; 1883 PetscScalar *array,*vals,*vals_ptr; 1884 MPI_Status status; 1885 1886 PetscFunctionBegin; 1887 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1888 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1889 1890 /* determine ownership of all rows */ 1891 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1892 else m = newmat->rmap->n; 1893 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1894 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1895 rowners[0] = 0; 1896 for (i=2; i<=size; i++) { 1897 rowners[i] += rowners[i-1]; 1898 } 1899 1900 if (!sizesset) { 1901 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1902 } 1903 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1904 ierr = MatGetArray(newmat,&array);CHKERRQ(ierr); 1905 1906 if (!rank) { 1907 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1908 1909 /* read in my part of the matrix numerical values */ 1910 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1911 1912 /* insert into matrix-by row (this is why cannot directly read into array */ 1913 vals_ptr = vals; 1914 for (i=0; i<m; i++) { 1915 for (j=0; j<N; j++) { 1916 array[i + j*m] = *vals_ptr++; 1917 } 1918 } 1919 1920 /* read in other processors and ship out */ 1921 for (i=1; i<size; i++) { 1922 nz = (rowners[i+1] - rowners[i])*N; 1923 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1924 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1925 } 1926 } else { 1927 /* receive numeric values */ 1928 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1929 1930 /* receive message of values*/ 1931 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm,&status);CHKERRQ(ierr); 1932 1933 /* insert into matrix-by row (this is why cannot directly read into array */ 1934 vals_ptr = vals; 1935 for (i=0; i<m; i++) { 1936 for (j=0; j<N; j++) { 1937 array[i + j*m] = *vals_ptr++; 1938 } 1939 } 1940 } 1941 ierr = PetscFree(rowners);CHKERRQ(ierr); 1942 ierr = PetscFree(vals);CHKERRQ(ierr); 1943 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1944 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1945 PetscFunctionReturn(0); 1946 } 1947 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatLoadnew_MPIDense" 1950 PetscErrorCode MatLoadnew_MPIDense(PetscViewer viewer,Mat newmat) 1951 { 1952 PetscScalar *vals,*svals; 1953 MPI_Comm comm = ((PetscObject)viewer)->comm; 1954 MPI_Status status; 1955 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1956 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1957 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1958 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1959 int fd; 1960 PetscErrorCode ierr; 1961 1962 PetscFunctionBegin; 1963 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1964 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1965 if (!rank) { 1966 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1967 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1968 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1969 } 1970 1971 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1972 1973 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1974 M = header[1]; N = header[2]; nz = header[3]; 1975 1976 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1977 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1978 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1979 1980 /* If global sizes are set, check if they are consistent with that given in the file */ 1981 if (sizesset) { 1982 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1983 } 1984 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1985 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1986 1987 /* 1988 Handle case where matrix is stored on disk as a dense matrix 1989 */ 1990 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1991 ierr = MatLoadnew_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 1992 PetscFunctionReturn(0); 1993 } 1994 1995 /* determine ownership of all rows */ 1996 if (newmat->rmap->n < 0) m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1997 else m = PetscMPIIntCast(newmat->rmap->n); 1998 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1999 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2000 rowners[0] = 0; 2001 for (i=2; i<=size; i++) { 2002 rowners[i] += rowners[i-1]; 2003 } 2004 rstart = rowners[rank]; 2005 rend = rowners[rank+1]; 2006 2007 /* distribute row lengths to all processors */ 2008 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 2009 if (!rank) { 2010 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2011 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2012 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2013 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2014 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2015 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2016 } else { 2017 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2018 } 2019 2020 if (!rank) { 2021 /* calculate the number of nonzeros on each processor */ 2022 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2023 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2024 for (i=0; i<size; i++) { 2025 for (j=rowners[i]; j< rowners[i+1]; j++) { 2026 procsnz[i] += rowlengths[j]; 2027 } 2028 } 2029 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2030 2031 /* determine max buffer needed and allocate it */ 2032 maxnz = 0; 2033 for (i=0; i<size; i++) { 2034 maxnz = PetscMax(maxnz,procsnz[i]); 2035 } 2036 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2037 2038 /* read in my part of the matrix column indices */ 2039 nz = procsnz[0]; 2040 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2041 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2042 2043 /* read in every one elses and ship off */ 2044 for (i=1; i<size; i++) { 2045 nz = procsnz[i]; 2046 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2047 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2048 } 2049 ierr = PetscFree(cols);CHKERRQ(ierr); 2050 } else { 2051 /* determine buffer space needed for message */ 2052 nz = 0; 2053 for (i=0; i<m; i++) { 2054 nz += ourlens[i]; 2055 } 2056 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2057 2058 /* receive message of column indices*/ 2059 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2060 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2061 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2062 } 2063 2064 /* loop over local rows, determining number of off diagonal entries */ 2065 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2066 jj = 0; 2067 for (i=0; i<m; i++) { 2068 for (j=0; j<ourlens[i]; j++) { 2069 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2070 jj++; 2071 } 2072 } 2073 2074 /* create our matrix */ 2075 for (i=0; i<m; i++) { 2076 ourlens[i] -= offlens[i]; 2077 } 2078 2079 if (!sizesset) { 2080 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2081 } 2082 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 2083 for (i=0; i<m; i++) { 2084 ourlens[i] += offlens[i]; 2085 } 2086 2087 if (!rank) { 2088 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2089 2090 /* read in my part of the matrix numerical values */ 2091 nz = procsnz[0]; 2092 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2093 2094 /* insert into matrix */ 2095 jj = rstart; 2096 smycols = mycols; 2097 svals = vals; 2098 for (i=0; i<m; i++) { 2099 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2100 smycols += ourlens[i]; 2101 svals += ourlens[i]; 2102 jj++; 2103 } 2104 2105 /* read in other processors and ship out */ 2106 for (i=1; i<size; i++) { 2107 nz = procsnz[i]; 2108 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2109 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2110 } 2111 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2112 } else { 2113 /* receive numeric values */ 2114 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2115 2116 /* receive message of values*/ 2117 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2118 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2119 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2120 2121 /* insert into matrix */ 2122 jj = rstart; 2123 smycols = mycols; 2124 svals = vals; 2125 for (i=0; i<m; i++) { 2126 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2127 smycols += ourlens[i]; 2128 svals += ourlens[i]; 2129 jj++; 2130 } 2131 } 2132 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2133 ierr = PetscFree(vals);CHKERRQ(ierr); 2134 ierr = PetscFree(mycols);CHKERRQ(ierr); 2135 ierr = PetscFree(rowners);CHKERRQ(ierr); 2136 2137 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2138 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2139 PetscFunctionReturn(0); 2140 } 2141 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "MatEqual_MPIDense" 2144 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2145 { 2146 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2147 Mat a,b; 2148 PetscTruth flg; 2149 PetscErrorCode ierr; 2150 2151 PetscFunctionBegin; 2152 a = matA->A; 2153 b = matB->A; 2154 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2155 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2156 PetscFunctionReturn(0); 2157 } 2158 2159 #if defined(PETSC_HAVE_PLAPACK) 2160 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2163 /*@C 2164 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2165 Level: developer 2166 2167 .keywords: Petsc, destroy, package, PLAPACK 2168 .seealso: PetscFinalize() 2169 @*/ 2170 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2171 { 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 ierr = PLA_Finalize();CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2181 /*@C 2182 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2183 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2184 2185 Input Parameter: 2186 . comm - the communicator the matrix lives on 2187 2188 Level: developer 2189 2190 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2191 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2192 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2193 cannot overlap. 2194 2195 .keywords: Petsc, initialize, package, PLAPACK 2196 .seealso: PetscSysInitializePackage(), PetscInitialize() 2197 @*/ 2198 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2199 { 2200 PetscMPIInt size; 2201 PetscErrorCode ierr; 2202 2203 PetscFunctionBegin; 2204 if (!PLA_Initialized(PETSC_NULL)) { 2205 2206 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2207 Plapack_nprows = 1; 2208 Plapack_npcols = size; 2209 2210 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2211 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2212 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2213 #if defined(PETSC_USE_DEBUG) 2214 Plapack_ierror = 3; 2215 #else 2216 Plapack_ierror = 0; 2217 #endif 2218 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2219 if (Plapack_ierror){ 2220 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2221 } else { 2222 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2223 } 2224 2225 Plapack_nb_alg = 0; 2226 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2227 if (Plapack_nb_alg) { 2228 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2229 } 2230 PetscOptionsEnd(); 2231 2232 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2233 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2234 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2235 } 2236 PetscFunctionReturn(0); 2237 } 2238 2239 #endif 2240