1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 PetscInt Plapack_nprows,Plapack_npcols; 10 MPI_Comm Plapack_comm_2d; 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatDenseGetLocalMatrix" 14 /*@ 15 16 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 17 matrix that represents the operator. For sequential matrices it returns itself. 18 19 Input Parameter: 20 . A - the Seq or MPI dense matrix 21 22 Output Parameter: 23 . B - the inner matrix 24 25 Level: intermediate 26 27 @*/ 28 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 29 { 30 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 31 PetscErrorCode ierr; 32 PetscTruth flg; 33 34 PetscFunctionBegin; 35 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 36 if (flg) { 37 *B = mat->A; 38 } else { 39 *B = A; 40 } 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatGetRow_MPIDense" 46 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 47 { 48 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 49 PetscErrorCode ierr; 50 PetscInt lrow,rstart = A->rmap.rstart,rend = A->rmap.rend; 51 52 PetscFunctionBegin; 53 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 54 lrow = row - rstart; 55 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatRestoreRow_MPIDense" 61 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 62 { 63 PetscErrorCode ierr; 64 65 PetscFunctionBegin; 66 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 67 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 68 PetscFunctionReturn(0); 69 } 70 71 EXTERN_C_BEGIN 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 74 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 75 { 76 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 77 PetscErrorCode ierr; 78 PetscInt m = A->rmap.n,rstart = A->rmap.rstart; 79 PetscScalar *array; 80 MPI_Comm comm; 81 82 PetscFunctionBegin; 83 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 84 85 /* The reuse aspect is not implemented efficiently */ 86 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 87 88 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 89 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 90 ierr = MatCreate(comm,B);CHKERRQ(ierr); 91 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 92 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 93 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 94 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 95 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 96 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 97 98 *iscopy = PETSC_TRUE; 99 PetscFunctionReturn(0); 100 } 101 EXTERN_C_END 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "MatSetValues_MPIDense" 105 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 106 { 107 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 108 PetscErrorCode ierr; 109 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 110 PetscTruth roworiented = A->roworiented; 111 112 PetscFunctionBegin; 113 for (i=0; i<m; i++) { 114 if (idxm[i] < 0) continue; 115 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 116 if (idxm[i] >= rstart && idxm[i] < rend) { 117 row = idxm[i] - rstart; 118 if (roworiented) { 119 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 120 } else { 121 for (j=0; j<n; j++) { 122 if (idxn[j] < 0) continue; 123 if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 124 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 125 } 126 } 127 } else { 128 if (!A->donotstash) { 129 if (roworiented) { 130 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 131 } else { 132 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 133 } 134 } 135 } 136 } 137 PetscFunctionReturn(0); 138 } 139 140 #undef __FUNCT__ 141 #define __FUNCT__ "MatGetValues_MPIDense" 142 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 143 { 144 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 145 PetscErrorCode ierr; 146 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 147 148 PetscFunctionBegin; 149 for (i=0; i<m; i++) { 150 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 151 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 152 if (idxm[i] >= rstart && idxm[i] < rend) { 153 row = idxm[i] - rstart; 154 for (j=0; j<n; j++) { 155 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 156 if (idxn[j] >= mat->cmap.N) { 157 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 158 } 159 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 160 } 161 } else { 162 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 163 } 164 } 165 PetscFunctionReturn(0); 166 } 167 168 #undef __FUNCT__ 169 #define __FUNCT__ "MatGetArray_MPIDense" 170 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 171 { 172 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 182 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 183 { 184 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 185 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 186 PetscErrorCode ierr; 187 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 188 PetscScalar *av,*bv,*v = lmat->v; 189 Mat newmat; 190 191 PetscFunctionBegin; 192 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 193 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 194 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 195 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 196 197 /* No parallel redistribution currently supported! Should really check each index set 198 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 199 original matrix! */ 200 201 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 202 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 203 204 /* Check submatrix call */ 205 if (scall == MAT_REUSE_MATRIX) { 206 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 207 /* Really need to test rows and column sizes! */ 208 newmat = *B; 209 } else { 210 /* Create and fill new matrix */ 211 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 212 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 213 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 214 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 215 } 216 217 /* Now extract the data pointers and do the copy, column at a time */ 218 newmatd = (Mat_MPIDense*)newmat->data; 219 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 220 221 for (i=0; i<ncols; i++) { 222 av = v + nlrows*icol[i]; 223 for (j=0; j<nrows; j++) { 224 *bv++ = av[irow[j] - rstart]; 225 } 226 } 227 228 /* Assemble the matrices so that the correct flags are set */ 229 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 230 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 231 232 /* Free work space */ 233 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 234 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 235 *B = newmat; 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatRestoreArray_MPIDense" 241 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 242 { 243 PetscFunctionBegin; 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 250 { 251 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 252 MPI_Comm comm = ((PetscObject)mat)->comm; 253 PetscErrorCode ierr; 254 PetscInt nstash,reallocs; 255 InsertMode addv; 256 257 PetscFunctionBegin; 258 /* make sure all processors are either in INSERTMODE or ADDMODE */ 259 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 260 if (addv == (ADD_VALUES|INSERT_VALUES)) { 261 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 262 } 263 mat->insertmode = addv; /* in case this processor had no cache */ 264 265 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 266 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 267 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 273 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 274 { 275 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 276 PetscErrorCode ierr; 277 PetscInt i,*row,*col,flg,j,rstart,ncols; 278 PetscMPIInt n; 279 PetscScalar *val; 280 InsertMode addv=mat->insertmode; 281 282 PetscFunctionBegin; 283 /* wait on receives */ 284 while (1) { 285 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 286 if (!flg) break; 287 288 for (i=0; i<n;) { 289 /* Now identify the consecutive vals belonging to the same row */ 290 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 291 if (j < n) ncols = j-i; 292 else ncols = n-i; 293 /* Now assemble all these values with a single function call */ 294 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 295 i = j; 296 } 297 } 298 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 299 300 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 301 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 302 303 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 304 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 305 } 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatZeroEntries_MPIDense" 311 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 312 { 313 PetscErrorCode ierr; 314 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 315 316 PetscFunctionBegin; 317 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 318 PetscFunctionReturn(0); 319 } 320 321 /* the code does not do the diagonal entries correctly unless the 322 matrix is square and the column and row owerships are identical. 323 This is a BUG. The only way to fix it seems to be to access 324 mdn->A and mdn->B directly and not through the MatZeroRows() 325 routine. 326 */ 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatZeroRows_MPIDense" 329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 330 { 331 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 332 PetscErrorCode ierr; 333 PetscInt i,*owners = A->rmap.range; 334 PetscInt *nprocs,j,idx,nsends; 335 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 336 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 337 PetscInt *lens,*lrows,*values; 338 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 339 MPI_Comm comm = ((PetscObject)A)->comm; 340 MPI_Request *send_waits,*recv_waits; 341 MPI_Status recv_status,*send_status; 342 PetscTruth found; 343 344 PetscFunctionBegin; 345 /* first count number of contributors to each processor */ 346 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 347 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 348 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 349 for (i=0; i<N; i++) { 350 idx = rows[i]; 351 found = PETSC_FALSE; 352 for (j=0; j<size; j++) { 353 if (idx >= owners[j] && idx < owners[j+1]) { 354 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 355 } 356 } 357 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 358 } 359 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 360 361 /* inform other processors of number of messages and max length*/ 362 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 363 364 /* post receives: */ 365 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 366 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 367 for (i=0; i<nrecvs; i++) { 368 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 369 } 370 371 /* do sends: 372 1) starts[i] gives the starting index in svalues for stuff going to 373 the ith processor 374 */ 375 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 376 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 377 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 378 starts[0] = 0; 379 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 380 for (i=0; i<N; i++) { 381 svalues[starts[owner[i]]++] = rows[i]; 382 } 383 384 starts[0] = 0; 385 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 386 count = 0; 387 for (i=0; i<size; i++) { 388 if (nprocs[2*i+1]) { 389 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 390 } 391 } 392 ierr = PetscFree(starts);CHKERRQ(ierr); 393 394 base = owners[rank]; 395 396 /* wait on receives */ 397 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 398 source = lens + nrecvs; 399 count = nrecvs; slen = 0; 400 while (count) { 401 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 402 /* unpack receives into our local space */ 403 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 404 source[imdex] = recv_status.MPI_SOURCE; 405 lens[imdex] = n; 406 slen += n; 407 count--; 408 } 409 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 410 411 /* move the data into the send scatter */ 412 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 413 count = 0; 414 for (i=0; i<nrecvs; i++) { 415 values = rvalues + i*nmax; 416 for (j=0; j<lens[i]; j++) { 417 lrows[count++] = values[j] - base; 418 } 419 } 420 ierr = PetscFree(rvalues);CHKERRQ(ierr); 421 ierr = PetscFree(lens);CHKERRQ(ierr); 422 ierr = PetscFree(owner);CHKERRQ(ierr); 423 ierr = PetscFree(nprocs);CHKERRQ(ierr); 424 425 /* actually zap the local rows */ 426 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 427 ierr = PetscFree(lrows);CHKERRQ(ierr); 428 429 /* wait on sends */ 430 if (nsends) { 431 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 432 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 433 ierr = PetscFree(send_status);CHKERRQ(ierr); 434 } 435 ierr = PetscFree(send_waits);CHKERRQ(ierr); 436 ierr = PetscFree(svalues);CHKERRQ(ierr); 437 438 PetscFunctionReturn(0); 439 } 440 441 #undef __FUNCT__ 442 #define __FUNCT__ "MatMult_MPIDense" 443 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 444 { 445 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 446 PetscErrorCode ierr; 447 448 PetscFunctionBegin; 449 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 450 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 451 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 452 PetscFunctionReturn(0); 453 } 454 455 #undef __FUNCT__ 456 #define __FUNCT__ "MatMultAdd_MPIDense" 457 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 458 { 459 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 460 PetscErrorCode ierr; 461 462 PetscFunctionBegin; 463 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 464 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 465 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatMultTranspose_MPIDense" 471 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 472 { 473 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 474 PetscErrorCode ierr; 475 PetscScalar zero = 0.0; 476 477 PetscFunctionBegin; 478 ierr = VecSet(yy,zero);CHKERRQ(ierr); 479 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 480 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 481 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 482 PetscFunctionReturn(0); 483 } 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 487 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 488 { 489 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 490 PetscErrorCode ierr; 491 492 PetscFunctionBegin; 493 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 494 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 495 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 496 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatGetDiagonal_MPIDense" 502 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 503 { 504 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 505 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 506 PetscErrorCode ierr; 507 PetscInt len,i,n,m = A->rmap.n,radd; 508 PetscScalar *x,zero = 0.0; 509 510 PetscFunctionBegin; 511 ierr = VecSet(v,zero);CHKERRQ(ierr); 512 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 513 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 514 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 515 len = PetscMin(a->A->rmap.n,a->A->cmap.n); 516 radd = A->rmap.rstart*m; 517 for (i=0; i<len; i++) { 518 x[i] = aloc->v[radd + i*m + i]; 519 } 520 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 521 PetscFunctionReturn(0); 522 } 523 524 #undef __FUNCT__ 525 #define __FUNCT__ "MatDestroy_MPIDense" 526 PetscErrorCode MatDestroy_MPIDense(Mat mat) 527 { 528 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 529 PetscErrorCode ierr; 530 #if defined(PETSC_HAVE_PLAPACK) 531 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 532 #endif 533 534 PetscFunctionBegin; 535 536 #if defined(PETSC_USE_LOG) 537 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 538 #endif 539 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 540 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 541 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 542 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 543 #if defined(PETSC_HAVE_PLAPACK) 544 if (lu->CleanUpPlapack) { 545 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 546 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 547 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 548 549 550 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 551 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 552 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 553 } 554 #endif 555 556 ierr = PetscFree(mdn);CHKERRQ(ierr); 557 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 558 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 559 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 560 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 561 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 562 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 #undef __FUNCT__ 567 #define __FUNCT__ "MatView_MPIDense_Binary" 568 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 569 { 570 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 571 PetscErrorCode ierr; 572 PetscViewerFormat format; 573 int fd; 574 PetscInt header[4],mmax,N = mat->cmap.N,i,j,m,k; 575 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 576 PetscScalar *work,*v,*vv; 577 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 578 MPI_Status status; 579 580 PetscFunctionBegin; 581 if (mdn->size == 1) { 582 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 583 } else { 584 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 585 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 586 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 587 588 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 589 if (format == PETSC_VIEWER_BINARY_NATIVE) { 590 591 if (!rank) { 592 /* store the matrix as a dense matrix */ 593 header[0] = MAT_FILE_COOKIE; 594 header[1] = mat->rmap.N; 595 header[2] = N; 596 header[3] = MATRIX_BINARY_FORMAT_DENSE; 597 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 598 599 /* get largest work array needed for transposing array */ 600 mmax = mat->rmap.n; 601 for (i=1; i<size; i++) { 602 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 603 } 604 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 605 606 /* write out local array, by rows */ 607 m = mat->rmap.n; 608 v = a->v; 609 for (j=0; j<N; j++) { 610 for (i=0; i<m; i++) { 611 work[j + i*N] = *v++; 612 } 613 } 614 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 615 /* get largest work array to receive messages from other processes, excludes process zero */ 616 mmax = 0; 617 for (i=1; i<size; i++) { 618 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 619 } 620 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 621 v = vv; 622 for (k=1; k<size; k++) { 623 m = mat->rmap.range[k+1] - mat->rmap.range[k]; 624 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 625 626 for (j=0; j<N; j++) { 627 for (i=0; i<m; i++) { 628 work[j + i*N] = *v++; 629 } 630 } 631 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 632 } 633 ierr = PetscFree(work);CHKERRQ(ierr); 634 ierr = PetscFree(vv);CHKERRQ(ierr); 635 } else { 636 ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 637 } 638 } 639 } 640 PetscFunctionReturn(0); 641 } 642 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 645 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 646 { 647 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 648 PetscErrorCode ierr; 649 PetscMPIInt size = mdn->size,rank = mdn->rank; 650 PetscViewerType vtype; 651 PetscTruth iascii,isdraw; 652 PetscViewer sviewer; 653 PetscViewerFormat format; 654 #if defined(PETSC_HAVE_PLAPACK) 655 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 656 #endif 657 658 PetscFunctionBegin; 659 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 660 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 661 if (iascii) { 662 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 663 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 664 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 665 MatInfo info; 666 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 667 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n, 668 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 669 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 670 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 671 #if defined(PETSC_HAVE_PLAPACK) 672 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 673 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 674 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 675 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",lu->ierror);CHKERRQ(ierr); 676 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",lu->nb_alg);CHKERRQ(ierr); 677 #endif 678 PetscFunctionReturn(0); 679 } else if (format == PETSC_VIEWER_ASCII_INFO) { 680 PetscFunctionReturn(0); 681 } 682 } else if (isdraw) { 683 PetscDraw draw; 684 PetscTruth isnull; 685 686 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 687 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 688 if (isnull) PetscFunctionReturn(0); 689 } 690 691 if (size == 1) { 692 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 693 } else { 694 /* assemble the entire matrix onto first processor. */ 695 Mat A; 696 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz; 697 PetscInt *cols; 698 PetscScalar *vals; 699 700 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 701 if (!rank) { 702 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 703 } else { 704 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 705 } 706 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 707 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 708 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 709 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 710 711 /* Copy the matrix ... This isn't the most efficient means, 712 but it's quick for now */ 713 A->insertmode = INSERT_VALUES; 714 row = mat->rmap.rstart; m = mdn->A->rmap.n; 715 for (i=0; i<m; i++) { 716 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 717 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 718 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 719 row++; 720 } 721 722 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 723 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 724 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 725 if (!rank) { 726 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 727 } 728 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 729 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 730 ierr = MatDestroy(A);CHKERRQ(ierr); 731 } 732 PetscFunctionReturn(0); 733 } 734 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatView_MPIDense" 737 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 738 { 739 PetscErrorCode ierr; 740 PetscTruth iascii,isbinary,isdraw,issocket; 741 742 PetscFunctionBegin; 743 744 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 745 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 746 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 747 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 748 749 if (iascii || issocket || isdraw) { 750 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 751 } else if (isbinary) { 752 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 753 } else { 754 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 755 } 756 PetscFunctionReturn(0); 757 } 758 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatGetInfo_MPIDense" 761 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 762 { 763 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 764 Mat mdn = mat->A; 765 PetscErrorCode ierr; 766 PetscReal isend[5],irecv[5]; 767 768 PetscFunctionBegin; 769 info->rows_global = (double)A->rmap.N; 770 info->columns_global = (double)A->cmap.N; 771 info->rows_local = (double)A->rmap.n; 772 info->columns_local = (double)A->cmap.N; 773 info->block_size = 1.0; 774 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 775 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 776 isend[3] = info->memory; isend[4] = info->mallocs; 777 if (flag == MAT_LOCAL) { 778 info->nz_used = isend[0]; 779 info->nz_allocated = isend[1]; 780 info->nz_unneeded = isend[2]; 781 info->memory = isend[3]; 782 info->mallocs = isend[4]; 783 } else if (flag == MAT_GLOBAL_MAX) { 784 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 785 info->nz_used = irecv[0]; 786 info->nz_allocated = irecv[1]; 787 info->nz_unneeded = irecv[2]; 788 info->memory = irecv[3]; 789 info->mallocs = irecv[4]; 790 } else if (flag == MAT_GLOBAL_SUM) { 791 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 792 info->nz_used = irecv[0]; 793 info->nz_allocated = irecv[1]; 794 info->nz_unneeded = irecv[2]; 795 info->memory = irecv[3]; 796 info->mallocs = irecv[4]; 797 } 798 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 799 info->fill_ratio_needed = 0; 800 info->factor_mallocs = 0; 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatSetOption_MPIDense" 806 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 807 { 808 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 809 PetscErrorCode ierr; 810 811 PetscFunctionBegin; 812 switch (op) { 813 case MAT_NEW_NONZERO_LOCATIONS: 814 case MAT_NEW_NONZERO_LOCATION_ERR: 815 case MAT_NEW_NONZERO_ALLOCATION_ERR: 816 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 817 break; 818 case MAT_ROW_ORIENTED: 819 a->roworiented = flg; 820 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 821 break; 822 case MAT_NEW_DIAGONALS: 823 case MAT_USE_HASH_TABLE: 824 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 825 break; 826 case MAT_IGNORE_OFF_PROC_ENTRIES: 827 a->donotstash = flg; 828 break; 829 case MAT_SYMMETRIC: 830 case MAT_STRUCTURALLY_SYMMETRIC: 831 case MAT_HERMITIAN: 832 case MAT_SYMMETRY_ETERNAL: 833 case MAT_IGNORE_LOWER_TRIANGULAR: 834 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 835 break; 836 default: 837 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 838 } 839 PetscFunctionReturn(0); 840 } 841 842 843 #undef __FUNCT__ 844 #define __FUNCT__ "MatDiagonalScale_MPIDense" 845 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 846 { 847 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 848 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 849 PetscScalar *l,*r,x,*v; 850 PetscErrorCode ierr; 851 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n; 852 853 PetscFunctionBegin; 854 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 855 if (ll) { 856 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 857 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 858 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 859 for (i=0; i<m; i++) { 860 x = l[i]; 861 v = mat->v + i; 862 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 863 } 864 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 865 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 866 } 867 if (rr) { 868 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 869 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 870 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 873 for (i=0; i<n; i++) { 874 x = r[i]; 875 v = mat->v + i*m; 876 for (j=0; j<m; j++) { (*v++) *= x;} 877 } 878 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 879 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 880 } 881 PetscFunctionReturn(0); 882 } 883 884 #undef __FUNCT__ 885 #define __FUNCT__ "MatNorm_MPIDense" 886 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 887 { 888 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 889 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 890 PetscErrorCode ierr; 891 PetscInt i,j; 892 PetscReal sum = 0.0; 893 PetscScalar *v = mat->v; 894 895 PetscFunctionBegin; 896 if (mdn->size == 1) { 897 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 898 } else { 899 if (type == NORM_FROBENIUS) { 900 for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) { 901 #if defined(PETSC_USE_COMPLEX) 902 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 903 #else 904 sum += (*v)*(*v); v++; 905 #endif 906 } 907 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 908 *nrm = sqrt(*nrm); 909 ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr); 910 } else if (type == NORM_1) { 911 PetscReal *tmp,*tmp2; 912 ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 913 tmp2 = tmp + A->cmap.N; 914 ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 915 *nrm = 0.0; 916 v = mat->v; 917 for (j=0; j<mdn->A->cmap.n; j++) { 918 for (i=0; i<mdn->A->rmap.n; i++) { 919 tmp[j] += PetscAbsScalar(*v); v++; 920 } 921 } 922 ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 923 for (j=0; j<A->cmap.N; j++) { 924 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 925 } 926 ierr = PetscFree(tmp);CHKERRQ(ierr); 927 ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr); 928 } else if (type == NORM_INFINITY) { /* max row norm */ 929 PetscReal ntemp; 930 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 931 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 932 } else { 933 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 934 } 935 } 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatTranspose_MPIDense" 941 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 942 { 943 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 944 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 945 Mat B; 946 PetscInt M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart; 947 PetscErrorCode ierr; 948 PetscInt j,i; 949 PetscScalar *v; 950 951 PetscFunctionBegin; 952 if (A == *matout && M != N) { 953 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 954 } 955 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 956 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 957 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 958 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 959 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 960 } else { 961 B = *matout; 962 } 963 964 m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v; 965 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 966 for (i=0; i<m; i++) rwork[i] = rstart + i; 967 for (j=0; j<n; j++) { 968 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 969 v += m; 970 } 971 ierr = PetscFree(rwork);CHKERRQ(ierr); 972 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 973 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 974 if (*matout != A) { 975 *matout = B; 976 } else { 977 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 978 } 979 PetscFunctionReturn(0); 980 } 981 982 #include "petscblaslapack.h" 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatScale_MPIDense" 985 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 986 { 987 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 988 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 989 PetscScalar oalpha = alpha; 990 PetscErrorCode ierr; 991 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N); 992 993 PetscFunctionBegin; 994 BLASscal_(&nz,&oalpha,a->v,&one); 995 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 996 PetscFunctionReturn(0); 997 } 998 999 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1000 1001 #undef __FUNCT__ 1002 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1003 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1004 { 1005 PetscErrorCode ierr; 1006 1007 PetscFunctionBegin; 1008 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1014 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1015 { 1016 PetscErrorCode ierr; 1017 PetscInt m=A->rmap.n,n=B->cmap.n; 1018 Mat Cmat; 1019 1020 PetscFunctionBegin; 1021 if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n); 1022 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1023 ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr); 1024 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1025 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 1026 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1027 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1028 *C = Cmat; 1029 PetscFunctionReturn(0); 1030 } 1031 1032 #if defined(PETSC_HAVE_PLAPACK) 1033 #undef __FUNCT__ 1034 #define __FUNCT__ "MatSolve_MPIDense" 1035 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1036 { 1037 MPI_Comm comm = ((PetscObject)A)->comm,dummy_comm; 1038 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1039 PetscErrorCode ierr; 1040 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1041 PetscScalar *array; 1042 PetscReal one = 1.0; 1043 PetscMPIInt size,r_rank,r_nproc,c_rank,c_nproc;; 1044 PLA_Obj v_pla = NULL; 1045 PetscScalar *loc_buf; 1046 Vec loc_x; 1047 1048 PetscFunctionBegin; 1049 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1050 1051 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1052 ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr); 1053 ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr); 1054 1055 /* Copy b into rhs_pla */ 1056 ierr = PLA_API_begin();CHKERRQ(ierr); 1057 ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr); 1058 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1059 ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr); 1060 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1061 ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr); 1062 ierr = PLA_API_end();CHKERRQ(ierr); 1063 1064 if (A->factor == FACTOR_LU){ 1065 /* Apply the permutations to the right hand sides */ 1066 ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr); 1067 1068 /* Solve L y = b, overwriting b with y */ 1069 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1070 1071 /* Solve U x = y (=b), overwriting b with x */ 1072 ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1073 } else { /* FACTOR_CHOLESKY */ 1074 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1075 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1076 } 1077 1078 /* Copy PLAPACK x into Petsc vector x */ 1079 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1080 ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr); 1081 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1082 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1083 if (!lu->pla_solved){ 1084 1085 ierr = PLA_Temp_comm_row_info(lu->templ,&dummy_comm,&r_rank,&r_nproc);CHKERRQ(ierr); 1086 ierr = PLA_Temp_comm_col_info(lu->templ,&dummy_comm,&c_rank,&c_nproc);CHKERRQ(ierr); 1087 /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */ 1088 1089 /* Create IS and cts for VecScatterring */ 1090 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1091 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1092 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1093 idx_petsc = idx_pla + loc_m; 1094 1095 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1096 for (i=0; i<loc_m; i+=lu->nb){ 1097 j = 0; 1098 while (j < lu->nb && i+j < loc_m){ 1099 idx_petsc[i+j] = rstart + j; j++; 1100 } 1101 rstart += size*lu->nb; 1102 } 1103 1104 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1105 1106 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1107 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1108 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1109 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1110 } 1111 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 1114 /* Free data */ 1115 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1116 ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr); 1117 1118 lu->pla_solved = PETSC_TRUE; 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1124 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1125 { 1126 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1127 PetscErrorCode ierr; 1128 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,rend; 1129 PetscInt info_pla=0; 1130 PetscScalar *array,one = 1.0; 1131 1132 PetscFunctionBegin; 1133 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1134 1135 /* Copy A into lu->A */ 1136 ierr = PLA_API_begin();CHKERRQ(ierr); 1137 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1138 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1139 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1140 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1141 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1142 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1143 ierr = PLA_API_end();CHKERRQ(ierr); 1144 1145 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1146 info_pla = PLA_LU(lu->A,lu->pivots); 1147 if (info_pla != 0) 1148 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1149 1150 lu->CleanUpPlapack = PETSC_TRUE; 1151 lu->rstart = rstart; 1152 1153 (*F)->assembled = PETSC_TRUE; /* required by -ksp_view */ 1154 PetscFunctionReturn(0); 1155 } 1156 1157 #undef __FUNCT__ 1158 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1159 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1160 { 1161 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1162 PetscErrorCode ierr; 1163 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,rend; 1164 PetscInt info_pla=0; 1165 PetscScalar *array,one = 1.0; 1166 1167 PetscFunctionBegin; 1168 1169 /* Copy A into lu->A */ 1170 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1171 ierr = PLA_API_begin();CHKERRQ(ierr); 1172 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1173 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1174 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1175 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1176 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1177 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1178 ierr = PLA_API_end();CHKERRQ(ierr); 1179 1180 /* Factor P A -> Chol */ 1181 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1182 if (info_pla != 0) 1183 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1184 1185 lu->CleanUpPlapack = PETSC_TRUE; 1186 lu->rstart = rstart; 1187 1188 (*F)->assembled = PETSC_TRUE; /* required by -ksp_view */ 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private" 1194 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F) 1195 { 1196 Mat B; 1197 Mat_Plapack *lu; 1198 PetscErrorCode ierr; 1199 PetscInt M=A->rmap.N,N=A->cmap.N; 1200 MPI_Comm comm=((PetscObject)A)->comm; 1201 PetscMPIInt size; 1202 PetscInt ierror; 1203 1204 PetscFunctionBegin; 1205 /* Create the factorization matrix */ 1206 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1207 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr); 1208 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1209 1210 lu = (Mat_Plapack*)(B->spptr); 1211 1212 /* Set default Plapack parameters */ 1213 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1214 ierror = 0; 1215 lu->nb = M/size; 1216 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1217 1218 /* Set runtime options */ 1219 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1220 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 1221 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 1222 1223 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1224 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr); 1225 if (ierror){ 1226 ierr = PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 1227 } else { 1228 ierr = PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 1229 } 1230 lu->ierror = ierror; 1231 1232 lu->nb_alg = 0; 1233 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",lu->nb_alg,&lu->nb_alg,PETSC_NULL);CHKERRQ(ierr); 1234 if (lu->nb_alg){ 1235 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,lu->nb_alg);CHKERRQ(ierr); 1236 } 1237 PetscOptionsEnd(); 1238 1239 1240 /* Create object distribution template */ 1241 lu->templ = NULL; 1242 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1243 1244 /* Use suggested nb_alg if it is not provided by user */ 1245 if (lu->nb_alg == 0){ 1246 ierr = PLA_Environ_nb_alg(PLA_OP_PAN_PAN,lu->templ,&lu->nb_alg);CHKERRQ(ierr); 1247 ierr = pla_Environ_set_nb_alg(PLA_OP_ALL_ALG,lu->nb_alg);CHKERRQ(ierr); 1248 } 1249 1250 /* Set the datatype */ 1251 #if defined(PETSC_USE_COMPLEX) 1252 lu->datatype = MPI_DOUBLE_COMPLEX; 1253 #else 1254 lu->datatype = MPI_DOUBLE; 1255 #endif 1256 1257 ierr = PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1258 1259 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1260 lu->CleanUpPlapack = PETSC_TRUE; 1261 *F = B; 1262 PetscFunctionReturn(0); 1263 } 1264 1265 /* Note the Petsc r and c permutations are ignored */ 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1268 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 1269 { 1270 PetscErrorCode ierr; 1271 PetscInt M = A->rmap.N; 1272 Mat_Plapack *lu; 1273 1274 PetscFunctionBegin; 1275 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1276 lu = (Mat_Plapack*)(*F)->spptr; 1277 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1278 (*F)->factor = FACTOR_LU; 1279 PetscFunctionReturn(0); 1280 } 1281 1282 /* Note the Petsc perm permutation is ignored */ 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1285 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F) 1286 { 1287 PetscErrorCode ierr; 1288 PetscTruth issymmetric,set; 1289 1290 PetscFunctionBegin; 1291 ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr); 1292 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1293 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1294 (*F)->factor = FACTOR_CHOLESKY; 1295 PetscFunctionReturn(0); 1296 } 1297 #endif 1298 1299 /* -------------------------------------------------------------------*/ 1300 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1301 MatGetRow_MPIDense, 1302 MatRestoreRow_MPIDense, 1303 MatMult_MPIDense, 1304 /* 4*/ MatMultAdd_MPIDense, 1305 MatMultTranspose_MPIDense, 1306 MatMultTransposeAdd_MPIDense, 1307 #if defined(PETSC_HAVE_PLAPACK) 1308 MatSolve_MPIDense, 1309 #else 1310 0, 1311 #endif 1312 0, 1313 0, 1314 /*10*/ 0, 1315 0, 1316 0, 1317 0, 1318 MatTranspose_MPIDense, 1319 /*15*/ MatGetInfo_MPIDense, 1320 MatEqual_MPIDense, 1321 MatGetDiagonal_MPIDense, 1322 MatDiagonalScale_MPIDense, 1323 MatNorm_MPIDense, 1324 /*20*/ MatAssemblyBegin_MPIDense, 1325 MatAssemblyEnd_MPIDense, 1326 0, 1327 MatSetOption_MPIDense, 1328 MatZeroEntries_MPIDense, 1329 /*25*/ MatZeroRows_MPIDense, 1330 #if defined(PETSC_HAVE_PLAPACK) 1331 MatLUFactorSymbolic_MPIDense, 1332 MatLUFactorNumeric_MPIDense, 1333 MatCholeskyFactorSymbolic_MPIDense, 1334 MatCholeskyFactorNumeric_MPIDense, 1335 #else 1336 0, 1337 0, 1338 0, 1339 0, 1340 #endif 1341 /*30*/ MatSetUpPreallocation_MPIDense, 1342 0, 1343 0, 1344 MatGetArray_MPIDense, 1345 MatRestoreArray_MPIDense, 1346 /*35*/ MatDuplicate_MPIDense, 1347 0, 1348 0, 1349 0, 1350 0, 1351 /*40*/ 0, 1352 MatGetSubMatrices_MPIDense, 1353 0, 1354 MatGetValues_MPIDense, 1355 0, 1356 /*45*/ 0, 1357 MatScale_MPIDense, 1358 0, 1359 0, 1360 0, 1361 /*50*/ 0, 1362 0, 1363 0, 1364 0, 1365 0, 1366 /*55*/ 0, 1367 0, 1368 0, 1369 0, 1370 0, 1371 /*60*/ MatGetSubMatrix_MPIDense, 1372 MatDestroy_MPIDense, 1373 MatView_MPIDense, 1374 0, 1375 0, 1376 /*65*/ 0, 1377 0, 1378 0, 1379 0, 1380 0, 1381 /*70*/ 0, 1382 0, 1383 0, 1384 0, 1385 0, 1386 /*75*/ 0, 1387 0, 1388 0, 1389 0, 1390 0, 1391 /*80*/ 0, 1392 0, 1393 0, 1394 0, 1395 /*84*/ MatLoad_MPIDense, 1396 0, 1397 0, 1398 0, 1399 0, 1400 0, 1401 /*90*/ 0, 1402 MatMatMultSymbolic_MPIDense_MPIDense, 1403 0, 1404 0, 1405 0, 1406 /*95*/ 0, 1407 0, 1408 0, 1409 0}; 1410 1411 EXTERN_C_BEGIN 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1414 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1415 { 1416 Mat_MPIDense *a; 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 mat->preallocated = PETSC_TRUE; 1421 /* Note: For now, when data is specified above, this assumes the user correctly 1422 allocates the local dense storage space. We should add error checking. */ 1423 1424 a = (Mat_MPIDense*)mat->data; 1425 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1426 ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr); 1427 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1428 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1429 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 } 1432 EXTERN_C_END 1433 1434 EXTERN_C_BEGIN 1435 #if defined(PETSC_HAVE_PLAPACK) 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK" 1438 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type) 1439 { 1440 PetscFunctionBegin; 1441 /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */ 1442 PetscFunctionReturn(0); 1443 } 1444 #endif 1445 EXTERN_C_END 1446 1447 /*MC 1448 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1449 1450 Options Database Keys: 1451 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1452 1453 Level: beginner 1454 1455 MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR) 1456 for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed 1457 (run config/configure.py with the option --download-plapack) 1458 1459 1460 Options Database Keys: 1461 . -mat_plapack_nprows <n> - number of rows in processor partition 1462 . -mat_plapack_npcols <n> - number of columns in processor partition 1463 . -mat_plapack_nb <n> - block size of template vector 1464 . -mat_plapack_nb_alg <n> - algorithmic block size 1465 - -mat_plapack_ckerror <n> - error checking flag 1466 1467 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE 1468 M*/ 1469 1470 EXTERN_C_BEGIN 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatCreate_MPIDense" 1473 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1474 { 1475 Mat_MPIDense *a; 1476 PetscErrorCode ierr; 1477 #if defined(PETSC_HAVE_PLAPACK) 1478 Mat_Plapack *lu; 1479 #endif 1480 1481 PetscFunctionBegin; 1482 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1483 mat->data = (void*)a; 1484 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1485 mat->factor = 0; 1486 mat->mapping = 0; 1487 1488 mat->insertmode = NOT_SET_VALUES; 1489 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1490 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1491 1492 mat->rmap.bs = mat->cmap.bs = 1; 1493 ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr); 1494 ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr); 1495 a->nvec = mat->cmap.n; 1496 1497 /* build cache for off array entries formed */ 1498 a->donotstash = PETSC_FALSE; 1499 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1500 1501 /* stuff used for matrix vector multiply */ 1502 a->lvec = 0; 1503 a->Mvctx = 0; 1504 a->roworiented = PETSC_TRUE; 1505 1506 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1507 "MatGetDiagonalBlock_MPIDense", 1508 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1509 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1510 "MatMPIDenseSetPreallocation_MPIDense", 1511 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1512 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1513 "MatMatMult_MPIAIJ_MPIDense", 1514 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1515 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1516 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1517 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1518 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1519 "MatMatMultNumeric_MPIAIJ_MPIDense", 1520 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1521 #if defined(PETSC_HAVE_PLAPACK) 1522 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C", 1523 "MatSetSolverType_MPIDense_PLAPACK", 1524 MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr); 1525 #endif 1526 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1527 1528 #if defined(PETSC_HAVE_PLAPACK) 1529 ierr = PetscNewLog(mat,Mat_Plapack,&lu);CHKERRQ(ierr); 1530 lu->CleanUpPlapack = PETSC_FALSE; 1531 mat->spptr = (void*)lu; 1532 #endif 1533 PetscFunctionReturn(0); 1534 } 1535 EXTERN_C_END 1536 1537 /*MC 1538 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1539 1540 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1541 and MATMPIDENSE otherwise. 1542 1543 Options Database Keys: 1544 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1545 1546 Level: beginner 1547 1548 1549 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1550 M*/ 1551 1552 EXTERN_C_BEGIN 1553 #undef __FUNCT__ 1554 #define __FUNCT__ "MatCreate_Dense" 1555 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1556 { 1557 PetscErrorCode ierr; 1558 PetscMPIInt size; 1559 1560 PetscFunctionBegin; 1561 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1562 if (size == 1) { 1563 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1564 } else { 1565 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1566 } 1567 PetscFunctionReturn(0); 1568 } 1569 EXTERN_C_END 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1573 /*@C 1574 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1575 1576 Not collective 1577 1578 Input Parameters: 1579 . A - the matrix 1580 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1581 to control all matrix memory allocation. 1582 1583 Notes: 1584 The dense format is fully compatible with standard Fortran 77 1585 storage by columns. 1586 1587 The data input variable is intended primarily for Fortran programmers 1588 who wish to allocate their own matrix memory space. Most users should 1589 set data=PETSC_NULL. 1590 1591 Level: intermediate 1592 1593 .keywords: matrix,dense, parallel 1594 1595 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1596 @*/ 1597 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1598 { 1599 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1600 1601 PetscFunctionBegin; 1602 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1603 if (f) { 1604 ierr = (*f)(mat,data);CHKERRQ(ierr); 1605 } 1606 PetscFunctionReturn(0); 1607 } 1608 1609 #undef __FUNCT__ 1610 #define __FUNCT__ "MatCreateMPIDense" 1611 /*@C 1612 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1613 1614 Collective on MPI_Comm 1615 1616 Input Parameters: 1617 + comm - MPI communicator 1618 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1619 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1620 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1621 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1622 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1623 to control all matrix memory allocation. 1624 1625 Output Parameter: 1626 . A - the matrix 1627 1628 Notes: 1629 The dense format is fully compatible with standard Fortran 77 1630 storage by columns. 1631 1632 The data input variable is intended primarily for Fortran programmers 1633 who wish to allocate their own matrix memory space. Most users should 1634 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1635 1636 The user MUST specify either the local or global matrix dimensions 1637 (possibly both). 1638 1639 Level: intermediate 1640 1641 .keywords: matrix,dense, parallel 1642 1643 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1644 @*/ 1645 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1646 { 1647 PetscErrorCode ierr; 1648 PetscMPIInt size; 1649 1650 PetscFunctionBegin; 1651 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1652 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1653 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1654 if (size > 1) { 1655 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1656 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1657 } else { 1658 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1659 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1660 } 1661 PetscFunctionReturn(0); 1662 } 1663 1664 #undef __FUNCT__ 1665 #define __FUNCT__ "MatDuplicate_MPIDense" 1666 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1667 { 1668 Mat mat; 1669 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1670 PetscErrorCode ierr; 1671 1672 PetscFunctionBegin; 1673 *newmat = 0; 1674 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1675 ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1676 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1677 a = (Mat_MPIDense*)mat->data; 1678 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1679 mat->factor = A->factor; 1680 mat->assembled = PETSC_TRUE; 1681 mat->preallocated = PETSC_TRUE; 1682 1683 mat->rmap.rstart = A->rmap.rstart; 1684 mat->rmap.rend = A->rmap.rend; 1685 a->size = oldmat->size; 1686 a->rank = oldmat->rank; 1687 mat->insertmode = NOT_SET_VALUES; 1688 a->nvec = oldmat->nvec; 1689 a->donotstash = oldmat->donotstash; 1690 1691 ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1692 ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1693 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1694 1695 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1696 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1697 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1698 1699 #if defined(PETSC_HAVE_PLAPACK) 1700 ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr); 1701 #endif 1702 *newmat = mat; 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #include "petscsys.h" 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1710 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1711 { 1712 PetscErrorCode ierr; 1713 PetscMPIInt rank,size; 1714 PetscInt *rowners,i,m,nz,j; 1715 PetscScalar *array,*vals,*vals_ptr; 1716 MPI_Status status; 1717 1718 PetscFunctionBegin; 1719 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1720 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1721 1722 /* determine ownership of all rows */ 1723 m = M/size + ((M % size) > rank); 1724 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1725 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1726 rowners[0] = 0; 1727 for (i=2; i<=size; i++) { 1728 rowners[i] += rowners[i-1]; 1729 } 1730 1731 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1732 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1733 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1734 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1735 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1736 1737 if (!rank) { 1738 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1739 1740 /* read in my part of the matrix numerical values */ 1741 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1742 1743 /* insert into matrix-by row (this is why cannot directly read into array */ 1744 vals_ptr = vals; 1745 for (i=0; i<m; i++) { 1746 for (j=0; j<N; j++) { 1747 array[i + j*m] = *vals_ptr++; 1748 } 1749 } 1750 1751 /* read in other processors and ship out */ 1752 for (i=1; i<size; i++) { 1753 nz = (rowners[i+1] - rowners[i])*N; 1754 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1755 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1756 } 1757 } else { 1758 /* receive numeric values */ 1759 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1760 1761 /* receive message of values*/ 1762 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1763 1764 /* insert into matrix-by row (this is why cannot directly read into array */ 1765 vals_ptr = vals; 1766 for (i=0; i<m; i++) { 1767 for (j=0; j<N; j++) { 1768 array[i + j*m] = *vals_ptr++; 1769 } 1770 } 1771 } 1772 ierr = PetscFree(rowners);CHKERRQ(ierr); 1773 ierr = PetscFree(vals);CHKERRQ(ierr); 1774 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1775 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1776 PetscFunctionReturn(0); 1777 } 1778 1779 #undef __FUNCT__ 1780 #define __FUNCT__ "MatLoad_MPIDense" 1781 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1782 { 1783 Mat A; 1784 PetscScalar *vals,*svals; 1785 MPI_Comm comm = ((PetscObject)viewer)->comm; 1786 MPI_Status status; 1787 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1788 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1789 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1790 PetscInt i,nz,j,rstart,rend; 1791 int fd; 1792 PetscErrorCode ierr; 1793 1794 PetscFunctionBegin; 1795 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1796 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1797 if (!rank) { 1798 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1799 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1800 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1801 } 1802 1803 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1804 M = header[1]; N = header[2]; nz = header[3]; 1805 1806 /* 1807 Handle case where matrix is stored on disk as a dense matrix 1808 */ 1809 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1810 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1811 PetscFunctionReturn(0); 1812 } 1813 1814 /* determine ownership of all rows */ 1815 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1816 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1817 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1818 rowners[0] = 0; 1819 for (i=2; i<=size; i++) { 1820 rowners[i] += rowners[i-1]; 1821 } 1822 rstart = rowners[rank]; 1823 rend = rowners[rank+1]; 1824 1825 /* distribute row lengths to all processors */ 1826 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1827 offlens = ourlens + (rend-rstart); 1828 if (!rank) { 1829 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1830 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1831 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1832 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1833 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1834 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1835 } else { 1836 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1837 } 1838 1839 if (!rank) { 1840 /* calculate the number of nonzeros on each processor */ 1841 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1842 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1843 for (i=0; i<size; i++) { 1844 for (j=rowners[i]; j< rowners[i+1]; j++) { 1845 procsnz[i] += rowlengths[j]; 1846 } 1847 } 1848 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1849 1850 /* determine max buffer needed and allocate it */ 1851 maxnz = 0; 1852 for (i=0; i<size; i++) { 1853 maxnz = PetscMax(maxnz,procsnz[i]); 1854 } 1855 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1856 1857 /* read in my part of the matrix column indices */ 1858 nz = procsnz[0]; 1859 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1860 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1861 1862 /* read in every one elses and ship off */ 1863 for (i=1; i<size; i++) { 1864 nz = procsnz[i]; 1865 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1866 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1867 } 1868 ierr = PetscFree(cols);CHKERRQ(ierr); 1869 } else { 1870 /* determine buffer space needed for message */ 1871 nz = 0; 1872 for (i=0; i<m; i++) { 1873 nz += ourlens[i]; 1874 } 1875 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1876 1877 /* receive message of column indices*/ 1878 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1879 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1880 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1881 } 1882 1883 /* loop over local rows, determining number of off diagonal entries */ 1884 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1885 jj = 0; 1886 for (i=0; i<m; i++) { 1887 for (j=0; j<ourlens[i]; j++) { 1888 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1889 jj++; 1890 } 1891 } 1892 1893 /* create our matrix */ 1894 for (i=0; i<m; i++) { 1895 ourlens[i] -= offlens[i]; 1896 } 1897 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1898 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1899 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1900 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1901 A = *newmat; 1902 for (i=0; i<m; i++) { 1903 ourlens[i] += offlens[i]; 1904 } 1905 1906 if (!rank) { 1907 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1908 1909 /* read in my part of the matrix numerical values */ 1910 nz = procsnz[0]; 1911 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1912 1913 /* insert into matrix */ 1914 jj = rstart; 1915 smycols = mycols; 1916 svals = vals; 1917 for (i=0; i<m; i++) { 1918 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1919 smycols += ourlens[i]; 1920 svals += ourlens[i]; 1921 jj++; 1922 } 1923 1924 /* read in other processors and ship out */ 1925 for (i=1; i<size; i++) { 1926 nz = procsnz[i]; 1927 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1928 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 1929 } 1930 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1931 } else { 1932 /* receive numeric values */ 1933 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1934 1935 /* receive message of values*/ 1936 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 1937 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1938 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1939 1940 /* insert into matrix */ 1941 jj = rstart; 1942 smycols = mycols; 1943 svals = vals; 1944 for (i=0; i<m; i++) { 1945 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1946 smycols += ourlens[i]; 1947 svals += ourlens[i]; 1948 jj++; 1949 } 1950 } 1951 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1952 ierr = PetscFree(vals);CHKERRQ(ierr); 1953 ierr = PetscFree(mycols);CHKERRQ(ierr); 1954 ierr = PetscFree(rowners);CHKERRQ(ierr); 1955 1956 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1957 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatEqual_MPIDense" 1963 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 1964 { 1965 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1966 Mat a,b; 1967 PetscTruth flg; 1968 PetscErrorCode ierr; 1969 1970 PetscFunctionBegin; 1971 a = matA->A; 1972 b = matB->A; 1973 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1974 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1975 PetscFunctionReturn(0); 1976 } 1977 1978 #if defined(PETSC_HAVE_PLAPACK) 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 1982 /*@C 1983 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 1984 Level: developer 1985 1986 .keywords: Petsc, destroy, package, PLAPACK 1987 .seealso: PetscFinalize() 1988 @*/ 1989 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 1990 { 1991 PetscErrorCode ierr; 1992 1993 PetscFunctionBegin; 1994 ierr = PLA_Finalize();CHKERRQ(ierr); 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2000 /*@C 2001 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2002 called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize() 2003 when using static libraries. 2004 2005 Input Parameter: 2006 path - The dynamic library path, or PETSC_NULL 2007 2008 Level: developer 2009 2010 .keywords: Petsc, initialize, package, PLAPACK 2011 .seealso: PetscInitializePackage(), PetscInitialize() 2012 @*/ 2013 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[]) 2014 { 2015 MPI_Comm comm = PETSC_COMM_WORLD; 2016 PetscMPIInt size,nb_alg; 2017 PetscErrorCode ierr; 2018 PetscTruth ierror; 2019 2020 PetscFunctionBegin; 2021 if (!PLA_Initialized(PETSC_NULL)) { 2022 2023 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2024 Plapack_nprows = 1; 2025 Plapack_npcols = size; 2026 2027 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2028 ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2029 ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2030 ierr = PetscOptionsTruth("-plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr); 2031 if (ierror){ 2032 ierr = PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2033 } else { 2034 ierr = PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2035 } 2036 2037 nb_alg = 0; 2038 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",nb_alg,&nb_alg,PETSC_NULL);CHKERRQ(ierr); 2039 if (nb_alg) { 2040 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,nb_alg);CHKERRQ(ierr); 2041 } 2042 PetscOptionsEnd(); 2043 2044 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2045 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2046 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2047 } 2048 PetscFunctionReturn(0); 2049 } 2050 2051 2052 #endif 2053