1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatDenseGetLocalMatrix" 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscTruth flg; 30 31 PetscFunctionBegin; 32 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) { 34 *B = mat->A; 35 } else { 36 *B = A; 37 } 38 PetscFunctionReturn(0); 39 } 40 41 #undef __FUNCT__ 42 #define __FUNCT__ "MatGetRow_MPIDense" 43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscInt lrow,rstart = A->rmap.rstart,rend = A->rmap.rend; 48 49 PetscFunctionBegin; 50 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 51 lrow = row - rstart; 52 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatRestoreRow_MPIDense" 58 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 59 { 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 64 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 71 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 72 { 73 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 74 PetscErrorCode ierr; 75 PetscInt m = A->rmap.n,rstart = A->rmap.rstart; 76 PetscScalar *array; 77 MPI_Comm comm; 78 79 PetscFunctionBegin; 80 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 81 82 /* The reuse aspect is not implemented efficiently */ 83 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 84 85 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 86 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 87 ierr = MatCreate(comm,B);CHKERRQ(ierr); 88 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 89 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 90 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 91 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 93 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 94 95 *iscopy = PETSC_TRUE; 96 PetscFunctionReturn(0); 97 } 98 EXTERN_C_END 99 100 #undef __FUNCT__ 101 #define __FUNCT__ "MatSetValues_MPIDense" 102 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 103 { 104 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 105 PetscErrorCode ierr; 106 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 107 PetscTruth roworiented = A->roworiented; 108 109 PetscFunctionBegin; 110 for (i=0; i<m; i++) { 111 if (idxm[i] < 0) continue; 112 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 113 if (idxm[i] >= rstart && idxm[i] < rend) { 114 row = idxm[i] - rstart; 115 if (roworiented) { 116 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 117 } else { 118 for (j=0; j<n; j++) { 119 if (idxn[j] < 0) continue; 120 if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 121 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 122 } 123 } 124 } else { 125 if (!A->donotstash) { 126 if (roworiented) { 127 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 128 } else { 129 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 130 } 131 } 132 } 133 } 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatGetValues_MPIDense" 139 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 140 { 141 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 142 PetscErrorCode ierr; 143 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 144 145 PetscFunctionBegin; 146 for (i=0; i<m; i++) { 147 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 148 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 149 if (idxm[i] >= rstart && idxm[i] < rend) { 150 row = idxm[i] - rstart; 151 for (j=0; j<n; j++) { 152 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 153 if (idxn[j] >= mat->cmap.N) { 154 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 155 } 156 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 157 } 158 } else { 159 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 160 } 161 } 162 PetscFunctionReturn(0); 163 } 164 165 #undef __FUNCT__ 166 #define __FUNCT__ "MatGetArray_MPIDense" 167 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 #undef __FUNCT__ 178 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 179 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 180 { 181 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 182 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 183 PetscErrorCode ierr; 184 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 185 PetscScalar *av,*bv,*v = lmat->v; 186 Mat newmat; 187 188 PetscFunctionBegin; 189 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 190 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 191 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 192 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 193 194 /* No parallel redistribution currently supported! Should really check each index set 195 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 196 original matrix! */ 197 198 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 199 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 200 201 /* Check submatrix call */ 202 if (scall == MAT_REUSE_MATRIX) { 203 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 204 /* Really need to test rows and column sizes! */ 205 newmat = *B; 206 } else { 207 /* Create and fill new matrix */ 208 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 209 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 210 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 211 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 212 } 213 214 /* Now extract the data pointers and do the copy, column at a time */ 215 newmatd = (Mat_MPIDense*)newmat->data; 216 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 217 218 for (i=0; i<ncols; i++) { 219 av = v + nlrows*icol[i]; 220 for (j=0; j<nrows; j++) { 221 *bv++ = av[irow[j] - rstart]; 222 } 223 } 224 225 /* Assemble the matrices so that the correct flags are set */ 226 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 227 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 228 229 /* Free work space */ 230 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 231 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 232 *B = newmat; 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatRestoreArray_MPIDense" 238 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 239 { 240 PetscFunctionBegin; 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 246 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 247 { 248 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 249 MPI_Comm comm = ((PetscObject)mat)->comm; 250 PetscErrorCode ierr; 251 PetscInt nstash,reallocs; 252 InsertMode addv; 253 254 PetscFunctionBegin; 255 /* make sure all processors are either in INSERTMODE or ADDMODE */ 256 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 257 if (addv == (ADD_VALUES|INSERT_VALUES)) { 258 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 259 } 260 mat->insertmode = addv; /* in case this processor had no cache */ 261 262 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 263 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 264 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 270 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 271 { 272 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 273 PetscErrorCode ierr; 274 PetscInt i,*row,*col,flg,j,rstart,ncols; 275 PetscMPIInt n; 276 PetscScalar *val; 277 InsertMode addv=mat->insertmode; 278 279 PetscFunctionBegin; 280 /* wait on receives */ 281 while (1) { 282 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 283 if (!flg) break; 284 285 for (i=0; i<n;) { 286 /* Now identify the consecutive vals belonging to the same row */ 287 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 288 if (j < n) ncols = j-i; 289 else ncols = n-i; 290 /* Now assemble all these values with a single function call */ 291 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 292 i = j; 293 } 294 } 295 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 296 297 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 298 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 299 300 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 301 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 302 } 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatZeroEntries_MPIDense" 308 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 309 { 310 PetscErrorCode ierr; 311 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 312 313 PetscFunctionBegin; 314 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 /* the code does not do the diagonal entries correctly unless the 319 matrix is square and the column and row owerships are identical. 320 This is a BUG. The only way to fix it seems to be to access 321 mdn->A and mdn->B directly and not through the MatZeroRows() 322 routine. 323 */ 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatZeroRows_MPIDense" 326 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 327 { 328 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 329 PetscErrorCode ierr; 330 PetscInt i,*owners = A->rmap.range; 331 PetscInt *nprocs,j,idx,nsends; 332 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 333 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 334 PetscInt *lens,*lrows,*values; 335 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 336 MPI_Comm comm = ((PetscObject)A)->comm; 337 MPI_Request *send_waits,*recv_waits; 338 MPI_Status recv_status,*send_status; 339 PetscTruth found; 340 341 PetscFunctionBegin; 342 /* first count number of contributors to each processor */ 343 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 344 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 345 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 346 for (i=0; i<N; i++) { 347 idx = rows[i]; 348 found = PETSC_FALSE; 349 for (j=0; j<size; j++) { 350 if (idx >= owners[j] && idx < owners[j+1]) { 351 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 352 } 353 } 354 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 355 } 356 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 357 358 /* inform other processors of number of messages and max length*/ 359 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 360 361 /* post receives: */ 362 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 363 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 364 for (i=0; i<nrecvs; i++) { 365 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 366 } 367 368 /* do sends: 369 1) starts[i] gives the starting index in svalues for stuff going to 370 the ith processor 371 */ 372 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 373 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 374 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 375 starts[0] = 0; 376 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 377 for (i=0; i<N; i++) { 378 svalues[starts[owner[i]]++] = rows[i]; 379 } 380 381 starts[0] = 0; 382 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 383 count = 0; 384 for (i=0; i<size; i++) { 385 if (nprocs[2*i+1]) { 386 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 387 } 388 } 389 ierr = PetscFree(starts);CHKERRQ(ierr); 390 391 base = owners[rank]; 392 393 /* wait on receives */ 394 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 395 source = lens + nrecvs; 396 count = nrecvs; slen = 0; 397 while (count) { 398 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 399 /* unpack receives into our local space */ 400 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 401 source[imdex] = recv_status.MPI_SOURCE; 402 lens[imdex] = n; 403 slen += n; 404 count--; 405 } 406 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 407 408 /* move the data into the send scatter */ 409 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 410 count = 0; 411 for (i=0; i<nrecvs; i++) { 412 values = rvalues + i*nmax; 413 for (j=0; j<lens[i]; j++) { 414 lrows[count++] = values[j] - base; 415 } 416 } 417 ierr = PetscFree(rvalues);CHKERRQ(ierr); 418 ierr = PetscFree(lens);CHKERRQ(ierr); 419 ierr = PetscFree(owner);CHKERRQ(ierr); 420 ierr = PetscFree(nprocs);CHKERRQ(ierr); 421 422 /* actually zap the local rows */ 423 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 424 ierr = PetscFree(lrows);CHKERRQ(ierr); 425 426 /* wait on sends */ 427 if (nsends) { 428 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 429 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 430 ierr = PetscFree(send_status);CHKERRQ(ierr); 431 } 432 ierr = PetscFree(send_waits);CHKERRQ(ierr); 433 ierr = PetscFree(svalues);CHKERRQ(ierr); 434 435 PetscFunctionReturn(0); 436 } 437 438 #undef __FUNCT__ 439 #define __FUNCT__ "MatMult_MPIDense" 440 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 441 { 442 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 443 PetscErrorCode ierr; 444 445 PetscFunctionBegin; 446 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 447 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 448 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatMultAdd_MPIDense" 454 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 455 { 456 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 457 PetscErrorCode ierr; 458 459 PetscFunctionBegin; 460 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 461 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 462 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatMultTranspose_MPIDense" 468 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 469 { 470 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 471 PetscErrorCode ierr; 472 PetscScalar zero = 0.0; 473 474 PetscFunctionBegin; 475 ierr = VecSet(yy,zero);CHKERRQ(ierr); 476 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 477 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 478 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 484 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 485 { 486 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 487 PetscErrorCode ierr; 488 489 PetscFunctionBegin; 490 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 491 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 492 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 493 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "MatGetDiagonal_MPIDense" 499 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 503 PetscErrorCode ierr; 504 PetscInt len,i,n,m = A->rmap.n,radd; 505 PetscScalar *x,zero = 0.0; 506 507 PetscFunctionBegin; 508 ierr = VecSet(v,zero);CHKERRQ(ierr); 509 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 510 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 511 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 512 len = PetscMin(a->A->rmap.n,a->A->cmap.n); 513 radd = A->rmap.rstart*m; 514 for (i=0; i<len; i++) { 515 x[i] = aloc->v[radd + i*m + i]; 516 } 517 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 #undef __FUNCT__ 522 #define __FUNCT__ "MatDestroy_MPIDense" 523 PetscErrorCode MatDestroy_MPIDense(Mat mat) 524 { 525 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 526 PetscErrorCode ierr; 527 #if defined(PETSC_HAVE_PLAPACK) 528 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 529 #endif 530 531 PetscFunctionBegin; 532 533 #if defined(PETSC_USE_LOG) 534 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 535 #endif 536 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 537 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 538 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 539 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 540 #if defined(PETSC_HAVE_PLAPACK) 541 if (lu->CleanUpPlapack) { 542 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 543 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 544 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 545 546 547 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 548 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 549 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 550 } 551 #endif 552 553 ierr = PetscFree(mdn);CHKERRQ(ierr); 554 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 555 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 556 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 557 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 558 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 559 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 560 PetscFunctionReturn(0); 561 } 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatView_MPIDense_Binary" 565 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 566 { 567 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 568 PetscErrorCode ierr; 569 PetscViewerFormat format; 570 int fd; 571 PetscInt header[4],mmax,N = mat->cmap.N,i,j,m,k; 572 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 573 PetscScalar *work,*v,*vv; 574 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 575 MPI_Status status; 576 577 PetscFunctionBegin; 578 if (mdn->size == 1) { 579 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 580 } else { 581 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 582 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 583 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 584 585 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 586 if (format == PETSC_VIEWER_BINARY_NATIVE) { 587 588 if (!rank) { 589 /* store the matrix as a dense matrix */ 590 header[0] = MAT_FILE_COOKIE; 591 header[1] = mat->rmap.N; 592 header[2] = N; 593 header[3] = MATRIX_BINARY_FORMAT_DENSE; 594 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 595 596 /* get largest work array needed for transposing array */ 597 mmax = mat->rmap.n; 598 for (i=1; i<size; i++) { 599 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 600 } 601 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 602 603 /* write out local array, by rows */ 604 m = mat->rmap.n; 605 v = a->v; 606 for (j=0; j<N; j++) { 607 for (i=0; i<m; i++) { 608 work[j + i*N] = *v++; 609 } 610 } 611 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 612 /* get largest work array to receive messages from other processes, excludes process zero */ 613 mmax = 0; 614 for (i=1; i<size; i++) { 615 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 616 } 617 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 618 v = vv; 619 for (k=1; k<size; k++) { 620 m = mat->rmap.range[k+1] - mat->rmap.range[k]; 621 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 622 623 for (j=0; j<N; j++) { 624 for (i=0; i<m; i++) { 625 work[j + i*N] = *v++; 626 } 627 } 628 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 629 } 630 ierr = PetscFree(work);CHKERRQ(ierr); 631 ierr = PetscFree(vv);CHKERRQ(ierr); 632 } else { 633 ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 634 } 635 } 636 } 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 642 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 643 { 644 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 645 PetscErrorCode ierr; 646 PetscMPIInt size = mdn->size,rank = mdn->rank; 647 PetscViewerType vtype; 648 PetscTruth iascii,isdraw; 649 PetscViewer sviewer; 650 PetscViewerFormat format; 651 #if defined(PETSC_HAVE_PLAPACK) 652 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 653 #endif 654 655 PetscFunctionBegin; 656 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 657 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 658 if (iascii) { 659 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 660 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 661 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 662 MatInfo info; 663 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 664 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n, 665 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 666 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 667 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 668 #if defined(PETSC_HAVE_PLAPACK) 669 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 670 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",lu->nprows, lu->npcols);CHKERRQ(ierr); 671 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 672 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",lu->ierror);CHKERRQ(ierr); 673 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",lu->nb_alg);CHKERRQ(ierr); 674 #endif 675 PetscFunctionReturn(0); 676 } else if (format == PETSC_VIEWER_ASCII_INFO) { 677 PetscFunctionReturn(0); 678 } 679 } else if (isdraw) { 680 PetscDraw draw; 681 PetscTruth isnull; 682 683 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 684 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 685 if (isnull) PetscFunctionReturn(0); 686 } 687 688 if (size == 1) { 689 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 690 } else { 691 /* assemble the entire matrix onto first processor. */ 692 Mat A; 693 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz; 694 PetscInt *cols; 695 PetscScalar *vals; 696 697 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 698 if (!rank) { 699 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 700 } else { 701 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 702 } 703 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 704 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 705 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 706 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 707 708 /* Copy the matrix ... This isn't the most efficient means, 709 but it's quick for now */ 710 A->insertmode = INSERT_VALUES; 711 row = mat->rmap.rstart; m = mdn->A->rmap.n; 712 for (i=0; i<m; i++) { 713 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 714 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 715 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 716 row++; 717 } 718 719 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 721 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 722 if (!rank) { 723 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 724 } 725 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 726 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 727 ierr = MatDestroy(A);CHKERRQ(ierr); 728 } 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatView_MPIDense" 734 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 735 { 736 PetscErrorCode ierr; 737 PetscTruth iascii,isbinary,isdraw,issocket; 738 739 PetscFunctionBegin; 740 741 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 742 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 743 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 744 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 745 746 if (iascii || issocket || isdraw) { 747 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 748 } else if (isbinary) { 749 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 750 } else { 751 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatGetInfo_MPIDense" 758 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 759 { 760 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 761 Mat mdn = mat->A; 762 PetscErrorCode ierr; 763 PetscReal isend[5],irecv[5]; 764 765 PetscFunctionBegin; 766 info->rows_global = (double)A->rmap.N; 767 info->columns_global = (double)A->cmap.N; 768 info->rows_local = (double)A->rmap.n; 769 info->columns_local = (double)A->cmap.N; 770 info->block_size = 1.0; 771 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 772 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 773 isend[3] = info->memory; isend[4] = info->mallocs; 774 if (flag == MAT_LOCAL) { 775 info->nz_used = isend[0]; 776 info->nz_allocated = isend[1]; 777 info->nz_unneeded = isend[2]; 778 info->memory = isend[3]; 779 info->mallocs = isend[4]; 780 } else if (flag == MAT_GLOBAL_MAX) { 781 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 782 info->nz_used = irecv[0]; 783 info->nz_allocated = irecv[1]; 784 info->nz_unneeded = irecv[2]; 785 info->memory = irecv[3]; 786 info->mallocs = irecv[4]; 787 } else if (flag == MAT_GLOBAL_SUM) { 788 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 789 info->nz_used = irecv[0]; 790 info->nz_allocated = irecv[1]; 791 info->nz_unneeded = irecv[2]; 792 info->memory = irecv[3]; 793 info->mallocs = irecv[4]; 794 } 795 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 796 info->fill_ratio_needed = 0; 797 info->factor_mallocs = 0; 798 PetscFunctionReturn(0); 799 } 800 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatSetOption_MPIDense" 803 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 804 { 805 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 806 PetscErrorCode ierr; 807 808 PetscFunctionBegin; 809 switch (op) { 810 case MAT_NEW_NONZERO_LOCATIONS: 811 case MAT_NEW_NONZERO_LOCATION_ERR: 812 case MAT_NEW_NONZERO_ALLOCATION_ERR: 813 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 814 break; 815 case MAT_ROW_ORIENTED: 816 a->roworiented = flg; 817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 818 break; 819 case MAT_NEW_DIAGONALS: 820 case MAT_USE_HASH_TABLE: 821 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 822 break; 823 case MAT_IGNORE_OFF_PROC_ENTRIES: 824 a->donotstash = flg; 825 break; 826 case MAT_SYMMETRIC: 827 case MAT_STRUCTURALLY_SYMMETRIC: 828 case MAT_HERMITIAN: 829 case MAT_SYMMETRY_ETERNAL: 830 case MAT_IGNORE_LOWER_TRIANGULAR: 831 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 832 break; 833 default: 834 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 835 } 836 PetscFunctionReturn(0); 837 } 838 839 840 #undef __FUNCT__ 841 #define __FUNCT__ "MatDiagonalScale_MPIDense" 842 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 843 { 844 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 845 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 846 PetscScalar *l,*r,x,*v; 847 PetscErrorCode ierr; 848 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n; 849 850 PetscFunctionBegin; 851 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 852 if (ll) { 853 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 854 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 855 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 856 for (i=0; i<m; i++) { 857 x = l[i]; 858 v = mat->v + i; 859 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 860 } 861 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 862 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 863 } 864 if (rr) { 865 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 866 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 867 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 870 for (i=0; i<n; i++) { 871 x = r[i]; 872 v = mat->v + i*m; 873 for (j=0; j<m; j++) { (*v++) *= x;} 874 } 875 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 876 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 877 } 878 PetscFunctionReturn(0); 879 } 880 881 #undef __FUNCT__ 882 #define __FUNCT__ "MatNorm_MPIDense" 883 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 884 { 885 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 886 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 887 PetscErrorCode ierr; 888 PetscInt i,j; 889 PetscReal sum = 0.0; 890 PetscScalar *v = mat->v; 891 892 PetscFunctionBegin; 893 if (mdn->size == 1) { 894 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 895 } else { 896 if (type == NORM_FROBENIUS) { 897 for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) { 898 #if defined(PETSC_USE_COMPLEX) 899 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 900 #else 901 sum += (*v)*(*v); v++; 902 #endif 903 } 904 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 905 *nrm = sqrt(*nrm); 906 ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr); 907 } else if (type == NORM_1) { 908 PetscReal *tmp,*tmp2; 909 ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 910 tmp2 = tmp + A->cmap.N; 911 ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 912 *nrm = 0.0; 913 v = mat->v; 914 for (j=0; j<mdn->A->cmap.n; j++) { 915 for (i=0; i<mdn->A->rmap.n; i++) { 916 tmp[j] += PetscAbsScalar(*v); v++; 917 } 918 } 919 ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 920 for (j=0; j<A->cmap.N; j++) { 921 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 922 } 923 ierr = PetscFree(tmp);CHKERRQ(ierr); 924 ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr); 925 } else if (type == NORM_INFINITY) { /* max row norm */ 926 PetscReal ntemp; 927 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 928 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 929 } else { 930 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 931 } 932 } 933 PetscFunctionReturn(0); 934 } 935 936 #undef __FUNCT__ 937 #define __FUNCT__ "MatTranspose_MPIDense" 938 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 939 { 940 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 941 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 942 Mat B; 943 PetscInt M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart; 944 PetscErrorCode ierr; 945 PetscInt j,i; 946 PetscScalar *v; 947 948 PetscFunctionBegin; 949 if (A == *matout && M != N) { 950 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 951 } 952 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 953 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 954 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 955 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 956 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 957 } else { 958 B = *matout; 959 } 960 961 m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v; 962 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 963 for (i=0; i<m; i++) rwork[i] = rstart + i; 964 for (j=0; j<n; j++) { 965 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 966 v += m; 967 } 968 ierr = PetscFree(rwork);CHKERRQ(ierr); 969 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 970 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 971 if (*matout != A) { 972 *matout = B; 973 } else { 974 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 975 } 976 PetscFunctionReturn(0); 977 } 978 979 #include "petscblaslapack.h" 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatScale_MPIDense" 982 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 983 { 984 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 985 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 986 PetscScalar oalpha = alpha; 987 PetscErrorCode ierr; 988 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N); 989 990 PetscFunctionBegin; 991 BLASscal_(&nz,&oalpha,a->v,&one); 992 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 997 998 #undef __FUNCT__ 999 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1000 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1001 { 1002 PetscErrorCode ierr; 1003 1004 PetscFunctionBegin; 1005 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1011 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1012 { 1013 PetscErrorCode ierr; 1014 PetscInt m=A->rmap.n,n=B->cmap.n; 1015 Mat Cmat; 1016 1017 PetscFunctionBegin; 1018 if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n); 1019 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1020 ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr); 1021 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1022 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 1023 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1024 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1025 *C = Cmat; 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #if defined(PETSC_HAVE_PLAPACK) 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatSolve_MPIDense" 1032 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1033 { 1034 MPI_Comm comm = ((PetscObject)A)->comm; 1035 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1036 PetscErrorCode ierr; 1037 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1038 PetscScalar *array; 1039 PetscReal one = 1.0; 1040 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1041 PLA_Obj v_pla = NULL; 1042 PetscScalar *loc_buf; 1043 Vec loc_x; 1044 1045 PetscFunctionBegin; 1046 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1047 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1048 1049 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1050 ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr); 1051 ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr); 1052 1053 /* Copy b into rhs_pla */ 1054 ierr = PLA_API_begin();CHKERRQ(ierr); 1055 ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr); 1056 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1057 ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr); 1058 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1059 ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr); 1060 ierr = PLA_API_end();CHKERRQ(ierr); 1061 1062 if (A->factor == FACTOR_LU){ 1063 /* Apply the permutations to the right hand sides */ 1064 ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr); 1065 1066 /* Solve L y = b, overwriting b with y */ 1067 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1068 1069 /* Solve U x = y (=b), overwriting b with x */ 1070 ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1071 } else { /* FACTOR_CHOLESKY */ 1072 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1073 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1074 } 1075 1076 /* Copy PLAPACK x into Petsc vector x */ 1077 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1078 ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr); 1079 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1080 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1081 if (!lu->pla_solved){ 1082 1083 ierr = PLA_Temp_comm_row_info(lu->templ,&lu->comm_2d,&r_rank,&r_nproc);CHKERRQ(ierr); 1084 ierr = PLA_Temp_comm_col_info(lu->templ,&lu->comm_2d,&c_rank,&c_nproc);CHKERRQ(ierr); 1085 /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */ 1086 1087 /* Create IS and cts for VecScatterring */ 1088 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1089 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1090 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1091 idx_petsc = idx_pla + loc_m; 1092 1093 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1094 for (i=0; i<loc_m; i+=lu->nb){ 1095 j = 0; 1096 while (j < lu->nb && i+j < loc_m){ 1097 idx_petsc[i+j] = rstart + j; j++; 1098 } 1099 rstart += size*lu->nb; 1100 } 1101 1102 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1103 1104 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1105 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1106 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1107 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1108 } 1109 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1111 1112 /* Free data */ 1113 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1114 ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr); 1115 1116 lu->pla_solved = PETSC_TRUE; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1122 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1123 { 1124 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1125 PetscErrorCode ierr; 1126 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,rend; 1127 PetscInt info_pla=0; 1128 PetscScalar *array,one = 1.0; 1129 1130 PetscFunctionBegin; 1131 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1132 1133 /* Copy A into lu->A */ 1134 ierr = PLA_API_begin();CHKERRQ(ierr); 1135 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1136 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1137 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1138 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1139 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1140 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1141 ierr = PLA_API_end();CHKERRQ(ierr); 1142 1143 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1144 info_pla = PLA_LU(lu->A,lu->pivots); 1145 if (info_pla != 0) 1146 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1147 1148 lu->CleanUpPlapack = PETSC_TRUE; 1149 lu->rstart = rstart; 1150 1151 (*F)->assembled = PETSC_TRUE; /* required by -ksp_view */ 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1157 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1158 { 1159 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1160 PetscErrorCode ierr; 1161 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,rend; 1162 PetscInt info_pla=0; 1163 PetscScalar *array,one = 1.0; 1164 1165 PetscFunctionBegin; 1166 1167 /* Copy A into lu->A */ 1168 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1169 ierr = PLA_API_begin();CHKERRQ(ierr); 1170 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1171 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1172 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1173 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1174 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1175 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1176 ierr = PLA_API_end();CHKERRQ(ierr); 1177 1178 /* Factor P A -> Chol */ 1179 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1180 if (info_pla != 0) 1181 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1182 1183 lu->CleanUpPlapack = PETSC_TRUE; 1184 lu->rstart = rstart; 1185 1186 (*F)->assembled = PETSC_TRUE; /* required by -ksp_view */ 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private" 1192 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F) 1193 { 1194 Mat B; 1195 Mat_Plapack *lu; 1196 PetscErrorCode ierr; 1197 PetscInt M=A->rmap.N,N=A->cmap.N; 1198 MPI_Comm comm=((PetscObject)A)->comm,comm_2d; 1199 PetscMPIInt size; 1200 PetscInt ierror; 1201 1202 PetscFunctionBegin; 1203 /* Create the factorization matrix */ 1204 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1205 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr); 1206 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1207 1208 lu = (Mat_Plapack*)(B->spptr); 1209 1210 /* Set default Plapack parameters */ 1211 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1212 lu->nprows = 1; lu->npcols = size; 1213 ierror = 0; 1214 lu->nb = M/size; 1215 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1216 1217 /* Set runtime options */ 1218 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1219 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",lu->nprows,&lu->nprows,PETSC_NULL);CHKERRQ(ierr); 1220 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",lu->npcols,&lu->npcols,PETSC_NULL);CHKERRQ(ierr); 1221 1222 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1223 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr); 1224 if (ierror){ 1225 ierr = PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 1226 } else { 1227 ierr = PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 1228 } 1229 lu->ierror = ierror; 1230 1231 lu->nb_alg = 0; 1232 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",lu->nb_alg,&lu->nb_alg,PETSC_NULL);CHKERRQ(ierr); 1233 if (lu->nb_alg){ 1234 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,lu->nb_alg);CHKERRQ(ierr); 1235 } 1236 PetscOptionsEnd(); 1237 1238 1239 /* Create a 2D communicator */ 1240 ierr = PLA_Comm_1D_to_2D(comm,lu->nprows,lu->npcols,&comm_2d);CHKERRQ(ierr); 1241 lu->comm_2d = comm_2d; 1242 1243 /* Create object distribution template */ 1244 lu->templ = NULL; 1245 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1246 1247 /* Use suggested nb_alg if it is not provided by user */ 1248 if (lu->nb_alg == 0){ 1249 ierr = PLA_Environ_nb_alg(PLA_OP_PAN_PAN,lu->templ,&lu->nb_alg);CHKERRQ(ierr); 1250 ierr = pla_Environ_set_nb_alg(PLA_OP_ALL_ALG,lu->nb_alg);CHKERRQ(ierr); 1251 } 1252 1253 /* Set the datatype */ 1254 #if defined(PETSC_USE_COMPLEX) 1255 lu->datatype = MPI_DOUBLE_COMPLEX; 1256 #else 1257 lu->datatype = MPI_DOUBLE; 1258 #endif 1259 1260 ierr = PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1261 1262 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1263 lu->CleanUpPlapack = PETSC_TRUE; 1264 *F = B; 1265 PetscFunctionReturn(0); 1266 } 1267 1268 /* Note the Petsc r and c permutations are ignored */ 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1271 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 1272 { 1273 PetscErrorCode ierr; 1274 PetscInt M = A->rmap.N; 1275 Mat_Plapack *lu; 1276 1277 PetscFunctionBegin; 1278 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1279 lu = (Mat_Plapack*)(*F)->spptr; 1280 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1281 (*F)->factor = FACTOR_LU; 1282 PetscFunctionReturn(0); 1283 } 1284 1285 /* Note the Petsc perm permutation is ignored */ 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1288 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F) 1289 { 1290 PetscErrorCode ierr; 1291 PetscTruth issymmetric,set; 1292 1293 PetscFunctionBegin; 1294 ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr); 1295 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1296 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1297 (*F)->factor = FACTOR_CHOLESKY; 1298 PetscFunctionReturn(0); 1299 } 1300 #endif 1301 1302 /* -------------------------------------------------------------------*/ 1303 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1304 MatGetRow_MPIDense, 1305 MatRestoreRow_MPIDense, 1306 MatMult_MPIDense, 1307 /* 4*/ MatMultAdd_MPIDense, 1308 MatMultTranspose_MPIDense, 1309 MatMultTransposeAdd_MPIDense, 1310 #if defined(PETSC_HAVE_PLAPACK) 1311 MatSolve_MPIDense, 1312 #else 1313 0, 1314 #endif 1315 0, 1316 0, 1317 /*10*/ 0, 1318 0, 1319 0, 1320 0, 1321 MatTranspose_MPIDense, 1322 /*15*/ MatGetInfo_MPIDense, 1323 MatEqual_MPIDense, 1324 MatGetDiagonal_MPIDense, 1325 MatDiagonalScale_MPIDense, 1326 MatNorm_MPIDense, 1327 /*20*/ MatAssemblyBegin_MPIDense, 1328 MatAssemblyEnd_MPIDense, 1329 0, 1330 MatSetOption_MPIDense, 1331 MatZeroEntries_MPIDense, 1332 /*25*/ MatZeroRows_MPIDense, 1333 #if defined(PETSC_HAVE_PLAPACK) 1334 MatLUFactorSymbolic_MPIDense, 1335 MatLUFactorNumeric_MPIDense, 1336 MatCholeskyFactorSymbolic_MPIDense, 1337 MatCholeskyFactorNumeric_MPIDense, 1338 #else 1339 0, 1340 0, 1341 0, 1342 0, 1343 #endif 1344 /*30*/ MatSetUpPreallocation_MPIDense, 1345 0, 1346 0, 1347 MatGetArray_MPIDense, 1348 MatRestoreArray_MPIDense, 1349 /*35*/ MatDuplicate_MPIDense, 1350 0, 1351 0, 1352 0, 1353 0, 1354 /*40*/ 0, 1355 MatGetSubMatrices_MPIDense, 1356 0, 1357 MatGetValues_MPIDense, 1358 0, 1359 /*45*/ 0, 1360 MatScale_MPIDense, 1361 0, 1362 0, 1363 0, 1364 /*50*/ 0, 1365 0, 1366 0, 1367 0, 1368 0, 1369 /*55*/ 0, 1370 0, 1371 0, 1372 0, 1373 0, 1374 /*60*/ MatGetSubMatrix_MPIDense, 1375 MatDestroy_MPIDense, 1376 MatView_MPIDense, 1377 0, 1378 0, 1379 /*65*/ 0, 1380 0, 1381 0, 1382 0, 1383 0, 1384 /*70*/ 0, 1385 0, 1386 0, 1387 0, 1388 0, 1389 /*75*/ 0, 1390 0, 1391 0, 1392 0, 1393 0, 1394 /*80*/ 0, 1395 0, 1396 0, 1397 0, 1398 /*84*/ MatLoad_MPIDense, 1399 0, 1400 0, 1401 0, 1402 0, 1403 0, 1404 /*90*/ 0, 1405 MatMatMultSymbolic_MPIDense_MPIDense, 1406 0, 1407 0, 1408 0, 1409 /*95*/ 0, 1410 0, 1411 0, 1412 0}; 1413 1414 EXTERN_C_BEGIN 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1417 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1418 { 1419 Mat_MPIDense *a; 1420 PetscErrorCode ierr; 1421 1422 PetscFunctionBegin; 1423 mat->preallocated = PETSC_TRUE; 1424 /* Note: For now, when data is specified above, this assumes the user correctly 1425 allocates the local dense storage space. We should add error checking. */ 1426 1427 a = (Mat_MPIDense*)mat->data; 1428 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1429 ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr); 1430 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1431 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1432 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 EXTERN_C_END 1436 1437 EXTERN_C_BEGIN 1438 #if defined(PETSC_HAVE_PLAPACK) 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK" 1441 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type) 1442 { 1443 PetscFunctionBegin; 1444 /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */ 1445 PetscFunctionReturn(0); 1446 } 1447 #endif 1448 EXTERN_C_END 1449 1450 /*MC 1451 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1452 1453 Options Database Keys: 1454 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1455 1456 Level: beginner 1457 1458 MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR) 1459 for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed 1460 (run config/configure.py with the option --download-plapack) 1461 1462 1463 Options Database Keys: 1464 . -mat_plapack_nprows <n> - number of rows in processor partition 1465 . -mat_plapack_npcols <n> - number of columns in processor partition 1466 . -mat_plapack_nb <n> - block size of template vector 1467 . -mat_plapack_nb_alg <n> - algorithmic block size 1468 - -mat_plapack_ckerror <n> - error checking flag 1469 1470 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE 1471 M*/ 1472 1473 EXTERN_C_BEGIN 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "MatCreate_MPIDense" 1476 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1477 { 1478 Mat_MPIDense *a; 1479 PetscErrorCode ierr; 1480 #if defined(PETSC_HAVE_PLAPACK) 1481 Mat_Plapack *lu; 1482 #endif 1483 1484 PetscFunctionBegin; 1485 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1486 mat->data = (void*)a; 1487 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1488 mat->factor = 0; 1489 mat->mapping = 0; 1490 1491 mat->insertmode = NOT_SET_VALUES; 1492 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1493 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1494 1495 mat->rmap.bs = mat->cmap.bs = 1; 1496 ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr); 1497 ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr); 1498 a->nvec = mat->cmap.n; 1499 1500 /* build cache for off array entries formed */ 1501 a->donotstash = PETSC_FALSE; 1502 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1503 1504 /* stuff used for matrix vector multiply */ 1505 a->lvec = 0; 1506 a->Mvctx = 0; 1507 a->roworiented = PETSC_TRUE; 1508 1509 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1510 "MatGetDiagonalBlock_MPIDense", 1511 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1512 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1513 "MatMPIDenseSetPreallocation_MPIDense", 1514 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1515 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1516 "MatMatMult_MPIAIJ_MPIDense", 1517 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1518 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1519 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1520 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1521 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1522 "MatMatMultNumeric_MPIAIJ_MPIDense", 1523 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1524 #if defined(PETSC_HAVE_PLAPACK) 1525 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C", 1526 "MatSetSolverType_MPIDense_PLAPACK", 1527 MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr); 1528 #endif 1529 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1530 1531 #if defined(PETSC_HAVE_PLAPACK) 1532 ierr = PetscNewLog(mat,Mat_Plapack,&lu);CHKERRQ(ierr); 1533 lu->CleanUpPlapack = PETSC_FALSE; 1534 mat->spptr = (void*)lu; 1535 #endif 1536 PetscFunctionReturn(0); 1537 } 1538 EXTERN_C_END 1539 1540 /*MC 1541 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1542 1543 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1544 and MATMPIDENSE otherwise. 1545 1546 Options Database Keys: 1547 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1548 1549 Level: beginner 1550 1551 1552 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1553 M*/ 1554 1555 EXTERN_C_BEGIN 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatCreate_Dense" 1558 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1559 { 1560 PetscErrorCode ierr; 1561 PetscMPIInt size; 1562 1563 PetscFunctionBegin; 1564 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1565 if (size == 1) { 1566 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1567 } else { 1568 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1569 } 1570 PetscFunctionReturn(0); 1571 } 1572 EXTERN_C_END 1573 1574 #undef __FUNCT__ 1575 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1576 /*@C 1577 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1578 1579 Not collective 1580 1581 Input Parameters: 1582 . A - the matrix 1583 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1584 to control all matrix memory allocation. 1585 1586 Notes: 1587 The dense format is fully compatible with standard Fortran 77 1588 storage by columns. 1589 1590 The data input variable is intended primarily for Fortran programmers 1591 who wish to allocate their own matrix memory space. Most users should 1592 set data=PETSC_NULL. 1593 1594 Level: intermediate 1595 1596 .keywords: matrix,dense, parallel 1597 1598 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1599 @*/ 1600 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1601 { 1602 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1603 1604 PetscFunctionBegin; 1605 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1606 if (f) { 1607 ierr = (*f)(mat,data);CHKERRQ(ierr); 1608 } 1609 PetscFunctionReturn(0); 1610 } 1611 1612 #undef __FUNCT__ 1613 #define __FUNCT__ "MatCreateMPIDense" 1614 /*@C 1615 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1616 1617 Collective on MPI_Comm 1618 1619 Input Parameters: 1620 + comm - MPI communicator 1621 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1622 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1623 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1624 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1625 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1626 to control all matrix memory allocation. 1627 1628 Output Parameter: 1629 . A - the matrix 1630 1631 Notes: 1632 The dense format is fully compatible with standard Fortran 77 1633 storage by columns. 1634 1635 The data input variable is intended primarily for Fortran programmers 1636 who wish to allocate their own matrix memory space. Most users should 1637 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1638 1639 The user MUST specify either the local or global matrix dimensions 1640 (possibly both). 1641 1642 Level: intermediate 1643 1644 .keywords: matrix,dense, parallel 1645 1646 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1647 @*/ 1648 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1649 { 1650 PetscErrorCode ierr; 1651 PetscMPIInt size; 1652 1653 PetscFunctionBegin; 1654 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1655 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1656 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1657 if (size > 1) { 1658 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1659 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1660 } else { 1661 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1662 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1663 } 1664 PetscFunctionReturn(0); 1665 } 1666 1667 #undef __FUNCT__ 1668 #define __FUNCT__ "MatDuplicate_MPIDense" 1669 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1670 { 1671 Mat mat; 1672 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1673 PetscErrorCode ierr; 1674 1675 PetscFunctionBegin; 1676 *newmat = 0; 1677 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1678 ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1679 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1680 a = (Mat_MPIDense*)mat->data; 1681 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1682 mat->factor = A->factor; 1683 mat->assembled = PETSC_TRUE; 1684 mat->preallocated = PETSC_TRUE; 1685 1686 mat->rmap.rstart = A->rmap.rstart; 1687 mat->rmap.rend = A->rmap.rend; 1688 a->size = oldmat->size; 1689 a->rank = oldmat->rank; 1690 mat->insertmode = NOT_SET_VALUES; 1691 a->nvec = oldmat->nvec; 1692 a->donotstash = oldmat->donotstash; 1693 1694 ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1695 ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1696 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1697 1698 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1699 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1700 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1701 1702 #if defined(PETSC_HAVE_PLAPACK) 1703 ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr); 1704 #endif 1705 *newmat = mat; 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #include "petscsys.h" 1710 1711 #undef __FUNCT__ 1712 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1713 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1714 { 1715 PetscErrorCode ierr; 1716 PetscMPIInt rank,size; 1717 PetscInt *rowners,i,m,nz,j; 1718 PetscScalar *array,*vals,*vals_ptr; 1719 MPI_Status status; 1720 1721 PetscFunctionBegin; 1722 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1723 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1724 1725 /* determine ownership of all rows */ 1726 m = M/size + ((M % size) > rank); 1727 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1728 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1729 rowners[0] = 0; 1730 for (i=2; i<=size; i++) { 1731 rowners[i] += rowners[i-1]; 1732 } 1733 1734 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1735 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1736 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1737 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1738 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1739 1740 if (!rank) { 1741 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1742 1743 /* read in my part of the matrix numerical values */ 1744 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1745 1746 /* insert into matrix-by row (this is why cannot directly read into array */ 1747 vals_ptr = vals; 1748 for (i=0; i<m; i++) { 1749 for (j=0; j<N; j++) { 1750 array[i + j*m] = *vals_ptr++; 1751 } 1752 } 1753 1754 /* read in other processors and ship out */ 1755 for (i=1; i<size; i++) { 1756 nz = (rowners[i+1] - rowners[i])*N; 1757 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1758 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1759 } 1760 } else { 1761 /* receive numeric values */ 1762 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1763 1764 /* receive message of values*/ 1765 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1766 1767 /* insert into matrix-by row (this is why cannot directly read into array */ 1768 vals_ptr = vals; 1769 for (i=0; i<m; i++) { 1770 for (j=0; j<N; j++) { 1771 array[i + j*m] = *vals_ptr++; 1772 } 1773 } 1774 } 1775 ierr = PetscFree(rowners);CHKERRQ(ierr); 1776 ierr = PetscFree(vals);CHKERRQ(ierr); 1777 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1778 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1779 PetscFunctionReturn(0); 1780 } 1781 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "MatLoad_MPIDense" 1784 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1785 { 1786 Mat A; 1787 PetscScalar *vals,*svals; 1788 MPI_Comm comm = ((PetscObject)viewer)->comm; 1789 MPI_Status status; 1790 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1791 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1792 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1793 PetscInt i,nz,j,rstart,rend; 1794 int fd; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1799 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1800 if (!rank) { 1801 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1802 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1803 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1804 } 1805 1806 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1807 M = header[1]; N = header[2]; nz = header[3]; 1808 1809 /* 1810 Handle case where matrix is stored on disk as a dense matrix 1811 */ 1812 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1813 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1814 PetscFunctionReturn(0); 1815 } 1816 1817 /* determine ownership of all rows */ 1818 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1819 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1820 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1821 rowners[0] = 0; 1822 for (i=2; i<=size; i++) { 1823 rowners[i] += rowners[i-1]; 1824 } 1825 rstart = rowners[rank]; 1826 rend = rowners[rank+1]; 1827 1828 /* distribute row lengths to all processors */ 1829 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1830 offlens = ourlens + (rend-rstart); 1831 if (!rank) { 1832 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1833 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1834 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1835 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1836 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1837 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1838 } else { 1839 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1840 } 1841 1842 if (!rank) { 1843 /* calculate the number of nonzeros on each processor */ 1844 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1845 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1846 for (i=0; i<size; i++) { 1847 for (j=rowners[i]; j< rowners[i+1]; j++) { 1848 procsnz[i] += rowlengths[j]; 1849 } 1850 } 1851 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1852 1853 /* determine max buffer needed and allocate it */ 1854 maxnz = 0; 1855 for (i=0; i<size; i++) { 1856 maxnz = PetscMax(maxnz,procsnz[i]); 1857 } 1858 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1859 1860 /* read in my part of the matrix column indices */ 1861 nz = procsnz[0]; 1862 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1863 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1864 1865 /* read in every one elses and ship off */ 1866 for (i=1; i<size; i++) { 1867 nz = procsnz[i]; 1868 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1869 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1870 } 1871 ierr = PetscFree(cols);CHKERRQ(ierr); 1872 } else { 1873 /* determine buffer space needed for message */ 1874 nz = 0; 1875 for (i=0; i<m; i++) { 1876 nz += ourlens[i]; 1877 } 1878 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1879 1880 /* receive message of column indices*/ 1881 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1882 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1883 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1884 } 1885 1886 /* loop over local rows, determining number of off diagonal entries */ 1887 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1888 jj = 0; 1889 for (i=0; i<m; i++) { 1890 for (j=0; j<ourlens[i]; j++) { 1891 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1892 jj++; 1893 } 1894 } 1895 1896 /* create our matrix */ 1897 for (i=0; i<m; i++) { 1898 ourlens[i] -= offlens[i]; 1899 } 1900 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1901 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1902 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1903 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1904 A = *newmat; 1905 for (i=0; i<m; i++) { 1906 ourlens[i] += offlens[i]; 1907 } 1908 1909 if (!rank) { 1910 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1911 1912 /* read in my part of the matrix numerical values */ 1913 nz = procsnz[0]; 1914 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1915 1916 /* insert into matrix */ 1917 jj = rstart; 1918 smycols = mycols; 1919 svals = vals; 1920 for (i=0; i<m; i++) { 1921 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1922 smycols += ourlens[i]; 1923 svals += ourlens[i]; 1924 jj++; 1925 } 1926 1927 /* read in other processors and ship out */ 1928 for (i=1; i<size; i++) { 1929 nz = procsnz[i]; 1930 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1931 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 1932 } 1933 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1934 } else { 1935 /* receive numeric values */ 1936 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1937 1938 /* receive message of values*/ 1939 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 1940 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1941 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1942 1943 /* insert into matrix */ 1944 jj = rstart; 1945 smycols = mycols; 1946 svals = vals; 1947 for (i=0; i<m; i++) { 1948 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1949 smycols += ourlens[i]; 1950 svals += ourlens[i]; 1951 jj++; 1952 } 1953 } 1954 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1955 ierr = PetscFree(vals);CHKERRQ(ierr); 1956 ierr = PetscFree(mycols);CHKERRQ(ierr); 1957 ierr = PetscFree(rowners);CHKERRQ(ierr); 1958 1959 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1960 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1961 PetscFunctionReturn(0); 1962 } 1963 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatEqual_MPIDense" 1966 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 1967 { 1968 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1969 Mat a,b; 1970 PetscTruth flg; 1971 PetscErrorCode ierr; 1972 1973 PetscFunctionBegin; 1974 a = matA->A; 1975 b = matB->A; 1976 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1977 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1978 PetscFunctionReturn(0); 1979 } 1980 1981 #if defined(PETSC_HAVE_PLAPACK) 1982 1983 #undef __FUNCT__ 1984 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 1985 /*@C 1986 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 1987 Level: developer 1988 1989 .keywords: Petsc, destroy, package, PLAPACK 1990 .seealso: PetscFinalize() 1991 @*/ 1992 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 1993 { 1994 PetscErrorCode ierr; 1995 1996 PetscFunctionBegin; 1997 ierr = PLA_Finalize();CHKERRQ(ierr); 1998 PetscFunctionReturn(0); 1999 } 2000 2001 #undef __FUNCT__ 2002 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2003 /*@C 2004 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2005 called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize() 2006 when using static libraries. 2007 2008 Input Parameter: 2009 path - The dynamic library path, or PETSC_NULL 2010 2011 Level: developer 2012 2013 .keywords: Petsc, initialize, package, PLAPACK 2014 .seealso: PetscInitializePackage(), PetscInitialize() 2015 @*/ 2016 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[]) 2017 { 2018 MPI_Comm comm = PETSC_COMM_WORLD,comm_2d; 2019 int initPLA; 2020 PetscMPIInt size,nprows,npcols; 2021 PetscErrorCode ierr; 2022 2023 PetscFunctionBegin; 2024 if (!PLA_Initialized(PETSC_NULL)) { 2025 2026 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2027 nprows = 1; 2028 npcols = size; 2029 2030 ierr = PLA_Comm_1D_to_2D(comm,nprows,npcols,&comm_2d);CHKERRQ(ierr); 2031 ierr = PLA_Init(comm_2d);CHKERRQ(ierr); 2032 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2033 } 2034 PetscFunctionReturn(0); 2035 } 2036 2037 2038 #endif 2039