1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatDenseGetLocalMatrix" 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscTruth flg; 30 31 PetscFunctionBegin; 32 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) { 34 *B = mat->A; 35 } else { 36 *B = A; 37 } 38 PetscFunctionReturn(0); 39 } 40 41 #undef __FUNCT__ 42 #define __FUNCT__ "MatGetRow_MPIDense" 43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscInt lrow,rstart = A->rmap.rstart,rend = A->rmap.rend; 48 49 PetscFunctionBegin; 50 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 51 lrow = row - rstart; 52 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatRestoreRow_MPIDense" 58 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 59 { 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 64 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 71 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 72 { 73 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 74 PetscErrorCode ierr; 75 PetscInt m = A->rmap.n,rstart = A->rmap.rstart; 76 PetscScalar *array; 77 MPI_Comm comm; 78 79 PetscFunctionBegin; 80 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 81 82 /* The reuse aspect is not implemented efficiently */ 83 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 84 85 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 86 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 87 ierr = MatCreate(comm,B);CHKERRQ(ierr); 88 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 89 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 90 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 91 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 93 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 94 95 *iscopy = PETSC_TRUE; 96 PetscFunctionReturn(0); 97 } 98 EXTERN_C_END 99 100 #undef __FUNCT__ 101 #define __FUNCT__ "MatSetValues_MPIDense" 102 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 103 { 104 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 105 PetscErrorCode ierr; 106 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 107 PetscTruth roworiented = A->roworiented; 108 109 PetscFunctionBegin; 110 for (i=0; i<m; i++) { 111 if (idxm[i] < 0) continue; 112 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 113 if (idxm[i] >= rstart && idxm[i] < rend) { 114 row = idxm[i] - rstart; 115 if (roworiented) { 116 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 117 } else { 118 for (j=0; j<n; j++) { 119 if (idxn[j] < 0) continue; 120 if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 121 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 122 } 123 } 124 } else { 125 if (!A->donotstash) { 126 if (roworiented) { 127 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 128 } else { 129 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 130 } 131 } 132 } 133 } 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatGetValues_MPIDense" 139 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 140 { 141 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 142 PetscErrorCode ierr; 143 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 144 145 PetscFunctionBegin; 146 for (i=0; i<m; i++) { 147 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 148 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 149 if (idxm[i] >= rstart && idxm[i] < rend) { 150 row = idxm[i] - rstart; 151 for (j=0; j<n; j++) { 152 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 153 if (idxn[j] >= mat->cmap.N) { 154 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 155 } 156 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 157 } 158 } else { 159 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 160 } 161 } 162 PetscFunctionReturn(0); 163 } 164 165 #undef __FUNCT__ 166 #define __FUNCT__ "MatGetArray_MPIDense" 167 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 #undef __FUNCT__ 178 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 179 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 180 { 181 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 182 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 183 PetscErrorCode ierr; 184 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 185 PetscScalar *av,*bv,*v = lmat->v; 186 Mat newmat; 187 188 PetscFunctionBegin; 189 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 190 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 191 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 192 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 193 194 /* No parallel redistribution currently supported! Should really check each index set 195 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 196 original matrix! */ 197 198 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 199 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 200 201 /* Check submatrix call */ 202 if (scall == MAT_REUSE_MATRIX) { 203 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 204 /* Really need to test rows and column sizes! */ 205 newmat = *B; 206 } else { 207 /* Create and fill new matrix */ 208 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 209 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 210 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 211 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 212 } 213 214 /* Now extract the data pointers and do the copy, column at a time */ 215 newmatd = (Mat_MPIDense*)newmat->data; 216 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 217 218 for (i=0; i<ncols; i++) { 219 av = v + nlrows*icol[i]; 220 for (j=0; j<nrows; j++) { 221 *bv++ = av[irow[j] - rstart]; 222 } 223 } 224 225 /* Assemble the matrices so that the correct flags are set */ 226 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 227 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 228 229 /* Free work space */ 230 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 231 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 232 *B = newmat; 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatRestoreArray_MPIDense" 238 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 239 { 240 PetscFunctionBegin; 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 246 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 247 { 248 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 249 MPI_Comm comm = ((PetscObject)mat)->comm; 250 PetscErrorCode ierr; 251 PetscInt nstash,reallocs; 252 InsertMode addv; 253 254 PetscFunctionBegin; 255 /* make sure all processors are either in INSERTMODE or ADDMODE */ 256 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 257 if (addv == (ADD_VALUES|INSERT_VALUES)) { 258 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 259 } 260 mat->insertmode = addv; /* in case this processor had no cache */ 261 262 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 263 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 264 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 270 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 271 { 272 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 273 PetscErrorCode ierr; 274 PetscInt i,*row,*col,flg,j,rstart,ncols; 275 PetscMPIInt n; 276 PetscScalar *val; 277 InsertMode addv=mat->insertmode; 278 279 PetscFunctionBegin; 280 /* wait on receives */ 281 while (1) { 282 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 283 if (!flg) break; 284 285 for (i=0; i<n;) { 286 /* Now identify the consecutive vals belonging to the same row */ 287 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 288 if (j < n) ncols = j-i; 289 else ncols = n-i; 290 /* Now assemble all these values with a single function call */ 291 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 292 i = j; 293 } 294 } 295 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 296 297 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 298 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 299 300 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 301 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 302 } 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatZeroEntries_MPIDense" 308 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 309 { 310 PetscErrorCode ierr; 311 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 312 313 PetscFunctionBegin; 314 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 /* the code does not do the diagonal entries correctly unless the 319 matrix is square and the column and row owerships are identical. 320 This is a BUG. The only way to fix it seems to be to access 321 mdn->A and mdn->B directly and not through the MatZeroRows() 322 routine. 323 */ 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatZeroRows_MPIDense" 326 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 327 { 328 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 329 PetscErrorCode ierr; 330 PetscInt i,*owners = A->rmap.range; 331 PetscInt *nprocs,j,idx,nsends; 332 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 333 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 334 PetscInt *lens,*lrows,*values; 335 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 336 MPI_Comm comm = ((PetscObject)A)->comm; 337 MPI_Request *send_waits,*recv_waits; 338 MPI_Status recv_status,*send_status; 339 PetscTruth found; 340 341 PetscFunctionBegin; 342 /* first count number of contributors to each processor */ 343 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 344 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 345 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 346 for (i=0; i<N; i++) { 347 idx = rows[i]; 348 found = PETSC_FALSE; 349 for (j=0; j<size; j++) { 350 if (idx >= owners[j] && idx < owners[j+1]) { 351 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 352 } 353 } 354 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 355 } 356 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 357 358 /* inform other processors of number of messages and max length*/ 359 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 360 361 /* post receives: */ 362 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 363 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 364 for (i=0; i<nrecvs; i++) { 365 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 366 } 367 368 /* do sends: 369 1) starts[i] gives the starting index in svalues for stuff going to 370 the ith processor 371 */ 372 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 373 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 374 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 375 starts[0] = 0; 376 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 377 for (i=0; i<N; i++) { 378 svalues[starts[owner[i]]++] = rows[i]; 379 } 380 381 starts[0] = 0; 382 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 383 count = 0; 384 for (i=0; i<size; i++) { 385 if (nprocs[2*i+1]) { 386 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 387 } 388 } 389 ierr = PetscFree(starts);CHKERRQ(ierr); 390 391 base = owners[rank]; 392 393 /* wait on receives */ 394 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 395 source = lens + nrecvs; 396 count = nrecvs; slen = 0; 397 while (count) { 398 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 399 /* unpack receives into our local space */ 400 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 401 source[imdex] = recv_status.MPI_SOURCE; 402 lens[imdex] = n; 403 slen += n; 404 count--; 405 } 406 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 407 408 /* move the data into the send scatter */ 409 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 410 count = 0; 411 for (i=0; i<nrecvs; i++) { 412 values = rvalues + i*nmax; 413 for (j=0; j<lens[i]; j++) { 414 lrows[count++] = values[j] - base; 415 } 416 } 417 ierr = PetscFree(rvalues);CHKERRQ(ierr); 418 ierr = PetscFree(lens);CHKERRQ(ierr); 419 ierr = PetscFree(owner);CHKERRQ(ierr); 420 ierr = PetscFree(nprocs);CHKERRQ(ierr); 421 422 /* actually zap the local rows */ 423 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 424 ierr = PetscFree(lrows);CHKERRQ(ierr); 425 426 /* wait on sends */ 427 if (nsends) { 428 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 429 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 430 ierr = PetscFree(send_status);CHKERRQ(ierr); 431 } 432 ierr = PetscFree(send_waits);CHKERRQ(ierr); 433 ierr = PetscFree(svalues);CHKERRQ(ierr); 434 435 PetscFunctionReturn(0); 436 } 437 438 #undef __FUNCT__ 439 #define __FUNCT__ "MatMult_MPIDense" 440 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 441 { 442 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 443 PetscErrorCode ierr; 444 445 PetscFunctionBegin; 446 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 447 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 448 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatMultAdd_MPIDense" 454 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 455 { 456 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 457 PetscErrorCode ierr; 458 459 PetscFunctionBegin; 460 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 461 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 462 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatMultTranspose_MPIDense" 468 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 469 { 470 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 471 PetscErrorCode ierr; 472 PetscScalar zero = 0.0; 473 474 PetscFunctionBegin; 475 ierr = VecSet(yy,zero);CHKERRQ(ierr); 476 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 477 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 478 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 484 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 485 { 486 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 487 PetscErrorCode ierr; 488 489 PetscFunctionBegin; 490 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 491 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 492 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 493 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "MatGetDiagonal_MPIDense" 499 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 503 PetscErrorCode ierr; 504 PetscInt len,i,n,m = A->rmap.n,radd; 505 PetscScalar *x,zero = 0.0; 506 507 PetscFunctionBegin; 508 ierr = VecSet(v,zero);CHKERRQ(ierr); 509 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 510 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 511 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 512 len = PetscMin(a->A->rmap.n,a->A->cmap.n); 513 radd = A->rmap.rstart*m; 514 for (i=0; i<len; i++) { 515 x[i] = aloc->v[radd + i*m + i]; 516 } 517 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 #undef __FUNCT__ 522 #define __FUNCT__ "MatDestroy_MPIDense" 523 PetscErrorCode MatDestroy_MPIDense(Mat mat) 524 { 525 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 526 PetscErrorCode ierr; 527 #if defined(PETSC_HAVE_PLAPACK) 528 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 529 #endif 530 531 PetscFunctionBegin; 532 533 #if defined(PETSC_USE_LOG) 534 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 535 #endif 536 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 537 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 538 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 539 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 540 #if defined(PETSC_HAVE_PLAPACK) 541 if (lu->CleanUpPlapack) { 542 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 543 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 544 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 545 ierr = PLA_Finalize();CHKERRQ(ierr); 546 547 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 548 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 549 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 550 } 551 #endif 552 553 ierr = PetscFree(mdn);CHKERRQ(ierr); 554 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 555 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 556 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 557 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 558 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 559 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 560 PetscFunctionReturn(0); 561 } 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatView_MPIDense_Binary" 565 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 566 { 567 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 568 PetscErrorCode ierr; 569 PetscViewerFormat format; 570 int fd; 571 PetscInt header[4],mmax,N = mat->cmap.N,i,j,m,k; 572 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 573 PetscScalar *work,*v,*vv; 574 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 575 MPI_Status status; 576 577 PetscFunctionBegin; 578 if (mdn->size == 1) { 579 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 580 } else { 581 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 582 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 583 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 584 585 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 586 if (format == PETSC_VIEWER_BINARY_NATIVE) { 587 588 if (!rank) { 589 /* store the matrix as a dense matrix */ 590 header[0] = MAT_FILE_COOKIE; 591 header[1] = mat->rmap.N; 592 header[2] = N; 593 header[3] = MATRIX_BINARY_FORMAT_DENSE; 594 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 595 596 /* get largest work array needed for transposing array */ 597 mmax = mat->rmap.n; 598 for (i=1; i<size; i++) { 599 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 600 } 601 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 602 603 /* write out local array, by rows */ 604 m = mat->rmap.n; 605 v = a->v; 606 for (j=0; j<N; j++) { 607 for (i=0; i<m; i++) { 608 work[j + i*N] = *v++; 609 } 610 } 611 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 612 /* get largest work array to receive messages from other processes, excludes process zero */ 613 mmax = 0; 614 for (i=1; i<size; i++) { 615 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 616 } 617 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 618 v = vv; 619 for (k=1; k<size; k++) { 620 m = mat->rmap.range[k+1] - mat->rmap.range[k]; 621 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 622 623 for (j=0; j<N; j++) { 624 for (i=0; i<m; i++) { 625 work[j + i*N] = *v++; 626 } 627 } 628 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 629 } 630 ierr = PetscFree(work);CHKERRQ(ierr); 631 ierr = PetscFree(vv);CHKERRQ(ierr); 632 } else { 633 ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 634 } 635 } 636 } 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 642 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 643 { 644 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 645 PetscErrorCode ierr; 646 PetscMPIInt size = mdn->size,rank = mdn->rank; 647 PetscViewerType vtype; 648 PetscTruth iascii,isdraw; 649 PetscViewer sviewer; 650 PetscViewerFormat format; 651 #if defined(PETSC_HAVE_PLAPACK) 652 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 653 #endif 654 655 PetscFunctionBegin; 656 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 657 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 658 if (iascii) { 659 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 660 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 661 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 662 MatInfo info; 663 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 664 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n, 665 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 666 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 667 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 668 #if defined(PETSC_HAVE_PLAPACK) 669 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 670 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",lu->nprows, lu->npcols);CHKERRQ(ierr); 671 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 672 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",lu->ierror);CHKERRQ(ierr); 673 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",lu->nb_alg);CHKERRQ(ierr); 674 #endif 675 PetscFunctionReturn(0); 676 } else if (format == PETSC_VIEWER_ASCII_INFO) { 677 PetscFunctionReturn(0); 678 } 679 } else if (isdraw) { 680 PetscDraw draw; 681 PetscTruth isnull; 682 683 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 684 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 685 if (isnull) PetscFunctionReturn(0); 686 } 687 688 if (size == 1) { 689 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 690 } else { 691 /* assemble the entire matrix onto first processor. */ 692 Mat A; 693 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz; 694 PetscInt *cols; 695 PetscScalar *vals; 696 697 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 698 if (!rank) { 699 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 700 } else { 701 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 702 } 703 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 704 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 705 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 706 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 707 708 /* Copy the matrix ... This isn't the most efficient means, 709 but it's quick for now */ 710 A->insertmode = INSERT_VALUES; 711 row = mat->rmap.rstart; m = mdn->A->rmap.n; 712 for (i=0; i<m; i++) { 713 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 714 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 715 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 716 row++; 717 } 718 719 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 721 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 722 if (!rank) { 723 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 724 } 725 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 726 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 727 ierr = MatDestroy(A);CHKERRQ(ierr); 728 } 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatView_MPIDense" 734 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 735 { 736 PetscErrorCode ierr; 737 PetscTruth iascii,isbinary,isdraw,issocket; 738 739 PetscFunctionBegin; 740 741 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 742 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 743 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 744 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 745 746 if (iascii || issocket || isdraw) { 747 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 748 } else if (isbinary) { 749 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 750 } else { 751 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatGetInfo_MPIDense" 758 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 759 { 760 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 761 Mat mdn = mat->A; 762 PetscErrorCode ierr; 763 PetscReal isend[5],irecv[5]; 764 765 PetscFunctionBegin; 766 info->rows_global = (double)A->rmap.N; 767 info->columns_global = (double)A->cmap.N; 768 info->rows_local = (double)A->rmap.n; 769 info->columns_local = (double)A->cmap.N; 770 info->block_size = 1.0; 771 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 772 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 773 isend[3] = info->memory; isend[4] = info->mallocs; 774 if (flag == MAT_LOCAL) { 775 info->nz_used = isend[0]; 776 info->nz_allocated = isend[1]; 777 info->nz_unneeded = isend[2]; 778 info->memory = isend[3]; 779 info->mallocs = isend[4]; 780 } else if (flag == MAT_GLOBAL_MAX) { 781 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 782 info->nz_used = irecv[0]; 783 info->nz_allocated = irecv[1]; 784 info->nz_unneeded = irecv[2]; 785 info->memory = irecv[3]; 786 info->mallocs = irecv[4]; 787 } else if (flag == MAT_GLOBAL_SUM) { 788 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 789 info->nz_used = irecv[0]; 790 info->nz_allocated = irecv[1]; 791 info->nz_unneeded = irecv[2]; 792 info->memory = irecv[3]; 793 info->mallocs = irecv[4]; 794 } 795 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 796 info->fill_ratio_needed = 0; 797 info->factor_mallocs = 0; 798 PetscFunctionReturn(0); 799 } 800 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatSetOption_MPIDense" 803 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 804 { 805 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 806 PetscErrorCode ierr; 807 808 PetscFunctionBegin; 809 switch (op) { 810 case MAT_NEW_NONZERO_LOCATIONS: 811 case MAT_NEW_NONZERO_LOCATION_ERR: 812 case MAT_NEW_NONZERO_ALLOCATION_ERR: 813 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 814 break; 815 case MAT_ROW_ORIENTED: 816 a->roworiented = flg; 817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 818 break; 819 case MAT_NEW_DIAGONALS: 820 case MAT_USE_HASH_TABLE: 821 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 822 break; 823 case MAT_IGNORE_OFF_PROC_ENTRIES: 824 a->donotstash = flg; 825 break; 826 case MAT_SYMMETRIC: 827 case MAT_STRUCTURALLY_SYMMETRIC: 828 case MAT_HERMITIAN: 829 case MAT_SYMMETRY_ETERNAL: 830 case MAT_IGNORE_LOWER_TRIANGULAR: 831 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 832 break; 833 default: 834 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 835 } 836 PetscFunctionReturn(0); 837 } 838 839 840 #undef __FUNCT__ 841 #define __FUNCT__ "MatDiagonalScale_MPIDense" 842 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 843 { 844 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 845 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 846 PetscScalar *l,*r,x,*v; 847 PetscErrorCode ierr; 848 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n; 849 850 PetscFunctionBegin; 851 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 852 if (ll) { 853 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 854 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 855 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 856 for (i=0; i<m; i++) { 857 x = l[i]; 858 v = mat->v + i; 859 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 860 } 861 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 862 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 863 } 864 if (rr) { 865 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 866 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 867 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 870 for (i=0; i<n; i++) { 871 x = r[i]; 872 v = mat->v + i*m; 873 for (j=0; j<m; j++) { (*v++) *= x;} 874 } 875 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 876 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 877 } 878 PetscFunctionReturn(0); 879 } 880 881 #undef __FUNCT__ 882 #define __FUNCT__ "MatNorm_MPIDense" 883 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 884 { 885 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 886 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 887 PetscErrorCode ierr; 888 PetscInt i,j; 889 PetscReal sum = 0.0; 890 PetscScalar *v = mat->v; 891 892 PetscFunctionBegin; 893 if (mdn->size == 1) { 894 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 895 } else { 896 if (type == NORM_FROBENIUS) { 897 for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) { 898 #if defined(PETSC_USE_COMPLEX) 899 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 900 #else 901 sum += (*v)*(*v); v++; 902 #endif 903 } 904 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 905 *nrm = sqrt(*nrm); 906 ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr); 907 } else if (type == NORM_1) { 908 PetscReal *tmp,*tmp2; 909 ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 910 tmp2 = tmp + A->cmap.N; 911 ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 912 *nrm = 0.0; 913 v = mat->v; 914 for (j=0; j<mdn->A->cmap.n; j++) { 915 for (i=0; i<mdn->A->rmap.n; i++) { 916 tmp[j] += PetscAbsScalar(*v); v++; 917 } 918 } 919 ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 920 for (j=0; j<A->cmap.N; j++) { 921 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 922 } 923 ierr = PetscFree(tmp);CHKERRQ(ierr); 924 ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr); 925 } else if (type == NORM_INFINITY) { /* max row norm */ 926 PetscReal ntemp; 927 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 928 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 929 } else { 930 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 931 } 932 } 933 PetscFunctionReturn(0); 934 } 935 936 #undef __FUNCT__ 937 #define __FUNCT__ "MatTranspose_MPIDense" 938 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 939 { 940 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 941 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 942 Mat B; 943 PetscInt M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart; 944 PetscErrorCode ierr; 945 PetscInt j,i; 946 PetscScalar *v; 947 948 PetscFunctionBegin; 949 if (A == *matout && M != N) { 950 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 951 } 952 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 953 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 954 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 955 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 956 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 957 } else { 958 B = *matout; 959 } 960 961 m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v; 962 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 963 for (i=0; i<m; i++) rwork[i] = rstart + i; 964 for (j=0; j<n; j++) { 965 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 966 v += m; 967 } 968 ierr = PetscFree(rwork);CHKERRQ(ierr); 969 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 970 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 971 if (*matout != A) { 972 *matout = B; 973 } else { 974 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 975 } 976 PetscFunctionReturn(0); 977 } 978 979 #include "petscblaslapack.h" 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatScale_MPIDense" 982 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 983 { 984 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 985 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 986 PetscScalar oalpha = alpha; 987 PetscErrorCode ierr; 988 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N); 989 990 PetscFunctionBegin; 991 BLASscal_(&nz,&oalpha,a->v,&one); 992 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 997 998 #undef __FUNCT__ 999 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1000 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1001 { 1002 PetscErrorCode ierr; 1003 1004 PetscFunctionBegin; 1005 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1011 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1012 { 1013 PetscErrorCode ierr; 1014 PetscInt m=A->rmap.n,n=B->cmap.n; 1015 Mat Cmat; 1016 1017 PetscFunctionBegin; 1018 if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n); 1019 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1020 ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr); 1021 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1022 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 1023 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1024 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1025 *C = Cmat; 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #if defined(PETSC_HAVE_PLAPACK) 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatSolve_MPIDense" 1032 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1033 { 1034 MPI_Comm comm = ((PetscObject)A)->comm; 1035 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1036 PetscErrorCode ierr; 1037 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1038 PetscScalar *array; 1039 PetscReal one = 1.0; 1040 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1041 PLA_Obj v_pla = NULL; 1042 PetscScalar *loc_buf; 1043 Vec loc_x; 1044 1045 PetscFunctionBegin; 1046 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1047 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1048 1049 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1050 ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr); 1051 ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr); 1052 1053 /* Copy b into rhs_pla */ 1054 ierr = PLA_API_begin();CHKERRQ(ierr); 1055 ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr); 1056 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1057 ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr); 1058 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1059 ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr); 1060 ierr = PLA_API_end();CHKERRQ(ierr); 1061 1062 if (A->factor == FACTOR_LU){ 1063 /* Apply the permutations to the right hand sides */ 1064 ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr); 1065 1066 /* Solve L y = b, overwriting b with y */ 1067 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1068 1069 /* Solve U x = y (=b), overwriting b with x */ 1070 ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr); 1071 } else { /* FACTOR_CHOLESKY */ 1072 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1073 ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr); 1074 } 1075 1076 /* Copy PLAPACK x into Petsc vector x */ 1077 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1078 ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr); 1079 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1080 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1081 if (!lu->pla_solved){ 1082 1083 ierr = PLA_Temp_comm_row_info(lu->templ,&lu->comm_2d,&r_rank,&r_nproc);CHKERRQ(ierr); 1084 ierr = PLA_Temp_comm_col_info(lu->templ,&lu->comm_2d,&c_rank,&c_nproc);CHKERRQ(ierr); 1085 /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */ 1086 1087 /* Create IS and cts for VecScatterring */ 1088 ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr); 1089 ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr); 1090 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1091 idx_petsc = idx_pla + loc_m; 1092 1093 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1094 for (i=0; i<loc_m; i+=lu->nb){ 1095 j = 0; 1096 while (j < lu->nb && i+j < loc_m){ 1097 idx_petsc[i+j] = rstart + j; j++; 1098 } 1099 rstart += size*lu->nb; 1100 } 1101 1102 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1103 1104 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1105 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1106 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1107 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1108 } 1109 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1111 1112 /* Free data */ 1113 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1114 ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr); 1115 1116 lu->pla_solved = PETSC_TRUE; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1122 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1123 { 1124 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1125 PetscErrorCode ierr; 1126 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,rend; 1127 PetscInt info_pla=0; 1128 PetscScalar *array,one = 1.0; 1129 1130 PetscFunctionBegin; 1131 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1132 1133 /* Copy A into lu->A */ 1134 ierr = PLA_API_begin();CHKERRQ(ierr); 1135 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1136 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1137 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1138 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1139 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1140 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1141 ierr = PLA_API_end();CHKERRQ(ierr); 1142 1143 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1144 info_pla = PLA_LU(lu->A,lu->pivots); 1145 if (info_pla != 0) 1146 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1147 1148 lu->CleanUpPlapack = PETSC_TRUE; 1149 lu->rstart = rstart; 1150 1151 (*F)->assembled = PETSC_TRUE; /* required by -ksp_view */ 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1157 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1158 { 1159 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1160 PetscErrorCode ierr; 1161 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,rend; 1162 PetscInt info_pla=0; 1163 PetscScalar *array,one = 1.0; 1164 1165 PetscFunctionBegin; 1166 1167 /* Copy A into lu->A */ 1168 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1169 ierr = PLA_API_begin();CHKERRQ(ierr); 1170 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1171 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1172 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1173 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1174 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1175 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1176 ierr = PLA_API_end();CHKERRQ(ierr); 1177 1178 /* Factor P A -> Chol */ 1179 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1180 if (info_pla != 0) 1181 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1182 1183 lu->CleanUpPlapack = PETSC_TRUE; 1184 lu->rstart = rstart; 1185 1186 (*F)->assembled = PETSC_TRUE; /* required by -ksp_view */ 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private" 1192 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F) 1193 { 1194 Mat B; 1195 Mat_Plapack *lu; 1196 PetscErrorCode ierr; 1197 PetscInt M=A->rmap.N,N=A->cmap.N; 1198 MPI_Comm comm=((PetscObject)A)->comm,comm_2d; 1199 PetscMPIInt size; 1200 PetscInt ierror; 1201 1202 PetscFunctionBegin; 1203 /* Create the factorization matrix */ 1204 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1205 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr); 1206 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1207 1208 lu = (Mat_Plapack*)(B->spptr); 1209 1210 /* Set default Plapack parameters */ 1211 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1212 lu->nprows = 1; lu->npcols = size; 1213 ierror = 0; 1214 lu->nb = M/size; 1215 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1216 1217 /* Set runtime options */ 1218 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1219 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",lu->nprows,&lu->nprows,PETSC_NULL);CHKERRQ(ierr); 1220 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",lu->npcols,&lu->npcols,PETSC_NULL);CHKERRQ(ierr); 1221 1222 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1223 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr); 1224 if (ierror){ 1225 ierr = PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 1226 } else { 1227 ierr = PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 1228 } 1229 lu->ierror = ierror; 1230 1231 lu->nb_alg = 0; 1232 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",lu->nb_alg,&lu->nb_alg,PETSC_NULL);CHKERRQ(ierr); 1233 if (lu->nb_alg){ 1234 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,lu->nb_alg);CHKERRQ(ierr); 1235 } 1236 PetscOptionsEnd(); 1237 1238 1239 /* Create a 2D communicator */ 1240 ierr = PLA_Comm_1D_to_2D(comm,lu->nprows,lu->npcols,&comm_2d);CHKERRQ(ierr); 1241 lu->comm_2d = comm_2d; 1242 1243 /* Initialize PLAPACK */ 1244 ierr = PLA_Init(comm_2d);CHKERRQ(ierr); 1245 1246 /* Create object distribution template */ 1247 lu->templ = NULL; 1248 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1249 1250 /* Use suggested nb_alg if it is not provided by user */ 1251 if (lu->nb_alg == 0){ 1252 ierr = PLA_Environ_nb_alg(PLA_OP_PAN_PAN,lu->templ,&lu->nb_alg);CHKERRQ(ierr); 1253 ierr = pla_Environ_set_nb_alg(PLA_OP_ALL_ALG,lu->nb_alg);CHKERRQ(ierr); 1254 } 1255 1256 /* Set the datatype */ 1257 #if defined(PETSC_USE_COMPLEX) 1258 lu->datatype = MPI_DOUBLE_COMPLEX; 1259 #else 1260 lu->datatype = MPI_DOUBLE; 1261 #endif 1262 1263 ierr = PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1264 1265 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1266 lu->CleanUpPlapack = PETSC_TRUE; 1267 *F = B; 1268 PetscFunctionReturn(0); 1269 } 1270 1271 /* Note the Petsc r and c permutations are ignored */ 1272 #undef __FUNCT__ 1273 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1274 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 1275 { 1276 PetscErrorCode ierr; 1277 PetscInt M = A->rmap.N; 1278 Mat_Plapack *lu; 1279 1280 PetscFunctionBegin; 1281 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1282 lu = (Mat_Plapack*)(*F)->spptr; 1283 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1284 (*F)->factor = FACTOR_LU; 1285 PetscFunctionReturn(0); 1286 } 1287 1288 /* Note the Petsc perm permutation is ignored */ 1289 #undef __FUNCT__ 1290 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1291 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F) 1292 { 1293 PetscErrorCode ierr; 1294 PetscTruth issymmetric,set; 1295 1296 PetscFunctionBegin; 1297 ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr); 1298 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1299 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1300 (*F)->factor = FACTOR_CHOLESKY; 1301 PetscFunctionReturn(0); 1302 } 1303 #endif 1304 1305 /* -------------------------------------------------------------------*/ 1306 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1307 MatGetRow_MPIDense, 1308 MatRestoreRow_MPIDense, 1309 MatMult_MPIDense, 1310 /* 4*/ MatMultAdd_MPIDense, 1311 MatMultTranspose_MPIDense, 1312 MatMultTransposeAdd_MPIDense, 1313 #if defined(PETSC_HAVE_PLAPACK) 1314 MatSolve_MPIDense, 1315 #else 1316 0, 1317 #endif 1318 0, 1319 0, 1320 /*10*/ 0, 1321 0, 1322 0, 1323 0, 1324 MatTranspose_MPIDense, 1325 /*15*/ MatGetInfo_MPIDense, 1326 MatEqual_MPIDense, 1327 MatGetDiagonal_MPIDense, 1328 MatDiagonalScale_MPIDense, 1329 MatNorm_MPIDense, 1330 /*20*/ MatAssemblyBegin_MPIDense, 1331 MatAssemblyEnd_MPIDense, 1332 0, 1333 MatSetOption_MPIDense, 1334 MatZeroEntries_MPIDense, 1335 /*25*/ MatZeroRows_MPIDense, 1336 #if defined(PETSC_HAVE_PLAPACK) 1337 MatLUFactorSymbolic_MPIDense, 1338 MatLUFactorNumeric_MPIDense, 1339 MatCholeskyFactorSymbolic_MPIDense, 1340 MatCholeskyFactorNumeric_MPIDense, 1341 #else 1342 0, 1343 0, 1344 0, 1345 0, 1346 #endif 1347 /*30*/ MatSetUpPreallocation_MPIDense, 1348 0, 1349 0, 1350 MatGetArray_MPIDense, 1351 MatRestoreArray_MPIDense, 1352 /*35*/ MatDuplicate_MPIDense, 1353 0, 1354 0, 1355 0, 1356 0, 1357 /*40*/ 0, 1358 MatGetSubMatrices_MPIDense, 1359 0, 1360 MatGetValues_MPIDense, 1361 0, 1362 /*45*/ 0, 1363 MatScale_MPIDense, 1364 0, 1365 0, 1366 0, 1367 /*50*/ 0, 1368 0, 1369 0, 1370 0, 1371 0, 1372 /*55*/ 0, 1373 0, 1374 0, 1375 0, 1376 0, 1377 /*60*/ MatGetSubMatrix_MPIDense, 1378 MatDestroy_MPIDense, 1379 MatView_MPIDense, 1380 0, 1381 0, 1382 /*65*/ 0, 1383 0, 1384 0, 1385 0, 1386 0, 1387 /*70*/ 0, 1388 0, 1389 0, 1390 0, 1391 0, 1392 /*75*/ 0, 1393 0, 1394 0, 1395 0, 1396 0, 1397 /*80*/ 0, 1398 0, 1399 0, 1400 0, 1401 /*84*/ MatLoad_MPIDense, 1402 0, 1403 0, 1404 0, 1405 0, 1406 0, 1407 /*90*/ 0, 1408 MatMatMultSymbolic_MPIDense_MPIDense, 1409 0, 1410 0, 1411 0, 1412 /*95*/ 0, 1413 0, 1414 0, 1415 0}; 1416 1417 EXTERN_C_BEGIN 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1420 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1421 { 1422 Mat_MPIDense *a; 1423 PetscErrorCode ierr; 1424 1425 PetscFunctionBegin; 1426 mat->preallocated = PETSC_TRUE; 1427 /* Note: For now, when data is specified above, this assumes the user correctly 1428 allocates the local dense storage space. We should add error checking. */ 1429 1430 a = (Mat_MPIDense*)mat->data; 1431 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1432 ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr); 1433 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1434 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1435 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 EXTERN_C_END 1439 1440 EXTERN_C_BEGIN 1441 #if defined(PETSC_HAVE_PLAPACK) 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK" 1444 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type) 1445 { 1446 PetscFunctionBegin; 1447 /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */ 1448 PetscFunctionReturn(0); 1449 } 1450 #endif 1451 EXTERN_C_END 1452 1453 /*MC 1454 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1455 1456 Options Database Keys: 1457 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1458 1459 Level: beginner 1460 1461 MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR) 1462 for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed 1463 (run config/configure.py with the option --download-plapack) 1464 1465 1466 Options Database Keys: 1467 . -mat_plapack_nprows <n> - number of rows in processor partition 1468 . -mat_plapack_npcols <n> - number of columns in processor partition 1469 . -mat_plapack_nb <n> - block size of template vector 1470 . -mat_plapack_nb_alg <n> - algorithmic block size 1471 - -mat_plapack_ckerror <n> - error checking flag 1472 1473 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE 1474 M*/ 1475 1476 EXTERN_C_BEGIN 1477 #undef __FUNCT__ 1478 #define __FUNCT__ "MatCreate_MPIDense" 1479 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1480 { 1481 Mat_MPIDense *a; 1482 PetscErrorCode ierr; 1483 #if defined(PETSC_HAVE_PLAPACK) 1484 Mat_Plapack *lu; 1485 #endif 1486 1487 PetscFunctionBegin; 1488 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1489 mat->data = (void*)a; 1490 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1491 mat->factor = 0; 1492 mat->mapping = 0; 1493 1494 mat->insertmode = NOT_SET_VALUES; 1495 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1496 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1497 1498 mat->rmap.bs = mat->cmap.bs = 1; 1499 ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr); 1500 ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr); 1501 a->nvec = mat->cmap.n; 1502 1503 /* build cache for off array entries formed */ 1504 a->donotstash = PETSC_FALSE; 1505 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1506 1507 /* stuff used for matrix vector multiply */ 1508 a->lvec = 0; 1509 a->Mvctx = 0; 1510 a->roworiented = PETSC_TRUE; 1511 1512 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1513 "MatGetDiagonalBlock_MPIDense", 1514 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1515 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1516 "MatMPIDenseSetPreallocation_MPIDense", 1517 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1518 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1519 "MatMatMult_MPIAIJ_MPIDense", 1520 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1521 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1522 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1523 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1524 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1525 "MatMatMultNumeric_MPIAIJ_MPIDense", 1526 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1527 #if defined(PETSC_HAVE_PLAPACK) 1528 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C", 1529 "MatSetSolverType_MPIDense_PLAPACK", 1530 MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr); 1531 #endif 1532 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1533 1534 #if defined(PETSC_HAVE_PLAPACK) 1535 ierr = PetscNewLog(mat,Mat_Plapack,&lu);CHKERRQ(ierr); 1536 lu->CleanUpPlapack = PETSC_FALSE; 1537 mat->spptr = (void*)lu; 1538 #endif 1539 PetscFunctionReturn(0); 1540 } 1541 EXTERN_C_END 1542 1543 /*MC 1544 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1545 1546 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1547 and MATMPIDENSE otherwise. 1548 1549 Options Database Keys: 1550 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1551 1552 Level: beginner 1553 1554 1555 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1556 M*/ 1557 1558 EXTERN_C_BEGIN 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatCreate_Dense" 1561 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1562 { 1563 PetscErrorCode ierr; 1564 PetscMPIInt size; 1565 1566 PetscFunctionBegin; 1567 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1568 if (size == 1) { 1569 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1570 } else { 1571 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1572 } 1573 PetscFunctionReturn(0); 1574 } 1575 EXTERN_C_END 1576 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1579 /*@C 1580 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1581 1582 Not collective 1583 1584 Input Parameters: 1585 . A - the matrix 1586 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1587 to control all matrix memory allocation. 1588 1589 Notes: 1590 The dense format is fully compatible with standard Fortran 77 1591 storage by columns. 1592 1593 The data input variable is intended primarily for Fortran programmers 1594 who wish to allocate their own matrix memory space. Most users should 1595 set data=PETSC_NULL. 1596 1597 Level: intermediate 1598 1599 .keywords: matrix,dense, parallel 1600 1601 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1602 @*/ 1603 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1604 { 1605 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1606 1607 PetscFunctionBegin; 1608 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1609 if (f) { 1610 ierr = (*f)(mat,data);CHKERRQ(ierr); 1611 } 1612 PetscFunctionReturn(0); 1613 } 1614 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "MatCreateMPIDense" 1617 /*@C 1618 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1619 1620 Collective on MPI_Comm 1621 1622 Input Parameters: 1623 + comm - MPI communicator 1624 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1625 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1626 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1627 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1628 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1629 to control all matrix memory allocation. 1630 1631 Output Parameter: 1632 . A - the matrix 1633 1634 Notes: 1635 The dense format is fully compatible with standard Fortran 77 1636 storage by columns. 1637 1638 The data input variable is intended primarily for Fortran programmers 1639 who wish to allocate their own matrix memory space. Most users should 1640 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1641 1642 The user MUST specify either the local or global matrix dimensions 1643 (possibly both). 1644 1645 Level: intermediate 1646 1647 .keywords: matrix,dense, parallel 1648 1649 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1650 @*/ 1651 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1652 { 1653 PetscErrorCode ierr; 1654 PetscMPIInt size; 1655 1656 PetscFunctionBegin; 1657 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1658 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1659 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1660 if (size > 1) { 1661 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1662 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1663 } else { 1664 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1665 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1666 } 1667 PetscFunctionReturn(0); 1668 } 1669 1670 #undef __FUNCT__ 1671 #define __FUNCT__ "MatDuplicate_MPIDense" 1672 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1673 { 1674 Mat mat; 1675 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1676 PetscErrorCode ierr; 1677 1678 PetscFunctionBegin; 1679 *newmat = 0; 1680 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1681 ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1682 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1683 a = (Mat_MPIDense*)mat->data; 1684 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1685 mat->factor = A->factor; 1686 mat->assembled = PETSC_TRUE; 1687 mat->preallocated = PETSC_TRUE; 1688 1689 mat->rmap.rstart = A->rmap.rstart; 1690 mat->rmap.rend = A->rmap.rend; 1691 a->size = oldmat->size; 1692 a->rank = oldmat->rank; 1693 mat->insertmode = NOT_SET_VALUES; 1694 a->nvec = oldmat->nvec; 1695 a->donotstash = oldmat->donotstash; 1696 1697 ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1698 ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1699 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1700 1701 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1702 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1703 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1704 1705 #if defined(PETSC_HAVE_PLAPACK) 1706 ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr); 1707 #endif 1708 *newmat = mat; 1709 PetscFunctionReturn(0); 1710 } 1711 1712 #include "petscsys.h" 1713 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1716 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1717 { 1718 PetscErrorCode ierr; 1719 PetscMPIInt rank,size; 1720 PetscInt *rowners,i,m,nz,j; 1721 PetscScalar *array,*vals,*vals_ptr; 1722 MPI_Status status; 1723 1724 PetscFunctionBegin; 1725 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1726 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1727 1728 /* determine ownership of all rows */ 1729 m = M/size + ((M % size) > rank); 1730 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1731 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1732 rowners[0] = 0; 1733 for (i=2; i<=size; i++) { 1734 rowners[i] += rowners[i-1]; 1735 } 1736 1737 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1738 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1739 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1740 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1741 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1742 1743 if (!rank) { 1744 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1745 1746 /* read in my part of the matrix numerical values */ 1747 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1748 1749 /* insert into matrix-by row (this is why cannot directly read into array */ 1750 vals_ptr = vals; 1751 for (i=0; i<m; i++) { 1752 for (j=0; j<N; j++) { 1753 array[i + j*m] = *vals_ptr++; 1754 } 1755 } 1756 1757 /* read in other processors and ship out */ 1758 for (i=1; i<size; i++) { 1759 nz = (rowners[i+1] - rowners[i])*N; 1760 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1761 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1762 } 1763 } else { 1764 /* receive numeric values */ 1765 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1766 1767 /* receive message of values*/ 1768 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1769 1770 /* insert into matrix-by row (this is why cannot directly read into array */ 1771 vals_ptr = vals; 1772 for (i=0; i<m; i++) { 1773 for (j=0; j<N; j++) { 1774 array[i + j*m] = *vals_ptr++; 1775 } 1776 } 1777 } 1778 ierr = PetscFree(rowners);CHKERRQ(ierr); 1779 ierr = PetscFree(vals);CHKERRQ(ierr); 1780 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1781 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1782 PetscFunctionReturn(0); 1783 } 1784 1785 #undef __FUNCT__ 1786 #define __FUNCT__ "MatLoad_MPIDense" 1787 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1788 { 1789 Mat A; 1790 PetscScalar *vals,*svals; 1791 MPI_Comm comm = ((PetscObject)viewer)->comm; 1792 MPI_Status status; 1793 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1794 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1795 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1796 PetscInt i,nz,j,rstart,rend; 1797 int fd; 1798 PetscErrorCode ierr; 1799 1800 PetscFunctionBegin; 1801 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1802 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1803 if (!rank) { 1804 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1805 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1806 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1807 } 1808 1809 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1810 M = header[1]; N = header[2]; nz = header[3]; 1811 1812 /* 1813 Handle case where matrix is stored on disk as a dense matrix 1814 */ 1815 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1816 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1817 PetscFunctionReturn(0); 1818 } 1819 1820 /* determine ownership of all rows */ 1821 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1822 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1823 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1824 rowners[0] = 0; 1825 for (i=2; i<=size; i++) { 1826 rowners[i] += rowners[i-1]; 1827 } 1828 rstart = rowners[rank]; 1829 rend = rowners[rank+1]; 1830 1831 /* distribute row lengths to all processors */ 1832 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1833 offlens = ourlens + (rend-rstart); 1834 if (!rank) { 1835 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1836 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1837 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1838 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1839 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1840 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1841 } else { 1842 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1843 } 1844 1845 if (!rank) { 1846 /* calculate the number of nonzeros on each processor */ 1847 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1848 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1849 for (i=0; i<size; i++) { 1850 for (j=rowners[i]; j< rowners[i+1]; j++) { 1851 procsnz[i] += rowlengths[j]; 1852 } 1853 } 1854 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1855 1856 /* determine max buffer needed and allocate it */ 1857 maxnz = 0; 1858 for (i=0; i<size; i++) { 1859 maxnz = PetscMax(maxnz,procsnz[i]); 1860 } 1861 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1862 1863 /* read in my part of the matrix column indices */ 1864 nz = procsnz[0]; 1865 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1866 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1867 1868 /* read in every one elses and ship off */ 1869 for (i=1; i<size; i++) { 1870 nz = procsnz[i]; 1871 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1872 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1873 } 1874 ierr = PetscFree(cols);CHKERRQ(ierr); 1875 } else { 1876 /* determine buffer space needed for message */ 1877 nz = 0; 1878 for (i=0; i<m; i++) { 1879 nz += ourlens[i]; 1880 } 1881 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1882 1883 /* receive message of column indices*/ 1884 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1885 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1886 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1887 } 1888 1889 /* loop over local rows, determining number of off diagonal entries */ 1890 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1891 jj = 0; 1892 for (i=0; i<m; i++) { 1893 for (j=0; j<ourlens[i]; j++) { 1894 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1895 jj++; 1896 } 1897 } 1898 1899 /* create our matrix */ 1900 for (i=0; i<m; i++) { 1901 ourlens[i] -= offlens[i]; 1902 } 1903 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1904 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1905 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1906 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1907 A = *newmat; 1908 for (i=0; i<m; i++) { 1909 ourlens[i] += offlens[i]; 1910 } 1911 1912 if (!rank) { 1913 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1914 1915 /* read in my part of the matrix numerical values */ 1916 nz = procsnz[0]; 1917 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1918 1919 /* insert into matrix */ 1920 jj = rstart; 1921 smycols = mycols; 1922 svals = vals; 1923 for (i=0; i<m; i++) { 1924 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1925 smycols += ourlens[i]; 1926 svals += ourlens[i]; 1927 jj++; 1928 } 1929 1930 /* read in other processors and ship out */ 1931 for (i=1; i<size; i++) { 1932 nz = procsnz[i]; 1933 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1934 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 1935 } 1936 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1937 } else { 1938 /* receive numeric values */ 1939 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1940 1941 /* receive message of values*/ 1942 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 1943 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1944 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1945 1946 /* insert into matrix */ 1947 jj = rstart; 1948 smycols = mycols; 1949 svals = vals; 1950 for (i=0; i<m; i++) { 1951 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1952 smycols += ourlens[i]; 1953 svals += ourlens[i]; 1954 jj++; 1955 } 1956 } 1957 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1958 ierr = PetscFree(vals);CHKERRQ(ierr); 1959 ierr = PetscFree(mycols);CHKERRQ(ierr); 1960 ierr = PetscFree(rowners);CHKERRQ(ierr); 1961 1962 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1963 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 PetscFunctionReturn(0); 1965 } 1966 1967 #undef __FUNCT__ 1968 #define __FUNCT__ "MatEqual_MPIDense" 1969 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 1970 { 1971 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1972 Mat a,b; 1973 PetscTruth flg; 1974 PetscErrorCode ierr; 1975 1976 PetscFunctionBegin; 1977 a = matA->A; 1978 b = matB->A; 1979 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1980 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1981 PetscFunctionReturn(0); 1982 } 1983 1984 1985 1986