1 /*$Id: mpidense.c,v 1.153 2001/03/23 23:21:49 balay Exp buschelm $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 16 int m = A->m,rstart = mdn->rstart,ierr; 17 Scalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 27 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 28 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 29 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 30 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 33 *iscopy = PETSC_TRUE; 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN int MatSetUpMultiply_MPIDense(Mat); 39 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatSetValues_MPIDense" 42 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 43 { 44 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 45 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 46 PetscTruth roworiented = A->roworiented; 47 48 PetscFunctionBegin; 49 for (i=0; i<m; i++) { 50 if (idxm[i] < 0) continue; 51 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 52 if (idxm[i] >= rstart && idxm[i] < rend) { 53 row = idxm[i] - rstart; 54 if (roworiented) { 55 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 56 } else { 57 for (j=0; j<n; j++) { 58 if (idxn[j] < 0) continue; 59 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 60 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 61 } 62 } 63 } else { 64 if (!A->donotstash) { 65 if (roworiented) { 66 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 67 } else { 68 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 69 } 70 } 71 } 72 } 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetValues_MPIDense" 78 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 79 { 80 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 81 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 82 83 PetscFunctionBegin; 84 for (i=0; i<m; i++) { 85 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 86 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 87 if (idxm[i] >= rstart && idxm[i] < rend) { 88 row = idxm[i] - rstart; 89 for (j=0; j<n; j++) { 90 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 91 if (idxn[j] >= mat->N) { 92 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 93 } 94 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 95 } 96 } else { 97 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 98 } 99 } 100 PetscFunctionReturn(0); 101 } 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "MatGetArray_MPIDense" 105 int MatGetArray_MPIDense(Mat A,Scalar **array) 106 { 107 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 108 int ierr; 109 110 PetscFunctionBegin; 111 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 117 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 118 { 119 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 120 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 121 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 122 Scalar *av,*bv,*v = lmat->v; 123 Mat newmat; 124 125 PetscFunctionBegin; 126 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 127 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 128 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 129 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 130 131 /* No parallel redistribution currently supported! Should really check each index set 132 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 133 original matrix! */ 134 135 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 137 138 /* Check submatrix call */ 139 if (scall == MAT_REUSE_MATRIX) { 140 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 141 /* Really need to test rows and column sizes! */ 142 newmat = *B; 143 } else { 144 /* Create and fill new matrix */ 145 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 146 } 147 148 /* Now extract the data pointers and do the copy, column at a time */ 149 newmatd = (Mat_MPIDense*)newmat->data; 150 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 151 152 for (i=0; i<ncols; i++) { 153 av = v + nlrows*icol[i]; 154 for (j=0; j<nrows; j++) { 155 *bv++ = av[irow[j] - rstart]; 156 } 157 } 158 159 /* Assemble the matrices so that the correct flags are set */ 160 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 161 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 162 163 /* Free work space */ 164 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 165 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 166 *B = newmat; 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatRestoreArray_MPIDense" 172 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 173 { 174 PetscFunctionBegin; 175 PetscFunctionReturn(0); 176 } 177 178 #undef __FUNCT__ 179 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 180 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 181 { 182 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 183 MPI_Comm comm = mat->comm; 184 int ierr,nstash,reallocs; 185 InsertMode addv; 186 187 PetscFunctionBegin; 188 /* make sure all processors are either in INSERTMODE or ADDMODE */ 189 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 190 if (addv == (ADD_VALUES|INSERT_VALUES)) { 191 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 192 } 193 mat->insertmode = addv; /* in case this processor had no cache */ 194 195 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 196 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 197 PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 198 PetscFunctionReturn(0); 199 } 200 201 #undef __FUNCT__ 202 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 203 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 204 { 205 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 206 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 207 Scalar *val; 208 InsertMode addv=mat->insertmode; 209 210 PetscFunctionBegin; 211 /* wait on receives */ 212 while (1) { 213 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 214 if (!flg) break; 215 216 for (i=0; i<n;) { 217 /* Now identify the consecutive vals belonging to the same row */ 218 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 219 if (j < n) ncols = j-i; 220 else ncols = n-i; 221 /* Now assemble all these values with a single function call */ 222 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 223 i = j; 224 } 225 } 226 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 227 228 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 229 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 230 231 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 232 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 233 } 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "MatZeroEntries_MPIDense" 239 int MatZeroEntries_MPIDense(Mat A) 240 { 241 int ierr; 242 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 243 244 PetscFunctionBegin; 245 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatGetBlockSize_MPIDense" 251 int MatGetBlockSize_MPIDense(Mat A,int *bs) 252 { 253 PetscFunctionBegin; 254 *bs = 1; 255 PetscFunctionReturn(0); 256 } 257 258 /* the code does not do the diagonal entries correctly unless the 259 matrix is square and the column and row owerships are identical. 260 This is a BUG. The only way to fix it seems to be to access 261 mdn->A and mdn->B directly and not through the MatZeroRows() 262 routine. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatZeroRows_MPIDense" 266 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 267 { 268 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 269 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 270 int *procs,*nprocs,j,idx,nsends,*work; 271 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 272 int *rvalues,tag = A->tag,count,base,slen,n,*source; 273 int *lens,imdex,*lrows,*values; 274 MPI_Comm comm = A->comm; 275 MPI_Request *send_waits,*recv_waits; 276 MPI_Status recv_status,*send_status; 277 IS istmp; 278 PetscTruth found; 279 280 PetscFunctionBegin; 281 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 282 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 283 284 /* first count number of contributors to each processor */ 285 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 286 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 287 procs = nprocs + size; 288 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 289 for (i=0; i<N; i++) { 290 idx = rows[i]; 291 found = PETSC_FALSE; 292 for (j=0; j<size; j++) { 293 if (idx >= owners[j] && idx < owners[j+1]) { 294 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 295 } 296 } 297 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 298 } 299 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 300 301 /* inform other processors of number of messages and max length*/ 302 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 303 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 304 nmax = work[rank]; 305 nrecvs = work[size+rank]; 306 ierr = PetscFree(work);CHKERRQ(ierr); 307 308 /* post receives: */ 309 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 310 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 311 for (i=0; i<nrecvs; i++) { 312 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 313 } 314 315 /* do sends: 316 1) starts[i] gives the starting index in svalues for stuff going to 317 the ith processor 318 */ 319 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 320 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 321 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 322 starts[0] = 0; 323 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 324 for (i=0; i<N; i++) { 325 svalues[starts[owner[i]]++] = rows[i]; 326 } 327 ISRestoreIndices(is,&rows); 328 329 starts[0] = 0; 330 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 331 count = 0; 332 for (i=0; i<size; i++) { 333 if (procs[i]) { 334 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 335 } 336 } 337 ierr = PetscFree(starts);CHKERRQ(ierr); 338 339 base = owners[rank]; 340 341 /* wait on receives */ 342 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 343 source = lens + nrecvs; 344 count = nrecvs; slen = 0; 345 while (count) { 346 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 347 /* unpack receives into our local space */ 348 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 349 source[imdex] = recv_status.MPI_SOURCE; 350 lens[imdex] = n; 351 slen += n; 352 count--; 353 } 354 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 355 356 /* move the data into the send scatter */ 357 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 358 count = 0; 359 for (i=0; i<nrecvs; i++) { 360 values = rvalues + i*nmax; 361 for (j=0; j<lens[i]; j++) { 362 lrows[count++] = values[j] - base; 363 } 364 } 365 ierr = PetscFree(rvalues);CHKERRQ(ierr); 366 ierr = PetscFree(lens);CHKERRQ(ierr); 367 ierr = PetscFree(owner);CHKERRQ(ierr); 368 ierr = PetscFree(nprocs);CHKERRQ(ierr); 369 370 /* actually zap the local rows */ 371 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 372 PetscLogObjectParent(A,istmp); 373 ierr = PetscFree(lrows);CHKERRQ(ierr); 374 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 375 ierr = ISDestroy(istmp);CHKERRQ(ierr); 376 377 /* wait on sends */ 378 if (nsends) { 379 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 380 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 381 ierr = PetscFree(send_status);CHKERRQ(ierr); 382 } 383 ierr = PetscFree(send_waits);CHKERRQ(ierr); 384 ierr = PetscFree(svalues);CHKERRQ(ierr); 385 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatMult_MPIDense" 391 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 392 { 393 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 394 int ierr; 395 396 PetscFunctionBegin; 397 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 398 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 399 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 400 PetscFunctionReturn(0); 401 } 402 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatMultAdd_MPIDense" 405 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 406 { 407 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 408 int ierr; 409 410 PetscFunctionBegin; 411 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 412 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 413 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatMultTranspose_MPIDense" 419 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 420 { 421 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 422 int ierr; 423 Scalar zero = 0.0; 424 425 PetscFunctionBegin; 426 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 427 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 428 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 429 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 435 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 436 { 437 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 438 int ierr; 439 440 PetscFunctionBegin; 441 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 442 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 443 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 444 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNCT__ 449 #define __FUNCT__ "MatGetDiagonal_MPIDense" 450 int MatGetDiagonal_MPIDense(Mat A,Vec v) 451 { 452 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 453 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 454 int ierr,len,i,n,m = A->m,radd; 455 Scalar *x,zero = 0.0; 456 457 PetscFunctionBegin; 458 ierr = VecSet(&zero,v);CHKERRQ(ierr); 459 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 460 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 461 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 462 len = PetscMin(a->A->m,a->A->n); 463 radd = a->rstart*m; 464 for (i=0; i<len; i++) { 465 x[i] = aloc->v[radd + i*m + i]; 466 } 467 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNCT__ 472 #define __FUNCT__ "MatDestroy_MPIDense" 473 int MatDestroy_MPIDense(Mat mat) 474 { 475 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 476 int ierr; 477 478 PetscFunctionBegin; 479 480 #if defined(PETSC_USE_LOG) 481 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 482 #endif 483 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 484 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 485 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 486 if (mdn->lvec) VecDestroy(mdn->lvec); 487 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 488 if (mdn->factor) { 489 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 490 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 491 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 492 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 493 } 494 ierr = PetscFree(mdn);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatView_MPIDense_Binary" 500 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 501 { 502 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 503 int ierr; 504 505 PetscFunctionBegin; 506 if (mdn->size == 1) { 507 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 508 } 509 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 510 PetscFunctionReturn(0); 511 } 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 515 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 516 { 517 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 518 int ierr,size = mdn->size,rank = mdn->rank; 519 PetscViewerType vtype; 520 PetscTruth isascii,isdraw; 521 PetscViewer sviewer; 522 PetscViewerFormat format; 523 524 PetscFunctionBegin; 525 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 526 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 527 if (isascii) { 528 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 529 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 530 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 531 MatInfo info; 532 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 533 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 534 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 535 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 536 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 537 PetscFunctionReturn(0); 538 } else if (format == PETSC_VIEWER_ASCII_INFO) { 539 PetscFunctionReturn(0); 540 } 541 } else if (isdraw) { 542 PetscDraw draw; 543 PetscTruth isnull; 544 545 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 546 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 547 if (isnull) PetscFunctionReturn(0); 548 } 549 550 if (size == 1) { 551 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 552 } else { 553 /* assemble the entire matrix onto first processor. */ 554 Mat A; 555 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 556 Scalar *vals; 557 558 if (!rank) { 559 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 560 } else { 561 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 562 } 563 PetscLogObjectParent(mat,A); 564 565 /* Copy the matrix ... This isn't the most efficient means, 566 but it's quick for now */ 567 row = mdn->rstart; m = mdn->A->m; 568 for (i=0; i<m; i++) { 569 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 570 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 571 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 572 row++; 573 } 574 575 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 576 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 577 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 578 if (!rank) { 579 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 580 } 581 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 582 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 583 ierr = MatDestroy(A);CHKERRQ(ierr); 584 } 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatView_MPIDense" 590 int MatView_MPIDense(Mat mat,PetscViewer viewer) 591 { 592 int ierr; 593 PetscTruth isascii,isbinary,isdraw,issocket; 594 595 PetscFunctionBegin; 596 597 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 598 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 599 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 600 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 601 602 if (isascii || issocket || isdraw) { 603 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 604 } else if (isbinary) { 605 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 606 } else { 607 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 608 } 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatGetInfo_MPIDense" 614 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 615 { 616 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 617 Mat mdn = mat->A; 618 int ierr; 619 PetscReal isend[5],irecv[5]; 620 621 PetscFunctionBegin; 622 info->rows_global = (double)A->M; 623 info->columns_global = (double)A->N; 624 info->rows_local = (double)A->m; 625 info->columns_local = (double)A->N; 626 info->block_size = 1.0; 627 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 628 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 629 isend[3] = info->memory; isend[4] = info->mallocs; 630 if (flag == MAT_LOCAL) { 631 info->nz_used = isend[0]; 632 info->nz_allocated = isend[1]; 633 info->nz_unneeded = isend[2]; 634 info->memory = isend[3]; 635 info->mallocs = isend[4]; 636 } else if (flag == MAT_GLOBAL_MAX) { 637 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 638 info->nz_used = irecv[0]; 639 info->nz_allocated = irecv[1]; 640 info->nz_unneeded = irecv[2]; 641 info->memory = irecv[3]; 642 info->mallocs = irecv[4]; 643 } else if (flag == MAT_GLOBAL_SUM) { 644 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 645 info->nz_used = irecv[0]; 646 info->nz_allocated = irecv[1]; 647 info->nz_unneeded = irecv[2]; 648 info->memory = irecv[3]; 649 info->mallocs = irecv[4]; 650 } 651 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 652 info->fill_ratio_needed = 0; 653 info->factor_mallocs = 0; 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatSetOption_MPIDense" 659 int MatSetOption_MPIDense(Mat A,MatOption op) 660 { 661 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 662 int ierr; 663 664 PetscFunctionBegin; 665 switch (op) { 666 case MAT_NO_NEW_NONZERO_LOCATIONS: 667 case MAT_YES_NEW_NONZERO_LOCATIONS: 668 case MAT_NEW_NONZERO_LOCATION_ERR: 669 case MAT_NEW_NONZERO_ALLOCATION_ERR: 670 case MAT_COLUMNS_SORTED: 671 case MAT_COLUMNS_UNSORTED: 672 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 673 break; 674 case MAT_ROW_ORIENTED: 675 a->roworiented = PETSC_TRUE; 676 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 677 break; 678 case MAT_ROWS_SORTED: 679 case MAT_ROWS_UNSORTED: 680 case MAT_YES_NEW_DIAGONALS: 681 case MAT_USE_HASH_TABLE: 682 PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 683 break; 684 case MAT_COLUMN_ORIENTED: 685 a->roworiented = PETSC_FALSE; 686 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 687 break; 688 case MAT_IGNORE_OFF_PROC_ENTRIES: 689 a->donotstash = PETSC_TRUE; 690 break; 691 case MAT_NO_NEW_DIAGONALS: 692 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 693 break; 694 default: 695 SETERRQ(PETSC_ERR_SUP,"unknown option"); 696 break; 697 } 698 PetscFunctionReturn(0); 699 } 700 701 #undef __FUNCT__ 702 #define __FUNCT__ "MatGetOwnershipRange_MPIDense" 703 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 704 { 705 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 706 707 PetscFunctionBegin; 708 if (m) *m = mat->rstart; 709 if (n) *n = mat->rend; 710 PetscFunctionReturn(0); 711 } 712 713 #undef __FUNCT__ 714 #define __FUNCT__ "MatGetRow_MPIDense" 715 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 716 { 717 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 718 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 719 720 PetscFunctionBegin; 721 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 722 lrow = row - rstart; 723 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatRestoreRow_MPIDense" 729 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 730 { 731 int ierr; 732 733 PetscFunctionBegin; 734 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 735 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatDiagonalScale_MPIDense" 741 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 742 { 743 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 744 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 745 Scalar *l,*r,x,*v; 746 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 747 748 PetscFunctionBegin; 749 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 750 if (ll) { 751 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 752 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 753 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 754 for (i=0; i<m; i++) { 755 x = l[i]; 756 v = mat->v + i; 757 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 758 } 759 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 760 PetscLogFlops(n*m); 761 } 762 if (rr) { 763 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 764 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 765 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 766 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 767 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 768 for (i=0; i<n; i++) { 769 x = r[i]; 770 v = mat->v + i*m; 771 for (j=0; j<m; j++) { (*v++) *= x;} 772 } 773 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 774 PetscLogFlops(n*m); 775 } 776 PetscFunctionReturn(0); 777 } 778 779 #undef __FUNCT__ 780 #define __FUNCT__ "MatNorm_MPIDense" 781 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm) 782 { 783 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 784 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 785 int ierr,i,j; 786 PetscReal sum = 0.0; 787 Scalar *v = mat->v; 788 789 PetscFunctionBegin; 790 if (mdn->size == 1) { 791 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 792 } else { 793 if (type == NORM_FROBENIUS) { 794 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 795 #if defined(PETSC_USE_COMPLEX) 796 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 797 #else 798 sum += (*v)*(*v); v++; 799 #endif 800 } 801 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 802 *norm = sqrt(*norm); 803 PetscLogFlops(2*mdn->A->n*mdn->A->m); 804 } else if (type == NORM_1) { 805 PetscReal *tmp,*tmp2; 806 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 807 tmp2 = tmp + A->N; 808 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 809 *norm = 0.0; 810 v = mat->v; 811 for (j=0; j<mdn->A->n; j++) { 812 for (i=0; i<mdn->A->m; i++) { 813 tmp[j] += PetscAbsScalar(*v); v++; 814 } 815 } 816 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 817 for (j=0; j<A->N; j++) { 818 if (tmp2[j] > *norm) *norm = tmp2[j]; 819 } 820 ierr = PetscFree(tmp);CHKERRQ(ierr); 821 PetscLogFlops(A->n*A->m); 822 } else if (type == NORM_INFINITY) { /* max row norm */ 823 PetscReal ntemp; 824 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 825 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 826 } else { 827 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 828 } 829 } 830 PetscFunctionReturn(0); 831 } 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "MatTranspose_MPIDense" 835 int MatTranspose_MPIDense(Mat A,Mat *matout) 836 { 837 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 838 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 839 Mat B; 840 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 841 int j,i,ierr; 842 Scalar *v; 843 844 PetscFunctionBegin; 845 if (!matout && M != N) { 846 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 847 } 848 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 849 850 m = a->A->m; n = a->A->n; v = Aloc->v; 851 ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr); 852 for (j=0; j<n; j++) { 853 for (i=0; i<m; i++) rwork[i] = rstart + i; 854 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 855 v += m; 856 } 857 ierr = PetscFree(rwork);CHKERRQ(ierr); 858 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 859 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 860 if (matout) { 861 *matout = B; 862 } else { 863 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 864 } 865 PetscFunctionReturn(0); 866 } 867 868 #include "petscblaslapack.h" 869 #undef __FUNCT__ 870 #define __FUNCT__ "MatScale_MPIDense" 871 int MatScale_MPIDense(Scalar *alpha,Mat inA) 872 { 873 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 874 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 875 int one = 1,nz; 876 877 PetscFunctionBegin; 878 nz = inA->m*inA->N; 879 BLscal_(&nz,alpha,a->v,&one); 880 PetscLogFlops(nz); 881 PetscFunctionReturn(0); 882 } 883 884 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 885 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 889 int MatSetUpPreallocation_MPIDense(Mat A) 890 { 891 int ierr; 892 893 PetscFunctionBegin; 894 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 895 PetscFunctionReturn(0); 896 } 897 898 /* -------------------------------------------------------------------*/ 899 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 900 MatGetRow_MPIDense, 901 MatRestoreRow_MPIDense, 902 MatMult_MPIDense, 903 MatMultAdd_MPIDense, 904 MatMultTranspose_MPIDense, 905 MatMultTransposeAdd_MPIDense, 906 0, 907 0, 908 0, 909 0, 910 0, 911 0, 912 0, 913 MatTranspose_MPIDense, 914 MatGetInfo_MPIDense,0, 915 MatGetDiagonal_MPIDense, 916 MatDiagonalScale_MPIDense, 917 MatNorm_MPIDense, 918 MatAssemblyBegin_MPIDense, 919 MatAssemblyEnd_MPIDense, 920 0, 921 MatSetOption_MPIDense, 922 MatZeroEntries_MPIDense, 923 MatZeroRows_MPIDense, 924 0, 925 0, 926 0, 927 0, 928 MatSetUpPreallocation_MPIDense, 929 0, 930 MatGetOwnershipRange_MPIDense, 931 0, 932 0, 933 MatGetArray_MPIDense, 934 MatRestoreArray_MPIDense, 935 MatDuplicate_MPIDense, 936 0, 937 0, 938 0, 939 0, 940 0, 941 MatGetSubMatrices_MPIDense, 942 0, 943 MatGetValues_MPIDense, 944 0, 945 0, 946 MatScale_MPIDense, 947 0, 948 0, 949 0, 950 MatGetBlockSize_MPIDense, 951 0, 952 0, 953 0, 954 0, 955 0, 956 0, 957 0, 958 0, 959 0, 960 MatGetSubMatrix_MPIDense, 961 MatDestroy_MPIDense, 962 MatView_MPIDense, 963 MatGetMaps_Petsc}; 964 965 EXTERN_C_BEGIN 966 #undef __FUNCT__ 967 #define __FUNCT__ "MatCreate_MPIDense" 968 int MatCreate_MPIDense(Mat mat) 969 { 970 Mat_MPIDense *a; 971 int ierr,i; 972 973 PetscFunctionBegin; 974 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 975 mat->data = (void*)a; 976 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 977 mat->factor = 0; 978 mat->mapping = 0; 979 980 a->factor = 0; 981 mat->insertmode = NOT_SET_VALUES; 982 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 983 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 984 985 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 986 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 987 a->nvec = mat->n; 988 989 /* the information in the maps duplicates the information computed below, eventually 990 we should remove the duplicate information that is not contained in the maps */ 991 ierr = MapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 992 ierr = MapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 993 994 /* build local table of row and column ownerships */ 995 ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 996 a->cowners = a->rowners + a->size + 1; 997 PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 998 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 999 a->rowners[0] = 0; 1000 for (i=2; i<=a->size; i++) { 1001 a->rowners[i] += a->rowners[i-1]; 1002 } 1003 a->rstart = a->rowners[a->rank]; 1004 a->rend = a->rowners[a->rank+1]; 1005 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1006 a->cowners[0] = 0; 1007 for (i=2; i<=a->size; i++) { 1008 a->cowners[i] += a->cowners[i-1]; 1009 } 1010 1011 /* build cache for off array entries formed */ 1012 a->donotstash = PETSC_FALSE; 1013 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1014 1015 /* stuff used for matrix vector multiply */ 1016 a->lvec = 0; 1017 a->Mvctx = 0; 1018 a->roworiented = PETSC_TRUE; 1019 1020 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1021 "MatGetDiagonalBlock_MPIDense", 1022 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1023 PetscFunctionReturn(0); 1024 } 1025 EXTERN_C_END 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1029 /*@C 1030 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1031 1032 Not collective 1033 1034 Input Parameters: 1035 . A - the matrix 1036 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1037 to control all matrix memory allocation. 1038 1039 Notes: 1040 The dense format is fully compatible with standard Fortran 77 1041 storage by columns. 1042 1043 The data input variable is intended primarily for Fortran programmers 1044 who wish to allocate their own matrix memory space. Most users should 1045 set data=PETSC_NULL. 1046 1047 Level: intermediate 1048 1049 .keywords: matrix,dense, parallel 1050 1051 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1052 @*/ 1053 int MatMPIDenseSetPreallocation(Mat mat,Scalar *data) 1054 { 1055 Mat_MPIDense *a; 1056 int ierr; 1057 PetscTruth flg2; 1058 1059 PetscFunctionBegin; 1060 ierr = PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);CHKERRQ(ierr); 1061 if (!flg2) PetscFunctionReturn(0); 1062 mat->preallocated = PETSC_TRUE; 1063 /* Note: For now, when data is specified above, this assumes the user correctly 1064 allocates the local dense storage space. We should add error checking. */ 1065 1066 a = (Mat_MPIDense*)mat->data; 1067 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr); 1068 PetscLogObjectParent(mat,a->A); 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatCreateMPIDense" 1074 /*@C 1075 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1076 1077 Collective on MPI_Comm 1078 1079 Input Parameters: 1080 + comm - MPI communicator 1081 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1082 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1083 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1084 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1085 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1086 to control all matrix memory allocation. 1087 1088 Output Parameter: 1089 . A - the matrix 1090 1091 Notes: 1092 The dense format is fully compatible with standard Fortran 77 1093 storage by columns. 1094 1095 The data input variable is intended primarily for Fortran programmers 1096 who wish to allocate their own matrix memory space. Most users should 1097 set data=PETSC_NULL. 1098 1099 The user MUST specify either the local or global matrix dimensions 1100 (possibly both). 1101 1102 Level: intermediate 1103 1104 .keywords: matrix,dense, parallel 1105 1106 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1107 @*/ 1108 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 1109 { 1110 int ierr,size; 1111 1112 PetscFunctionBegin; 1113 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1114 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1115 if (size > 1) { 1116 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1117 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1118 } else { 1119 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1120 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1121 } 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatDuplicate_MPIDense" 1127 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1128 { 1129 Mat mat; 1130 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1131 int ierr; 1132 1133 PetscFunctionBegin; 1134 *newmat = 0; 1135 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1136 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1137 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1138 mat->data = (void*)a; 1139 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1140 mat->factor = A->factor; 1141 mat->assembled = PETSC_TRUE; 1142 mat->preallocated = PETSC_TRUE; 1143 1144 a->rstart = oldmat->rstart; 1145 a->rend = oldmat->rend; 1146 a->size = oldmat->size; 1147 a->rank = oldmat->rank; 1148 mat->insertmode = NOT_SET_VALUES; 1149 a->nvec = oldmat->nvec; 1150 a->donotstash = oldmat->donotstash; 1151 ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1152 PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1153 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1154 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1155 1156 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1157 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1158 PetscLogObjectParent(mat,a->A); 1159 *newmat = mat; 1160 PetscFunctionReturn(0); 1161 } 1162 1163 #include "petscsys.h" 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1167 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1168 { 1169 int *rowners,i,size,rank,m,ierr,nz,j; 1170 Scalar *array,*vals,*vals_ptr; 1171 MPI_Status status; 1172 1173 PetscFunctionBegin; 1174 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1175 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1176 1177 /* determine ownership of all rows */ 1178 m = M/size + ((M % size) > rank); 1179 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1180 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1181 rowners[0] = 0; 1182 for (i=2; i<=size; i++) { 1183 rowners[i] += rowners[i-1]; 1184 } 1185 1186 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1187 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1188 1189 if (!rank) { 1190 ierr = PetscMalloc(m*N*sizeof(Scalar),&vals);CHKERRQ(ierr); 1191 1192 /* read in my part of the matrix numerical values */ 1193 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1194 1195 /* insert into matrix-by row (this is why cannot directly read into array */ 1196 vals_ptr = vals; 1197 for (i=0; i<m; i++) { 1198 for (j=0; j<N; j++) { 1199 array[i + j*m] = *vals_ptr++; 1200 } 1201 } 1202 1203 /* read in other processors and ship out */ 1204 for (i=1; i<size; i++) { 1205 nz = (rowners[i+1] - rowners[i])*N; 1206 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1207 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1208 } 1209 } else { 1210 /* receive numeric values */ 1211 ierr = PetscMalloc(m*N*sizeof(Scalar),&vals);CHKERRQ(ierr); 1212 1213 /* receive message of values*/ 1214 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1215 1216 /* insert into matrix-by row (this is why cannot directly read into array */ 1217 vals_ptr = vals; 1218 for (i=0; i<m; i++) { 1219 for (j=0; j<N; j++) { 1220 array[i + j*m] = *vals_ptr++; 1221 } 1222 } 1223 } 1224 ierr = PetscFree(rowners);CHKERRQ(ierr); 1225 ierr = PetscFree(vals);CHKERRQ(ierr); 1226 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1227 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 EXTERN_C_BEGIN 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatLoad_MPIDense" 1234 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat) 1235 { 1236 Mat A; 1237 Scalar *vals,*svals; 1238 MPI_Comm comm = ((PetscObject)viewer)->comm; 1239 MPI_Status status; 1240 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1241 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1242 int tag = ((PetscObject)viewer)->tag; 1243 int i,nz,ierr,j,rstart,rend,fd; 1244 1245 PetscFunctionBegin; 1246 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1247 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1248 if (!rank) { 1249 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1250 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1251 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1252 } 1253 1254 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1255 M = header[1]; N = header[2]; nz = header[3]; 1256 1257 /* 1258 Handle case where matrix is stored on disk as a dense matrix 1259 */ 1260 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1261 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1262 PetscFunctionReturn(0); 1263 } 1264 1265 /* determine ownership of all rows */ 1266 m = M/size + ((M % size) > rank); 1267 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1268 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1269 rowners[0] = 0; 1270 for (i=2; i<=size; i++) { 1271 rowners[i] += rowners[i-1]; 1272 } 1273 rstart = rowners[rank]; 1274 rend = rowners[rank+1]; 1275 1276 /* distribute row lengths to all processors */ 1277 ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr); 1278 offlens = ourlens + (rend-rstart); 1279 if (!rank) { 1280 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1281 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1282 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1283 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1284 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1285 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1286 } else { 1287 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1288 } 1289 1290 if (!rank) { 1291 /* calculate the number of nonzeros on each processor */ 1292 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1293 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1294 for (i=0; i<size; i++) { 1295 for (j=rowners[i]; j< rowners[i+1]; j++) { 1296 procsnz[i] += rowlengths[j]; 1297 } 1298 } 1299 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1300 1301 /* determine max buffer needed and allocate it */ 1302 maxnz = 0; 1303 for (i=0; i<size; i++) { 1304 maxnz = PetscMax(maxnz,procsnz[i]); 1305 } 1306 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1307 1308 /* read in my part of the matrix column indices */ 1309 nz = procsnz[0]; 1310 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1311 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1312 1313 /* read in every one elses and ship off */ 1314 for (i=1; i<size; i++) { 1315 nz = procsnz[i]; 1316 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1317 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1318 } 1319 ierr = PetscFree(cols);CHKERRQ(ierr); 1320 } else { 1321 /* determine buffer space needed for message */ 1322 nz = 0; 1323 for (i=0; i<m; i++) { 1324 nz += ourlens[i]; 1325 } 1326 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1327 1328 /* receive message of column indices*/ 1329 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1330 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1331 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1332 } 1333 1334 /* loop over local rows, determining number of off diagonal entries */ 1335 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1336 jj = 0; 1337 for (i=0; i<m; i++) { 1338 for (j=0; j<ourlens[i]; j++) { 1339 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1340 jj++; 1341 } 1342 } 1343 1344 /* create our matrix */ 1345 for (i=0; i<m; i++) { 1346 ourlens[i] -= offlens[i]; 1347 } 1348 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1349 A = *newmat; 1350 for (i=0; i<m; i++) { 1351 ourlens[i] += offlens[i]; 1352 } 1353 1354 if (!rank) { 1355 ierr = PetscMalloc(maxnz*sizeof(Scalar),&vals);CHKERRQ(ierr); 1356 1357 /* read in my part of the matrix numerical values */ 1358 nz = procsnz[0]; 1359 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1360 1361 /* insert into matrix */ 1362 jj = rstart; 1363 smycols = mycols; 1364 svals = vals; 1365 for (i=0; i<m; i++) { 1366 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1367 smycols += ourlens[i]; 1368 svals += ourlens[i]; 1369 jj++; 1370 } 1371 1372 /* read in other processors and ship out */ 1373 for (i=1; i<size; i++) { 1374 nz = procsnz[i]; 1375 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1376 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1377 } 1378 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1379 } else { 1380 /* receive numeric values */ 1381 ierr = PetscMalloc(nz*sizeof(Scalar),&vals);CHKERRQ(ierr); 1382 1383 /* receive message of values*/ 1384 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1385 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1386 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1387 1388 /* insert into matrix */ 1389 jj = rstart; 1390 smycols = mycols; 1391 svals = vals; 1392 for (i=0; i<m; i++) { 1393 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1394 smycols += ourlens[i]; 1395 svals += ourlens[i]; 1396 jj++; 1397 } 1398 } 1399 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1400 ierr = PetscFree(vals);CHKERRQ(ierr); 1401 ierr = PetscFree(mycols);CHKERRQ(ierr); 1402 ierr = PetscFree(rowners);CHKERRQ(ierr); 1403 1404 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1405 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 EXTERN_C_END 1409 1410 1411 1412 1413 1414