1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 #include "src/vec/vecimpl.h" 9 10 EXTERN_C_BEGIN 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 13 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 14 { 15 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 16 int m = A->m,rstart = mdn->rstart,ierr; 17 PetscScalar *array; 18 MPI_Comm comm; 19 20 PetscFunctionBegin; 21 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 22 23 /* The reuse aspect is not implemented efficiently */ 24 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 25 26 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 27 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 28 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 29 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 30 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 31 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 33 *iscopy = PETSC_TRUE; 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN int MatSetUpMultiply_MPIDense(Mat); 39 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatSetValues_MPIDense" 42 int MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv) 43 { 44 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 45 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 46 PetscTruth roworiented = A->roworiented; 47 48 PetscFunctionBegin; 49 for (i=0; i<m; i++) { 50 if (idxm[i] < 0) continue; 51 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 52 if (idxm[i] >= rstart && idxm[i] < rend) { 53 row = idxm[i] - rstart; 54 if (roworiented) { 55 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 56 } else { 57 for (j=0; j<n; j++) { 58 if (idxn[j] < 0) continue; 59 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 60 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 61 } 62 } 63 } else { 64 if (!A->donotstash) { 65 if (roworiented) { 66 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 67 } else { 68 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 69 } 70 } 71 } 72 } 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetValues_MPIDense" 78 int MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 79 { 80 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 81 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 82 83 PetscFunctionBegin; 84 for (i=0; i<m; i++) { 85 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 86 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 87 if (idxm[i] >= rstart && idxm[i] < rend) { 88 row = idxm[i] - rstart; 89 for (j=0; j<n; j++) { 90 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 91 if (idxn[j] >= mat->N) { 92 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 93 } 94 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 95 } 96 } else { 97 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 98 } 99 } 100 PetscFunctionReturn(0); 101 } 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "MatGetArray_MPIDense" 105 int MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 106 { 107 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 108 int ierr; 109 110 PetscFunctionBegin; 111 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 117 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 118 { 119 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 120 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 121 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 122 PetscScalar *av,*bv,*v = lmat->v; 123 Mat newmat; 124 125 PetscFunctionBegin; 126 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 127 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 128 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 129 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 130 131 /* No parallel redistribution currently supported! Should really check each index set 132 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 133 original matrix! */ 134 135 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 137 138 /* Check submatrix call */ 139 if (scall == MAT_REUSE_MATRIX) { 140 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 141 /* Really need to test rows and column sizes! */ 142 newmat = *B; 143 } else { 144 /* Create and fill new matrix */ 145 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 146 } 147 148 /* Now extract the data pointers and do the copy, column at a time */ 149 newmatd = (Mat_MPIDense*)newmat->data; 150 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 151 152 for (i=0; i<ncols; i++) { 153 av = v + nlrows*icol[i]; 154 for (j=0; j<nrows; j++) { 155 *bv++ = av[irow[j] - rstart]; 156 } 157 } 158 159 /* Assemble the matrices so that the correct flags are set */ 160 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 161 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 162 163 /* Free work space */ 164 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 165 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 166 *B = newmat; 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatRestoreArray_MPIDense" 172 int MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 173 { 174 PetscFunctionBegin; 175 PetscFunctionReturn(0); 176 } 177 178 #undef __FUNCT__ 179 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 180 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 181 { 182 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 183 MPI_Comm comm = mat->comm; 184 int ierr,nstash,reallocs; 185 InsertMode addv; 186 187 PetscFunctionBegin; 188 /* make sure all processors are either in INSERTMODE or ADDMODE */ 189 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 190 if (addv == (ADD_VALUES|INSERT_VALUES)) { 191 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 192 } 193 mat->insertmode = addv; /* in case this processor had no cache */ 194 195 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 196 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 197 PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 198 PetscFunctionReturn(0); 199 } 200 201 #undef __FUNCT__ 202 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 203 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 204 { 205 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 206 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 207 PetscScalar *val; 208 InsertMode addv=mat->insertmode; 209 210 PetscFunctionBegin; 211 /* wait on receives */ 212 while (1) { 213 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 214 if (!flg) break; 215 216 for (i=0; i<n;) { 217 /* Now identify the consecutive vals belonging to the same row */ 218 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 219 if (j < n) ncols = j-i; 220 else ncols = n-i; 221 /* Now assemble all these values with a single function call */ 222 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 223 i = j; 224 } 225 } 226 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 227 228 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 229 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 230 231 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 232 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 233 } 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "MatZeroEntries_MPIDense" 239 int MatZeroEntries_MPIDense(Mat A) 240 { 241 int ierr; 242 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 243 244 PetscFunctionBegin; 245 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatGetBlockSize_MPIDense" 251 int MatGetBlockSize_MPIDense(Mat A,int *bs) 252 { 253 PetscFunctionBegin; 254 *bs = 1; 255 PetscFunctionReturn(0); 256 } 257 258 /* the code does not do the diagonal entries correctly unless the 259 matrix is square and the column and row owerships are identical. 260 This is a BUG. The only way to fix it seems to be to access 261 mdn->A and mdn->B directly and not through the MatZeroRows() 262 routine. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatZeroRows_MPIDense" 266 int MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag) 267 { 268 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 269 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 270 int *nprocs,j,idx,nsends; 271 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 272 int *rvalues,tag = A->tag,count,base,slen,n,*source; 273 int *lens,imdex,*lrows,*values; 274 MPI_Comm comm = A->comm; 275 MPI_Request *send_waits,*recv_waits; 276 MPI_Status recv_status,*send_status; 277 IS istmp; 278 PetscTruth found; 279 280 PetscFunctionBegin; 281 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 282 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 283 284 /* first count number of contributors to each processor */ 285 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 286 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 287 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 288 for (i=0; i<N; i++) { 289 idx = rows[i]; 290 found = PETSC_FALSE; 291 for (j=0; j<size; j++) { 292 if (idx >= owners[j] && idx < owners[j+1]) { 293 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 294 } 295 } 296 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 297 } 298 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 299 300 /* inform other processors of number of messages and max length*/ 301 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 302 303 /* post receives: */ 304 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 305 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 306 for (i=0; i<nrecvs; i++) { 307 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 308 } 309 310 /* do sends: 311 1) starts[i] gives the starting index in svalues for stuff going to 312 the ith processor 313 */ 314 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 315 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 316 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 317 starts[0] = 0; 318 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 319 for (i=0; i<N; i++) { 320 svalues[starts[owner[i]]++] = rows[i]; 321 } 322 ISRestoreIndices(is,&rows); 323 324 starts[0] = 0; 325 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 326 count = 0; 327 for (i=0; i<size; i++) { 328 if (nprocs[2*i+1]) { 329 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 330 } 331 } 332 ierr = PetscFree(starts);CHKERRQ(ierr); 333 334 base = owners[rank]; 335 336 /* wait on receives */ 337 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 338 source = lens + nrecvs; 339 count = nrecvs; slen = 0; 340 while (count) { 341 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 342 /* unpack receives into our local space */ 343 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 344 source[imdex] = recv_status.MPI_SOURCE; 345 lens[imdex] = n; 346 slen += n; 347 count--; 348 } 349 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 350 351 /* move the data into the send scatter */ 352 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 353 count = 0; 354 for (i=0; i<nrecvs; i++) { 355 values = rvalues + i*nmax; 356 for (j=0; j<lens[i]; j++) { 357 lrows[count++] = values[j] - base; 358 } 359 } 360 ierr = PetscFree(rvalues);CHKERRQ(ierr); 361 ierr = PetscFree(lens);CHKERRQ(ierr); 362 ierr = PetscFree(owner);CHKERRQ(ierr); 363 ierr = PetscFree(nprocs);CHKERRQ(ierr); 364 365 /* actually zap the local rows */ 366 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 367 PetscLogObjectParent(A,istmp); 368 ierr = PetscFree(lrows);CHKERRQ(ierr); 369 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 370 ierr = ISDestroy(istmp);CHKERRQ(ierr); 371 372 /* wait on sends */ 373 if (nsends) { 374 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 375 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 376 ierr = PetscFree(send_status);CHKERRQ(ierr); 377 } 378 ierr = PetscFree(send_waits);CHKERRQ(ierr); 379 ierr = PetscFree(svalues);CHKERRQ(ierr); 380 381 PetscFunctionReturn(0); 382 } 383 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatMult_MPIDense" 386 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 387 { 388 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 389 int ierr; 390 391 PetscFunctionBegin; 392 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 393 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 394 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 #undef __FUNCT__ 399 #define __FUNCT__ "MatMultAdd_MPIDense" 400 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 401 { 402 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 403 int ierr; 404 405 PetscFunctionBegin; 406 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 407 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 408 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 412 #undef __FUNCT__ 413 #define __FUNCT__ "MatMultTranspose_MPIDense" 414 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 415 { 416 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 417 int ierr; 418 PetscScalar zero = 0.0; 419 420 PetscFunctionBegin; 421 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 422 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 423 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 424 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 430 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 431 { 432 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 433 int ierr; 434 435 PetscFunctionBegin; 436 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 437 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 438 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 439 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatGetDiagonal_MPIDense" 445 int MatGetDiagonal_MPIDense(Mat A,Vec v) 446 { 447 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 448 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 449 int ierr,len,i,n,m = A->m,radd; 450 PetscScalar *x,zero = 0.0; 451 452 PetscFunctionBegin; 453 ierr = VecSet(&zero,v);CHKERRQ(ierr); 454 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 455 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 456 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 457 len = PetscMin(a->A->m,a->A->n); 458 radd = a->rstart*m; 459 for (i=0; i<len; i++) { 460 x[i] = aloc->v[radd + i*m + i]; 461 } 462 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatDestroy_MPIDense" 468 int MatDestroy_MPIDense(Mat mat) 469 { 470 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 471 int ierr; 472 473 PetscFunctionBegin; 474 475 #if defined(PETSC_USE_LOG) 476 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 477 #endif 478 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 479 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 480 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 481 if (mdn->lvec) VecDestroy(mdn->lvec); 482 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 483 if (mdn->factor) { 484 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 485 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 486 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 487 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 488 } 489 ierr = PetscFree(mdn);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492 493 #undef __FUNCT__ 494 #define __FUNCT__ "MatView_MPIDense_Binary" 495 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 496 { 497 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 498 int ierr; 499 500 PetscFunctionBegin; 501 if (mdn->size == 1) { 502 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 503 } 504 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNCT__ 509 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 510 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 511 { 512 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 513 int ierr,size = mdn->size,rank = mdn->rank; 514 PetscViewerType vtype; 515 PetscTruth isascii,isdraw; 516 PetscViewer sviewer; 517 PetscViewerFormat format; 518 519 PetscFunctionBegin; 520 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 521 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 522 if (isascii) { 523 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 524 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 525 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 526 MatInfo info; 527 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 528 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 529 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 530 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 531 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 532 PetscFunctionReturn(0); 533 } else if (format == PETSC_VIEWER_ASCII_INFO) { 534 PetscFunctionReturn(0); 535 } 536 } else if (isdraw) { 537 PetscDraw draw; 538 PetscTruth isnull; 539 540 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 541 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 542 if (isnull) PetscFunctionReturn(0); 543 } 544 545 if (size == 1) { 546 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 547 } else { 548 /* assemble the entire matrix onto first processor. */ 549 Mat A; 550 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 551 PetscScalar *vals; 552 553 if (!rank) { 554 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 555 } else { 556 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 557 } 558 PetscLogObjectParent(mat,A); 559 560 /* Copy the matrix ... This isn't the most efficient means, 561 but it's quick for now */ 562 row = mdn->rstart; m = mdn->A->m; 563 for (i=0; i<m; i++) { 564 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 565 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 566 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 567 row++; 568 } 569 570 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 571 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 572 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 573 if (!rank) { 574 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 575 } 576 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 577 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 578 ierr = MatDestroy(A);CHKERRQ(ierr); 579 } 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatView_MPIDense" 585 int MatView_MPIDense(Mat mat,PetscViewer viewer) 586 { 587 int ierr; 588 PetscTruth isascii,isbinary,isdraw,issocket; 589 590 PetscFunctionBegin; 591 592 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 593 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 594 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 595 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 596 597 if (isascii || issocket || isdraw) { 598 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 599 } else if (isbinary) { 600 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 601 } else { 602 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatGetInfo_MPIDense" 609 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 610 { 611 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 612 Mat mdn = mat->A; 613 int ierr; 614 PetscReal isend[5],irecv[5]; 615 616 PetscFunctionBegin; 617 info->rows_global = (double)A->M; 618 info->columns_global = (double)A->N; 619 info->rows_local = (double)A->m; 620 info->columns_local = (double)A->N; 621 info->block_size = 1.0; 622 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 623 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 624 isend[3] = info->memory; isend[4] = info->mallocs; 625 if (flag == MAT_LOCAL) { 626 info->nz_used = isend[0]; 627 info->nz_allocated = isend[1]; 628 info->nz_unneeded = isend[2]; 629 info->memory = isend[3]; 630 info->mallocs = isend[4]; 631 } else if (flag == MAT_GLOBAL_MAX) { 632 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 633 info->nz_used = irecv[0]; 634 info->nz_allocated = irecv[1]; 635 info->nz_unneeded = irecv[2]; 636 info->memory = irecv[3]; 637 info->mallocs = irecv[4]; 638 } else if (flag == MAT_GLOBAL_SUM) { 639 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 640 info->nz_used = irecv[0]; 641 info->nz_allocated = irecv[1]; 642 info->nz_unneeded = irecv[2]; 643 info->memory = irecv[3]; 644 info->mallocs = irecv[4]; 645 } 646 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 647 info->fill_ratio_needed = 0; 648 info->factor_mallocs = 0; 649 PetscFunctionReturn(0); 650 } 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatSetOption_MPIDense" 654 int MatSetOption_MPIDense(Mat A,MatOption op) 655 { 656 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 657 int ierr; 658 659 PetscFunctionBegin; 660 switch (op) { 661 case MAT_NO_NEW_NONZERO_LOCATIONS: 662 case MAT_YES_NEW_NONZERO_LOCATIONS: 663 case MAT_NEW_NONZERO_LOCATION_ERR: 664 case MAT_NEW_NONZERO_ALLOCATION_ERR: 665 case MAT_COLUMNS_SORTED: 666 case MAT_COLUMNS_UNSORTED: 667 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 668 break; 669 case MAT_ROW_ORIENTED: 670 a->roworiented = PETSC_TRUE; 671 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 672 break; 673 case MAT_ROWS_SORTED: 674 case MAT_ROWS_UNSORTED: 675 case MAT_YES_NEW_DIAGONALS: 676 case MAT_USE_HASH_TABLE: 677 PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 678 break; 679 case MAT_COLUMN_ORIENTED: 680 a->roworiented = PETSC_FALSE; 681 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 682 break; 683 case MAT_IGNORE_OFF_PROC_ENTRIES: 684 a->donotstash = PETSC_TRUE; 685 break; 686 case MAT_NO_NEW_DIAGONALS: 687 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 688 default: 689 SETERRQ(PETSC_ERR_SUP,"unknown option"); 690 } 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatGetRow_MPIDense" 696 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v) 697 { 698 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 699 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 700 701 PetscFunctionBegin; 702 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 703 lrow = row - rstart; 704 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatRestoreRow_MPIDense" 710 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 711 { 712 int ierr; 713 714 PetscFunctionBegin; 715 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 716 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 717 PetscFunctionReturn(0); 718 } 719 720 #undef __FUNCT__ 721 #define __FUNCT__ "MatDiagonalScale_MPIDense" 722 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 723 { 724 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 725 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 726 PetscScalar *l,*r,x,*v; 727 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 728 729 PetscFunctionBegin; 730 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 731 if (ll) { 732 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 733 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 734 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 735 for (i=0; i<m; i++) { 736 x = l[i]; 737 v = mat->v + i; 738 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 739 } 740 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 741 PetscLogFlops(n*m); 742 } 743 if (rr) { 744 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 745 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 746 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 747 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 748 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 749 for (i=0; i<n; i++) { 750 x = r[i]; 751 v = mat->v + i*m; 752 for (j=0; j<m; j++) { (*v++) *= x;} 753 } 754 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 755 PetscLogFlops(n*m); 756 } 757 PetscFunctionReturn(0); 758 } 759 760 #undef __FUNCT__ 761 #define __FUNCT__ "MatNorm_MPIDense" 762 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 763 { 764 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 765 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 766 int ierr,i,j; 767 PetscReal sum = 0.0; 768 PetscScalar *v = mat->v; 769 770 PetscFunctionBegin; 771 if (mdn->size == 1) { 772 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 773 } else { 774 if (type == NORM_FROBENIUS) { 775 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 776 #if defined(PETSC_USE_COMPLEX) 777 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 778 #else 779 sum += (*v)*(*v); v++; 780 #endif 781 } 782 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 783 *nrm = sqrt(*nrm); 784 PetscLogFlops(2*mdn->A->n*mdn->A->m); 785 } else if (type == NORM_1) { 786 PetscReal *tmp,*tmp2; 787 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 788 tmp2 = tmp + A->N; 789 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 790 *nrm = 0.0; 791 v = mat->v; 792 for (j=0; j<mdn->A->n; j++) { 793 for (i=0; i<mdn->A->m; i++) { 794 tmp[j] += PetscAbsScalar(*v); v++; 795 } 796 } 797 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 798 for (j=0; j<A->N; j++) { 799 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 800 } 801 ierr = PetscFree(tmp);CHKERRQ(ierr); 802 PetscLogFlops(A->n*A->m); 803 } else if (type == NORM_INFINITY) { /* max row norm */ 804 PetscReal ntemp; 805 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 806 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 807 } else { 808 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 809 } 810 } 811 PetscFunctionReturn(0); 812 } 813 814 #undef __FUNCT__ 815 #define __FUNCT__ "MatTranspose_MPIDense" 816 int MatTranspose_MPIDense(Mat A,Mat *matout) 817 { 818 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 819 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 820 Mat B; 821 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 822 int j,i,ierr; 823 PetscScalar *v; 824 825 PetscFunctionBegin; 826 if (!matout && M != N) { 827 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 828 } 829 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 830 831 m = a->A->m; n = a->A->n; v = Aloc->v; 832 ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr); 833 for (j=0; j<n; j++) { 834 for (i=0; i<m; i++) rwork[i] = rstart + i; 835 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 836 v += m; 837 } 838 ierr = PetscFree(rwork);CHKERRQ(ierr); 839 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 840 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 841 if (matout) { 842 *matout = B; 843 } else { 844 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 845 } 846 PetscFunctionReturn(0); 847 } 848 849 #include "petscblaslapack.h" 850 #undef __FUNCT__ 851 #define __FUNCT__ "MatScale_MPIDense" 852 int MatScale_MPIDense(const PetscScalar *alpha,Mat inA) 853 { 854 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 855 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 856 int one = 1,nz; 857 858 PetscFunctionBegin; 859 nz = inA->m*inA->N; 860 BLscal_(&nz,(PetscScalar*)alpha,a->v,&one); 861 PetscLogFlops(nz); 862 PetscFunctionReturn(0); 863 } 864 865 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 866 EXTERN int MatGetSubMatrices_MPIDense(Mat,int,const IS[],const IS[],MatReuse,Mat *[]); 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 870 int MatSetUpPreallocation_MPIDense(Mat A) 871 { 872 int ierr; 873 874 PetscFunctionBegin; 875 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 /* -------------------------------------------------------------------*/ 880 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 881 MatGetRow_MPIDense, 882 MatRestoreRow_MPIDense, 883 MatMult_MPIDense, 884 MatMultAdd_MPIDense, 885 MatMultTranspose_MPIDense, 886 MatMultTransposeAdd_MPIDense, 887 0, 888 0, 889 0, 890 0, 891 0, 892 0, 893 0, 894 MatTranspose_MPIDense, 895 MatGetInfo_MPIDense,0, 896 MatGetDiagonal_MPIDense, 897 MatDiagonalScale_MPIDense, 898 MatNorm_MPIDense, 899 MatAssemblyBegin_MPIDense, 900 MatAssemblyEnd_MPIDense, 901 0, 902 MatSetOption_MPIDense, 903 MatZeroEntries_MPIDense, 904 MatZeroRows_MPIDense, 905 0, 906 0, 907 0, 908 0, 909 MatSetUpPreallocation_MPIDense, 910 0, 911 0, 912 MatGetArray_MPIDense, 913 MatRestoreArray_MPIDense, 914 MatDuplicate_MPIDense, 915 0, 916 0, 917 0, 918 0, 919 0, 920 MatGetSubMatrices_MPIDense, 921 0, 922 MatGetValues_MPIDense, 923 0, 924 0, 925 MatScale_MPIDense, 926 0, 927 0, 928 0, 929 MatGetBlockSize_MPIDense, 930 0, 931 0, 932 0, 933 0, 934 0, 935 0, 936 0, 937 0, 938 0, 939 MatGetSubMatrix_MPIDense, 940 MatDestroy_MPIDense, 941 MatView_MPIDense, 942 MatGetPetscMaps_Petsc}; 943 944 EXTERN_C_BEGIN 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 947 int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 948 { 949 Mat_MPIDense *a; 950 int ierr; 951 952 PetscFunctionBegin; 953 mat->preallocated = PETSC_TRUE; 954 /* Note: For now, when data is specified above, this assumes the user correctly 955 allocates the local dense storage space. We should add error checking. */ 956 957 a = (Mat_MPIDense*)mat->data; 958 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);CHKERRQ(ierr); 959 PetscLogObjectParent(mat,a->A); 960 PetscFunctionReturn(0); 961 } 962 EXTERN_C_END 963 964 /*MC 965 MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 966 967 Options Database Keys: 968 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 969 970 Level: beginner 971 972 .seealso: MatCreateMPIDense 973 M*/ 974 975 EXTERN_C_BEGIN 976 #undef __FUNCT__ 977 #define __FUNCT__ "MatCreate_MPIDense" 978 int MatCreate_MPIDense(Mat mat) 979 { 980 Mat_MPIDense *a; 981 int ierr,i; 982 983 PetscFunctionBegin; 984 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 985 mat->data = (void*)a; 986 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 987 mat->factor = 0; 988 mat->mapping = 0; 989 990 a->factor = 0; 991 mat->insertmode = NOT_SET_VALUES; 992 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 993 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 994 995 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 996 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 997 a->nvec = mat->n; 998 999 /* the information in the maps duplicates the information computed below, eventually 1000 we should remove the duplicate information that is not contained in the maps */ 1001 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 1002 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 1003 1004 /* build local table of row and column ownerships */ 1005 ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1006 a->cowners = a->rowners + a->size + 1; 1007 PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1008 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1009 a->rowners[0] = 0; 1010 for (i=2; i<=a->size; i++) { 1011 a->rowners[i] += a->rowners[i-1]; 1012 } 1013 a->rstart = a->rowners[a->rank]; 1014 a->rend = a->rowners[a->rank+1]; 1015 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1016 a->cowners[0] = 0; 1017 for (i=2; i<=a->size; i++) { 1018 a->cowners[i] += a->cowners[i-1]; 1019 } 1020 1021 /* build cache for off array entries formed */ 1022 a->donotstash = PETSC_FALSE; 1023 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1024 1025 /* stuff used for matrix vector multiply */ 1026 a->lvec = 0; 1027 a->Mvctx = 0; 1028 a->roworiented = PETSC_TRUE; 1029 1030 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1031 "MatGetDiagonalBlock_MPIDense", 1032 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1033 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1034 "MatMPIDenseSetPreallocation_MPIDense", 1035 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 EXTERN_C_END 1039 1040 #undef __FUNCT__ 1041 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1042 /*@C 1043 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1044 1045 Not collective 1046 1047 Input Parameters: 1048 . A - the matrix 1049 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1050 to control all matrix memory allocation. 1051 1052 Notes: 1053 The dense format is fully compatible with standard Fortran 77 1054 storage by columns. 1055 1056 The data input variable is intended primarily for Fortran programmers 1057 who wish to allocate their own matrix memory space. Most users should 1058 set data=PETSC_NULL. 1059 1060 Level: intermediate 1061 1062 .keywords: matrix,dense, parallel 1063 1064 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1065 @*/ 1066 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1067 { 1068 int ierr,(*f)(Mat,PetscScalar *); 1069 1070 PetscFunctionBegin; 1071 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1072 if (f) { 1073 ierr = (*f)(mat,data);CHKERRQ(ierr); 1074 } 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatCreateMPIDense" 1080 /*@C 1081 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1082 1083 Collective on MPI_Comm 1084 1085 Input Parameters: 1086 + comm - MPI communicator 1087 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1088 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1089 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1090 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1091 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1092 to control all matrix memory allocation. 1093 1094 Output Parameter: 1095 . A - the matrix 1096 1097 Notes: 1098 The dense format is fully compatible with standard Fortran 77 1099 storage by columns. 1100 1101 The data input variable is intended primarily for Fortran programmers 1102 who wish to allocate their own matrix memory space. Most users should 1103 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1104 1105 The user MUST specify either the local or global matrix dimensions 1106 (possibly both). 1107 1108 Level: intermediate 1109 1110 .keywords: matrix,dense, parallel 1111 1112 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1113 @*/ 1114 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A) 1115 { 1116 int ierr,size; 1117 1118 PetscFunctionBegin; 1119 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1120 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1121 if (size > 1) { 1122 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1123 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1124 } else { 1125 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1126 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1127 } 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatDuplicate_MPIDense" 1133 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1134 { 1135 Mat mat; 1136 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1137 int ierr; 1138 1139 PetscFunctionBegin; 1140 *newmat = 0; 1141 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1142 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1143 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1144 mat->data = (void*)a; 1145 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1146 mat->factor = A->factor; 1147 mat->assembled = PETSC_TRUE; 1148 mat->preallocated = PETSC_TRUE; 1149 1150 a->rstart = oldmat->rstart; 1151 a->rend = oldmat->rend; 1152 a->size = oldmat->size; 1153 a->rank = oldmat->rank; 1154 mat->insertmode = NOT_SET_VALUES; 1155 a->nvec = oldmat->nvec; 1156 a->donotstash = oldmat->donotstash; 1157 ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1158 PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1159 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1160 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1161 1162 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1163 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1164 PetscLogObjectParent(mat,a->A); 1165 *newmat = mat; 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #include "petscsys.h" 1170 1171 #undef __FUNCT__ 1172 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1173 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1174 { 1175 int *rowners,i,size,rank,m,ierr,nz,j; 1176 PetscScalar *array,*vals,*vals_ptr; 1177 MPI_Status status; 1178 1179 PetscFunctionBegin; 1180 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1181 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1182 1183 /* determine ownership of all rows */ 1184 m = M/size + ((M % size) > rank); 1185 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1186 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1187 rowners[0] = 0; 1188 for (i=2; i<=size; i++) { 1189 rowners[i] += rowners[i-1]; 1190 } 1191 1192 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1193 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1194 1195 if (!rank) { 1196 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1197 1198 /* read in my part of the matrix numerical values */ 1199 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1200 1201 /* insert into matrix-by row (this is why cannot directly read into array */ 1202 vals_ptr = vals; 1203 for (i=0; i<m; i++) { 1204 for (j=0; j<N; j++) { 1205 array[i + j*m] = *vals_ptr++; 1206 } 1207 } 1208 1209 /* read in other processors and ship out */ 1210 for (i=1; i<size; i++) { 1211 nz = (rowners[i+1] - rowners[i])*N; 1212 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1213 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1214 } 1215 } else { 1216 /* receive numeric values */ 1217 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1218 1219 /* receive message of values*/ 1220 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1221 1222 /* insert into matrix-by row (this is why cannot directly read into array */ 1223 vals_ptr = vals; 1224 for (i=0; i<m; i++) { 1225 for (j=0; j<N; j++) { 1226 array[i + j*m] = *vals_ptr++; 1227 } 1228 } 1229 } 1230 ierr = PetscFree(rowners);CHKERRQ(ierr); 1231 ierr = PetscFree(vals);CHKERRQ(ierr); 1232 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1233 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 EXTERN_C_BEGIN 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "MatLoad_MPIDense" 1240 int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat) 1241 { 1242 Mat A; 1243 PetscScalar *vals,*svals; 1244 MPI_Comm comm = ((PetscObject)viewer)->comm; 1245 MPI_Status status; 1246 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1247 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1248 int tag = ((PetscObject)viewer)->tag; 1249 int i,nz,ierr,j,rstart,rend,fd; 1250 1251 PetscFunctionBegin; 1252 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1253 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1254 if (!rank) { 1255 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1256 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1257 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1258 } 1259 1260 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1261 M = header[1]; N = header[2]; nz = header[3]; 1262 1263 /* 1264 Handle case where matrix is stored on disk as a dense matrix 1265 */ 1266 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1267 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 /* determine ownership of all rows */ 1272 m = M/size + ((M % size) > rank); 1273 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1274 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1275 rowners[0] = 0; 1276 for (i=2; i<=size; i++) { 1277 rowners[i] += rowners[i-1]; 1278 } 1279 rstart = rowners[rank]; 1280 rend = rowners[rank+1]; 1281 1282 /* distribute row lengths to all processors */ 1283 ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr); 1284 offlens = ourlens + (rend-rstart); 1285 if (!rank) { 1286 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1287 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1288 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1289 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1290 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1291 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1292 } else { 1293 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1294 } 1295 1296 if (!rank) { 1297 /* calculate the number of nonzeros on each processor */ 1298 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1299 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1300 for (i=0; i<size; i++) { 1301 for (j=rowners[i]; j< rowners[i+1]; j++) { 1302 procsnz[i] += rowlengths[j]; 1303 } 1304 } 1305 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1306 1307 /* determine max buffer needed and allocate it */ 1308 maxnz = 0; 1309 for (i=0; i<size; i++) { 1310 maxnz = PetscMax(maxnz,procsnz[i]); 1311 } 1312 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1313 1314 /* read in my part of the matrix column indices */ 1315 nz = procsnz[0]; 1316 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1317 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1318 1319 /* read in every one elses and ship off */ 1320 for (i=1; i<size; i++) { 1321 nz = procsnz[i]; 1322 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1323 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1324 } 1325 ierr = PetscFree(cols);CHKERRQ(ierr); 1326 } else { 1327 /* determine buffer space needed for message */ 1328 nz = 0; 1329 for (i=0; i<m; i++) { 1330 nz += ourlens[i]; 1331 } 1332 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1333 1334 /* receive message of column indices*/ 1335 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1336 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1337 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1338 } 1339 1340 /* loop over local rows, determining number of off diagonal entries */ 1341 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1342 jj = 0; 1343 for (i=0; i<m; i++) { 1344 for (j=0; j<ourlens[i]; j++) { 1345 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1346 jj++; 1347 } 1348 } 1349 1350 /* create our matrix */ 1351 for (i=0; i<m; i++) { 1352 ourlens[i] -= offlens[i]; 1353 } 1354 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1355 A = *newmat; 1356 for (i=0; i<m; i++) { 1357 ourlens[i] += offlens[i]; 1358 } 1359 1360 if (!rank) { 1361 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1362 1363 /* read in my part of the matrix numerical values */ 1364 nz = procsnz[0]; 1365 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1366 1367 /* insert into matrix */ 1368 jj = rstart; 1369 smycols = mycols; 1370 svals = vals; 1371 for (i=0; i<m; i++) { 1372 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1373 smycols += ourlens[i]; 1374 svals += ourlens[i]; 1375 jj++; 1376 } 1377 1378 /* read in other processors and ship out */ 1379 for (i=1; i<size; i++) { 1380 nz = procsnz[i]; 1381 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1382 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1383 } 1384 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1385 } else { 1386 /* receive numeric values */ 1387 ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1388 1389 /* receive message of values*/ 1390 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1391 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1392 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1393 1394 /* insert into matrix */ 1395 jj = rstart; 1396 smycols = mycols; 1397 svals = vals; 1398 for (i=0; i<m; i++) { 1399 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1400 smycols += ourlens[i]; 1401 svals += ourlens[i]; 1402 jj++; 1403 } 1404 } 1405 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1406 ierr = PetscFree(vals);CHKERRQ(ierr); 1407 ierr = PetscFree(mycols);CHKERRQ(ierr); 1408 ierr = PetscFree(rowners);CHKERRQ(ierr); 1409 1410 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1411 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 EXTERN_C_END 1415 1416 1417 1418 1419 1420