1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpidense.c,v 1.127 1999/10/04 18:51:01 bsmith Exp bsmith $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 EXTERN_C_BEGIN 13 #undef __FUNC__ 14 #define __FUNC__ "MatGetDiagonalBlock_MPIDense" 15 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 16 { 17 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 18 int m = mdn->m,rstart = mdn->rstart,rank,ierr; 19 Scalar *array; 20 MPI_Comm comm; 21 22 PetscFunctionBegin; 23 if (mdn->M != mdn->N) SETERRQ(PETSC_ERR_SUP,0,"Only square matrices supported."); 24 25 /* The reuse aspect is not implemented efficiently */ 26 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 27 28 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 29 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 30 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 31 ierr = MatCreateSeqDense(comm,m,m,array+m*rstart,B);CHKERRQ(ierr); 32 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 33 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 34 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 35 36 *iscopy = PETSC_TRUE; 37 PetscFunctionReturn(0); 38 } 39 EXTERN_C_END 40 41 extern int MatSetUpMultiply_MPIDense(Mat); 42 43 #undef __FUNC__ 44 #define __FUNC__ "MatSetValues_MPIDense" 45 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 46 { 47 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 48 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 49 int roworiented = A->roworiented; 50 51 PetscFunctionBegin; 52 for ( i=0; i<m; i++ ) { 53 if (idxm[i] < 0) continue; 54 if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 55 if (idxm[i] >= rstart && idxm[i] < rend) { 56 row = idxm[i] - rstart; 57 if (roworiented) { 58 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 59 } else { 60 for ( j=0; j<n; j++ ) { 61 if (idxn[j] < 0) continue; 62 if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 63 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 64 } 65 } 66 } else { 67 if ( !A->donotstash) { 68 if (roworiented) { 69 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 70 } else { 71 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 72 } 73 } 74 } 75 } 76 PetscFunctionReturn(0); 77 } 78 79 #undef __FUNC__ 80 #define __FUNC__ "MatGetValues_MPIDense" 81 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 82 { 83 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 84 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 85 86 PetscFunctionBegin; 87 for ( i=0; i<m; i++ ) { 88 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 89 if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 90 if (idxm[i] >= rstart && idxm[i] < rend) { 91 row = idxm[i] - rstart; 92 for ( j=0; j<n; j++ ) { 93 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 94 if (idxn[j] >= mdn->N) { 95 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 96 } 97 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 98 } 99 } else { 100 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 101 } 102 } 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNC__ 107 #define __FUNC__ "MatGetArray_MPIDense" 108 int MatGetArray_MPIDense(Mat A,Scalar **array) 109 { 110 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 111 int ierr; 112 113 PetscFunctionBegin; 114 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNC__ 119 #define __FUNC__ "MatGetSubMatrix_MPIDense" 120 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 121 { 122 Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd; 123 Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data; 124 int i, j, ierr, *irow, *icol, rstart, rend, nrows, ncols, nlrows, nlcols, rank; 125 Scalar *av, *bv, *v = lmat->v; 126 Mat newmat; 127 128 PetscFunctionBegin; 129 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 130 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 131 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 132 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 133 ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr); 134 135 /* No parallel redistribution currently supported! Should really check each index set 136 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 137 original matrix! */ 138 139 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 140 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 141 142 /* Check submatrix call */ 143 if (scall == MAT_REUSE_MATRIX) { 144 /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */ 145 /* Really need to test rows and column sizes! */ 146 newmat = *B; 147 } else { 148 /* Create and fill new matrix */ 149 ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr); 150 } 151 152 /* Now extract the data pointers and do the copy, column at a time */ 153 newmatd = (Mat_MPIDense *) newmat->data; 154 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 155 156 for ( i=0; i<ncols; i++ ) { 157 av = v + nlrows*icol[i]; 158 for (j=0; j<nrows; j++ ) { 159 *bv++ = av[irow[j] - rstart]; 160 } 161 } 162 163 /* Assemble the matrices so that the correct flags are set */ 164 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 165 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 166 167 /* Free work space */ 168 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 169 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 170 *B = newmat; 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNC__ 175 #define __FUNC__ "MatRestoreArray_MPIDense" 176 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 177 { 178 PetscFunctionBegin; 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNC__ 183 #define __FUNC__ "MatAssemblyBegin_MPIDense" 184 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 185 { 186 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 187 MPI_Comm comm = mat->comm; 188 int ierr,nstash,reallocs; 189 InsertMode addv; 190 191 PetscFunctionBegin; 192 /* make sure all processors are either in INSERTMODE or ADDMODE */ 193 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 194 if (addv == (ADD_VALUES|INSERT_VALUES)) { 195 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 196 } 197 mat->insertmode = addv; /* in case this processor had no cache */ 198 199 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 200 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 201 PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n", 202 nstash,reallocs); 203 PetscFunctionReturn(0); 204 } 205 206 #undef __FUNC__ 207 #define __FUNC__ "MatAssemblyEnd_MPIDense" 208 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 209 { 210 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 211 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 212 Scalar *val; 213 InsertMode addv=mat->insertmode; 214 215 PetscFunctionBegin; 216 /* wait on receives */ 217 while (1) { 218 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 219 if (!flg) break; 220 221 for ( i=0; i<n; ) { 222 /* Now identify the consecutive vals belonging to the same row */ 223 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 224 if (j < n) ncols = j-i; 225 else ncols = n-i; 226 /* Now assemble all these values with a single function call */ 227 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 228 i = j; 229 } 230 } 231 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 232 233 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 234 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 235 236 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 237 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 238 } 239 PetscFunctionReturn(0); 240 } 241 242 #undef __FUNC__ 243 #define __FUNC__ "MatZeroEntries_MPIDense" 244 int MatZeroEntries_MPIDense(Mat A) 245 { 246 int ierr; 247 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 248 249 PetscFunctionBegin; 250 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNC__ 255 #define __FUNC__ "MatGetBlockSize_MPIDense" 256 int MatGetBlockSize_MPIDense(Mat A,int *bs) 257 { 258 PetscFunctionBegin; 259 *bs = 1; 260 PetscFunctionReturn(0); 261 } 262 263 /* the code does not do the diagonal entries correctly unless the 264 matrix is square and the column and row owerships are identical. 265 This is a BUG. The only way to fix it seems to be to access 266 mdn->A and mdn->B directly and not through the MatZeroRows() 267 routine. 268 */ 269 #undef __FUNC__ 270 #define __FUNC__ "MatZeroRows_MPIDense" 271 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 272 { 273 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 274 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 275 int *procs,*nprocs,j,found,idx,nsends,*work; 276 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 277 int *rvalues,tag = A->tag,count,base,slen,n,*source; 278 int *lens,imdex,*lrows,*values; 279 MPI_Comm comm = A->comm; 280 MPI_Request *send_waits,*recv_waits; 281 MPI_Status recv_status,*send_status; 282 IS istmp; 283 284 PetscFunctionBegin; 285 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 286 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 287 288 /* first count number of contributors to each processor */ 289 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 290 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 291 procs = nprocs + size; 292 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 293 for ( i=0; i<N; i++ ) { 294 idx = rows[i]; 295 found = 0; 296 for ( j=0; j<size; j++ ) { 297 if (idx >= owners[j] && idx < owners[j+1]) { 298 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 299 } 300 } 301 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 302 } 303 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 304 305 /* inform other processors of number of messages and max length*/ 306 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 307 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 308 nrecvs = work[rank]; 309 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 310 nmax = work[rank]; 311 ierr = PetscFree(work);CHKERRQ(ierr); 312 313 /* post receives: */ 314 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 315 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 316 for ( i=0; i<nrecvs; i++ ) { 317 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 318 } 319 320 /* do sends: 321 1) starts[i] gives the starting index in svalues for stuff going to 322 the ith processor 323 */ 324 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 325 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 326 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 327 starts[0] = 0; 328 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 329 for ( i=0; i<N; i++ ) { 330 svalues[starts[owner[i]]++] = rows[i]; 331 } 332 ISRestoreIndices(is,&rows); 333 334 starts[0] = 0; 335 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 336 count = 0; 337 for ( i=0; i<size; i++ ) { 338 if (procs[i]) { 339 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 340 } 341 } 342 ierr = PetscFree(starts);CHKERRQ(ierr); 343 344 base = owners[rank]; 345 346 /* wait on receives */ 347 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 348 source = lens + nrecvs; 349 count = nrecvs; slen = 0; 350 while (count) { 351 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 352 /* unpack receives into our local space */ 353 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 354 source[imdex] = recv_status.MPI_SOURCE; 355 lens[imdex] = n; 356 slen += n; 357 count--; 358 } 359 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 360 361 /* move the data into the send scatter */ 362 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 363 count = 0; 364 for ( i=0; i<nrecvs; i++ ) { 365 values = rvalues + i*nmax; 366 for ( j=0; j<lens[i]; j++ ) { 367 lrows[count++] = values[j] - base; 368 } 369 } 370 ierr = PetscFree(rvalues);CHKERRQ(ierr); 371 ierr = PetscFree(lens);CHKERRQ(ierr); 372 ierr = PetscFree(owner);CHKERRQ(ierr); 373 ierr = PetscFree(nprocs);CHKERRQ(ierr); 374 375 /* actually zap the local rows */ 376 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 377 PLogObjectParent(A,istmp); 378 ierr = PetscFree(lrows);CHKERRQ(ierr); 379 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 380 ierr = ISDestroy(istmp);CHKERRQ(ierr); 381 382 /* wait on sends */ 383 if (nsends) { 384 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 385 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 386 ierr = PetscFree(send_status);CHKERRQ(ierr); 387 } 388 ierr = PetscFree(send_waits);CHKERRQ(ierr); 389 ierr = PetscFree(svalues);CHKERRQ(ierr); 390 391 PetscFunctionReturn(0); 392 } 393 394 #undef __FUNC__ 395 #define __FUNC__ "MatMult_MPIDense" 396 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 397 { 398 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 399 int ierr; 400 401 PetscFunctionBegin; 402 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 403 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 404 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNC__ 409 #define __FUNC__ "MatMultAdd_MPIDense" 410 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 411 { 412 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 413 int ierr; 414 415 PetscFunctionBegin; 416 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 417 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 418 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 422 #undef __FUNC__ 423 #define __FUNC__ "MatMultTrans_MPIDense" 424 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 425 { 426 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 427 int ierr; 428 Scalar zero = 0.0; 429 430 PetscFunctionBegin; 431 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 432 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 433 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 434 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 435 PetscFunctionReturn(0); 436 } 437 438 #undef __FUNC__ 439 #define __FUNC__ "MatMultTransAdd_MPIDense" 440 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 441 { 442 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 443 int ierr; 444 445 PetscFunctionBegin; 446 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 447 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 448 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 449 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 450 PetscFunctionReturn(0); 451 } 452 453 #undef __FUNC__ 454 #define __FUNC__ "MatGetDiagonal_MPIDense" 455 int MatGetDiagonal_MPIDense(Mat A,Vec v) 456 { 457 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 458 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 459 int ierr, len, i, n, m = a->m, radd; 460 Scalar *x, zero = 0.0; 461 462 PetscFunctionBegin; 463 VecSet(&zero,v); 464 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 465 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 466 if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 467 len = PetscMin(aloc->m,aloc->n); 468 radd = a->rstart*m; 469 for ( i=0; i<len; i++ ) { 470 x[i] = aloc->v[radd + i*m + i]; 471 } 472 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNC__ 477 #define __FUNC__ "MatDestroy_MPIDense" 478 int MatDestroy_MPIDense(Mat mat) 479 { 480 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 481 int ierr; 482 483 PetscFunctionBegin; 484 485 if (mat->mapping) { 486 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 487 } 488 if (mat->bmapping) { 489 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 490 } 491 #if defined(PETSC_USE_LOG) 492 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 493 #endif 494 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 495 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 496 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 497 if (mdn->lvec) VecDestroy(mdn->lvec); 498 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 499 if (mdn->factor) { 500 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 501 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 502 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 503 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 504 } 505 ierr = PetscFree(mdn);CHKERRQ(ierr); 506 if (mat->rmap) { 507 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 508 } 509 if (mat->cmap) { 510 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 511 } 512 PLogObjectDestroy(mat); 513 PetscHeaderDestroy(mat); 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNC__ 518 #define __FUNC__ "MatView_MPIDense_Binary" 519 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 520 { 521 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 522 int ierr; 523 524 PetscFunctionBegin; 525 if (mdn->size == 1) { 526 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 527 } 528 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 529 PetscFunctionReturn(0); 530 } 531 532 #undef __FUNC__ 533 #define __FUNC__ "MatView_MPIDense_ASCII" 534 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 535 { 536 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 537 int ierr, format, size = mdn->size, rank = mdn->rank; 538 FILE *fd; 539 ViewerType vtype; 540 541 PetscFunctionBegin; 542 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 543 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 544 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 545 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 546 MatInfo info; 547 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 548 ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr); 549 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 550 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 551 fflush(fd); 552 ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr); 553 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 554 PetscFunctionReturn(0); 555 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 556 PetscFunctionReturn(0); 557 } 558 559 if (size == 1) { 560 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 561 } else { 562 /* assemble the entire matrix onto first processor. */ 563 Mat A; 564 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 565 Scalar *vals; 566 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 567 568 if (!rank) { 569 ierr = MatCreateMPIDense(mat->comm,M,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 570 } else { 571 ierr = MatCreateMPIDense(mat->comm,0,mdn->nvec,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 572 } 573 PLogObjectParent(mat,A); 574 575 /* Copy the matrix ... This isn't the most efficient means, 576 but it's quick for now */ 577 row = mdn->rstart; m = Amdn->m; 578 for ( i=0; i<m; i++ ) { 579 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 580 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 581 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 582 row++; 583 } 584 585 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 586 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 587 if (!rank) { 588 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr); 589 } 590 ierr = MatDestroy(A);CHKERRQ(ierr); 591 } 592 PetscFunctionReturn(0); 593 } 594 595 #undef __FUNC__ 596 #define __FUNC__ "MatView_MPIDense" 597 int MatView_MPIDense(Mat mat,Viewer viewer) 598 { 599 int ierr; 600 int isascii,isbinary; 601 602 PetscFunctionBegin; 603 604 isascii = PetscTypeCompare(viewer,ASCII_VIEWER); 605 isbinary = PetscTypeCompare(viewer,BINARY_VIEWER); 606 607 if (isascii) { 608 ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr); 609 } else if (isbinary) { 610 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 611 } else { 612 SETERRQ1(1,1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 613 } 614 PetscFunctionReturn(0); 615 } 616 617 #undef __FUNC__ 618 #define __FUNC__ "MatGetInfo_MPIDense" 619 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 620 { 621 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 622 Mat mdn = mat->A; 623 int ierr; 624 double isend[5], irecv[5]; 625 626 PetscFunctionBegin; 627 info->rows_global = (double)mat->M; 628 info->columns_global = (double)mat->N; 629 info->rows_local = (double)mat->m; 630 info->columns_local = (double)mat->N; 631 info->block_size = 1.0; 632 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 633 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 634 isend[3] = info->memory; isend[4] = info->mallocs; 635 if (flag == MAT_LOCAL) { 636 info->nz_used = isend[0]; 637 info->nz_allocated = isend[1]; 638 info->nz_unneeded = isend[2]; 639 info->memory = isend[3]; 640 info->mallocs = isend[4]; 641 } else if (flag == MAT_GLOBAL_MAX) { 642 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 643 info->nz_used = irecv[0]; 644 info->nz_allocated = irecv[1]; 645 info->nz_unneeded = irecv[2]; 646 info->memory = irecv[3]; 647 info->mallocs = irecv[4]; 648 } else if (flag == MAT_GLOBAL_SUM) { 649 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 650 info->nz_used = irecv[0]; 651 info->nz_allocated = irecv[1]; 652 info->nz_unneeded = irecv[2]; 653 info->memory = irecv[3]; 654 info->mallocs = irecv[4]; 655 } 656 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 657 info->fill_ratio_needed = 0; 658 info->factor_mallocs = 0; 659 PetscFunctionReturn(0); 660 } 661 662 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 663 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 664 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 665 extern int MatSolve_MPIDense(Mat,Vec,Vec); 666 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 667 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 668 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 669 670 #undef __FUNC__ 671 #define __FUNC__ "MatSetOption_MPIDense" 672 int MatSetOption_MPIDense(Mat A,MatOption op) 673 { 674 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 675 676 PetscFunctionBegin; 677 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 678 op == MAT_YES_NEW_NONZERO_LOCATIONS || 679 op == MAT_NEW_NONZERO_LOCATION_ERR || 680 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 681 op == MAT_COLUMNS_SORTED || 682 op == MAT_COLUMNS_UNSORTED) { 683 MatSetOption(a->A,op); 684 } else if (op == MAT_ROW_ORIENTED) { 685 a->roworiented = 1; 686 MatSetOption(a->A,op); 687 } else if (op == MAT_ROWS_SORTED || 688 op == MAT_ROWS_UNSORTED || 689 op == MAT_SYMMETRIC || 690 op == MAT_STRUCTURALLY_SYMMETRIC || 691 op == MAT_YES_NEW_DIAGONALS || 692 op == MAT_USE_HASH_TABLE) { 693 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 694 } else if (op == MAT_COLUMN_ORIENTED) { 695 a->roworiented = 0; MatSetOption(a->A,op); 696 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 697 a->donotstash = 1; 698 } else if (op == MAT_NO_NEW_DIAGONALS) { 699 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 700 } else { 701 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 702 } 703 PetscFunctionReturn(0); 704 } 705 706 #undef __FUNC__ 707 #define __FUNC__ "MatGetSize_MPIDense" 708 int MatGetSize_MPIDense(Mat A,int *m,int *n) 709 { 710 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 711 712 PetscFunctionBegin; 713 *m = mat->M; *n = mat->N; 714 PetscFunctionReturn(0); 715 } 716 717 #undef __FUNC__ 718 #define __FUNC__ "MatGetLocalSize_MPIDense" 719 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 720 { 721 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 722 723 PetscFunctionBegin; 724 *m = mat->m; *n = mat->N; 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNC__ 729 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 730 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 731 { 732 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 733 734 PetscFunctionBegin; 735 *m = mat->rstart; *n = mat->rend; 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNC__ 740 #define __FUNC__ "MatGetRow_MPIDense" 741 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 742 { 743 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 744 int lrow, rstart = mat->rstart, rend = mat->rend,ierr; 745 746 PetscFunctionBegin; 747 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 748 lrow = row - rstart; 749 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNC__ 754 #define __FUNC__ "MatRestoreRow_MPIDense" 755 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 756 { 757 int ierr; 758 759 PetscFunctionBegin; 760 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 761 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNC__ 766 #define __FUNC__ "MatDiagonalScale_MPIDense" 767 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 768 { 769 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 770 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 771 Scalar *l,*r,x,*v; 772 int ierr,i,j,s2a,s3a,s2,s3,m=mat->m,n=mat->n; 773 774 PetscFunctionBegin; 775 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 776 if (ll) { 777 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 778 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector non-conforming local size"); 779 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 780 for ( i=0; i<m; i++ ) { 781 x = l[i]; 782 v = mat->v + i; 783 for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;} 784 } 785 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 786 PLogFlops(n*m); 787 } 788 if (rr) { 789 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 790 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec non-conforming local size"); 791 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 792 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 793 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 794 for ( i=0; i<n; i++ ) { 795 x = r[i]; 796 v = mat->v + i*m; 797 for ( j=0; j<m; j++ ) { (*v++) *= x;} 798 } 799 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 800 PLogFlops(n*m); 801 } 802 PetscFunctionReturn(0); 803 } 804 805 #undef __FUNC__ 806 #define __FUNC__ "MatNorm_MPIDense" 807 int MatNorm_MPIDense(Mat A,NormType type,double *norm) 808 { 809 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 810 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 811 int ierr, i, j; 812 double sum = 0.0; 813 Scalar *v = mat->v; 814 815 PetscFunctionBegin; 816 if (mdn->size == 1) { 817 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 818 } else { 819 if (type == NORM_FROBENIUS) { 820 for (i=0; i<mat->n*mat->m; i++ ) { 821 #if defined(PETSC_USE_COMPLEX) 822 sum += PetscReal(PetscConj(*v)*(*v)); v++; 823 #else 824 sum += (*v)*(*v); v++; 825 #endif 826 } 827 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 828 *norm = sqrt(*norm); 829 PLogFlops(2*mat->n*mat->m); 830 } else if (type == NORM_1) { 831 double *tmp, *tmp2; 832 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp); 833 tmp2 = tmp + mdn->N; 834 ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr); 835 *norm = 0.0; 836 v = mat->v; 837 for ( j=0; j<mat->n; j++ ) { 838 for ( i=0; i<mat->m; i++ ) { 839 tmp[j] += PetscAbsScalar(*v); v++; 840 } 841 } 842 ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 843 for ( j=0; j<mdn->N; j++ ) { 844 if (tmp2[j] > *norm) *norm = tmp2[j]; 845 } 846 ierr = PetscFree(tmp);CHKERRQ(ierr); 847 PLogFlops(mat->n*mat->m); 848 } else if (type == NORM_INFINITY) { /* max row norm */ 849 double ntemp; 850 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 851 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 852 } else { 853 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 854 } 855 } 856 PetscFunctionReturn(0); 857 } 858 859 #undef __FUNC__ 860 #define __FUNC__ "MatTranspose_MPIDense" 861 int MatTranspose_MPIDense(Mat A,Mat *matout) 862 { 863 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 864 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 865 Mat B; 866 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 867 int j, i, ierr; 868 Scalar *v; 869 870 PetscFunctionBegin; 871 if (matout == PETSC_NULL && M != N) { 872 SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 873 } 874 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 875 876 m = Aloc->m; n = Aloc->n; v = Aloc->v; 877 rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork); 878 for ( j=0; j<n; j++ ) { 879 for (i=0; i<m; i++) rwork[i] = rstart + i; 880 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 881 v += m; 882 } 883 ierr = PetscFree(rwork);CHKERRQ(ierr); 884 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 885 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 886 if (matout != PETSC_NULL) { 887 *matout = B; 888 } else { 889 PetscOps *Abops; 890 MatOps Aops; 891 892 /* This isn't really an in-place transpose, but free data struct from a */ 893 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 894 ierr = MatDestroy(a->A);CHKERRQ(ierr); 895 if (a->lvec) VecDestroy(a->lvec); 896 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 897 ierr = PetscFree(a);CHKERRQ(ierr); 898 899 /* 900 This is horrible, horrible code. We need to keep the 901 A pointers for the bops and ops but copy everything 902 else from C. 903 */ 904 Abops = A->bops; 905 Aops = A->ops; 906 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 907 A->bops = Abops; 908 A->ops = Aops; 909 910 PetscHeaderDestroy(B); 911 } 912 PetscFunctionReturn(0); 913 } 914 915 #include "pinclude/blaslapack.h" 916 #undef __FUNC__ 917 #define __FUNC__ "MatScale_MPIDense" 918 int MatScale_MPIDense(Scalar *alpha,Mat inA) 919 { 920 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 921 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 922 int one = 1, nz; 923 924 PetscFunctionBegin; 925 nz = a->m*a->n; 926 BLscal_( &nz, alpha, a->v, &one ); 927 PLogFlops(nz); 928 PetscFunctionReturn(0); 929 } 930 931 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 932 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 933 934 /* -------------------------------------------------------------------*/ 935 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 936 MatGetRow_MPIDense, 937 MatRestoreRow_MPIDense, 938 MatMult_MPIDense, 939 MatMultAdd_MPIDense, 940 MatMultTrans_MPIDense, 941 MatMultTransAdd_MPIDense, 942 0, 943 0, 944 0, 945 0, 946 0, 947 0, 948 0, 949 MatTranspose_MPIDense, 950 MatGetInfo_MPIDense,0, 951 MatGetDiagonal_MPIDense, 952 MatDiagonalScale_MPIDense, 953 MatNorm_MPIDense, 954 MatAssemblyBegin_MPIDense, 955 MatAssemblyEnd_MPIDense, 956 0, 957 MatSetOption_MPIDense, 958 MatZeroEntries_MPIDense, 959 MatZeroRows_MPIDense, 960 0, 961 0, 962 0, 963 0, 964 MatGetSize_MPIDense, 965 MatGetLocalSize_MPIDense, 966 MatGetOwnershipRange_MPIDense, 967 0, 968 0, 969 MatGetArray_MPIDense, 970 MatRestoreArray_MPIDense, 971 MatDuplicate_MPIDense, 972 0, 973 0, 974 0, 975 0, 976 0, 977 MatGetSubMatrices_MPIDense, 978 0, 979 MatGetValues_MPIDense, 980 0, 981 0, 982 MatScale_MPIDense, 983 0, 984 0, 985 0, 986 MatGetBlockSize_MPIDense, 987 0, 988 0, 989 0, 990 0, 991 0, 992 0, 993 0, 994 0, 995 0, 996 MatGetSubMatrix_MPIDense, 997 0, 998 0, 999 MatGetMaps_Petsc}; 1000 1001 #undef __FUNC__ 1002 #define __FUNC__ "MatCreateMPIDense" 1003 /*@C 1004 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1005 1006 Collective on MPI_Comm 1007 1008 Input Parameters: 1009 + comm - MPI communicator 1010 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1011 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1012 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1013 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1014 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1015 to control all matrix memory allocation. 1016 1017 Output Parameter: 1018 . A - the matrix 1019 1020 Notes: 1021 The dense format is fully compatible with standard Fortran 77 1022 storage by columns. 1023 1024 The data input variable is intended primarily for Fortran programmers 1025 who wish to allocate their own matrix memory space. Most users should 1026 set data=PETSC_NULL. 1027 1028 The user MUST specify either the local or global matrix dimensions 1029 (possibly both). 1030 1031 Level: intermediate 1032 1033 .keywords: matrix, dense, parallel 1034 1035 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1036 @*/ 1037 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 1038 { 1039 Mat mat; 1040 Mat_MPIDense *a; 1041 int ierr, i,flg; 1042 1043 PetscFunctionBegin; 1044 /* Note: For now, when data is specified above, this assumes the user correctly 1045 allocates the local dense storage space. We should add error checking. */ 1046 1047 *A = 0; 1048 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView); 1049 PLogObjectCreate(mat); 1050 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1051 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1052 mat->ops->destroy = MatDestroy_MPIDense; 1053 mat->ops->view = MatView_MPIDense; 1054 mat->factor = 0; 1055 mat->mapping = 0; 1056 1057 a->factor = 0; 1058 mat->insertmode = NOT_SET_VALUES; 1059 ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr); 1060 ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr); 1061 1062 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1063 1064 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1065 a->nvec = n; 1066 1067 /* each row stores all columns */ 1068 a->N = mat->N = N; 1069 a->M = mat->M = M; 1070 a->m = mat->m = m; 1071 a->n = mat->n = N; /* NOTE: n == N */ 1072 1073 /* the information in the maps duplicates the information computed below, eventually 1074 we should remove the duplicate information that is not contained in the maps */ 1075 ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr); 1076 ierr = MapCreateMPI(comm,n,N,&mat->cmap);CHKERRQ(ierr); 1077 1078 /* build local table of row and column ownerships */ 1079 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1080 a->cowners = a->rowners + a->size + 1; 1081 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1082 ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1083 a->rowners[0] = 0; 1084 for ( i=2; i<=a->size; i++ ) { 1085 a->rowners[i] += a->rowners[i-1]; 1086 } 1087 a->rstart = a->rowners[a->rank]; 1088 a->rend = a->rowners[a->rank+1]; 1089 ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1090 a->cowners[0] = 0; 1091 for ( i=2; i<=a->size; i++ ) { 1092 a->cowners[i] += a->cowners[i-1]; 1093 } 1094 1095 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr); 1096 PLogObjectParent(mat,a->A); 1097 1098 /* build cache for off array entries formed */ 1099 a->donotstash = 0; 1100 ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr); 1101 1102 /* stuff used for matrix vector multiply */ 1103 a->lvec = 0; 1104 a->Mvctx = 0; 1105 a->roworiented = 1; 1106 1107 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C", 1108 "MatGetDiagonalBlock_MPIDense", 1109 (void*)MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1110 1111 *A = mat; 1112 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1113 if (flg) { 1114 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1115 } 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNC__ 1120 #define __FUNC__ "MatDuplicate_MPIDense" 1121 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1122 { 1123 Mat mat; 1124 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 1125 int ierr; 1126 FactorCtx *factor; 1127 1128 PetscFunctionBegin; 1129 *newmat = 0; 1130 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView); 1131 PLogObjectCreate(mat); 1132 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1133 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1134 mat->ops->destroy = MatDestroy_MPIDense; 1135 mat->ops->view = MatView_MPIDense; 1136 mat->factor = A->factor; 1137 mat->assembled = PETSC_TRUE; 1138 1139 a->m = mat->m = oldmat->m; 1140 a->n = mat->n = oldmat->n; 1141 a->M = mat->M = oldmat->M; 1142 a->N = mat->N = oldmat->N; 1143 if (oldmat->factor) { 1144 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor); 1145 /* copy factor contents ... add this code! */ 1146 } else a->factor = 0; 1147 1148 a->rstart = oldmat->rstart; 1149 a->rend = oldmat->rend; 1150 a->size = oldmat->size; 1151 a->rank = oldmat->rank; 1152 mat->insertmode = NOT_SET_VALUES; 1153 a->donotstash = oldmat->donotstash; 1154 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners); 1155 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1156 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1157 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1158 1159 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1160 PLogObjectParent(mat,a->lvec); 1161 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1162 PLogObjectParent(mat,a->Mvctx); 1163 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1164 PLogObjectParent(mat,a->A); 1165 *newmat = mat; 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #include "sys.h" 1170 1171 #undef __FUNC__ 1172 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1173 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1174 { 1175 int *rowners, i,size,rank,m,ierr,nz,j; 1176 Scalar *array,*vals,*vals_ptr; 1177 MPI_Status status; 1178 1179 PetscFunctionBegin; 1180 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1181 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1182 1183 /* determine ownership of all rows */ 1184 m = M/size + ((M % size) > rank); 1185 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1186 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1187 rowners[0] = 0; 1188 for ( i=2; i<=size; i++ ) { 1189 rowners[i] += rowners[i-1]; 1190 } 1191 1192 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1193 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1194 1195 if (!rank) { 1196 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1197 1198 /* read in my part of the matrix numerical values */ 1199 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1200 1201 /* insert into matrix-by row (this is why cannot directly read into array */ 1202 vals_ptr = vals; 1203 for ( i=0; i<m; i++ ) { 1204 for ( j=0; j<N; j++ ) { 1205 array[i + j*m] = *vals_ptr++; 1206 } 1207 } 1208 1209 /* read in other processors and ship out */ 1210 for ( i=1; i<size; i++ ) { 1211 nz = (rowners[i+1] - rowners[i])*N; 1212 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1213 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1214 } 1215 } else { 1216 /* receive numeric values */ 1217 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1218 1219 /* receive message of values*/ 1220 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1221 1222 /* insert into matrix-by row (this is why cannot directly read into array */ 1223 vals_ptr = vals; 1224 for ( i=0; i<m; i++ ) { 1225 for ( j=0; j<N; j++ ) { 1226 array[i + j*m] = *vals_ptr++; 1227 } 1228 } 1229 } 1230 ierr = PetscFree(rowners);CHKERRQ(ierr); 1231 ierr = PetscFree(vals);CHKERRQ(ierr); 1232 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1233 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 1238 #undef __FUNC__ 1239 #define __FUNC__ "MatLoad_MPIDense" 1240 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1241 { 1242 Mat A; 1243 Scalar *vals,*svals; 1244 MPI_Comm comm = ((PetscObject)viewer)->comm; 1245 MPI_Status status; 1246 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1247 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1248 int tag = ((PetscObject)viewer)->tag; 1249 int i, nz, ierr, j,rstart, rend, fd; 1250 1251 PetscFunctionBegin; 1252 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1253 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1254 if (!rank) { 1255 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1256 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1257 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1258 } 1259 1260 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1261 M = header[1]; N = header[2]; nz = header[3]; 1262 1263 /* 1264 Handle case where matrix is stored on disk as a dense matrix 1265 */ 1266 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1267 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 /* determine ownership of all rows */ 1272 m = M/size + ((M % size) > rank); 1273 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1274 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1275 rowners[0] = 0; 1276 for ( i=2; i<=size; i++ ) { 1277 rowners[i] += rowners[i-1]; 1278 } 1279 rstart = rowners[rank]; 1280 rend = rowners[rank+1]; 1281 1282 /* distribute row lengths to all processors */ 1283 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1284 offlens = ourlens + (rend-rstart); 1285 if (!rank) { 1286 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1287 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1288 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1289 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1290 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1291 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1292 } else { 1293 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1294 } 1295 1296 if (!rank) { 1297 /* calculate the number of nonzeros on each processor */ 1298 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1299 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1300 for ( i=0; i<size; i++ ) { 1301 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1302 procsnz[i] += rowlengths[j]; 1303 } 1304 } 1305 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1306 1307 /* determine max buffer needed and allocate it */ 1308 maxnz = 0; 1309 for ( i=0; i<size; i++ ) { 1310 maxnz = PetscMax(maxnz,procsnz[i]); 1311 } 1312 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 1313 1314 /* read in my part of the matrix column indices */ 1315 nz = procsnz[0]; 1316 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1317 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1318 1319 /* read in every one elses and ship off */ 1320 for ( i=1; i<size; i++ ) { 1321 nz = procsnz[i]; 1322 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1323 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1324 } 1325 ierr = PetscFree(cols);CHKERRQ(ierr); 1326 } else { 1327 /* determine buffer space needed for message */ 1328 nz = 0; 1329 for ( i=0; i<m; i++ ) { 1330 nz += ourlens[i]; 1331 } 1332 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1333 1334 /* receive message of column indices*/ 1335 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1336 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1337 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1338 } 1339 1340 /* loop over local rows, determining number of off diagonal entries */ 1341 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1342 jj = 0; 1343 for ( i=0; i<m; i++ ) { 1344 for ( j=0; j<ourlens[i]; j++ ) { 1345 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1346 jj++; 1347 } 1348 } 1349 1350 /* create our matrix */ 1351 for ( i=0; i<m; i++ ) { 1352 ourlens[i] -= offlens[i]; 1353 } 1354 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1355 A = *newmat; 1356 for ( i=0; i<m; i++ ) { 1357 ourlens[i] += offlens[i]; 1358 } 1359 1360 if (!rank) { 1361 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 1362 1363 /* read in my part of the matrix numerical values */ 1364 nz = procsnz[0]; 1365 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1366 1367 /* insert into matrix */ 1368 jj = rstart; 1369 smycols = mycols; 1370 svals = vals; 1371 for ( i=0; i<m; i++ ) { 1372 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1373 smycols += ourlens[i]; 1374 svals += ourlens[i]; 1375 jj++; 1376 } 1377 1378 /* read in other processors and ship out */ 1379 for ( i=1; i<size; i++ ) { 1380 nz = procsnz[i]; 1381 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1382 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1383 } 1384 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1385 } else { 1386 /* receive numeric values */ 1387 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 1388 1389 /* receive message of values*/ 1390 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1391 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1392 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1393 1394 /* insert into matrix */ 1395 jj = rstart; 1396 smycols = mycols; 1397 svals = vals; 1398 for ( i=0; i<m; i++ ) { 1399 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1400 smycols += ourlens[i]; 1401 svals += ourlens[i]; 1402 jj++; 1403 } 1404 } 1405 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1406 ierr = PetscFree(vals);CHKERRQ(ierr); 1407 ierr = PetscFree(mycols);CHKERRQ(ierr); 1408 ierr = PetscFree(rowners);CHKERRQ(ierr); 1409 1410 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1411 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 1416 1417 1418 1419