1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpidense.c,v 1.120 1999/07/27 22:18:47 curfman Exp curfman $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 extern int MatSetUpMultiply_MPIDense(Mat); 13 14 #undef __FUNC__ 15 #define __FUNC__ "MatSetValues_MPIDense" 16 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 17 { 18 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 19 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 20 int roworiented = A->roworiented; 21 22 PetscFunctionBegin; 23 for ( i=0; i<m; i++ ) { 24 if (idxm[i] < 0) continue; 25 if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 26 if (idxm[i] >= rstart && idxm[i] < rend) { 27 row = idxm[i] - rstart; 28 if (roworiented) { 29 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 30 } else { 31 for ( j=0; j<n; j++ ) { 32 if (idxn[j] < 0) continue; 33 if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 34 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 35 } 36 } 37 } else { 38 if ( !A->donotstash) { 39 if (roworiented) { 40 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 41 } else { 42 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 43 } 44 } 45 } 46 } 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNC__ 51 #define __FUNC__ "MatGetValues_MPIDense" 52 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 53 { 54 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 55 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 56 57 PetscFunctionBegin; 58 for ( i=0; i<m; i++ ) { 59 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 60 if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 61 if (idxm[i] >= rstart && idxm[i] < rend) { 62 row = idxm[i] - rstart; 63 for ( j=0; j<n; j++ ) { 64 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 65 if (idxn[j] >= mdn->N) { 66 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 67 } 68 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 69 } 70 } else { 71 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 72 } 73 } 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNC__ 78 #define __FUNC__ "MatGetArray_MPIDense" 79 int MatGetArray_MPIDense(Mat A,Scalar **array) 80 { 81 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 82 int ierr; 83 84 PetscFunctionBegin; 85 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 86 PetscFunctionReturn(0); 87 } 88 89 #undef __FUNC__ 90 #define __FUNC__ "MatGetSubMatrix_MPIDense" 91 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 92 { 93 Mat_MPIDense *mat = (Mat_MPIDense *) A->data, *newmatd; 94 Mat_SeqDense *lmat = (Mat_SeqDense *) mat->A->data; 95 int i, j, ierr, *irow, *icol, n_lrows, n_lcols; 96 int rstart, rend, nrows, ncols, nlrows, nlcols, rank, ncols_all; 97 Scalar *av, *bv, *v = lmat->v; 98 Mat newmat; 99 100 PetscFunctionBegin; 101 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 102 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 103 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 104 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 105 ierr = ISGetSize(iscol,&ncols);CHKERRQ(ierr); 106 107 /* No parallel redistribution currently supported! Should really check each index set 108 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 109 original matrix! */ 110 111 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 112 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 113 114 /* Check submatrix call */ 115 if (scall == MAT_REUSE_MATRIX) { 116 /* SETERRQ(PETSC_ERR_ARG_SIZ,0,"Reused submatrix wrong size"); */ 117 /* Really need to test rows and column sizes! */ 118 newmat = *B; 119 } else { 120 /* Create and fill new matrix */ 121 ierr = MatCreateMPIDense(A->comm,nrows,ncols,PETSC_DECIDE,PETSC_DECIDE,PETSC_NULL,&newmat);CHKERRQ(ierr); 122 } 123 124 /* Now extract the data pointers and do the copy, column at a time */ 125 newmatd = (Mat_MPIDense *) newmat->data; 126 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 127 128 for ( i=0; i<ncols; i++ ) { 129 av = v + nlrows*icol[i]; 130 for (j=0; j<nrows; j++ ) { 131 *bv++ = av[irow[j] - rstart]; 132 } 133 } 134 135 /* Assemble the matrices so that the correct flags are set */ 136 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 137 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 138 139 /* Free work space */ 140 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 141 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 142 *B = newmat; 143 PetscFunctionReturn(0); 144 } 145 146 #undef __FUNC__ 147 #define __FUNC__ "MatRestoreArray_MPIDense" 148 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 149 { 150 PetscFunctionBegin; 151 PetscFunctionReturn(0); 152 } 153 154 #undef __FUNC__ 155 #define __FUNC__ "MatAssemblyBegin_MPIDense" 156 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 157 { 158 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 159 MPI_Comm comm = mat->comm; 160 int ierr,nstash,reallocs; 161 InsertMode addv; 162 163 PetscFunctionBegin; 164 /* make sure all processors are either in INSERTMODE or ADDMODE */ 165 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 166 if (addv == (ADD_VALUES|INSERT_VALUES)) { 167 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 168 } 169 mat->insertmode = addv; /* in case this processor had no cache */ 170 171 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 172 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 173 PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n", 174 nstash,reallocs); 175 PetscFunctionReturn(0); 176 } 177 178 #undef __FUNC__ 179 #define __FUNC__ "MatAssemblyEnd_MPIDense" 180 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 181 { 182 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 183 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 184 Scalar *val; 185 InsertMode addv=mat->insertmode; 186 187 PetscFunctionBegin; 188 /* wait on receives */ 189 while (1) { 190 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 191 if (!flg) break; 192 193 for ( i=0; i<n; ) { 194 /* Now identify the consecutive vals belonging to the same row */ 195 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 196 if (j < n) ncols = j-i; 197 else ncols = n-i; 198 /* Now assemble all these values with a single function call */ 199 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 200 i = j; 201 } 202 } 203 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 204 205 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 206 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 207 208 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 209 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 210 } 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNC__ 215 #define __FUNC__ "MatZeroEntries_MPIDense" 216 int MatZeroEntries_MPIDense(Mat A) 217 { 218 int ierr; 219 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 220 221 PetscFunctionBegin; 222 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 223 PetscFunctionReturn(0); 224 } 225 226 #undef __FUNC__ 227 #define __FUNC__ "MatGetBlockSize_MPIDense" 228 int MatGetBlockSize_MPIDense(Mat A,int *bs) 229 { 230 PetscFunctionBegin; 231 *bs = 1; 232 PetscFunctionReturn(0); 233 } 234 235 /* the code does not do the diagonal entries correctly unless the 236 matrix is square and the column and row owerships are identical. 237 This is a BUG. The only way to fix it seems to be to access 238 mdn->A and mdn->B directly and not through the MatZeroRows() 239 routine. 240 */ 241 #undef __FUNC__ 242 #define __FUNC__ "MatZeroRows_MPIDense" 243 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 244 { 245 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 246 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 247 int *procs,*nprocs,j,found,idx,nsends,*work; 248 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 249 int *rvalues,tag = A->tag,count,base,slen,n,*source; 250 int *lens,imdex,*lrows,*values; 251 MPI_Comm comm = A->comm; 252 MPI_Request *send_waits,*recv_waits; 253 MPI_Status recv_status,*send_status; 254 IS istmp; 255 256 PetscFunctionBegin; 257 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 258 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 259 260 /* first count number of contributors to each processor */ 261 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 262 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 263 procs = nprocs + size; 264 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 265 for ( i=0; i<N; i++ ) { 266 idx = rows[i]; 267 found = 0; 268 for ( j=0; j<size; j++ ) { 269 if (idx >= owners[j] && idx < owners[j+1]) { 270 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 271 } 272 } 273 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 274 } 275 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 276 277 /* inform other processors of number of messages and max length*/ 278 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 279 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 280 nrecvs = work[rank]; 281 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 282 nmax = work[rank]; 283 ierr = PetscFree(work);CHKERRQ(ierr); 284 285 /* post receives: */ 286 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 287 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 288 for ( i=0; i<nrecvs; i++ ) { 289 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 290 } 291 292 /* do sends: 293 1) starts[i] gives the starting index in svalues for stuff going to 294 the ith processor 295 */ 296 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 297 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 298 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 299 starts[0] = 0; 300 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 301 for ( i=0; i<N; i++ ) { 302 svalues[starts[owner[i]]++] = rows[i]; 303 } 304 ISRestoreIndices(is,&rows); 305 306 starts[0] = 0; 307 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 308 count = 0; 309 for ( i=0; i<size; i++ ) { 310 if (procs[i]) { 311 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 312 } 313 } 314 ierr = PetscFree(starts);CHKERRQ(ierr); 315 316 base = owners[rank]; 317 318 /* wait on receives */ 319 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 320 source = lens + nrecvs; 321 count = nrecvs; slen = 0; 322 while (count) { 323 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 324 /* unpack receives into our local space */ 325 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 326 source[imdex] = recv_status.MPI_SOURCE; 327 lens[imdex] = n; 328 slen += n; 329 count--; 330 } 331 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 332 333 /* move the data into the send scatter */ 334 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 335 count = 0; 336 for ( i=0; i<nrecvs; i++ ) { 337 values = rvalues + i*nmax; 338 for ( j=0; j<lens[i]; j++ ) { 339 lrows[count++] = values[j] - base; 340 } 341 } 342 ierr = PetscFree(rvalues);CHKERRQ(ierr); 343 ierr = PetscFree(lens);CHKERRQ(ierr); 344 ierr = PetscFree(owner);CHKERRQ(ierr); 345 ierr = PetscFree(nprocs);CHKERRQ(ierr); 346 347 /* actually zap the local rows */ 348 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 349 PLogObjectParent(A,istmp); 350 ierr = PetscFree(lrows);CHKERRQ(ierr); 351 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 352 ierr = ISDestroy(istmp);CHKERRQ(ierr); 353 354 /* wait on sends */ 355 if (nsends) { 356 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 357 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 358 ierr = PetscFree(send_status);CHKERRQ(ierr); 359 } 360 ierr = PetscFree(send_waits);CHKERRQ(ierr); 361 ierr = PetscFree(svalues);CHKERRQ(ierr); 362 363 PetscFunctionReturn(0); 364 } 365 366 #undef __FUNC__ 367 #define __FUNC__ "MatMult_MPIDense" 368 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 369 { 370 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 371 int ierr; 372 373 PetscFunctionBegin; 374 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 375 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 376 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 377 PetscFunctionReturn(0); 378 } 379 380 #undef __FUNC__ 381 #define __FUNC__ "MatMultAdd_MPIDense" 382 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 383 { 384 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 385 int ierr; 386 387 PetscFunctionBegin; 388 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 389 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 390 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 391 PetscFunctionReturn(0); 392 } 393 394 #undef __FUNC__ 395 #define __FUNC__ "MatMultTrans_MPIDense" 396 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 397 { 398 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 399 int ierr; 400 Scalar zero = 0.0; 401 402 PetscFunctionBegin; 403 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 404 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 405 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 406 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 407 PetscFunctionReturn(0); 408 } 409 410 #undef __FUNC__ 411 #define __FUNC__ "MatMultTransAdd_MPIDense" 412 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 413 { 414 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 415 int ierr; 416 417 PetscFunctionBegin; 418 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 419 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 420 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 421 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 422 PetscFunctionReturn(0); 423 } 424 425 #undef __FUNC__ 426 #define __FUNC__ "MatGetDiagonal_MPIDense" 427 int MatGetDiagonal_MPIDense(Mat A,Vec v) 428 { 429 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 430 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 431 int ierr, len, i, n, m = a->m, radd; 432 Scalar *x, zero = 0.0; 433 434 PetscFunctionBegin; 435 VecSet(&zero,v); 436 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 437 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 438 if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 439 len = PetscMin(aloc->m,aloc->n); 440 radd = a->rstart*m; 441 for ( i=0; i<len; i++ ) { 442 x[i] = aloc->v[radd + i*m + i]; 443 } 444 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNC__ 449 #define __FUNC__ "MatDestroy_MPIDense" 450 int MatDestroy_MPIDense(Mat mat) 451 { 452 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 453 int ierr; 454 455 PetscFunctionBegin; 456 457 if (mat->mapping) { 458 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 459 } 460 if (mat->bmapping) { 461 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 462 } 463 #if defined(PETSC_USE_LOG) 464 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 465 #endif 466 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 467 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 468 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 469 if (mdn->lvec) VecDestroy(mdn->lvec); 470 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 471 if (mdn->factor) { 472 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 473 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 474 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 475 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 476 } 477 ierr = PetscFree(mdn);CHKERRQ(ierr); 478 if (mat->rmap) { 479 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 480 } 481 if (mat->cmap) { 482 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 483 } 484 PLogObjectDestroy(mat); 485 PetscHeaderDestroy(mat); 486 PetscFunctionReturn(0); 487 } 488 489 #undef __FUNC__ 490 #define __FUNC__ "MatView_MPIDense_Binary" 491 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 494 int ierr; 495 496 PetscFunctionBegin; 497 if (mdn->size == 1) { 498 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 499 } 500 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 501 PetscFunctionReturn(0); 502 } 503 504 #undef __FUNC__ 505 #define __FUNC__ "MatView_MPIDense_ASCII" 506 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 507 { 508 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 509 int ierr, format, size = mdn->size, rank = mdn->rank; 510 FILE *fd; 511 ViewerType vtype; 512 513 PetscFunctionBegin; 514 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 515 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 516 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 517 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 518 MatInfo info; 519 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 520 PetscSequentialPhaseBegin(mat->comm,1); 521 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 522 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 523 fflush(fd); 524 PetscSequentialPhaseEnd(mat->comm,1); 525 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 526 PetscFunctionReturn(0); 527 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 528 PetscFunctionReturn(0); 529 } 530 531 if (size == 1) { 532 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 533 } else { 534 /* assemble the entire matrix onto first processor. */ 535 Mat A; 536 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 537 Scalar *vals; 538 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 539 540 if (!rank) { 541 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 542 } else { 543 ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 544 } 545 PLogObjectParent(mat,A); 546 547 /* Copy the matrix ... This isn't the most efficient means, 548 but it's quick for now */ 549 row = mdn->rstart; m = Amdn->m; 550 for ( i=0; i<m; i++ ) { 551 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 552 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 553 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 554 row++; 555 } 556 557 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 558 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 559 if (!rank) { 560 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr); 561 } 562 ierr = MatDestroy(A);CHKERRQ(ierr); 563 } 564 PetscFunctionReturn(0); 565 } 566 567 #undef __FUNC__ 568 #define __FUNC__ "MatView_MPIDense" 569 int MatView_MPIDense(Mat mat,Viewer viewer) 570 { 571 int ierr; 572 ViewerType vtype; 573 574 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 575 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 576 ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr); 577 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 578 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 579 } else { 580 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 581 } 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNC__ 586 #define __FUNC__ "MatGetInfo_MPIDense" 587 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 588 { 589 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 590 Mat mdn = mat->A; 591 int ierr; 592 double isend[5], irecv[5]; 593 594 PetscFunctionBegin; 595 info->rows_global = (double)mat->M; 596 info->columns_global = (double)mat->N; 597 info->rows_local = (double)mat->m; 598 info->columns_local = (double)mat->N; 599 info->block_size = 1.0; 600 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 601 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 602 isend[3] = info->memory; isend[4] = info->mallocs; 603 if (flag == MAT_LOCAL) { 604 info->nz_used = isend[0]; 605 info->nz_allocated = isend[1]; 606 info->nz_unneeded = isend[2]; 607 info->memory = isend[3]; 608 info->mallocs = isend[4]; 609 } else if (flag == MAT_GLOBAL_MAX) { 610 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 611 info->nz_used = irecv[0]; 612 info->nz_allocated = irecv[1]; 613 info->nz_unneeded = irecv[2]; 614 info->memory = irecv[3]; 615 info->mallocs = irecv[4]; 616 } else if (flag == MAT_GLOBAL_SUM) { 617 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 618 info->nz_used = irecv[0]; 619 info->nz_allocated = irecv[1]; 620 info->nz_unneeded = irecv[2]; 621 info->memory = irecv[3]; 622 info->mallocs = irecv[4]; 623 } 624 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 625 info->fill_ratio_needed = 0; 626 info->factor_mallocs = 0; 627 PetscFunctionReturn(0); 628 } 629 630 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 631 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 632 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 633 extern int MatSolve_MPIDense(Mat,Vec,Vec); 634 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 635 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 636 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 637 638 #undef __FUNC__ 639 #define __FUNC__ "MatSetOption_MPIDense" 640 int MatSetOption_MPIDense(Mat A,MatOption op) 641 { 642 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 643 644 PetscFunctionBegin; 645 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 646 op == MAT_YES_NEW_NONZERO_LOCATIONS || 647 op == MAT_NEW_NONZERO_LOCATION_ERR || 648 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 649 op == MAT_COLUMNS_SORTED || 650 op == MAT_COLUMNS_UNSORTED) { 651 MatSetOption(a->A,op); 652 } else if (op == MAT_ROW_ORIENTED) { 653 a->roworiented = 1; 654 MatSetOption(a->A,op); 655 } else if (op == MAT_ROWS_SORTED || 656 op == MAT_ROWS_UNSORTED || 657 op == MAT_SYMMETRIC || 658 op == MAT_STRUCTURALLY_SYMMETRIC || 659 op == MAT_YES_NEW_DIAGONALS || 660 op == MAT_USE_HASH_TABLE) { 661 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 662 } else if (op == MAT_COLUMN_ORIENTED) { 663 a->roworiented = 0; MatSetOption(a->A,op); 664 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 665 a->donotstash = 1; 666 } else if (op == MAT_NO_NEW_DIAGONALS) { 667 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 668 } else { 669 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 670 } 671 PetscFunctionReturn(0); 672 } 673 674 #undef __FUNC__ 675 #define __FUNC__ "MatGetSize_MPIDense" 676 int MatGetSize_MPIDense(Mat A,int *m,int *n) 677 { 678 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 679 680 PetscFunctionBegin; 681 *m = mat->M; *n = mat->N; 682 PetscFunctionReturn(0); 683 } 684 685 #undef __FUNC__ 686 #define __FUNC__ "MatGetLocalSize_MPIDense" 687 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 688 { 689 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 690 691 PetscFunctionBegin; 692 *m = mat->m; *n = mat->N; 693 PetscFunctionReturn(0); 694 } 695 696 #undef __FUNC__ 697 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 698 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 699 { 700 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 701 702 PetscFunctionBegin; 703 *m = mat->rstart; *n = mat->rend; 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNC__ 708 #define __FUNC__ "MatGetRow_MPIDense" 709 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 710 { 711 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 712 int lrow, rstart = mat->rstart, rend = mat->rend,ierr; 713 714 PetscFunctionBegin; 715 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 716 lrow = row - rstart; 717 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNC__ 722 #define __FUNC__ "MatRestoreRow_MPIDense" 723 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 724 { 725 int ierr; 726 727 PetscFunctionBegin; 728 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 729 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 730 PetscFunctionReturn(0); 731 } 732 733 #undef __FUNC__ 734 #define __FUNC__ "MatDiagonalScale_MPIDense" 735 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 736 { 737 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 738 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 739 Scalar *l,*r,x,*v; 740 int ierr,i,j,m = mat->m, n = mat->n; 741 742 PetscFunctionBegin; 743 if (ll) { 744 ierr = VecGetSize(ll,&m);CHKERRQ(ierr); 745 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 746 if (m != mat->m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vec wrong size"); 747 for ( i=0; i<m; i++ ) { 748 x = l[i]; 749 v = mat->v + i; 750 for ( j=0; j<n; j++ ) { (*v) *= x; v+= m;} 751 } 752 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 753 PLogFlops(n*m); 754 } 755 if (rr) { 756 ierr = VecGetSize(rr,&n);CHKERRQ(ierr); 757 if (n != mat->n) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vec wrong size"); 758 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 759 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 760 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 761 for ( i=0; i<n; i++ ) { 762 x = r[i]; 763 v = mat->v + i*m; 764 for ( j=0; j<m; j++ ) { (*v++) *= x;} 765 } 766 ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 767 PLogFlops(n*m); 768 } 769 PetscFunctionReturn(0); 770 } 771 772 #undef __FUNC__ 773 #define __FUNC__ "MatNorm_MPIDense" 774 int MatNorm_MPIDense(Mat A,NormType type,double *norm) 775 { 776 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 777 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 778 int ierr, i, j; 779 double sum = 0.0; 780 Scalar *v = mat->v; 781 782 PetscFunctionBegin; 783 if (mdn->size == 1) { 784 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 785 } else { 786 if (type == NORM_FROBENIUS) { 787 for (i=0; i<mat->n*mat->m; i++ ) { 788 #if defined(PETSC_USE_COMPLEX) 789 sum += PetscReal(PetscConj(*v)*(*v)); v++; 790 #else 791 sum += (*v)*(*v); v++; 792 #endif 793 } 794 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 795 *norm = sqrt(*norm); 796 PLogFlops(2*mat->n*mat->m); 797 } else if (type == NORM_1) { 798 double *tmp, *tmp2; 799 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp); 800 tmp2 = tmp + mdn->N; 801 ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr); 802 *norm = 0.0; 803 v = mat->v; 804 for ( j=0; j<mat->n; j++ ) { 805 for ( i=0; i<mat->m; i++ ) { 806 tmp[j] += PetscAbsScalar(*v); v++; 807 } 808 } 809 ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 810 for ( j=0; j<mdn->N; j++ ) { 811 if (tmp2[j] > *norm) *norm = tmp2[j]; 812 } 813 ierr = PetscFree(tmp);CHKERRQ(ierr); 814 PLogFlops(mat->n*mat->m); 815 } else if (type == NORM_INFINITY) { /* max row norm */ 816 double ntemp; 817 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 818 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 819 } else { 820 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 821 } 822 } 823 PetscFunctionReturn(0); 824 } 825 826 #undef __FUNC__ 827 #define __FUNC__ "MatTranspose_MPIDense" 828 int MatTranspose_MPIDense(Mat A,Mat *matout) 829 { 830 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 831 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 832 Mat B; 833 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 834 int j, i, ierr; 835 Scalar *v; 836 837 PetscFunctionBegin; 838 if (matout == PETSC_NULL && M != N) { 839 SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 840 } 841 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 842 843 m = Aloc->m; n = Aloc->n; v = Aloc->v; 844 rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork); 845 for ( j=0; j<n; j++ ) { 846 for (i=0; i<m; i++) rwork[i] = rstart + i; 847 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 848 v += m; 849 } 850 ierr = PetscFree(rwork);CHKERRQ(ierr); 851 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 852 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 853 if (matout != PETSC_NULL) { 854 *matout = B; 855 } else { 856 PetscOps *Abops; 857 MatOps Aops; 858 859 /* This isn't really an in-place transpose, but free data struct from a */ 860 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 861 ierr = MatDestroy(a->A);CHKERRQ(ierr); 862 if (a->lvec) VecDestroy(a->lvec); 863 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 864 ierr = PetscFree(a);CHKERRQ(ierr); 865 866 /* 867 This is horrible, horrible code. We need to keep the 868 A pointers for the bops and ops but copy everything 869 else from C. 870 */ 871 Abops = A->bops; 872 Aops = A->ops; 873 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 874 A->bops = Abops; 875 A->ops = Aops; 876 877 PetscHeaderDestroy(B); 878 } 879 PetscFunctionReturn(0); 880 } 881 882 #include "pinclude/blaslapack.h" 883 #undef __FUNC__ 884 #define __FUNC__ "MatScale_MPIDense" 885 int MatScale_MPIDense(Scalar *alpha,Mat inA) 886 { 887 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 888 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 889 int one = 1, nz; 890 891 PetscFunctionBegin; 892 nz = a->m*a->n; 893 BLscal_( &nz, alpha, a->v, &one ); 894 PLogFlops(nz); 895 PetscFunctionReturn(0); 896 } 897 898 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 899 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 900 901 /* -------------------------------------------------------------------*/ 902 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 903 MatGetRow_MPIDense, 904 MatRestoreRow_MPIDense, 905 MatMult_MPIDense, 906 MatMultAdd_MPIDense, 907 MatMultTrans_MPIDense, 908 MatMultTransAdd_MPIDense, 909 0, 910 0, 911 0, 912 0, 913 0, 914 0, 915 0, 916 MatTranspose_MPIDense, 917 MatGetInfo_MPIDense,0, 918 MatGetDiagonal_MPIDense, 919 MatDiagonalScale_MPIDense, 920 MatNorm_MPIDense, 921 MatAssemblyBegin_MPIDense, 922 MatAssemblyEnd_MPIDense, 923 0, 924 MatSetOption_MPIDense, 925 MatZeroEntries_MPIDense, 926 MatZeroRows_MPIDense, 927 0, 928 0, 929 0, 930 0, 931 MatGetSize_MPIDense, 932 MatGetLocalSize_MPIDense, 933 MatGetOwnershipRange_MPIDense, 934 0, 935 0, 936 MatGetArray_MPIDense, 937 MatRestoreArray_MPIDense, 938 MatDuplicate_MPIDense, 939 0, 940 0, 941 0, 942 0, 943 0, 944 MatGetSubMatrices_MPIDense, 945 0, 946 MatGetValues_MPIDense, 947 0, 948 0, 949 MatScale_MPIDense, 950 0, 951 0, 952 0, 953 MatGetBlockSize_MPIDense, 954 0, 955 0, 956 0, 957 0, 958 0, 959 0, 960 0, 961 0, 962 0, 963 MatGetSubMatrix_MPIDense, 964 0, 965 0, 966 MatGetMaps_Petsc}; 967 968 #undef __FUNC__ 969 #define __FUNC__ "MatCreateMPIDense" 970 /*@C 971 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 972 973 Collective on MPI_Comm 974 975 Input Parameters: 976 + comm - MPI communicator 977 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 978 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 979 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 980 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 981 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 982 to control all matrix memory allocation. 983 984 Output Parameter: 985 . A - the matrix 986 987 Notes: 988 The dense format is fully compatible with standard Fortran 77 989 storage by columns. 990 991 The data input variable is intended primarily for Fortran programmers 992 who wish to allocate their own matrix memory space. Most users should 993 set data=PETSC_NULL. 994 995 The user MUST specify either the local or global matrix dimensions 996 (possibly both). 997 998 Currently, the only parallel dense matrix decomposition is by rows, 999 so that n=N and each submatrix owns all of the global columns. 1000 1001 Level: intermediate 1002 1003 .keywords: matrix, dense, parallel 1004 1005 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1006 @*/ 1007 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 1008 { 1009 Mat mat; 1010 Mat_MPIDense *a; 1011 int ierr, i,flg; 1012 1013 PetscFunctionBegin; 1014 /* Note: For now, when data is specified above, this assumes the user correctly 1015 allocates the local dense storage space. We should add error checking. */ 1016 1017 *A = 0; 1018 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView); 1019 PLogObjectCreate(mat); 1020 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1021 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1022 mat->ops->destroy = MatDestroy_MPIDense; 1023 mat->ops->view = MatView_MPIDense; 1024 mat->factor = 0; 1025 mat->mapping = 0; 1026 1027 a->factor = 0; 1028 mat->insertmode = NOT_SET_VALUES; 1029 ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr); 1030 ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr); 1031 1032 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1033 1034 /* 1035 The computation of n is wrong below, n should represent the number of local 1036 rows in the right (column vector) 1037 */ 1038 1039 /* each row stores all columns */ 1040 if (N == PETSC_DECIDE) N = n; 1041 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1042 /* if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */ 1043 a->N = mat->N = N; 1044 a->M = mat->M = M; 1045 a->m = mat->m = m; 1046 a->n = mat->n = n; 1047 1048 /* the information in the maps duplicates the information computed below, eventually 1049 we should remove the duplicate information that is not contained in the maps */ 1050 ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr); 1051 ierr = MapCreateMPI(comm,PETSC_DECIDE,N,&mat->cmap);CHKERRQ(ierr); 1052 1053 /* build local table of row and column ownerships */ 1054 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1055 a->cowners = a->rowners + a->size + 1; 1056 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1057 ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1058 a->rowners[0] = 0; 1059 for ( i=2; i<=a->size; i++ ) { 1060 a->rowners[i] += a->rowners[i-1]; 1061 } 1062 a->rstart = a->rowners[a->rank]; 1063 a->rend = a->rowners[a->rank+1]; 1064 ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1065 a->cowners[0] = 0; 1066 for ( i=2; i<=a->size; i++ ) { 1067 a->cowners[i] += a->cowners[i-1]; 1068 } 1069 1070 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr); 1071 PLogObjectParent(mat,a->A); 1072 1073 /* build cache for off array entries formed */ 1074 a->donotstash = 0; 1075 ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr); 1076 1077 /* stuff used for matrix vector multiply */ 1078 a->lvec = 0; 1079 a->Mvctx = 0; 1080 a->roworiented = 1; 1081 1082 *A = mat; 1083 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1084 if (flg) { 1085 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1086 } 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNC__ 1091 #define __FUNC__ "MatDuplicate_MPIDense" 1092 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1093 { 1094 Mat mat; 1095 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 1096 int ierr; 1097 FactorCtx *factor; 1098 1099 PetscFunctionBegin; 1100 *newmat = 0; 1101 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView); 1102 PLogObjectCreate(mat); 1103 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1104 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1105 mat->ops->destroy = MatDestroy_MPIDense; 1106 mat->ops->view = MatView_MPIDense; 1107 mat->factor = A->factor; 1108 mat->assembled = PETSC_TRUE; 1109 1110 a->m = mat->m = oldmat->m; 1111 a->n = mat->n = oldmat->n; 1112 a->M = mat->M = oldmat->M; 1113 a->N = mat->N = oldmat->N; 1114 if (oldmat->factor) { 1115 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor); 1116 /* copy factor contents ... add this code! */ 1117 } else a->factor = 0; 1118 1119 a->rstart = oldmat->rstart; 1120 a->rend = oldmat->rend; 1121 a->size = oldmat->size; 1122 a->rank = oldmat->rank; 1123 mat->insertmode = NOT_SET_VALUES; 1124 a->donotstash = oldmat->donotstash; 1125 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners); 1126 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1127 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1128 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1129 1130 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1131 PLogObjectParent(mat,a->lvec); 1132 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1133 PLogObjectParent(mat,a->Mvctx); 1134 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1135 PLogObjectParent(mat,a->A); 1136 *newmat = mat; 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #include "sys.h" 1141 1142 #undef __FUNC__ 1143 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1144 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1145 { 1146 int *rowners, i,size,rank,m,ierr,nz,j; 1147 Scalar *array,*vals,*vals_ptr; 1148 MPI_Status status; 1149 1150 PetscFunctionBegin; 1151 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1152 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1153 1154 /* determine ownership of all rows */ 1155 m = M/size + ((M % size) > rank); 1156 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1157 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1158 rowners[0] = 0; 1159 for ( i=2; i<=size; i++ ) { 1160 rowners[i] += rowners[i-1]; 1161 } 1162 1163 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1164 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1165 1166 if (!rank) { 1167 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1168 1169 /* read in my part of the matrix numerical values */ 1170 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1171 1172 /* insert into matrix-by row (this is why cannot directly read into array */ 1173 vals_ptr = vals; 1174 for ( i=0; i<m; i++ ) { 1175 for ( j=0; j<N; j++ ) { 1176 array[i + j*m] = *vals_ptr++; 1177 } 1178 } 1179 1180 /* read in other processors and ship out */ 1181 for ( i=1; i<size; i++ ) { 1182 nz = (rowners[i+1] - rowners[i])*N; 1183 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1184 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1185 } 1186 } else { 1187 /* receive numeric values */ 1188 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1189 1190 /* receive message of values*/ 1191 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1192 1193 /* insert into matrix-by row (this is why cannot directly read into array */ 1194 vals_ptr = vals; 1195 for ( i=0; i<m; i++ ) { 1196 for ( j=0; j<N; j++ ) { 1197 array[i + j*m] = *vals_ptr++; 1198 } 1199 } 1200 } 1201 ierr = PetscFree(rowners);CHKERRQ(ierr); 1202 ierr = PetscFree(vals);CHKERRQ(ierr); 1203 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1204 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 1209 #undef __FUNC__ 1210 #define __FUNC__ "MatLoad_MPIDense" 1211 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1212 { 1213 Mat A; 1214 Scalar *vals,*svals; 1215 MPI_Comm comm = ((PetscObject)viewer)->comm; 1216 MPI_Status status; 1217 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1218 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1219 int tag = ((PetscObject)viewer)->tag; 1220 int i, nz, ierr, j,rstart, rend, fd; 1221 1222 PetscFunctionBegin; 1223 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1224 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1225 if (!rank) { 1226 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1227 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1228 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1229 } 1230 1231 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1232 M = header[1]; N = header[2]; nz = header[3]; 1233 1234 /* 1235 Handle case where matrix is stored on disk as a dense matrix 1236 */ 1237 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1238 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 /* determine ownership of all rows */ 1243 m = M/size + ((M % size) > rank); 1244 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1245 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1246 rowners[0] = 0; 1247 for ( i=2; i<=size; i++ ) { 1248 rowners[i] += rowners[i-1]; 1249 } 1250 rstart = rowners[rank]; 1251 rend = rowners[rank+1]; 1252 1253 /* distribute row lengths to all processors */ 1254 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1255 offlens = ourlens + (rend-rstart); 1256 if (!rank) { 1257 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1258 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1259 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1260 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1261 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1262 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1263 } else { 1264 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1265 } 1266 1267 if (!rank) { 1268 /* calculate the number of nonzeros on each processor */ 1269 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1270 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1271 for ( i=0; i<size; i++ ) { 1272 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1273 procsnz[i] += rowlengths[j]; 1274 } 1275 } 1276 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1277 1278 /* determine max buffer needed and allocate it */ 1279 maxnz = 0; 1280 for ( i=0; i<size; i++ ) { 1281 maxnz = PetscMax(maxnz,procsnz[i]); 1282 } 1283 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 1284 1285 /* read in my part of the matrix column indices */ 1286 nz = procsnz[0]; 1287 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1288 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1289 1290 /* read in every one elses and ship off */ 1291 for ( i=1; i<size; i++ ) { 1292 nz = procsnz[i]; 1293 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1294 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1295 } 1296 ierr = PetscFree(cols);CHKERRQ(ierr); 1297 } else { 1298 /* determine buffer space needed for message */ 1299 nz = 0; 1300 for ( i=0; i<m; i++ ) { 1301 nz += ourlens[i]; 1302 } 1303 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1304 1305 /* receive message of column indices*/ 1306 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1307 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1308 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1309 } 1310 1311 /* loop over local rows, determining number of off diagonal entries */ 1312 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1313 jj = 0; 1314 for ( i=0; i<m; i++ ) { 1315 for ( j=0; j<ourlens[i]; j++ ) { 1316 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1317 jj++; 1318 } 1319 } 1320 1321 /* create our matrix */ 1322 for ( i=0; i<m; i++ ) { 1323 ourlens[i] -= offlens[i]; 1324 } 1325 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1326 A = *newmat; 1327 for ( i=0; i<m; i++ ) { 1328 ourlens[i] += offlens[i]; 1329 } 1330 1331 if (!rank) { 1332 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 1333 1334 /* read in my part of the matrix numerical values */ 1335 nz = procsnz[0]; 1336 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1337 1338 /* insert into matrix */ 1339 jj = rstart; 1340 smycols = mycols; 1341 svals = vals; 1342 for ( i=0; i<m; i++ ) { 1343 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1344 smycols += ourlens[i]; 1345 svals += ourlens[i]; 1346 jj++; 1347 } 1348 1349 /* read in other processors and ship out */ 1350 for ( i=1; i<size; i++ ) { 1351 nz = procsnz[i]; 1352 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1353 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1354 } 1355 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1356 } else { 1357 /* receive numeric values */ 1358 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 1359 1360 /* receive message of values*/ 1361 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1362 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1363 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1364 1365 /* insert into matrix */ 1366 jj = rstart; 1367 smycols = mycols; 1368 svals = vals; 1369 for ( i=0; i<m; i++ ) { 1370 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1371 smycols += ourlens[i]; 1372 svals += ourlens[i]; 1373 jj++; 1374 } 1375 } 1376 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1377 ierr = PetscFree(vals);CHKERRQ(ierr); 1378 ierr = PetscFree(mycols);CHKERRQ(ierr); 1379 ierr = PetscFree(rowners);CHKERRQ(ierr); 1380 1381 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1382 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1383 PetscFunctionReturn(0); 1384 } 1385 1386 1387 1388 1389 1390