1 #ifndef lint 2 static char vcid[] = "$Id: mpidense.c,v 1.45 1996/08/08 14:42:42 bsmith Exp bsmith $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n, 13 int *idxn,Scalar *v,InsertMode addv) 14 { 15 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 16 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 17 int roworiented = A->roworiented; 18 19 if (A->insertmode != NOT_SET_VALUES && A->insertmode != addv) { 20 SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds"); 21 } 22 A->insertmode = addv; 23 for ( i=0; i<m; i++ ) { 24 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row"); 25 if (idxm[i] >= A->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large"); 26 if (idxm[i] >= rstart && idxm[i] < rend) { 27 row = idxm[i] - rstart; 28 if (roworiented) { 29 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv); CHKERRQ(ierr); 30 } 31 else { 32 for ( j=0; j<n; j++ ) { 33 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column"); 34 if (idxn[j] >= A->N) SETERRQ(1,"MatSetValues_MPIDense:Column too large"); 35 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv); CHKERRQ(ierr); 36 } 37 } 38 } 39 else { 40 if (roworiented) { 41 ierr = StashValues_Private(&A->stash,idxm[i],n,idxn,v+i*n,addv); CHKERRQ(ierr); 42 } 43 else { /* must stash each seperately */ 44 row = idxm[i]; 45 for ( j=0; j<n; j++ ) { 46 ierr = StashValues_Private(&A->stash,row,1,&idxn[j],v+i+j*m,addv); 47 CHKERRQ(ierr); 48 } 49 } 50 } 51 } 52 return 0; 53 } 54 55 static int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 56 { 57 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 58 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 59 60 for ( i=0; i<m; i++ ) { 61 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative row"); 62 if (idxm[i] >= mdn->M) SETERRQ(1,"MatGetValues_MPIDense:Row too large"); 63 if (idxm[i] >= rstart && idxm[i] < rend) { 64 row = idxm[i] - rstart; 65 for ( j=0; j<n; j++ ) { 66 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIDense:Negative column"); 67 if (idxn[j] >= mdn->N) 68 SETERRQ(1,"MatGetValues_MPIDense:Column too large"); 69 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j); CHKERRQ(ierr); 70 } 71 } 72 else { 73 SETERRQ(1,"MatGetValues_MPIDense:Only local values currently supported"); 74 } 75 } 76 return 0; 77 } 78 79 static int MatGetArray_MPIDense(Mat A,Scalar **array) 80 { 81 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 82 int ierr; 83 84 ierr = MatGetArray(a->A,array); CHKERRQ(ierr); 85 return 0; 86 } 87 88 static int MatRestoreArray_MPIDense(Mat A,Scalar **array) 89 { 90 return 0; 91 } 92 93 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 94 { 95 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 96 MPI_Comm comm = mat->comm; 97 int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 98 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 99 int tag = mat->tag, *owner,*starts,count,ierr; 100 InsertMode addv; 101 MPI_Request *send_waits,*recv_waits; 102 Scalar *rvalues,*svalues; 103 104 /* make sure all processors are either in INSERTMODE or ADDMODE */ 105 MPI_Allreduce(&mdn->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 106 if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1, 107 "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs"); 108 } 109 mdn->insertmode = addv; /* in case this processor had no cache */ 110 111 /* first count number of contributors to each processor */ 112 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 113 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 114 owner = (int *) PetscMalloc( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 115 for ( i=0; i<mdn->stash.n; i++ ) { 116 idx = mdn->stash.idx[i]; 117 for ( j=0; j<size; j++ ) { 118 if (idx >= owners[j] && idx < owners[j+1]) { 119 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 120 } 121 } 122 } 123 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 124 125 /* inform other processors of number of messages and max length*/ 126 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 127 MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm); 128 nreceives = work[rank]; 129 MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm); 130 nmax = work[rank]; 131 PetscFree(work); 132 133 /* post receives: 134 1) each message will consist of ordered pairs 135 (global index,value) we store the global index as a double 136 to simplify the message passing. 137 2) since we don't know how long each individual message is we 138 allocate the largest needed buffer for each receive. Potentially 139 this is a lot of wasted space. 140 141 This could be done better. 142 */ 143 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 144 CHKPTRQ(rvalues); 145 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 146 CHKPTRQ(recv_waits); 147 for ( i=0; i<nreceives; i++ ) { 148 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 149 comm,recv_waits+i); 150 } 151 152 /* do sends: 153 1) starts[i] gives the starting index in svalues for stuff going to 154 the ith processor 155 */ 156 svalues = (Scalar *) PetscMalloc( 3*(mdn->stash.n+1)*sizeof(Scalar) ); 157 CHKPTRQ(svalues); 158 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 159 CHKPTRQ(send_waits); 160 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 161 starts[0] = 0; 162 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 163 for ( i=0; i<mdn->stash.n; i++ ) { 164 svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 165 svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 166 svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 167 } 168 PetscFree(owner); 169 starts[0] = 0; 170 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 171 count = 0; 172 for ( i=0; i<size; i++ ) { 173 if (procs[i]) { 174 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 175 comm,send_waits+count++); 176 } 177 } 178 PetscFree(starts); PetscFree(nprocs); 179 180 /* Free cache space */ 181 ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 182 183 mdn->svalues = svalues; mdn->rvalues = rvalues; 184 mdn->nsends = nsends; mdn->nrecvs = nreceives; 185 mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 186 mdn->rmax = nmax; 187 188 return 0; 189 } 190 extern int MatSetUpMultiply_MPIDense(Mat); 191 192 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 193 { 194 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 195 MPI_Status *send_status,recv_status; 196 int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 197 Scalar *values,val; 198 InsertMode addv = mdn->insertmode; 199 200 /* wait on receives */ 201 while (count) { 202 MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 203 /* unpack receives into our local space */ 204 values = mdn->rvalues + 3*imdex*mdn->rmax; 205 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 206 n = n/3; 207 for ( i=0; i<n; i++ ) { 208 row = (int) PetscReal(values[3*i]) - mdn->rstart; 209 col = (int) PetscReal(values[3*i+1]); 210 val = values[3*i+2]; 211 if (col >= 0 && col < mdn->N) { 212 MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 213 } 214 else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");} 215 } 216 count--; 217 } 218 PetscFree(mdn->recv_waits); PetscFree(mdn->rvalues); 219 220 /* wait on sends */ 221 if (mdn->nsends) { 222 send_status = (MPI_Status *) PetscMalloc( mdn->nsends*sizeof(MPI_Status) ); 223 CHKPTRQ(send_status); 224 MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 225 PetscFree(send_status); 226 } 227 PetscFree(mdn->send_waits); PetscFree(mdn->svalues); 228 229 mdn->insertmode = NOT_SET_VALUES; 230 ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 231 ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 232 233 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 234 ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 235 } 236 return 0; 237 } 238 239 static int MatZeroEntries_MPIDense(Mat A) 240 { 241 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 242 return MatZeroEntries(l->A); 243 } 244 245 /* the code does not do the diagonal entries correctly unless the 246 matrix is square and the column and row owerships are identical. 247 This is a BUG. The only way to fix it seems to be to access 248 mdn->A and mdn->B directly and not through the MatZeroRows() 249 routine. 250 */ 251 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 252 { 253 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 254 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 255 int *procs,*nprocs,j,found,idx,nsends,*work; 256 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 257 int *rvalues,tag = A->tag,count,base,slen,n,*source; 258 int *lens,imdex,*lrows,*values; 259 MPI_Comm comm = A->comm; 260 MPI_Request *send_waits,*recv_waits; 261 MPI_Status recv_status,*send_status; 262 IS istmp; 263 264 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 265 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 266 267 /* first count number of contributors to each processor */ 268 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 269 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 270 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 271 for ( i=0; i<N; i++ ) { 272 idx = rows[i]; 273 found = 0; 274 for ( j=0; j<size; j++ ) { 275 if (idx >= owners[j] && idx < owners[j+1]) { 276 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 277 } 278 } 279 if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range"); 280 } 281 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 282 283 /* inform other processors of number of messages and max length*/ 284 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 285 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 286 nrecvs = work[rank]; 287 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 288 nmax = work[rank]; 289 PetscFree(work); 290 291 /* post receives: */ 292 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 293 CHKPTRQ(rvalues); 294 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 295 CHKPTRQ(recv_waits); 296 for ( i=0; i<nrecvs; i++ ) { 297 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 298 } 299 300 /* do sends: 301 1) starts[i] gives the starting index in svalues for stuff going to 302 the ith processor 303 */ 304 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 305 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 306 CHKPTRQ(send_waits); 307 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 308 starts[0] = 0; 309 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 310 for ( i=0; i<N; i++ ) { 311 svalues[starts[owner[i]]++] = rows[i]; 312 } 313 ISRestoreIndices(is,&rows); 314 315 starts[0] = 0; 316 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 317 count = 0; 318 for ( i=0; i<size; i++ ) { 319 if (procs[i]) { 320 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 321 } 322 } 323 PetscFree(starts); 324 325 base = owners[rank]; 326 327 /* wait on receives */ 328 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 329 source = lens + nrecvs; 330 count = nrecvs; slen = 0; 331 while (count) { 332 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 333 /* unpack receives into our local space */ 334 MPI_Get_count(&recv_status,MPI_INT,&n); 335 source[imdex] = recv_status.MPI_SOURCE; 336 lens[imdex] = n; 337 slen += n; 338 count--; 339 } 340 PetscFree(recv_waits); 341 342 /* move the data into the send scatter */ 343 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 344 count = 0; 345 for ( i=0; i<nrecvs; i++ ) { 346 values = rvalues + i*nmax; 347 for ( j=0; j<lens[i]; j++ ) { 348 lrows[count++] = values[j] - base; 349 } 350 } 351 PetscFree(rvalues); PetscFree(lens); 352 PetscFree(owner); PetscFree(nprocs); 353 354 /* actually zap the local rows */ 355 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 356 PLogObjectParent(A,istmp); 357 PetscFree(lrows); 358 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 359 ierr = ISDestroy(istmp); CHKERRQ(ierr); 360 361 /* wait on sends */ 362 if (nsends) { 363 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 364 CHKPTRQ(send_status); 365 MPI_Waitall(nsends,send_waits,send_status); 366 PetscFree(send_status); 367 } 368 PetscFree(send_waits); PetscFree(svalues); 369 370 return 0; 371 } 372 373 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 374 { 375 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 376 int ierr; 377 378 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 379 CHKERRQ(ierr); 380 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 381 CHKERRQ(ierr); 382 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 383 return 0; 384 } 385 386 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 387 { 388 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 389 int ierr; 390 391 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr); 392 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx);CHKERRQ(ierr); 393 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 394 return 0; 395 } 396 397 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 398 { 399 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 400 int ierr; 401 Scalar zero = 0.0; 402 403 ierr = VecSet(&zero,yy); CHKERRQ(ierr); 404 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 405 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 406 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 407 return 0; 408 } 409 410 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 411 { 412 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 413 int ierr; 414 415 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 416 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec); CHKERRQ(ierr); 417 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 418 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 419 return 0; 420 } 421 422 static int MatGetDiagonal_MPIDense(Mat A,Vec v) 423 { 424 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 425 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 426 int ierr, len, i, n, m = a->m, radd; 427 Scalar *x, zero = 0.0; 428 429 VecSet(&zero,v); 430 ierr = VecGetArray(v,&x); CHKERRQ(ierr); 431 ierr = VecGetSize(v,&n); CHKERRQ(ierr); 432 if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec"); 433 len = PetscMin(aloc->m,aloc->n); 434 radd = a->rstart*m; 435 for ( i=0; i<len; i++ ) { 436 x[i] = aloc->v[radd + i*m + i]; 437 } 438 return 0; 439 } 440 441 static int MatDestroy_MPIDense(PetscObject obj) 442 { 443 Mat mat = (Mat) obj; 444 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 445 int ierr; 446 447 #if defined(PETSC_LOG) 448 PLogObjectState(obj,"Rows=%d, Cols=%d",mdn->M,mdn->N); 449 #endif 450 PetscFree(mdn->rowners); 451 ierr = MatDestroy(mdn->A); CHKERRQ(ierr); 452 if (mdn->lvec) VecDestroy(mdn->lvec); 453 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 454 if (mdn->factor) { 455 if (mdn->factor->temp) PetscFree(mdn->factor->temp); 456 if (mdn->factor->tag) PetscFree(mdn->factor->tag); 457 if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 458 PetscFree(mdn->factor); 459 } 460 PetscFree(mdn); 461 PLogObjectDestroy(mat); 462 PetscHeaderDestroy(mat); 463 return 0; 464 } 465 466 #include "pinclude/pviewer.h" 467 468 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 469 { 470 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 471 int ierr; 472 if (mdn->size == 1) { 473 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 474 } 475 else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported"); 476 return 0; 477 } 478 479 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 480 { 481 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 482 int ierr, format; 483 FILE *fd; 484 ViewerType vtype; 485 486 ViewerGetType(viewer,&vtype); 487 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 488 ierr = ViewerGetFormat(viewer,&format); 489 if (format == ASCII_FORMAT_INFO_DETAILED) { 490 int nz, nzalloc, mem, rank; 491 MPI_Comm_rank(mat->comm,&rank); 492 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 493 PetscSequentialPhaseBegin(mat->comm,1); 494 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n", 495 rank,mdn->m,nz,nzalloc,mem); 496 fflush(fd); 497 PetscSequentialPhaseEnd(mat->comm,1); 498 ierr = VecScatterView(mdn->Mvctx,viewer); CHKERRQ(ierr); 499 return 0; 500 } 501 else if (format == ASCII_FORMAT_INFO) { 502 return 0; 503 } 504 if (vtype == ASCII_FILE_VIEWER) { 505 PetscSequentialPhaseBegin(mat->comm,1); 506 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 507 mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 508 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 509 fflush(fd); 510 PetscSequentialPhaseEnd(mat->comm,1); 511 } 512 else { 513 int size = mdn->size, rank = mdn->rank; 514 if (size == 1) { 515 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 516 } 517 else { 518 /* assemble the entire matrix onto first processor. */ 519 Mat A; 520 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 521 Scalar *vals; 522 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 523 524 if (!rank) { 525 ierr = MatCreateMPIDense(mat->comm,M,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 526 } 527 else { 528 ierr = MatCreateMPIDense(mat->comm,0,M,N,N,PETSC_NULL,&A); CHKERRQ(ierr); 529 } 530 PLogObjectParent(mat,A); 531 532 /* Copy the matrix ... This isn't the most efficient means, 533 but it's quick for now */ 534 row = mdn->rstart; m = Amdn->m; 535 for ( i=0; i<m; i++ ) { 536 ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 537 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 538 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 539 row++; 540 } 541 542 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 543 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 544 if (!rank) { 545 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 546 } 547 ierr = MatDestroy(A); CHKERRQ(ierr); 548 } 549 } 550 return 0; 551 } 552 553 static int MatView_MPIDense(PetscObject obj,Viewer viewer) 554 { 555 Mat mat = (Mat) obj; 556 int ierr; 557 ViewerType vtype; 558 559 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 560 if (vtype == ASCII_FILE_VIEWER) { 561 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 562 } 563 else if (vtype == ASCII_FILES_VIEWER) { 564 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 565 } 566 else if (vtype == BINARY_FILE_VIEWER) { 567 return MatView_MPIDense_Binary(mat,viewer); 568 } 569 return 0; 570 } 571 572 static int MatGetInfo_MPIDense(Mat A,MatInfoType flag,int *nz, 573 int *nzalloc,int *mem) 574 { 575 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 576 Mat mdn = mat->A; 577 int ierr, isend[3], irecv[3]; 578 579 ierr = MatGetInfo(mdn,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 580 if (flag == MAT_LOCAL) { 581 if (nz) *nz = isend[0]; 582 if (nzalloc) *nzalloc = isend[1]; 583 if (mem) *mem = isend[2]; 584 } else if (flag == MAT_GLOBAL_MAX) { 585 MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,A->comm); 586 if (nz) *nz = irecv[0]; 587 if (nzalloc) *nzalloc = irecv[1]; 588 if (mem) *mem = irecv[2]; 589 } else if (flag == MAT_GLOBAL_SUM) { 590 MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,A->comm); 591 if (nz) *nz = irecv[0]; 592 if (nzalloc) *nzalloc = irecv[1]; 593 if (mem) *mem = irecv[2]; 594 } 595 return 0; 596 } 597 598 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 599 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 600 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 601 extern int MatSolve_MPIDense(Mat,Vec,Vec); 602 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 603 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 604 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 605 606 static int MatSetOption_MPIDense(Mat A,MatOption op) 607 { 608 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 609 610 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 611 op == MAT_YES_NEW_NONZERO_LOCATIONS || 612 op == MAT_COLUMNS_SORTED || 613 op == MAT_ROW_ORIENTED) { 614 MatSetOption(a->A,op); 615 } 616 else if (op == MAT_ROWS_SORTED || 617 op == MAT_SYMMETRIC || 618 op == MAT_STRUCTURALLY_SYMMETRIC || 619 op == MAT_YES_NEW_DIAGONALS) 620 PLogInfo(A,"Info:MatSetOption_MPIDense:Option ignored\n"); 621 else if (op == MAT_COLUMN_ORIENTED) 622 { a->roworiented = 0; MatSetOption(a->A,op);} 623 else if (op == MAT_NO_NEW_DIAGONALS) 624 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:MAT_NO_NEW_DIAGONALS");} 625 else 626 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");} 627 return 0; 628 } 629 630 static int MatGetSize_MPIDense(Mat A,int *m,int *n) 631 { 632 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 633 *m = mat->M; *n = mat->N; 634 return 0; 635 } 636 637 static int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 638 { 639 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 640 *m = mat->m; *n = mat->N; 641 return 0; 642 } 643 644 static int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 645 { 646 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 647 *m = mat->rstart; *n = mat->rend; 648 return 0; 649 } 650 651 static int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 652 { 653 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 654 int lrow, rstart = mat->rstart, rend = mat->rend; 655 656 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows") 657 lrow = row - rstart; 658 return MatGetRow(mat->A,lrow,nz,idx,v); 659 } 660 661 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 662 { 663 if (idx) PetscFree(*idx); 664 if (v) PetscFree(*v); 665 return 0; 666 } 667 668 static int MatNorm_MPIDense(Mat A,NormType type,double *norm) 669 { 670 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 671 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 672 int ierr, i, j; 673 double sum = 0.0; 674 Scalar *v = mat->v; 675 676 if (mdn->size == 1) { 677 ierr = MatNorm(mdn->A,type,norm); CHKERRQ(ierr); 678 } else { 679 if (type == NORM_FROBENIUS) { 680 for (i=0; i<mat->n*mat->m; i++ ) { 681 #if defined(PETSC_COMPLEX) 682 sum += real(conj(*v)*(*v)); v++; 683 #else 684 sum += (*v)*(*v); v++; 685 #endif 686 } 687 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm); 688 *norm = sqrt(*norm); 689 PLogFlops(2*mat->n*mat->m); 690 } 691 else if (type == NORM_1) { 692 double *tmp, *tmp2; 693 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) ); CHKPTRQ(tmp); 694 tmp2 = tmp + mdn->N; 695 PetscMemzero(tmp,2*mdn->N*sizeof(double)); 696 *norm = 0.0; 697 v = mat->v; 698 for ( j=0; j<mat->n; j++ ) { 699 for ( i=0; i<mat->m; i++ ) { 700 tmp[j] += PetscAbsScalar(*v); v++; 701 } 702 } 703 MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm); 704 for ( j=0; j<mdn->N; j++ ) { 705 if (tmp2[j] > *norm) *norm = tmp2[j]; 706 } 707 PetscFree(tmp); 708 PLogFlops(mat->n*mat->m); 709 } 710 else if (type == NORM_INFINITY) { /* max row norm */ 711 double ntemp; 712 ierr = MatNorm(mdn->A,type,&ntemp); CHKERRQ(ierr); 713 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm); 714 } 715 else { 716 SETERRQ(1,"MatNorm_MPIDense:No support for two norm"); 717 } 718 } 719 return 0; 720 } 721 722 static int MatTranspose_MPIDense(Mat A,Mat *matout) 723 { 724 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 725 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 726 Mat B; 727 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 728 int j, i, ierr; 729 Scalar *v; 730 731 if (matout == PETSC_NULL && M != N) 732 SETERRQ(1,"MatTranspose_MPIDense:Supports square matrix only in-place"); 733 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B); 734 CHKERRQ(ierr); 735 736 m = Aloc->m; n = Aloc->n; v = Aloc->v; 737 rwork = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(rwork); 738 for ( j=0; j<n; j++ ) { 739 for (i=0; i<m; i++) rwork[i] = rstart + i; 740 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES); CHKERRQ(ierr); 741 v += m; 742 } 743 PetscFree(rwork); 744 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 745 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 746 if (matout != PETSC_NULL) { 747 *matout = B; 748 } else { 749 /* This isn't really an in-place transpose, but free data struct from a */ 750 PetscFree(a->rowners); 751 ierr = MatDestroy(a->A); CHKERRQ(ierr); 752 if (a->lvec) VecDestroy(a->lvec); 753 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 754 PetscFree(a); 755 PetscMemcpy(A,B,sizeof(struct _Mat)); 756 PetscHeaderDestroy(B); 757 } 758 return 0; 759 } 760 761 #include "pinclude/plapack.h" 762 static int MatScale_MPIDense(Scalar *alpha,Mat inA) 763 { 764 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 765 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 766 int one = 1, nz; 767 768 nz = a->m*a->n; 769 BLscal_( &nz, alpha, a->v, &one ); 770 PLogFlops(nz); 771 return 0; 772 } 773 774 static int MatConvertSameType_MPIDense(Mat,Mat *,int); 775 776 /* -------------------------------------------------------------------*/ 777 static struct _MatOps MatOps = {MatSetValues_MPIDense, 778 MatGetRow_MPIDense,MatRestoreRow_MPIDense, 779 MatMult_MPIDense,MatMultAdd_MPIDense, 780 MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 781 /* MatSolve_MPIDense,0, */ 782 0,0, 783 0,0, 784 0,0, 785 /* MatLUFactor_MPIDense,0, */ 786 0,MatTranspose_MPIDense, 787 MatGetInfo_MPIDense,0, 788 MatGetDiagonal_MPIDense,0,MatNorm_MPIDense, 789 MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 790 0, 791 MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 792 0,0,0, 793 /* 0,MatLUFactorSymbolic_MPIDense,MatLUFactorNumeric_MPIDense, */ 794 0,0, 795 MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 796 MatGetOwnershipRange_MPIDense, 797 0,0, MatGetArray_MPIDense, MatRestoreArray_MPIDense, 798 0,MatConvertSameType_MPIDense, 799 0,0,0,0,0, 800 0,0,MatGetValues_MPIDense,0,0,MatScale_MPIDense}; 801 802 /*@C 803 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 804 805 Input Parameters: 806 . comm - MPI communicator 807 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 808 . n - number of local columns (or PETSC_DECIDE to have calculated 809 if N is given) 810 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 811 . N - number of global columns (or PETSC_DECIDE to have calculated 812 if n is given) 813 . data - optional location of matrix data. Set data=PETSC_NULL for PETSc 814 to control all matrix memory allocation. 815 816 Output Parameter: 817 . A - the matrix 818 819 Notes: 820 The dense format is fully compatible with standard Fortran 77 821 storage by columns. 822 823 The data input variable is intended primarily for Fortran programmers 824 who wish to allocate their own matrix memory space. Most users should 825 set data=PETSC_NULL. 826 827 The user MUST specify either the local or global matrix dimensions 828 (possibly both). 829 830 Currently, the only parallel dense matrix decomposition is by rows, 831 so that n=N and each submatrix owns all of the global columns. 832 833 .keywords: matrix, dense, parallel 834 835 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 836 @*/ 837 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 838 { 839 Mat mat; 840 Mat_MPIDense *a; 841 int ierr, i,flg; 842 843 /* Note: For now, when data is specified above, this assumes the user correctly 844 allocates the local dense storage space. We should add error checking. */ 845 846 *A = 0; 847 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 848 PLogObjectCreate(mat); 849 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 850 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 851 mat->destroy = MatDestroy_MPIDense; 852 mat->view = MatView_MPIDense; 853 mat->factor = 0; 854 855 a->factor = 0; 856 a->insertmode = NOT_SET_VALUES; 857 MPI_Comm_rank(comm,&a->rank); 858 MPI_Comm_size(comm,&a->size); 859 860 if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 861 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 862 863 /* each row stores all columns */ 864 if (N == PETSC_DECIDE) N = n; 865 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 866 /* if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); */ 867 a->N = mat->N = N; 868 a->M = mat->M = M; 869 a->m = mat->m = m; 870 a->n = mat->n = n; 871 872 /* build local table of row and column ownerships */ 873 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 874 a->cowners = a->rowners + a->size + 1; 875 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 876 sizeof(Mat_MPIDense)); 877 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 878 a->rowners[0] = 0; 879 for ( i=2; i<=a->size; i++ ) { 880 a->rowners[i] += a->rowners[i-1]; 881 } 882 a->rstart = a->rowners[a->rank]; 883 a->rend = a->rowners[a->rank+1]; 884 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 885 a->cowners[0] = 0; 886 for ( i=2; i<=a->size; i++ ) { 887 a->cowners[i] += a->cowners[i-1]; 888 } 889 890 ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,data,&a->A); CHKERRQ(ierr); 891 PLogObjectParent(mat,a->A); 892 893 /* build cache for off array entries formed */ 894 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 895 896 /* stuff used for matrix vector multiply */ 897 a->lvec = 0; 898 a->Mvctx = 0; 899 a->roworiented = 1; 900 901 *A = mat; 902 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 903 if (flg) { 904 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 905 } 906 return 0; 907 } 908 909 static int MatConvertSameType_MPIDense(Mat A,Mat *newmat,int cpvalues) 910 { 911 Mat mat; 912 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 913 int ierr; 914 FactorCtx *factor; 915 916 *newmat = 0; 917 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIDENSE,A->comm); 918 PLogObjectCreate(mat); 919 mat->data = (void *) (a = PetscNew(Mat_MPIDense)); CHKPTRQ(a); 920 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 921 mat->destroy = MatDestroy_MPIDense; 922 mat->view = MatView_MPIDense; 923 mat->factor = A->factor; 924 mat->assembled = PETSC_TRUE; 925 926 a->m = mat->m = oldmat->m; 927 a->n = mat->n = oldmat->n; 928 a->M = mat->M = oldmat->M; 929 a->N = mat->N = oldmat->N; 930 if (oldmat->factor) { 931 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx)); CHKPTRQ(factor); 932 /* copy factor contents ... add this code! */ 933 } else a->factor = 0; 934 935 a->rstart = oldmat->rstart; 936 a->rend = oldmat->rend; 937 a->size = oldmat->size; 938 a->rank = oldmat->rank; 939 a->insertmode = NOT_SET_VALUES; 940 941 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 942 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 943 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 944 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 945 946 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 947 PLogObjectParent(mat,a->lvec); 948 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 949 PLogObjectParent(mat,a->Mvctx); 950 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 951 PLogObjectParent(mat,a->A); 952 *newmat = mat; 953 return 0; 954 } 955 956 #include "sys.h" 957 958 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 959 { 960 int *rowners, i,size,rank,m,rstart,rend,ierr,nz,j; 961 Scalar *array,*vals,*vals_ptr; 962 MPI_Status status; 963 964 MPI_Comm_rank(comm,&rank); 965 MPI_Comm_size(comm,&size); 966 967 /* determine ownership of all rows */ 968 m = M/size + ((M % size) > rank); 969 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 970 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 971 rowners[0] = 0; 972 for ( i=2; i<=size; i++ ) { 973 rowners[i] += rowners[i-1]; 974 } 975 rstart = rowners[rank]; 976 rend = rowners[rank+1]; 977 978 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 979 ierr = MatGetArray(*newmat,&array); CHKERRQ(ierr); 980 981 if (!rank) { 982 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 983 984 /* read in my part of the matrix numerical values */ 985 ierr = PetscBinaryRead(fd,vals,m*N,BINARY_SCALAR); CHKERRQ(ierr); 986 987 /* insert into matrix-by row (this is why cannot directly read into array */ 988 vals_ptr = vals; 989 for ( i=0; i<m; i++ ) { 990 for ( j=0; j<N; j++ ) { 991 array[i + j*m] = *vals_ptr++; 992 } 993 } 994 995 /* read in other processors and ship out */ 996 for ( i=1; i<size; i++ ) { 997 nz = (rowners[i+1] - rowners[i])*N; 998 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 999 MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm); 1000 } 1001 } 1002 else { 1003 /* receive numeric values */ 1004 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) ); CHKPTRQ(vals); 1005 1006 /* receive message of values*/ 1007 MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status); 1008 1009 /* insert into matrix-by row (this is why cannot directly read into array */ 1010 vals_ptr = vals; 1011 for ( i=0; i<m; i++ ) { 1012 for ( j=0; j<N; j++ ) { 1013 array[i + j*m] = *vals_ptr++; 1014 } 1015 } 1016 } 1017 PetscFree(rowners); 1018 PetscFree(vals); 1019 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1020 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1021 return 0; 1022 } 1023 1024 1025 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1026 { 1027 Mat A; 1028 int i, nz, ierr, j,rstart, rend, fd; 1029 Scalar *vals,*svals; 1030 MPI_Comm comm = ((PetscObject)viewer)->comm; 1031 MPI_Status status; 1032 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1033 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1034 int tag = ((PetscObject)viewer)->tag; 1035 1036 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1037 if (!rank) { 1038 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1039 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1040 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object"); 1041 } 1042 1043 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1044 M = header[1]; N = header[2]; nz = header[3]; 1045 1046 /* 1047 Handle case where matrix is stored on disk as a dense matrix 1048 */ 1049 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1050 return MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat); 1051 } 1052 1053 /* determine ownership of all rows */ 1054 m = M/size + ((M % size) > rank); 1055 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1056 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1057 rowners[0] = 0; 1058 for ( i=2; i<=size; i++ ) { 1059 rowners[i] += rowners[i-1]; 1060 } 1061 rstart = rowners[rank]; 1062 rend = rowners[rank+1]; 1063 1064 /* distribute row lengths to all processors */ 1065 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1066 offlens = ourlens + (rend-rstart); 1067 if (!rank) { 1068 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1069 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1070 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1071 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1072 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1073 PetscFree(sndcounts); 1074 } 1075 else { 1076 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1077 } 1078 1079 if (!rank) { 1080 /* calculate the number of nonzeros on each processor */ 1081 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1082 PetscMemzero(procsnz,size*sizeof(int)); 1083 for ( i=0; i<size; i++ ) { 1084 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1085 procsnz[i] += rowlengths[j]; 1086 } 1087 } 1088 PetscFree(rowlengths); 1089 1090 /* determine max buffer needed and allocate it */ 1091 maxnz = 0; 1092 for ( i=0; i<size; i++ ) { 1093 maxnz = PetscMax(maxnz,procsnz[i]); 1094 } 1095 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1096 1097 /* read in my part of the matrix column indices */ 1098 nz = procsnz[0]; 1099 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1100 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1101 1102 /* read in every one elses and ship off */ 1103 for ( i=1; i<size; i++ ) { 1104 nz = procsnz[i]; 1105 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1106 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1107 } 1108 PetscFree(cols); 1109 } 1110 else { 1111 /* determine buffer space needed for message */ 1112 nz = 0; 1113 for ( i=0; i<m; i++ ) { 1114 nz += ourlens[i]; 1115 } 1116 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1117 1118 /* receive message of column indices*/ 1119 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1120 MPI_Get_count(&status,MPI_INT,&maxnz); 1121 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 1122 } 1123 1124 /* loop over local rows, determining number of off diagonal entries */ 1125 PetscMemzero(offlens,m*sizeof(int)); 1126 jj = 0; 1127 for ( i=0; i<m; i++ ) { 1128 for ( j=0; j<ourlens[i]; j++ ) { 1129 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1130 jj++; 1131 } 1132 } 1133 1134 /* create our matrix */ 1135 for ( i=0; i<m; i++ ) { 1136 ourlens[i] -= offlens[i]; 1137 } 1138 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1139 A = *newmat; 1140 for ( i=0; i<m; i++ ) { 1141 ourlens[i] += offlens[i]; 1142 } 1143 1144 if (!rank) { 1145 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1146 1147 /* read in my part of the matrix numerical values */ 1148 nz = procsnz[0]; 1149 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1150 1151 /* insert into matrix */ 1152 jj = rstart; 1153 smycols = mycols; 1154 svals = vals; 1155 for ( i=0; i<m; i++ ) { 1156 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1157 smycols += ourlens[i]; 1158 svals += ourlens[i]; 1159 jj++; 1160 } 1161 1162 /* read in other processors and ship out */ 1163 for ( i=1; i<size; i++ ) { 1164 nz = procsnz[i]; 1165 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1166 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1167 } 1168 PetscFree(procsnz); 1169 } 1170 else { 1171 /* receive numeric values */ 1172 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1173 1174 /* receive message of values*/ 1175 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1176 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1177 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 1178 1179 /* insert into matrix */ 1180 jj = rstart; 1181 smycols = mycols; 1182 svals = vals; 1183 for ( i=0; i<m; i++ ) { 1184 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1185 smycols += ourlens[i]; 1186 svals += ourlens[i]; 1187 jj++; 1188 } 1189 } 1190 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1191 1192 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1193 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1194 return 0; 1195 } 1196 1197 1198 1199 1200 1201