1 #ifndef lint 2 static char vcid[] = "$Id: mpidense.c,v 1.3 1995/10/22 04:19:52 bsmith Exp curfman $"; 3 #endif 4 5 #include "mpidense.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 static int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n, 10 int *idxn,Scalar *v,InsertMode addv) 11 { 12 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 13 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 14 15 if (mdn->insertmode != NOT_SET_VALUES && mdn->insertmode != addv) { 16 SETERRQ(1,"MatSetValues_MPIDense:Cannot mix inserts and adds"); 17 } 18 mdn->insertmode = addv; 19 for ( i=0; i<m; i++ ) { 20 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative row"); 21 if (idxm[i] >= mdn->M) SETERRQ(1,"MatSetValues_MPIDense:Row too large"); 22 if (idxm[i] >= rstart && idxm[i] < rend) { 23 row = idxm[i] - rstart; 24 for ( j=0; j<n; j++ ) { 25 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIDense:Negative column"); 26 if (idxn[j] >= mdn->N) 27 SETERRQ(1,"MatSetValues_MPIDense:Column too large"); 28 ierr = MatSetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j,addv); 29 CHKERRQ(ierr); 30 } 31 } 32 else { 33 ierr = StashValues_Private(&mdn->stash,idxm[i],n,idxn,v+i*n,addv); 34 CHKERRQ(ierr); 35 } 36 } 37 return 0; 38 } 39 40 static int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 41 { 42 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 43 MPI_Comm comm = mat->comm; 44 int size = mdn->size, *owners = mdn->rowners, rank = mdn->rank; 45 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 46 int tag = mat->tag, *owner,*starts,count,ierr; 47 InsertMode addv; 48 MPI_Request *send_waits,*recv_waits; 49 Scalar *rvalues,*svalues; 50 51 /* make sure all processors are either in INSERTMODE or ADDMODE */ 52 MPI_Allreduce((void *) &mdn->insertmode,(void *) &addv,1,MPI_INT, 53 MPI_BOR,comm); 54 if (addv == (ADD_VALUES|INSERT_VALUES)) { SETERRQ(1, 55 "MatAssemblyBegin_MPIDense:Cannot mix adds/inserts on different procs"); 56 } 57 mdn->insertmode = addv; /* in case this processor had no cache */ 58 59 /* first count number of contributors to each processor */ 60 nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 61 PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 62 owner = (int *) PETSCMALLOC( (mdn->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 63 for ( i=0; i<mdn->stash.n; i++ ) { 64 idx = mdn->stash.idx[i]; 65 for ( j=0; j<size; j++ ) { 66 if (idx >= owners[j] && idx < owners[j+1]) { 67 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 68 } 69 } 70 } 71 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 72 73 /* inform other processors of number of messages and max length*/ 74 work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work); 75 MPI_Allreduce((void *) procs,(void *) work,size,MPI_INT,MPI_SUM,comm); 76 nreceives = work[rank]; 77 MPI_Allreduce((void *) nprocs,(void *) work,size,MPI_INT,MPI_MAX,comm); 78 nmax = work[rank]; 79 PETSCFREE(work); 80 81 /* post receives: 82 1) each message will consist of ordered pairs 83 (global index,value) we store the global index as a double 84 to simplify the message passing. 85 2) since we don't know how long each individual message is we 86 allocate the largest needed buffer for each receive. Potentially 87 this is a lot of wasted space. 88 89 This could be done better. 90 */ 91 rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 92 CHKPTRQ(rvalues); 93 recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 94 CHKPTRQ(recv_waits); 95 for ( i=0; i<nreceives; i++ ) { 96 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 97 comm,recv_waits+i); 98 } 99 100 /* do sends: 101 1) starts[i] gives the starting index in svalues for stuff going to 102 the ith processor 103 */ 104 svalues = (Scalar *) PETSCMALLOC( 3*(mdn->stash.n+1)*sizeof(Scalar) ); 105 CHKPTRQ(svalues); 106 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 107 CHKPTRQ(send_waits); 108 starts = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(starts); 109 starts[0] = 0; 110 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 111 for ( i=0; i<mdn->stash.n; i++ ) { 112 svalues[3*starts[owner[i]]] = (Scalar) mdn->stash.idx[i]; 113 svalues[3*starts[owner[i]]+1] = (Scalar) mdn->stash.idy[i]; 114 svalues[3*(starts[owner[i]]++)+2] = mdn->stash.array[i]; 115 } 116 PETSCFREE(owner); 117 starts[0] = 0; 118 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 119 count = 0; 120 for ( i=0; i<size; i++ ) { 121 if (procs[i]) { 122 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 123 comm,send_waits+count++); 124 } 125 } 126 PETSCFREE(starts); PETSCFREE(nprocs); 127 128 /* Free cache space */ 129 ierr = StashDestroy_Private(&mdn->stash); CHKERRQ(ierr); 130 131 mdn->svalues = svalues; mdn->rvalues = rvalues; 132 mdn->nsends = nsends; mdn->nrecvs = nreceives; 133 mdn->send_waits = send_waits; mdn->recv_waits = recv_waits; 134 mdn->rmax = nmax; 135 136 return 0; 137 } 138 extern int MatSetUpMultiply_MPIDense(Mat); 139 140 static int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 141 { 142 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 143 MPI_Status *send_status,recv_status; 144 int imdex, nrecvs=mdn->nrecvs, count=nrecvs, i, n, ierr, row, col; 145 Scalar *values,val; 146 InsertMode addv = mdn->insertmode; 147 148 /* wait on receives */ 149 while (count) { 150 MPI_Waitany(nrecvs,mdn->recv_waits,&imdex,&recv_status); 151 /* unpack receives into our local space */ 152 values = mdn->rvalues + 3*imdex*mdn->rmax; 153 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 154 n = n/3; 155 for ( i=0; i<n; i++ ) { 156 row = (int) PETSCREAL(values[3*i]) - mdn->rstart; 157 col = (int) PETSCREAL(values[3*i+1]); 158 val = values[3*i+2]; 159 if (col >= 0 && col < mdn->N) { 160 MatSetValues(mdn->A,1,&row,1,&col,&val,addv); 161 } 162 else {SETERRQ(1,"MatAssemblyEnd_MPIDense:Invalid column");} 163 } 164 count--; 165 } 166 PETSCFREE(mdn->recv_waits); PETSCFREE(mdn->rvalues); 167 168 /* wait on sends */ 169 if (mdn->nsends) { 170 send_status = (MPI_Status *) PETSCMALLOC( mdn->nsends*sizeof(MPI_Status) ); 171 CHKPTRQ(send_status); 172 MPI_Waitall(mdn->nsends,mdn->send_waits,send_status); 173 PETSCFREE(send_status); 174 } 175 PETSCFREE(mdn->send_waits); PETSCFREE(mdn->svalues); 176 177 mdn->insertmode = NOT_SET_VALUES; 178 ierr = MatAssemblyBegin(mdn->A,mode); CHKERRQ(ierr); 179 ierr = MatAssemblyEnd(mdn->A,mode); CHKERRQ(ierr); 180 181 if (!mdn->assembled && mode == FINAL_ASSEMBLY) { 182 ierr = MatSetUpMultiply_MPIDense(mat); CHKERRQ(ierr); 183 } 184 mdn->assembled = 1; 185 return 0; 186 } 187 188 static int MatZeroEntries_MPIDense(Mat A) 189 { 190 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 191 return MatZeroEntries(l->A); 192 } 193 194 /* the code does not do the diagonal entries correctly unless the 195 matrix is square and the column and row owerships are identical. 196 This is a BUG. The only way to fix it seems to be to access 197 aij->A and aij->B directly and not through the MatZeroRows() 198 routine. 199 */ 200 static int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 201 { 202 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 203 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 204 int *procs,*nprocs,j,found,idx,nsends,*work; 205 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 206 int *rvalues,tag = A->tag,count,base,slen,n,*source; 207 int *lens,imdex,*lrows,*values; 208 MPI_Comm comm = A->comm; 209 MPI_Request *send_waits,*recv_waits; 210 MPI_Status recv_status,*send_status; 211 IS istmp; 212 213 if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIDense:Must assemble matrix"); 214 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 215 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 216 217 /* first count number of contributors to each processor */ 218 nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 219 PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 220 owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 221 for ( i=0; i<N; i++ ) { 222 idx = rows[i]; 223 found = 0; 224 for ( j=0; j<size; j++ ) { 225 if (idx >= owners[j] && idx < owners[j+1]) { 226 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 227 } 228 } 229 if (!found) SETERRQ(1,"MatZeroRows_MPIDense:Index out of range"); 230 } 231 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 232 233 /* inform other processors of number of messages and max length*/ 234 work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work); 235 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 236 nrecvs = work[rank]; 237 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 238 nmax = work[rank]; 239 PETSCFREE(work); 240 241 /* post receives: */ 242 rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 243 CHKPTRQ(rvalues); 244 recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 245 CHKPTRQ(recv_waits); 246 for ( i=0; i<nrecvs; i++ ) { 247 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 248 } 249 250 /* do sends: 251 1) starts[i] gives the starting index in svalues for stuff going to 252 the ith processor 253 */ 254 svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 255 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 256 CHKPTRQ(send_waits); 257 starts = (int *) PETSCMALLOC( (size+1)*sizeof(int) ); CHKPTRQ(starts); 258 starts[0] = 0; 259 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 260 for ( i=0; i<N; i++ ) { 261 svalues[starts[owner[i]]++] = rows[i]; 262 } 263 ISRestoreIndices(is,&rows); 264 265 starts[0] = 0; 266 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 267 count = 0; 268 for ( i=0; i<size; i++ ) { 269 if (procs[i]) { 270 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 271 } 272 } 273 PETSCFREE(starts); 274 275 base = owners[rank]; 276 277 /* wait on receives */ 278 lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 279 source = lens + nrecvs; 280 count = nrecvs; slen = 0; 281 while (count) { 282 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 283 /* unpack receives into our local space */ 284 MPI_Get_count(&recv_status,MPI_INT,&n); 285 source[imdex] = recv_status.MPI_SOURCE; 286 lens[imdex] = n; 287 slen += n; 288 count--; 289 } 290 PETSCFREE(recv_waits); 291 292 /* move the data into the send scatter */ 293 lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 294 count = 0; 295 for ( i=0; i<nrecvs; i++ ) { 296 values = rvalues + i*nmax; 297 for ( j=0; j<lens[i]; j++ ) { 298 lrows[count++] = values[j] - base; 299 } 300 } 301 PETSCFREE(rvalues); PETSCFREE(lens); 302 PETSCFREE(owner); PETSCFREE(nprocs); 303 304 /* actually zap the local rows */ 305 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 306 PLogObjectParent(A,istmp); 307 PETSCFREE(lrows); 308 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 309 ierr = ISDestroy(istmp); CHKERRQ(ierr); 310 311 /* wait on sends */ 312 if (nsends) { 313 send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status)); 314 CHKPTRQ(send_status); 315 MPI_Waitall(nsends,send_waits,send_status); 316 PETSCFREE(send_status); 317 } 318 PETSCFREE(send_waits); PETSCFREE(svalues); 319 320 return 0; 321 } 322 323 static int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 324 { 325 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 326 int ierr; 327 if (!mdn->assembled) 328 SETERRQ(1,"MatMult_MPIDense:Must assemble matrix first"); 329 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 330 CHKERRQ(ierr); 331 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_ALL,mdn->Mvctx); 332 CHKERRQ(ierr); 333 ierr = MatMult(mdn->A,mdn->lvec,yy); CHKERRQ(ierr); 334 return 0; 335 } 336 337 static int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 338 { 339 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 340 int ierr; 341 if (!mdn->assembled) 342 SETERRQ(1,"MatMultAdd_MPIDense:Must assemble matrix first"); 343 ierr = VecScatterBegin(xx,mdn->lvec,ADD_VALUES,SCATTER_ALL,mdn->Mvctx); 344 CHKERRQ(ierr); 345 ierr = VecScatterEnd(xx,mdn->lvec,ADD_VALUES,SCATTER_ALL,mdn->Mvctx); 346 CHKERRQ(ierr); 347 ierr = MatMultAdd(mdn->A,mdn->lvec,yy,zz); CHKERRQ(ierr); 348 return 0; 349 } 350 351 static int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 352 { 353 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 354 int ierr; 355 356 if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIDense:must assemble matrix"); 357 ierr = MatMultTrans(a->A,xx,a->lvec); CHKERRQ(ierr); 358 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 359 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 360 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 361 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 362 return 0; 363 } 364 365 static int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 366 { 367 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 368 int ierr; 369 370 if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIDense:must assemble matrix"); 371 ierr = MatMultTransAdd(a->A,xx,yy,a->lvec); CHKERRQ(ierr); 372 /* send it on its way */ 373 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 374 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 375 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 376 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 377 return 0; 378 } 379 380 static int MatGetDiagonal_MPIDense(Mat A,Vec v) 381 { 382 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 383 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 384 int ierr, i, n, m = a->m, radd; 385 Scalar *x; 386 if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIDense:must assemble matrix"); 387 ierr = VecGetArray(v,&x); CHKERRQ(ierr); 388 ierr = VecGetSize(v,&n); CHKERRQ(ierr); 389 if (n != a->M) SETERRQ(1,"MatGetDiagonal_SeqDense:Nonconforming mat and vec"); 390 radd = a->rstart*m*m; 391 for ( i=0; i<m; i++ ) { 392 x[i] = aloc->v[radd + i*m + i]; 393 } 394 return 0; 395 } 396 397 static int MatDestroy_MPIDense(PetscObject obj) 398 { 399 Mat mat = (Mat) obj; 400 Mat_MPIDense *aij = (Mat_MPIDense *) mat->data; 401 int ierr; 402 #if defined(PETSC_LOG) 403 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 404 #endif 405 PETSCFREE(aij->rowners); 406 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 407 if (aij->lvec) VecDestroy(aij->lvec); 408 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 409 PETSCFREE(aij); 410 PLogObjectDestroy(mat); 411 PETSCHEADERDESTROY(mat); 412 return 0; 413 } 414 415 #include "pinclude/pviewer.h" 416 417 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 418 { 419 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 420 int ierr; 421 if (mdn->size == 1) { 422 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 423 } 424 else SETERRQ(1,"MatView_MPIDense_Binary:Only uniprocessor output supported"); 425 return 0; 426 } 427 428 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 429 { 430 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 431 int ierr, format; 432 PetscObject vobj = (PetscObject) viewer; 433 FILE *fd; 434 435 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 436 ierr = ViewerFileGetFormat_Private(viewer,&format); 437 if (format == FILE_FORMAT_INFO) { 438 int nz, nzalloc, mem, rank; 439 MPI_Comm_rank(mat->comm,&rank); 440 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 441 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 442 MPIU_Seq_begin(mat->comm,1); 443 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n", 444 rank,mdn->m,nz,nzalloc,mem); 445 fflush(fd); 446 MPIU_Seq_end(mat->comm,1); 447 ierr = VecScatterView(mdn->Mvctx,viewer); 448 return 0; 449 } 450 } 451 452 if (vobj->type == ASCII_FILE_VIEWER) { 453 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 454 MPIU_Seq_begin(mat->comm,1); 455 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d\n", 456 mdn->rank,mdn->m,mdn->rstart,mdn->rend,mdn->n); 457 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 458 fflush(fd); 459 MPIU_Seq_end(mat->comm,1); 460 } 461 else { 462 int size = mdn->size, rank = mdn->rank; 463 if (size == 1) { 464 ierr = MatView(mdn->A,viewer); CHKERRQ(ierr); 465 } 466 else { 467 /* assemble the entire matrix onto first processor. */ 468 Mat A; 469 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 470 Scalar *vals; 471 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 472 473 if (!rank) { 474 ierr = MatCreateMPIDense(mat->comm,M,M,N,N,&A); CHKERRQ(ierr); 475 } 476 else { 477 ierr = MatCreateMPIDense(mat->comm,0,M,N,N,&A); CHKERRQ(ierr); 478 } 479 PLogObjectParent(mat,A); 480 481 /* Copy the matrix ... This isn't the most efficient means, 482 but it's quick for now */ 483 row = mdn->rstart; m = Amdn->m; 484 for ( i=0; i<m; i++ ) { 485 ierr = MatGetRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 486 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES); CHKERRQ(ierr); 487 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals); CHKERRQ(ierr); 488 row++; 489 } 490 491 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 492 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 493 if (!rank) { 494 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer); CHKERRQ(ierr); 495 } 496 ierr = MatDestroy(A); CHKERRQ(ierr); 497 } 498 } 499 return 0; 500 } 501 502 static int MatView_MPIDense(PetscObject obj,Viewer viewer) 503 { 504 Mat mat = (Mat) obj; 505 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 506 PetscObject vobj = (PetscObject) viewer; 507 int ierr; 508 509 if (!mdn->assembled) SETERRQ(1,"MatView_MPIDense:must assemble matrix"); 510 if (!viewer) { 511 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 512 } 513 if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 514 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 515 } 516 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 517 ierr = MatView_MPIDense_ASCII(mat,viewer); CHKERRQ(ierr); 518 } 519 else if (vobj->type == BINARY_FILE_VIEWER) { 520 return MatView_MPIDense_Binary(mat,viewer); 521 } 522 return 0; 523 } 524 525 static int MatGetInfo_MPIDense(Mat matin,MatInfoType flag,int *nz, 526 int *nzalloc,int *mem) 527 { 528 Mat_MPIDense *mat = (Mat_MPIDense *) matin->data; 529 Mat A = mat->A; 530 int ierr, isend[3], irecv[3]; 531 532 ierr = MatGetInfo(A,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 533 if (flag == MAT_LOCAL) { 534 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 535 } else if (flag == MAT_GLOBAL_MAX) { 536 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 537 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 538 } else if (flag == MAT_GLOBAL_SUM) { 539 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 540 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 541 } 542 return 0; 543 } 544 545 static int MatSetOption_MPIDense(Mat A,MatOption op) 546 { 547 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 548 549 if (op == NO_NEW_NONZERO_LOCATIONS || 550 op == YES_NEW_NONZERO_LOCATIONS || 551 op == COLUMNS_SORTED || 552 op == ROW_ORIENTED) { 553 MatSetOption(a->A,op); 554 } 555 else if (op == ROWS_SORTED || 556 op == SYMMETRIC_MATRIX || 557 op == STRUCTURALLY_SYMMETRIC_MATRIX || 558 op == YES_NEW_DIAGONALS) 559 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIDense:Option ignored\n"); 560 else if (op == COLUMN_ORIENTED) 561 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:COLUMN_ORIENTED");} 562 else if (op == NO_NEW_DIAGONALS) 563 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:NO_NEW_DIAGONALS");} 564 else 565 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIDense:unknown option");} 566 return 0; 567 } 568 569 static int MatGetSize_MPIDense(Mat matin,int *m,int *n) 570 { 571 Mat_MPIDense *mat = (Mat_MPIDense *) matin->data; 572 *m = mat->M; *n = mat->N; 573 return 0; 574 } 575 576 static int MatGetLocalSize_MPIDense(Mat matin,int *m,int *n) 577 { 578 Mat_MPIDense *mat = (Mat_MPIDense *) matin->data; 579 *m = mat->m; *n = mat->N; 580 return 0; 581 } 582 583 static int MatGetOwnershipRange_MPIDense(Mat matin,int *m,int *n) 584 { 585 Mat_MPIDense *mat = (Mat_MPIDense *) matin->data; 586 *m = mat->rstart; *n = mat->rend; 587 return 0; 588 } 589 590 static int MatGetRow_MPIDense(Mat matin,int row,int *nz,int **idx,Scalar **v) 591 { 592 Mat_MPIDense *mat = (Mat_MPIDense *) matin->data; 593 int lrow, rstart = mat->rstart, rend = mat->rend; 594 595 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIDense:only local rows") 596 lrow = row - rstart; 597 return MatGetRow(mat->A,lrow,nz,idx,v); 598 } 599 600 static int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 601 { 602 if (idx) PETSCFREE(*idx); 603 if (v) PETSCFREE(*v); 604 return 0; 605 } 606 607 static int MatNorm_MPIDense(Mat mat,MatNormType type,double *norm) 608 { 609 *norm = 0.0; 610 SETERRQ(1,"MatNorm_MPIDense:Not finished"); 611 return 0; 612 } 613 614 static int MatCopyPrivate_MPIDense(Mat,Mat *,int); 615 616 /* -------------------------------------------------------------------*/ 617 static struct _MatOps MatOps = {MatSetValues_MPIDense, 618 MatGetRow_MPIDense,MatRestoreRow_MPIDense, 619 MatMult_MPIDense,MatMultAdd_MPIDense, 620 MatMultTrans_MPIDense,MatMultTransAdd_MPIDense, 621 0, 0, 622 0, 0, 0, 623 0, 0, 0, 624 MatGetInfo_MPIDense, 0, 625 MatGetDiagonal_MPIDense, 0, MatNorm_MPIDense, 626 MatAssemblyBegin_MPIDense,MatAssemblyEnd_MPIDense, 627 0, 628 MatSetOption_MPIDense,MatZeroEntries_MPIDense,MatZeroRows_MPIDense, 629 0, 630 0, 0, 0, 0, 631 MatGetSize_MPIDense,MatGetLocalSize_MPIDense, 632 MatGetOwnershipRange_MPIDense, 633 0, 0, 634 0, 0, 0, 0, 0, MatCopyPrivate_MPIDense}; 635 636 /*@C 637 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 638 639 Input Parameters: 640 . comm - MPI communicator 641 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 642 . n - number of local columns (or PETSC_DECIDE to have calculated 643 if N is given) 644 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 645 . N - number of global columns (or PETSC_DECIDE to have calculated 646 if n is given) 647 648 Output Parameter: 649 . newmat - the matrix 650 651 Notes: 652 The dense format is fully compatible with standard Fortran 77 653 storage by columns. 654 655 The user MUST specify either the local or global matrix dimensions 656 (possibly both). 657 658 .keywords: matrix, dense, parallel 659 660 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 661 @*/ 662 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Mat *newmat) 663 { 664 Mat mat; 665 Mat_MPIDense *a; 666 int ierr, i; 667 668 *newmat = 0; 669 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIDENSE,comm); 670 PLogObjectCreate(mat); 671 mat->data = (void *) (a = PETSCNEW(Mat_MPIDense)); CHKPTRQ(a); 672 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 673 mat->destroy = MatDestroy_MPIDense; 674 mat->view = MatView_MPIDense; 675 mat->factor = 0; 676 677 a->insertmode = NOT_SET_VALUES; 678 MPI_Comm_rank(comm,&a->rank); 679 MPI_Comm_size(comm,&a->size); 680 681 if (M == PETSC_DECIDE) MPI_Allreduce(&m,&M,1,MPI_INT,MPI_SUM,comm); 682 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 683 684 /* each row stores all columns */ 685 if (N == PETSC_DECIDE) N = n; 686 if (n == PETSC_DECIDE) n = N; 687 if (n != N) SETERRQ(1,"MatCreateMPIDense:For now, only n=N is supported"); 688 a->N = N; 689 a->M = M; 690 a->m = m; 691 a->n = n; 692 693 /* build local table of row and column ownerships */ 694 a->rowners = (int *) PETSCMALLOC((a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 695 PLogObjectMemory(mat,(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 696 sizeof(Mat_MPIDense)); 697 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 698 a->rowners[0] = 0; 699 for ( i=2; i<=a->size; i++ ) { 700 a->rowners[i] += a->rowners[i-1]; 701 } 702 a->rstart = a->rowners[a->rank]; 703 a->rend = a->rowners[a->rank+1]; 704 705 ierr = MatCreateSeqDense(MPI_COMM_SELF,m,N,&a->A); CHKERRQ(ierr); 706 PLogObjectParent(mat,a->A); 707 708 /* build cache for off array entries formed */ 709 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 710 711 /* stuff used for matrix vector multiply */ 712 a->lvec = 0; 713 a->Mvctx = 0; 714 a->assembled = 0; 715 716 *newmat = mat; 717 return 0; 718 } 719 720 static int MatCopyPrivate_MPIDense(Mat matin,Mat *newmat,int cpvalues) 721 { 722 Mat mat; 723 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) matin->data; 724 int ierr; 725 726 if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIDense:Must assemble matrix"); 727 *newmat = 0; 728 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIDENSE,matin->comm); 729 PLogObjectCreate(mat); 730 mat->data = (void *) (a = PETSCNEW(Mat_MPIDense)); CHKPTRQ(a); 731 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 732 mat->destroy = MatDestroy_MPIDense; 733 mat->view = MatView_MPIDense; 734 mat->factor = matin->factor; 735 736 a->m = oldmat->m; 737 a->n = oldmat->n; 738 a->M = oldmat->M; 739 a->N = oldmat->N; 740 741 a->assembled = 1; 742 a->rstart = oldmat->rstart; 743 a->rend = oldmat->rend; 744 a->size = oldmat->size; 745 a->rank = oldmat->rank; 746 a->insertmode = NOT_SET_VALUES; 747 748 a->rowners = (int *) PETSCMALLOC((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 749 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIDense)); 750 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 751 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 752 753 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 754 PLogObjectParent(mat,a->lvec); 755 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 756 PLogObjectParent(mat,a->Mvctx); 757 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 758 PLogObjectParent(mat,a->A); 759 *newmat = mat; 760 return 0; 761 } 762 763 #include "sysio.h" 764 765 int MatLoad_MPIDense(Viewer bview,MatType type,Mat *newmat) 766 { 767 Mat A; 768 int i, nz, ierr, j,rstart, rend, fd; 769 Scalar *vals,*svals; 770 PetscObject vobj = (PetscObject) bview; 771 MPI_Comm comm = vobj->comm; 772 MPI_Status status; 773 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 774 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 775 int tag = ((PetscObject)bview)->tag; 776 777 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 778 if (!rank) { 779 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 780 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 781 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:not matrix object"); 782 } 783 784 MPI_Bcast(header+1,3,MPI_INT,0,comm); 785 M = header[1]; N = header[2]; 786 /* determine ownership of all rows */ 787 m = M/size + ((M % size) > rank); 788 rowners = (int *) PETSCMALLOC((size+2)*sizeof(int)); CHKPTRQ(rowners); 789 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 790 rowners[0] = 0; 791 for ( i=2; i<=size; i++ ) { 792 rowners[i] += rowners[i-1]; 793 } 794 rstart = rowners[rank]; 795 rend = rowners[rank+1]; 796 797 /* distribute row lengths to all processors */ 798 ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 799 offlens = ourlens + (rend-rstart); 800 if (!rank) { 801 rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths); 802 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 803 sndcounts = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(sndcounts); 804 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 805 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 806 PETSCFREE(sndcounts); 807 } 808 else { 809 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 810 } 811 812 if (!rank) { 813 /* calculate the number of nonzeros on each processor */ 814 procsnz = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(procsnz); 815 PetscZero(procsnz,size*sizeof(int)); 816 for ( i=0; i<size; i++ ) { 817 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 818 procsnz[i] += rowlengths[j]; 819 } 820 } 821 PETSCFREE(rowlengths); 822 823 /* determine max buffer needed and allocate it */ 824 maxnz = 0; 825 for ( i=0; i<size; i++ ) { 826 maxnz = PETSCMAX(maxnz,procsnz[i]); 827 } 828 cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols); 829 830 /* read in my part of the matrix column indices */ 831 nz = procsnz[0]; 832 mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols); 833 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 834 835 /* read in every one elses and ship off */ 836 for ( i=1; i<size; i++ ) { 837 nz = procsnz[i]; 838 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 839 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 840 } 841 PETSCFREE(cols); 842 } 843 else { 844 /* determine buffer space needed for message */ 845 nz = 0; 846 for ( i=0; i<m; i++ ) { 847 nz += ourlens[i]; 848 } 849 mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols); 850 851 /* receive message of column indices*/ 852 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 853 MPI_Get_count(&status,MPI_INT,&maxnz); 854 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 855 } 856 857 /* loop over local rows, determining number of off diagonal entries */ 858 PetscZero(offlens,m*sizeof(int)); 859 jj = 0; 860 for ( i=0; i<m; i++ ) { 861 for ( j=0; j<ourlens[i]; j++ ) { 862 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 863 jj++; 864 } 865 } 866 867 /* create our matrix */ 868 for ( i=0; i<m; i++ ) { 869 ourlens[i] -= offlens[i]; 870 } 871 if (type == MATMPIDENSE) { 872 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr); 873 } 874 A = *newmat; 875 MatSetOption(A,COLUMNS_SORTED); 876 for ( i=0; i<m; i++ ) { 877 ourlens[i] += offlens[i]; 878 } 879 880 if (!rank) { 881 vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 882 883 /* read in my part of the matrix numerical values */ 884 nz = procsnz[0]; 885 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 886 887 /* insert into matrix */ 888 jj = rstart; 889 smycols = mycols; 890 svals = vals; 891 for ( i=0; i<m; i++ ) { 892 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 893 smycols += ourlens[i]; 894 svals += ourlens[i]; 895 jj++; 896 } 897 898 /* read in other processors and ship out */ 899 for ( i=1; i<size; i++ ) { 900 nz = procsnz[i]; 901 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 902 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 903 } 904 PETSCFREE(procsnz); 905 } 906 else { 907 /* receive numeric values */ 908 vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals); 909 910 /* receive message of values*/ 911 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 912 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 913 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIDenseorMPIRow:something is wrong with file"); 914 915 /* insert into matrix */ 916 jj = rstart; 917 smycols = mycols; 918 svals = vals; 919 for ( i=0; i<m; i++ ) { 920 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 921 smycols += ourlens[i]; 922 svals += ourlens[i]; 923 jj++; 924 } 925 } 926 PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners); 927 928 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 929 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 930 return 0; 931 } 932