1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.91 1995/10/19 22:22:37 curfman Exp curfman $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscZero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 40 int *idxn,Scalar *v,InsertMode addv) 41 { 42 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 43 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (idxm[i] >= rstart && idxm[i] < rend) { 56 row = idxm[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (idxn[j] >= cstart && idxn[j] < cend){ 61 col = idxn[j] - cstart; 62 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 63 } 64 else { 65 if (aij->assembled) { 66 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 67 col = aij->colmap[idxn[j]] + shift; 68 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 69 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 70 col = idxn[j]; 71 } 72 } 73 else col = idxn[j]; 74 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 75 } 76 } 77 } 78 else { 79 ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr); 80 } 81 } 82 return 0; 83 } 84 85 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 86 { 87 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 88 MPI_Comm comm = mat->comm; 89 int size = aij->size, *owners = aij->rowners; 90 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 91 MPI_Request *send_waits,*recv_waits; 92 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 93 InsertMode addv; 94 Scalar *rvalues,*svalues; 95 96 /* make sure all processors are either in INSERTMODE or ADDMODE */ 97 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 98 if (addv == (ADD_VALUES|INSERT_VALUES)) { 99 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 100 } 101 aij->insertmode = addv; /* in case this processor had no cache */ 102 103 /* first count number of contributors to each processor */ 104 nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 105 PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 106 owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 107 for ( i=0; i<aij->stash.n; i++ ) { 108 idx = aij->stash.idx[i]; 109 for ( j=0; j<size; j++ ) { 110 if (idx >= owners[j] && idx < owners[j+1]) { 111 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 112 } 113 } 114 } 115 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 116 117 /* inform other processors of number of messages and max length*/ 118 work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work); 119 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 120 nreceives = work[rank]; 121 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 122 nmax = work[rank]; 123 PETSCFREE(work); 124 125 /* post receives: 126 1) each message will consist of ordered pairs 127 (global index,value) we store the global index as a double 128 to simplify the message passing. 129 2) since we don't know how long each individual message is we 130 allocate the largest needed buffer for each receive. Potentially 131 this is a lot of wasted space. 132 133 134 This could be done better. 135 */ 136 rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 137 CHKPTRQ(rvalues); 138 recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 139 CHKPTRQ(recv_waits); 140 for ( i=0; i<nreceives; i++ ) { 141 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 142 comm,recv_waits+i); 143 } 144 145 /* do sends: 146 1) starts[i] gives the starting index in svalues for stuff going to 147 the ith processor 148 */ 149 svalues = (Scalar *) PETSCMALLOC(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 150 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 151 CHKPTRQ(send_waits); 152 starts = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(starts); 153 starts[0] = 0; 154 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 155 for ( i=0; i<aij->stash.n; i++ ) { 156 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 157 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 158 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 159 } 160 PETSCFREE(owner); 161 starts[0] = 0; 162 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 163 count = 0; 164 for ( i=0; i<size; i++ ) { 165 if (procs[i]) { 166 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 167 comm,send_waits+count++); 168 } 169 } 170 PETSCFREE(starts); PETSCFREE(nprocs); 171 172 /* Free cache space */ 173 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 174 175 aij->svalues = svalues; aij->rvalues = rvalues; 176 aij->nsends = nsends; aij->nrecvs = nreceives; 177 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 178 aij->rmax = nmax; 179 180 return 0; 181 } 182 extern int MatSetUpMultiply_MPIAIJ(Mat); 183 184 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 187 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 188 MPI_Status *send_status,recv_status; 189 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 190 int row,col,other_disassembled,shift = C->indexshift; 191 Scalar *values,val; 192 InsertMode addv = aij->insertmode; 193 194 /* wait on receives */ 195 while (count) { 196 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 197 /* unpack receives into our local space */ 198 values = aij->rvalues + 3*imdex*aij->rmax; 199 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 200 n = n/3; 201 for ( i=0; i<n; i++ ) { 202 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 203 col = (int) PETSCREAL(values[3*i+1]); 204 val = values[3*i+2]; 205 if (col >= aij->cstart && col < aij->cend) { 206 col -= aij->cstart; 207 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 208 } 209 else { 210 if (aij->assembled) { 211 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 212 col = aij->colmap[col] + shift; 213 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 214 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 215 col = (int) PETSCREAL(values[3*i+1]); 216 } 217 } 218 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 219 } 220 } 221 count--; 222 } 223 PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues); 224 225 /* wait on sends */ 226 if (aij->nsends) { 227 send_status = (MPI_Status *) PETSCMALLOC(aij->nsends*sizeof(MPI_Status)); 228 CHKPTRQ(send_status); 229 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 230 PETSCFREE(send_status); 231 } 232 PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues); 233 234 aij->insertmode = NOT_SET_VALUES; 235 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 236 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 237 238 /* determine if any processor has disassembled, if so we must 239 also disassemble ourselfs, in order that we may reassemble. */ 240 MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 241 if (aij->assembled && !other_disassembled) { 242 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 243 } 244 245 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 246 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 247 aij->assembled = 1; 248 } 249 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 250 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 251 252 return 0; 253 } 254 255 static int MatZeroEntries_MPIAIJ(Mat A) 256 { 257 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 258 int ierr; 259 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 260 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 261 return 0; 262 } 263 264 /* the code does not do the diagonal entries correctly unless the 265 matrix is square and the column and row owerships are identical. 266 This is a BUG. The only way to fix it seems to be to access 267 aij->A and aij->B directly and not through the MatZeroRows() 268 routine. 269 */ 270 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 271 { 272 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 273 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 274 int *procs,*nprocs,j,found,idx,nsends,*work; 275 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 276 int *rvalues,tag = A->tag,count,base,slen,n,*source; 277 int *lens,imdex,*lrows,*values; 278 MPI_Comm comm = A->comm; 279 MPI_Request *send_waits,*recv_waits; 280 MPI_Status recv_status,*send_status; 281 IS istmp; 282 283 if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix"); 284 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 285 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 286 287 /* first count number of contributors to each processor */ 288 nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 289 PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 290 owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 291 for ( i=0; i<N; i++ ) { 292 idx = rows[i]; 293 found = 0; 294 for ( j=0; j<size; j++ ) { 295 if (idx >= owners[j] && idx < owners[j+1]) { 296 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 297 } 298 } 299 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 300 } 301 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 302 303 /* inform other processors of number of messages and max length*/ 304 work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work); 305 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 306 nrecvs = work[rank]; 307 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 308 nmax = work[rank]; 309 PETSCFREE(work); 310 311 /* post receives: */ 312 rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 313 CHKPTRQ(rvalues); 314 recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 315 CHKPTRQ(recv_waits); 316 for ( i=0; i<nrecvs; i++ ) { 317 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 318 } 319 320 /* do sends: 321 1) starts[i] gives the starting index in svalues for stuff going to 322 the ith processor 323 */ 324 svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 325 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 326 CHKPTRQ(send_waits); 327 starts = (int *) PETSCMALLOC( (size+1)*sizeof(int) ); CHKPTRQ(starts); 328 starts[0] = 0; 329 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 330 for ( i=0; i<N; i++ ) { 331 svalues[starts[owner[i]]++] = rows[i]; 332 } 333 ISRestoreIndices(is,&rows); 334 335 starts[0] = 0; 336 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 337 count = 0; 338 for ( i=0; i<size; i++ ) { 339 if (procs[i]) { 340 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 341 } 342 } 343 PETSCFREE(starts); 344 345 base = owners[rank]; 346 347 /* wait on receives */ 348 lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 349 source = lens + nrecvs; 350 count = nrecvs; slen = 0; 351 while (count) { 352 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 353 /* unpack receives into our local space */ 354 MPI_Get_count(&recv_status,MPI_INT,&n); 355 source[imdex] = recv_status.MPI_SOURCE; 356 lens[imdex] = n; 357 slen += n; 358 count--; 359 } 360 PETSCFREE(recv_waits); 361 362 /* move the data into the send scatter */ 363 lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 364 count = 0; 365 for ( i=0; i<nrecvs; i++ ) { 366 values = rvalues + i*nmax; 367 for ( j=0; j<lens[i]; j++ ) { 368 lrows[count++] = values[j] - base; 369 } 370 } 371 PETSCFREE(rvalues); PETSCFREE(lens); 372 PETSCFREE(owner); PETSCFREE(nprocs); 373 374 /* actually zap the local rows */ 375 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 376 PLogObjectParent(A,istmp); 377 PETSCFREE(lrows); 378 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 379 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 380 ierr = ISDestroy(istmp); CHKERRQ(ierr); 381 382 /* wait on sends */ 383 if (nsends) { 384 send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status)); 385 CHKPTRQ(send_status); 386 MPI_Waitall(nsends,send_waits,send_status); 387 PETSCFREE(send_status); 388 } 389 PETSCFREE(send_waits); PETSCFREE(svalues); 390 391 return 0; 392 } 393 394 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 395 { 396 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 397 int ierr; 398 399 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 400 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 401 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 402 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 403 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 404 return 0; 405 } 406 407 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 408 { 409 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 410 int ierr; 411 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 412 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 413 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 414 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 415 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 416 return 0; 417 } 418 419 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 420 { 421 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 422 int ierr; 423 424 if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix"); 425 /* do nondiagonal part */ 426 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 427 /* send it on its way */ 428 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 429 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 430 /* do local part */ 431 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 432 /* receive remote parts: note this assumes the values are not actually */ 433 /* inserted in yy until the next line, which is true for my implementation*/ 434 /* but is not perhaps always true. */ 435 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 436 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 437 return 0; 438 } 439 440 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 441 { 442 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 443 int ierr; 444 445 if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix"); 446 /* do nondiagonal part */ 447 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 448 /* send it on its way */ 449 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 450 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 451 /* do local part */ 452 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 453 /* receive remote parts: note this assumes the values are not actually */ 454 /* inserted in yy until the next line, which is true for my implementation*/ 455 /* but is not perhaps always true. */ 456 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 457 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 458 return 0; 459 } 460 461 /* 462 This only works correctly for square matrices where the subblock A->A is the 463 diagonal block 464 */ 465 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 466 { 467 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 468 if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix"); 469 return MatGetDiagonal(a->A,v); 470 } 471 472 static int MatDestroy_MPIAIJ(PetscObject obj) 473 { 474 Mat mat = (Mat) obj; 475 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 476 int ierr; 477 #if defined(PETSC_LOG) 478 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 479 #endif 480 PETSCFREE(aij->rowners); 481 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 482 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 483 if (aij->colmap) PETSCFREE(aij->colmap); 484 if (aij->garray) PETSCFREE(aij->garray); 485 if (aij->lvec) VecDestroy(aij->lvec); 486 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 487 PETSCFREE(aij); 488 PLogObjectDestroy(mat); 489 PETSCHEADERDESTROY(mat); 490 return 0; 491 } 492 #include "draw.h" 493 #include "pinclude/pviewer.h" 494 495 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 496 { 497 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 498 int ierr; 499 500 if (aij->size == 1) { 501 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 502 } 503 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 504 return 0; 505 } 506 507 static int MatView_MPIAIJ_ASCIIorDraw(Mat mat,Viewer viewer) 508 { 509 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 510 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 511 int ierr, format,shift = C->indexshift; 512 PetscObject vobj = (PetscObject) viewer; 513 FILE *fd; 514 515 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 516 ierr = ViewerFileGetFormat_Private(viewer,&format); 517 if (format == FILE_FORMAT_INFO) { 518 return 0; 519 } 520 } 521 522 if (vobj->type == ASCII_FILE_VIEWER) { 523 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 524 MPIU_Seq_begin(mat->comm,1); 525 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 526 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 527 aij->cend); 528 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 529 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 530 fflush(fd); 531 MPIU_Seq_end(mat->comm,1); 532 } 533 else { 534 int size = aij->size, rank = aij->rank; 535 if (size == 1) { 536 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 537 } 538 else { 539 /* assemble the entire matrix onto first processor. */ 540 Mat A; 541 Mat_SeqAIJ *Aloc; 542 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 543 Scalar *a; 544 545 if (!rank) { 546 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr); 547 } 548 else { 549 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr); 550 } 551 PLogObjectParent(mat,A); 552 553 554 /* copy over the A part */ 555 Aloc = (Mat_SeqAIJ*) aij->A->data; 556 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 557 row = aij->rstart; 558 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 559 for ( i=0; i<m; i++ ) { 560 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 561 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 562 } 563 aj = Aloc->j; 564 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 565 566 /* copy over the B part */ 567 Aloc = (Mat_SeqAIJ*) aij->B->data; 568 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 569 row = aij->rstart; 570 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 571 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 572 for ( i=0; i<m; i++ ) { 573 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 574 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 575 } 576 PETSCFREE(ct); 577 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 578 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 579 if (!rank) { 580 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 581 } 582 ierr = MatDestroy(A); CHKERRQ(ierr); 583 } 584 } 585 return 0; 586 } 587 588 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 589 { 590 Mat mat = (Mat) obj; 591 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 592 int ierr; 593 PetscObject vobj = (PetscObject) viewer; 594 595 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix"); 596 if (!viewer) { 597 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 598 } 599 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 600 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 601 ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 602 } 603 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 604 ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 605 } 606 else if (vobj->cookie == DRAW_COOKIE) { 607 ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 608 } 609 else if (vobj->type == BINARY_FILE_VIEWER) { 610 return MatView_MPIAIJ_Binary(mat,viewer); 611 } 612 return 0; 613 } 614 615 extern int MatMarkDiag_SeqAIJ(Mat); 616 /* 617 This has to provide several versions. 618 619 1) per sequential 620 2) a) use only local smoothing updating outer values only once. 621 b) local smoothing updating outer values each inner iteration 622 3) color updating out values betwen colors. 623 */ 624 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 625 double fshift,int its,Vec xx) 626 { 627 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 628 Mat AA = mat->A, BB = mat->B; 629 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 630 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 631 int ierr,*idx, *diag; 632 int n = mat->n, m = mat->m, i,shift = A->indexshift; 633 Vec tt; 634 635 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix"); 636 637 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 638 xs = x + shift; /* shift by one for index start of 1 */ 639 ls = ls + shift; 640 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 641 diag = A->diag; 642 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 643 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 644 } 645 if (flag & SOR_EISENSTAT) { 646 /* Let A = L + U + D; where L is lower trianglar, 647 U is upper triangular, E is diagonal; This routine applies 648 649 (L + E)^{-1} A (U + E)^{-1} 650 651 to a vector efficiently using Eisenstat's trick. This is for 652 the case of SSOR preconditioner, so E is D/omega where omega 653 is the relaxation factor. 654 */ 655 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 656 PLogObjectParent(matin,tt); 657 VecGetArray(tt,&t); 658 scale = (2.0/omega) - 1.0; 659 /* x = (E + U)^{-1} b */ 660 VecSet(&zero,mat->lvec); 661 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 662 mat->Mvctx); CHKERRQ(ierr); 663 for ( i=m-1; i>-1; i-- ) { 664 n = A->i[i+1] - diag[i] - 1; 665 idx = A->j + diag[i] + !shift; 666 v = A->a + diag[i] + !shift; 667 sum = b[i]; 668 SPARSEDENSEMDOT(sum,xs,v,idx,n); 669 d = fshift + A->a[diag[i]+shift]; 670 n = B->i[i+1] - B->i[i]; 671 idx = B->j + B->i[i] + shift; 672 v = B->a + B->i[i] + shift; 673 SPARSEDENSEMDOT(sum,ls,v,idx,n); 674 x[i] = omega*(sum/d); 675 } 676 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 677 mat->Mvctx); CHKERRQ(ierr); 678 679 /* t = b - (2*E - D)x */ 680 v = A->a; 681 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 682 683 /* t = (E + L)^{-1}t */ 684 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 685 diag = A->diag; 686 VecSet(&zero,mat->lvec); 687 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 688 mat->Mvctx); CHKERRQ(ierr); 689 for ( i=0; i<m; i++ ) { 690 n = diag[i] - A->i[i]; 691 idx = A->j + A->i[i] + shift; 692 v = A->a + A->i[i] + shift; 693 sum = t[i]; 694 SPARSEDENSEMDOT(sum,ts,v,idx,n); 695 d = fshift + A->a[diag[i]+shift]; 696 n = B->i[i+1] - B->i[i]; 697 idx = B->j + B->i[i] + shift; 698 v = B->a + B->i[i] + shift; 699 SPARSEDENSEMDOT(sum,ls,v,idx,n); 700 t[i] = omega*(sum/d); 701 } 702 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 703 mat->Mvctx); CHKERRQ(ierr); 704 /* x = x + t */ 705 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 706 VecDestroy(tt); 707 return 0; 708 } 709 710 711 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 712 if (flag & SOR_ZERO_INITIAL_GUESS) { 713 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 714 } 715 else { 716 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 717 CHKERRQ(ierr); 718 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 719 CHKERRQ(ierr); 720 } 721 while (its--) { 722 /* go down through the rows */ 723 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 724 mat->Mvctx); CHKERRQ(ierr); 725 for ( i=0; i<m; i++ ) { 726 n = A->i[i+1] - A->i[i]; 727 idx = A->j + A->i[i] + shift; 728 v = A->a + A->i[i] + shift; 729 sum = b[i]; 730 SPARSEDENSEMDOT(sum,xs,v,idx,n); 731 d = fshift + A->a[diag[i]+shift]; 732 n = B->i[i+1] - B->i[i]; 733 idx = B->j + B->i[i] + shift; 734 v = B->a + B->i[i] + shift; 735 SPARSEDENSEMDOT(sum,ls,v,idx,n); 736 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 737 } 738 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 739 mat->Mvctx); CHKERRQ(ierr); 740 /* come up through the rows */ 741 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 742 mat->Mvctx); CHKERRQ(ierr); 743 for ( i=m-1; i>-1; i-- ) { 744 n = A->i[i+1] - A->i[i]; 745 idx = A->j + A->i[i] + shift; 746 v = A->a + A->i[i] + shift; 747 sum = b[i]; 748 SPARSEDENSEMDOT(sum,xs,v,idx,n); 749 d = fshift + A->a[diag[i]+shift]; 750 n = B->i[i+1] - B->i[i]; 751 idx = B->j + B->i[i] + shift; 752 v = B->a + B->i[i] + shift; 753 SPARSEDENSEMDOT(sum,ls,v,idx,n); 754 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 755 } 756 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 757 mat->Mvctx); CHKERRQ(ierr); 758 } 759 } 760 else if (flag & SOR_FORWARD_SWEEP){ 761 if (flag & SOR_ZERO_INITIAL_GUESS) { 762 VecSet(&zero,mat->lvec); 763 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 764 mat->Mvctx); CHKERRQ(ierr); 765 for ( i=0; i<m; i++ ) { 766 n = diag[i] - A->i[i]; 767 idx = A->j + A->i[i] + shift; 768 v = A->a + A->i[i] + shift; 769 sum = b[i]; 770 SPARSEDENSEMDOT(sum,xs,v,idx,n); 771 d = fshift + A->a[diag[i]+shift]; 772 n = B->i[i+1] - B->i[i]; 773 idx = B->j + B->i[i] + shift; 774 v = B->a + B->i[i] + shift; 775 SPARSEDENSEMDOT(sum,ls,v,idx,n); 776 x[i] = omega*(sum/d); 777 } 778 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 779 mat->Mvctx); CHKERRQ(ierr); 780 its--; 781 } 782 while (its--) { 783 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 784 CHKERRQ(ierr); 785 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 786 CHKERRQ(ierr); 787 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 788 mat->Mvctx); CHKERRQ(ierr); 789 for ( i=0; i<m; i++ ) { 790 n = A->i[i+1] - A->i[i]; 791 idx = A->j + A->i[i] + shift; 792 v = A->a + A->i[i] + shift; 793 sum = b[i]; 794 SPARSEDENSEMDOT(sum,xs,v,idx,n); 795 d = fshift + A->a[diag[i]+shift]; 796 n = B->i[i+1] - B->i[i]; 797 idx = B->j + B->i[i] + shift; 798 v = B->a + B->i[i] + shift; 799 SPARSEDENSEMDOT(sum,ls,v,idx,n); 800 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 801 } 802 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 803 mat->Mvctx); CHKERRQ(ierr); 804 } 805 } 806 else if (flag & SOR_BACKWARD_SWEEP){ 807 if (flag & SOR_ZERO_INITIAL_GUESS) { 808 VecSet(&zero,mat->lvec); 809 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 810 mat->Mvctx); CHKERRQ(ierr); 811 for ( i=m-1; i>-1; i-- ) { 812 n = A->i[i+1] - diag[i] - 1; 813 idx = A->j + diag[i] + !shift; 814 v = A->a + diag[i] + !shift; 815 sum = b[i]; 816 SPARSEDENSEMDOT(sum,xs,v,idx,n); 817 d = fshift + A->a[diag[i]+shift]; 818 n = B->i[i+1] - B->i[i]; 819 idx = B->j + B->i[i] + shift; 820 v = B->a + B->i[i] + shift; 821 SPARSEDENSEMDOT(sum,ls,v,idx,n); 822 x[i] = omega*(sum/d); 823 } 824 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 825 mat->Mvctx); CHKERRQ(ierr); 826 its--; 827 } 828 while (its--) { 829 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 830 mat->Mvctx); CHKERRQ(ierr); 831 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 832 mat->Mvctx); CHKERRQ(ierr); 833 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 834 mat->Mvctx); CHKERRQ(ierr); 835 for ( i=m-1; i>-1; i-- ) { 836 n = A->i[i+1] - A->i[i]; 837 idx = A->j + A->i[i] + shift; 838 v = A->a + A->i[i] + shift; 839 sum = b[i]; 840 SPARSEDENSEMDOT(sum,xs,v,idx,n); 841 d = fshift + A->a[diag[i]+shift]; 842 n = B->i[i+1] - B->i[i]; 843 idx = B->j + B->i[i] + shift; 844 v = B->a + B->i[i] + shift; 845 SPARSEDENSEMDOT(sum,ls,v,idx,n); 846 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 847 } 848 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 849 mat->Mvctx); CHKERRQ(ierr); 850 } 851 } 852 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 853 if (flag & SOR_ZERO_INITIAL_GUESS) { 854 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 855 } 856 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 857 CHKERRQ(ierr); 858 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 859 CHKERRQ(ierr); 860 while (its--) { 861 /* go down through the rows */ 862 for ( i=0; i<m; i++ ) { 863 n = A->i[i+1] - A->i[i]; 864 idx = A->j + A->i[i] + shift; 865 v = A->a + A->i[i] + shift; 866 sum = b[i]; 867 SPARSEDENSEMDOT(sum,xs,v,idx,n); 868 d = fshift + A->a[diag[i]+shift]; 869 n = B->i[i+1] - B->i[i]; 870 idx = B->j + B->i[i] + shift; 871 v = B->a + B->i[i] + shift; 872 SPARSEDENSEMDOT(sum,ls,v,idx,n); 873 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 874 } 875 /* come up through the rows */ 876 for ( i=m-1; i>-1; i-- ) { 877 n = A->i[i+1] - A->i[i]; 878 idx = A->j + A->i[i] + shift; 879 v = A->a + A->i[i] + shift; 880 sum = b[i]; 881 SPARSEDENSEMDOT(sum,xs,v,idx,n); 882 d = fshift + A->a[diag[i]+shift]; 883 n = B->i[i+1] - B->i[i]; 884 idx = B->j + B->i[i] + shift; 885 v = B->a + B->i[i] + shift; 886 SPARSEDENSEMDOT(sum,ls,v,idx,n); 887 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 888 } 889 } 890 } 891 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 892 if (flag & SOR_ZERO_INITIAL_GUESS) { 893 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 894 } 895 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 896 CHKERRQ(ierr); 897 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 898 CHKERRQ(ierr); 899 while (its--) { 900 for ( i=0; i<m; i++ ) { 901 n = A->i[i+1] - A->i[i]; 902 idx = A->j + A->i[i] + shift; 903 v = A->a + A->i[i] + shift; 904 sum = b[i]; 905 SPARSEDENSEMDOT(sum,xs,v,idx,n); 906 d = fshift + A->a[diag[i]+shift]; 907 n = B->i[i+1] - B->i[i]; 908 idx = B->j + B->i[i] + shift; 909 v = B->a + B->i[i] + shift; 910 SPARSEDENSEMDOT(sum,ls,v,idx,n); 911 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 912 } 913 } 914 } 915 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 916 if (flag & SOR_ZERO_INITIAL_GUESS) { 917 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 918 } 919 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 920 mat->Mvctx); CHKERRQ(ierr); 921 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 922 mat->Mvctx); CHKERRQ(ierr); 923 while (its--) { 924 for ( i=m-1; i>-1; i-- ) { 925 n = A->i[i+1] - A->i[i]; 926 idx = A->j + A->i[i] + shift; 927 v = A->a + A->i[i] + shift; 928 sum = b[i]; 929 SPARSEDENSEMDOT(sum,xs,v,idx,n); 930 d = fshift + A->a[diag[i]+shift]; 931 n = B->i[i+1] - B->i[i]; 932 idx = B->j + B->i[i] + shift; 933 v = B->a + B->i[i] + shift; 934 SPARSEDENSEMDOT(sum,ls,v,idx,n); 935 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 936 } 937 } 938 } 939 return 0; 940 } 941 942 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 943 int *nzalloc,int *mem) 944 { 945 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 946 Mat A = mat->A, B = mat->B; 947 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 948 949 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 950 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 951 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 952 if (flag == MAT_LOCAL) { 953 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 954 } else if (flag == MAT_GLOBAL_MAX) { 955 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 956 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 957 } else if (flag == MAT_GLOBAL_SUM) { 958 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 959 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 960 } 961 return 0; 962 } 963 964 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 965 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 966 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 967 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 968 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 969 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 970 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 971 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 972 973 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 974 { 975 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 976 977 if (op == NO_NEW_NONZERO_LOCATIONS || 978 op == YES_NEW_NONZERO_LOCATIONS || 979 op == COLUMNS_SORTED || 980 op == ROW_ORIENTED) { 981 MatSetOption(a->A,op); 982 MatSetOption(a->B,op); 983 } 984 else if (op == ROWS_SORTED || 985 op == SYMMETRIC_MATRIX || 986 op == STRUCTURALLY_SYMMETRIC_MATRIX || 987 op == YES_NEW_DIAGONALS) 988 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 989 else if (op == COLUMN_ORIENTED) 990 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");} 991 else if (op == NO_NEW_DIAGONALS) 992 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 993 else 994 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 995 return 0; 996 } 997 998 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 999 { 1000 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1001 *m = mat->M; *n = mat->N; 1002 return 0; 1003 } 1004 1005 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1006 { 1007 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1008 *m = mat->m; *n = mat->N; 1009 return 0; 1010 } 1011 1012 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1013 { 1014 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1015 *m = mat->rstart; *n = mat->rend; 1016 return 0; 1017 } 1018 1019 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1020 { 1021 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1022 Scalar *vworkA, *vworkB, **pvA, **pvB; 1023 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1024 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1025 1026 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:only local rows") 1027 lrow = row - rstart; 1028 1029 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1030 if (!v) {pvA = 0; pvB = 0;} 1031 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1032 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1033 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1034 nztot = nzA + nzB; 1035 1036 if (v || idx) { 1037 if (nztot) { 1038 /* Sort by increasing column numbers, assuming A and B already sorted */ 1039 int imark; 1040 if (mat->assembled) { 1041 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1042 } 1043 if (v) { 1044 *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1045 for ( i=0; i<nzB; i++ ) { 1046 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1047 else break; 1048 } 1049 imark = i; 1050 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1051 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1052 } 1053 if (idx) { 1054 *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1055 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1056 for ( i=0; i<nzB; i++ ) { 1057 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1058 else break; 1059 } 1060 imark = i; 1061 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1062 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1063 } 1064 } 1065 else {*idx = 0; *v=0;} 1066 } 1067 *nz = nztot; 1068 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1069 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1070 return 0; 1071 } 1072 1073 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1074 { 1075 if (idx) PETSCFREE(*idx); 1076 if (v) PETSCFREE(*v); 1077 return 0; 1078 } 1079 1080 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm) 1081 { 1082 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1083 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1084 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1085 double sum = 0.0; 1086 Scalar *v; 1087 1088 if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix"); 1089 if (aij->size == 1) { 1090 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1091 } else { 1092 if (type == NORM_FROBENIUS) { 1093 v = amat->a; 1094 for (i=0; i<amat->nz; i++ ) { 1095 #if defined(PETSC_COMPLEX) 1096 sum += real(conj(*v)*(*v)); v++; 1097 #else 1098 sum += (*v)*(*v); v++; 1099 #endif 1100 } 1101 v = bmat->a; 1102 for (i=0; i<bmat->nz; i++ ) { 1103 #if defined(PETSC_COMPLEX) 1104 sum += real(conj(*v)*(*v)); v++; 1105 #else 1106 sum += (*v)*(*v); v++; 1107 #endif 1108 } 1109 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1110 *norm = sqrt(*norm); 1111 } 1112 else if (type == NORM_1) { /* max column norm */ 1113 double *tmp, *tmp2; 1114 int *jj, *garray = aij->garray; 1115 tmp = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1116 tmp2 = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1117 PetscZero(tmp,aij->N*sizeof(double)); 1118 *norm = 0.0; 1119 v = amat->a; jj = amat->j; 1120 for ( j=0; j<amat->nz; j++ ) { 1121 #if defined(PETSC_COMPLEX) 1122 tmp[cstart + *jj++ + shift] += abs(*v++); 1123 #else 1124 tmp[cstart + *jj++ + shift] += fabs(*v++); 1125 #endif 1126 } 1127 v = bmat->a; jj = bmat->j; 1128 for ( j=0; j<bmat->nz; j++ ) { 1129 #if defined(PETSC_COMPLEX) 1130 tmp[garray[*jj++ + shift]] += abs(*v++); 1131 #else 1132 tmp[garray[*jj++ + shift]] += fabs(*v++); 1133 #endif 1134 } 1135 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1136 for ( j=0; j<aij->N; j++ ) { 1137 if (tmp2[j] > *norm) *norm = tmp2[j]; 1138 } 1139 PETSCFREE(tmp); PETSCFREE(tmp2); 1140 } 1141 else if (type == NORM_INFINITY) { /* max row norm */ 1142 double normtemp = 0.0; 1143 for ( j=0; j<amat->m; j++ ) { 1144 v = amat->a + amat->i[j] + shift; 1145 sum = 0.0; 1146 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1147 #if defined(PETSC_COMPLEX) 1148 sum += abs(*v); v++; 1149 #else 1150 sum += fabs(*v); v++; 1151 #endif 1152 } 1153 v = bmat->a + bmat->i[j] + shift; 1154 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1155 #if defined(PETSC_COMPLEX) 1156 sum += abs(*v); v++; 1157 #else 1158 sum += fabs(*v); v++; 1159 #endif 1160 } 1161 if (sum > normtemp) normtemp = sum; 1162 MPI_Allreduce((void*)&normtemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1163 } 1164 } 1165 else { 1166 SETERRQ(1,"MatNorm_MPIRow:No support for two norm"); 1167 } 1168 } 1169 return 0; 1170 } 1171 1172 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1173 { 1174 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1175 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1176 int ierr,shift = Aloc->indexshift; 1177 Mat B; 1178 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1179 Scalar *array; 1180 1181 if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1182 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1183 CHKERRQ(ierr); 1184 1185 /* copy over the A part */ 1186 Aloc = (Mat_SeqAIJ*) a->A->data; 1187 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1188 row = a->rstart; 1189 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1190 for ( i=0; i<m; i++ ) { 1191 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1192 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1193 } 1194 aj = Aloc->j; 1195 for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1196 1197 /* copy over the B part */ 1198 Aloc = (Mat_SeqAIJ*) a->B->data; 1199 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1200 row = a->rstart; 1201 ct = cols = (int *) PETSCMALLOC( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1202 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1203 for ( i=0; i<m; i++ ) { 1204 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1205 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1206 } 1207 PETSCFREE(ct); 1208 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1209 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1210 if (matout) { 1211 *matout = B; 1212 } else { 1213 /* This isn't really an in-place transpose .... but free data structures from a */ 1214 PETSCFREE(a->rowners); 1215 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1216 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1217 if (a->colmap) PETSCFREE(a->colmap); 1218 if (a->garray) PETSCFREE(a->garray); 1219 if (a->lvec) VecDestroy(a->lvec); 1220 if (a->Mvctx) VecScatterCtxDestroy(a->Mvctx); 1221 PETSCFREE(a); 1222 PetscMemcpy(A,B,sizeof(struct _Mat)); 1223 PETSCHEADERDESTROY(B); 1224 } 1225 return 0; 1226 } 1227 1228 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1229 static int MatCopyPrivate_MPIAIJ(Mat,Mat *); 1230 1231 /* -------------------------------------------------------------------*/ 1232 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1233 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1234 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1235 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1236 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1237 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1238 MatLUFactor_MPIAIJ,0, 1239 MatRelax_MPIAIJ, 1240 MatTranspose_MPIAIJ, 1241 MatGetInfo_MPIAIJ,0, 1242 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1243 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1244 0, 1245 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1246 MatGetReordering_MPIAIJ, 1247 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1248 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1249 MatILUFactorSymbolic_MPIAIJ,0, 1250 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 1251 1252 /*@C 1253 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1254 (the default parallel PETSc format). 1255 1256 Input Parameters: 1257 . comm - MPI communicator 1258 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1259 . n - number of local columns (or PETSC_DECIDE to have calculated 1260 if N is given) 1261 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1262 . N - number of global columns (or PETSC_DECIDE to have calculated 1263 if n is given) 1264 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1265 (same for all local rows) 1266 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1267 or null (possibly different for each row). You must leave room 1268 for the diagonal entry even if it is zero. 1269 . o_nz - number of nonzeros per row in off-diagonal portion of local 1270 submatrix (same for all local rows). 1271 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1272 submatrix or null (possibly different for each row). 1273 1274 Output Parameter: 1275 . newmat - the matrix 1276 1277 Notes: 1278 The AIJ format (also called the Yale sparse matrix format or 1279 compressed row storage), is fully compatible with standard Fortran 77 1280 storage. That is, the stored row and column indices can begin at 1281 either one (as in Fortran) or zero. See the users manual for details. 1282 1283 The user MUST specify either the local or global matrix dimensions 1284 (possibly both). 1285 1286 Storage Information: 1287 For a square global matrix we define each processor's diagonal portion 1288 to be its local rows and the corresponding columns (a square submatrix); 1289 each processor's off-diagonal portion encompasses the remainder of the 1290 local matrix (a rectangular submatrix). 1291 1292 The user can specify preallocated storage for the diagonal part of 1293 the local submatrix with either d_nz or d_nnz (not both). Set both 1294 d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1295 Likewise, specify preallocated storage for the off-diagonal part of 1296 the local submatrix with o_nz or o_nnz (not both). 1297 1298 .keywords: matrix, aij, compressed row, sparse, parallel 1299 1300 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1301 @*/ 1302 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1303 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 1304 { 1305 Mat mat; 1306 Mat_MPIAIJ *a; 1307 int ierr, i,sum[2],work[2]; 1308 1309 *newmat = 0; 1310 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1311 PLogObjectCreate(mat); 1312 mat->data = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a); 1313 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1314 mat->destroy = MatDestroy_MPIAIJ; 1315 mat->view = MatView_MPIAIJ; 1316 mat->factor = 0; 1317 1318 a->insertmode = NOT_SET_VALUES; 1319 MPI_Comm_rank(comm,&a->rank); 1320 MPI_Comm_size(comm,&a->size); 1321 1322 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) 1323 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1324 1325 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1326 work[0] = m; work[1] = n; 1327 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1328 if (M == PETSC_DECIDE) M = sum[0]; 1329 if (N == PETSC_DECIDE) N = sum[1]; 1330 } 1331 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1332 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1333 a->m = m; 1334 a->n = n; 1335 a->N = N; 1336 a->M = M; 1337 1338 /* build local table of row and column ownerships */ 1339 a->rowners = (int *) PETSCMALLOC(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1340 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 1341 sizeof(Mat_MPIAIJ)); 1342 a->cowners = a->rowners + a->size + 1; 1343 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1344 a->rowners[0] = 0; 1345 for ( i=2; i<=a->size; i++ ) { 1346 a->rowners[i] += a->rowners[i-1]; 1347 } 1348 a->rstart = a->rowners[a->rank]; 1349 a->rend = a->rowners[a->rank+1]; 1350 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1351 a->cowners[0] = 0; 1352 for ( i=2; i<=a->size; i++ ) { 1353 a->cowners[i] += a->cowners[i-1]; 1354 } 1355 a->cstart = a->cowners[a->rank]; 1356 a->cend = a->cowners[a->rank+1]; 1357 1358 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1359 PLogObjectParent(mat,a->A); 1360 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1361 PLogObjectParent(mat,a->B); 1362 1363 /* build cache for off array entries formed */ 1364 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1365 a->colmap = 0; 1366 a->garray = 0; 1367 1368 /* stuff used for matrix vector multiply */ 1369 a->lvec = 0; 1370 a->Mvctx = 0; 1371 a->assembled = 0; 1372 1373 *newmat = mat; 1374 return 0; 1375 } 1376 1377 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat) 1378 { 1379 Mat mat; 1380 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1381 int ierr, len; 1382 1383 if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix"); 1384 *newmat = 0; 1385 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1386 PLogObjectCreate(mat); 1387 mat->data = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a); 1388 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1389 mat->destroy = MatDestroy_MPIAIJ; 1390 mat->view = MatView_MPIAIJ; 1391 mat->factor = matin->factor; 1392 1393 a->m = oldmat->m; 1394 a->n = oldmat->n; 1395 a->M = oldmat->M; 1396 a->N = oldmat->N; 1397 1398 a->assembled = 1; 1399 a->rstart = oldmat->rstart; 1400 a->rend = oldmat->rend; 1401 a->cstart = oldmat->cstart; 1402 a->cend = oldmat->cend; 1403 a->size = oldmat->size; 1404 a->rank = oldmat->rank; 1405 a->insertmode = NOT_SET_VALUES; 1406 1407 a->rowners = (int *) PETSCMALLOC((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1408 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1409 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1410 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1411 if (oldmat->colmap) { 1412 a->colmap = (int *) PETSCMALLOC((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1413 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1414 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1415 } else a->colmap = 0; 1416 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1417 a->garray = (int *) PETSCMALLOC(len*sizeof(int)); CHKPTRQ(a->garray); 1418 PLogObjectMemory(mat,len*sizeof(int)); 1419 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1420 } else a->garray = 0; 1421 1422 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1423 PLogObjectParent(mat,a->lvec); 1424 ierr = VecScatterCtxCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1425 PLogObjectParent(mat,a->Mvctx); 1426 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1427 PLogObjectParent(mat,a->A); 1428 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1429 PLogObjectParent(mat,a->B); 1430 *newmat = mat; 1431 return 0; 1432 } 1433 1434 #include "sysio.h" 1435 1436 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat) 1437 { 1438 Mat A; 1439 int i, nz, ierr, j,rstart, rend, fd; 1440 Scalar *vals,*svals; 1441 PetscObject vobj = (PetscObject) bview; 1442 MPI_Comm comm = vobj->comm; 1443 MPI_Status status; 1444 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1445 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1446 int tag = ((PetscObject)bview)->tag; 1447 1448 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1449 if (!rank) { 1450 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1451 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1452 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object"); 1453 } 1454 1455 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1456 M = header[1]; N = header[2]; 1457 /* determine ownership of all rows */ 1458 m = M/size + ((M % size) > rank); 1459 rowners = (int *) PETSCMALLOC((size+2)*sizeof(int)); CHKPTRQ(rowners); 1460 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1461 rowners[0] = 0; 1462 for ( i=2; i<=size; i++ ) { 1463 rowners[i] += rowners[i-1]; 1464 } 1465 rstart = rowners[rank]; 1466 rend = rowners[rank+1]; 1467 1468 /* distribute row lengths to all processors */ 1469 ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1470 offlens = ourlens + (rend-rstart); 1471 if (!rank) { 1472 rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths); 1473 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1474 sndcounts = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(sndcounts); 1475 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1476 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1477 PETSCFREE(sndcounts); 1478 } 1479 else { 1480 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1481 } 1482 1483 if (!rank) { 1484 /* calculate the number of nonzeros on each processor */ 1485 procsnz = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(procsnz); 1486 PetscZero(procsnz,size*sizeof(int)); 1487 for ( i=0; i<size; i++ ) { 1488 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1489 procsnz[i] += rowlengths[j]; 1490 } 1491 } 1492 PETSCFREE(rowlengths); 1493 1494 /* determine max buffer needed and allocate it */ 1495 maxnz = 0; 1496 for ( i=0; i<size; i++ ) { 1497 maxnz = PETSCMAX(maxnz,procsnz[i]); 1498 } 1499 cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols); 1500 1501 /* read in my part of the matrix column indices */ 1502 nz = procsnz[0]; 1503 mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols); 1504 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1505 1506 /* read in every one elses and ship off */ 1507 for ( i=1; i<size; i++ ) { 1508 nz = procsnz[i]; 1509 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1510 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1511 } 1512 PETSCFREE(cols); 1513 } 1514 else { 1515 /* determine buffer space needed for message */ 1516 nz = 0; 1517 for ( i=0; i<m; i++ ) { 1518 nz += ourlens[i]; 1519 } 1520 mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols); 1521 1522 /* receive message of column indices*/ 1523 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1524 MPI_Get_count(&status,MPI_INT,&maxnz); 1525 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1526 } 1527 1528 /* loop over local rows, determining number of off diagonal entries */ 1529 PetscZero(offlens,m*sizeof(int)); 1530 jj = 0; 1531 for ( i=0; i<m; i++ ) { 1532 for ( j=0; j<ourlens[i]; j++ ) { 1533 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1534 jj++; 1535 } 1536 } 1537 1538 /* create our matrix */ 1539 for ( i=0; i<m; i++ ) { 1540 ourlens[i] -= offlens[i]; 1541 } 1542 if (type == MATMPIAIJ) { 1543 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1544 } 1545 else if (type == MATMPIROW) { 1546 ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1547 } 1548 A = *newmat; 1549 MatSetOption(A,COLUMNS_SORTED); 1550 for ( i=0; i<m; i++ ) { 1551 ourlens[i] += offlens[i]; 1552 } 1553 1554 if (!rank) { 1555 vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1556 1557 /* read in my part of the matrix numerical values */ 1558 nz = procsnz[0]; 1559 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1560 1561 /* insert into matrix */ 1562 jj = rstart; 1563 smycols = mycols; 1564 svals = vals; 1565 for ( i=0; i<m; i++ ) { 1566 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1567 smycols += ourlens[i]; 1568 svals += ourlens[i]; 1569 jj++; 1570 } 1571 1572 /* read in other processors and ship out */ 1573 for ( i=1; i<size; i++ ) { 1574 nz = procsnz[i]; 1575 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1576 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1577 } 1578 PETSCFREE(procsnz); 1579 } 1580 else { 1581 /* receive numeric values */ 1582 vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1583 1584 /* receive message of values*/ 1585 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1586 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1587 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1588 1589 /* insert into matrix */ 1590 jj = rstart; 1591 smycols = mycols; 1592 svals = vals; 1593 for ( i=0; i<m; i++ ) { 1594 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1595 smycols += ourlens[i]; 1596 svals += ourlens[i]; 1597 jj++; 1598 } 1599 } 1600 PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners); 1601 1602 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1603 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1604 return 0; 1605 } 1606