1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.93 1995/10/23 17:21:45 bsmith Exp curfman $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscZero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 40 int *idxn,Scalar *v,InsertMode addv) 41 { 42 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 43 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (idxm[i] >= rstart && idxm[i] < rend) { 56 row = idxm[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (idxn[j] >= cstart && idxn[j] < cend){ 61 col = idxn[j] - cstart; 62 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 63 } 64 else { 65 if (aij->assembled) { 66 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 67 col = aij->colmap[idxn[j]] + shift; 68 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 69 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 70 col = idxn[j]; 71 } 72 } 73 else col = idxn[j]; 74 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 75 } 76 } 77 } 78 else { 79 ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr); 80 } 81 } 82 return 0; 83 } 84 85 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 86 { 87 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 88 MPI_Comm comm = mat->comm; 89 int size = aij->size, *owners = aij->rowners; 90 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 91 MPI_Request *send_waits,*recv_waits; 92 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 93 InsertMode addv; 94 Scalar *rvalues,*svalues; 95 96 /* make sure all processors are either in INSERTMODE or ADDMODE */ 97 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 98 if (addv == (ADD_VALUES|INSERT_VALUES)) { 99 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 100 } 101 aij->insertmode = addv; /* in case this processor had no cache */ 102 103 /* first count number of contributors to each processor */ 104 nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 105 PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 106 owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 107 for ( i=0; i<aij->stash.n; i++ ) { 108 idx = aij->stash.idx[i]; 109 for ( j=0; j<size; j++ ) { 110 if (idx >= owners[j] && idx < owners[j+1]) { 111 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 112 } 113 } 114 } 115 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 116 117 /* inform other processors of number of messages and max length*/ 118 work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work); 119 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 120 nreceives = work[rank]; 121 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 122 nmax = work[rank]; 123 PETSCFREE(work); 124 125 /* post receives: 126 1) each message will consist of ordered pairs 127 (global index,value) we store the global index as a double 128 to simplify the message passing. 129 2) since we don't know how long each individual message is we 130 allocate the largest needed buffer for each receive. Potentially 131 this is a lot of wasted space. 132 133 134 This could be done better. 135 */ 136 rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 137 CHKPTRQ(rvalues); 138 recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 139 CHKPTRQ(recv_waits); 140 for ( i=0; i<nreceives; i++ ) { 141 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 142 comm,recv_waits+i); 143 } 144 145 /* do sends: 146 1) starts[i] gives the starting index in svalues for stuff going to 147 the ith processor 148 */ 149 svalues = (Scalar *) PETSCMALLOC(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 150 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 151 CHKPTRQ(send_waits); 152 starts = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(starts); 153 starts[0] = 0; 154 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 155 for ( i=0; i<aij->stash.n; i++ ) { 156 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 157 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 158 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 159 } 160 PETSCFREE(owner); 161 starts[0] = 0; 162 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 163 count = 0; 164 for ( i=0; i<size; i++ ) { 165 if (procs[i]) { 166 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 167 comm,send_waits+count++); 168 } 169 } 170 PETSCFREE(starts); PETSCFREE(nprocs); 171 172 /* Free cache space */ 173 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 174 175 aij->svalues = svalues; aij->rvalues = rvalues; 176 aij->nsends = nsends; aij->nrecvs = nreceives; 177 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 178 aij->rmax = nmax; 179 180 return 0; 181 } 182 extern int MatSetUpMultiply_MPIAIJ(Mat); 183 184 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 187 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 188 MPI_Status *send_status,recv_status; 189 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 190 int row,col,other_disassembled,shift = C->indexshift; 191 Scalar *values,val; 192 InsertMode addv = aij->insertmode; 193 194 /* wait on receives */ 195 while (count) { 196 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 197 /* unpack receives into our local space */ 198 values = aij->rvalues + 3*imdex*aij->rmax; 199 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 200 n = n/3; 201 for ( i=0; i<n; i++ ) { 202 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 203 col = (int) PETSCREAL(values[3*i+1]); 204 val = values[3*i+2]; 205 if (col >= aij->cstart && col < aij->cend) { 206 col -= aij->cstart; 207 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 208 } 209 else { 210 if (aij->assembled) { 211 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 212 col = aij->colmap[col] + shift; 213 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 214 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 215 col = (int) PETSCREAL(values[3*i+1]); 216 } 217 } 218 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 219 } 220 } 221 count--; 222 } 223 PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues); 224 225 /* wait on sends */ 226 if (aij->nsends) { 227 send_status = (MPI_Status *) PETSCMALLOC(aij->nsends*sizeof(MPI_Status)); 228 CHKPTRQ(send_status); 229 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 230 PETSCFREE(send_status); 231 } 232 PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues); 233 234 aij->insertmode = NOT_SET_VALUES; 235 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 236 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 237 238 /* determine if any processor has disassembled, if so we must 239 also disassemble ourselfs, in order that we may reassemble. */ 240 MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 241 if (aij->assembled && !other_disassembled) { 242 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 243 } 244 245 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 246 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 247 aij->assembled = 1; 248 } 249 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 250 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 251 252 return 0; 253 } 254 255 static int MatZeroEntries_MPIAIJ(Mat A) 256 { 257 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 258 int ierr; 259 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 260 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 261 return 0; 262 } 263 264 /* the code does not do the diagonal entries correctly unless the 265 matrix is square and the column and row owerships are identical. 266 This is a BUG. The only way to fix it seems to be to access 267 aij->A and aij->B directly and not through the MatZeroRows() 268 routine. 269 */ 270 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 271 { 272 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 273 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 274 int *procs,*nprocs,j,found,idx,nsends,*work; 275 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 276 int *rvalues,tag = A->tag,count,base,slen,n,*source; 277 int *lens,imdex,*lrows,*values; 278 MPI_Comm comm = A->comm; 279 MPI_Request *send_waits,*recv_waits; 280 MPI_Status recv_status,*send_status; 281 IS istmp; 282 283 if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix"); 284 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 285 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 286 287 /* first count number of contributors to each processor */ 288 nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 289 PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 290 owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 291 for ( i=0; i<N; i++ ) { 292 idx = rows[i]; 293 found = 0; 294 for ( j=0; j<size; j++ ) { 295 if (idx >= owners[j] && idx < owners[j+1]) { 296 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 297 } 298 } 299 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 300 } 301 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 302 303 /* inform other processors of number of messages and max length*/ 304 work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work); 305 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 306 nrecvs = work[rank]; 307 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 308 nmax = work[rank]; 309 PETSCFREE(work); 310 311 /* post receives: */ 312 rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 313 CHKPTRQ(rvalues); 314 recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 315 CHKPTRQ(recv_waits); 316 for ( i=0; i<nrecvs; i++ ) { 317 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 318 } 319 320 /* do sends: 321 1) starts[i] gives the starting index in svalues for stuff going to 322 the ith processor 323 */ 324 svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 325 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 326 CHKPTRQ(send_waits); 327 starts = (int *) PETSCMALLOC( (size+1)*sizeof(int) ); CHKPTRQ(starts); 328 starts[0] = 0; 329 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 330 for ( i=0; i<N; i++ ) { 331 svalues[starts[owner[i]]++] = rows[i]; 332 } 333 ISRestoreIndices(is,&rows); 334 335 starts[0] = 0; 336 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 337 count = 0; 338 for ( i=0; i<size; i++ ) { 339 if (procs[i]) { 340 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 341 } 342 } 343 PETSCFREE(starts); 344 345 base = owners[rank]; 346 347 /* wait on receives */ 348 lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 349 source = lens + nrecvs; 350 count = nrecvs; slen = 0; 351 while (count) { 352 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 353 /* unpack receives into our local space */ 354 MPI_Get_count(&recv_status,MPI_INT,&n); 355 source[imdex] = recv_status.MPI_SOURCE; 356 lens[imdex] = n; 357 slen += n; 358 count--; 359 } 360 PETSCFREE(recv_waits); 361 362 /* move the data into the send scatter */ 363 lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 364 count = 0; 365 for ( i=0; i<nrecvs; i++ ) { 366 values = rvalues + i*nmax; 367 for ( j=0; j<lens[i]; j++ ) { 368 lrows[count++] = values[j] - base; 369 } 370 } 371 PETSCFREE(rvalues); PETSCFREE(lens); 372 PETSCFREE(owner); PETSCFREE(nprocs); 373 374 /* actually zap the local rows */ 375 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 376 PLogObjectParent(A,istmp); 377 PETSCFREE(lrows); 378 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 379 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 380 ierr = ISDestroy(istmp); CHKERRQ(ierr); 381 382 /* wait on sends */ 383 if (nsends) { 384 send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status)); 385 CHKPTRQ(send_status); 386 MPI_Waitall(nsends,send_waits,send_status); 387 PETSCFREE(send_status); 388 } 389 PETSCFREE(send_waits); PETSCFREE(svalues); 390 391 return 0; 392 } 393 394 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 395 { 396 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 397 int ierr; 398 399 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 400 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 401 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 402 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 403 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 404 return 0; 405 } 406 407 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 408 { 409 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 410 int ierr; 411 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 412 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 413 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 414 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 415 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 416 return 0; 417 } 418 419 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 420 { 421 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 422 int ierr; 423 424 if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix"); 425 /* do nondiagonal part */ 426 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 427 /* send it on its way */ 428 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 429 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 430 /* do local part */ 431 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 432 /* receive remote parts: note this assumes the values are not actually */ 433 /* inserted in yy until the next line, which is true for my implementation*/ 434 /* but is not perhaps always true. */ 435 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 436 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 437 return 0; 438 } 439 440 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 441 { 442 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 443 int ierr; 444 445 if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix"); 446 /* do nondiagonal part */ 447 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 448 /* send it on its way */ 449 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 450 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 451 /* do local part */ 452 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 453 /* receive remote parts: note this assumes the values are not actually */ 454 /* inserted in yy until the next line, which is true for my implementation*/ 455 /* but is not perhaps always true. */ 456 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 457 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 458 return 0; 459 } 460 461 /* 462 This only works correctly for square matrices where the subblock A->A is the 463 diagonal block 464 */ 465 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 466 { 467 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 468 if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix"); 469 return MatGetDiagonal(a->A,v); 470 } 471 472 static int MatDestroy_MPIAIJ(PetscObject obj) 473 { 474 Mat mat = (Mat) obj; 475 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 476 int ierr; 477 #if defined(PETSC_LOG) 478 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 479 #endif 480 PETSCFREE(aij->rowners); 481 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 482 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 483 if (aij->colmap) PETSCFREE(aij->colmap); 484 if (aij->garray) PETSCFREE(aij->garray); 485 if (aij->lvec) VecDestroy(aij->lvec); 486 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 487 PETSCFREE(aij); 488 PLogObjectDestroy(mat); 489 PETSCHEADERDESTROY(mat); 490 return 0; 491 } 492 #include "draw.h" 493 #include "pinclude/pviewer.h" 494 495 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 496 { 497 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 498 int ierr; 499 500 if (aij->size == 1) { 501 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 502 } 503 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 504 return 0; 505 } 506 507 static int MatView_MPIAIJ_ASCIIorDraw(Mat mat,Viewer viewer) 508 { 509 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 510 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 511 int ierr, format,shift = C->indexshift,rank; 512 PetscObject vobj = (PetscObject) viewer; 513 FILE *fd; 514 515 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 516 ierr = ViewerFileGetFormat_Private(viewer,&format); 517 if (format == FILE_FORMAT_INFO) { 518 int nz,nzalloc,mem; 519 MPI_Comm_rank(mat->comm,&rank); 520 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 521 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 522 MPIU_Seq_begin(mat->comm,1); 523 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 524 nzalloc,mem); 525 fflush(fd); 526 MPIU_Seq_end(mat->comm,1); 527 ierr = VecScatterView(aij->Mvctx,viewer); 528 return 0; 529 } 530 } 531 532 if (vobj->type == ASCII_FILE_VIEWER) { 533 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 534 MPIU_Seq_begin(mat->comm,1); 535 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 536 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 537 aij->cend); 538 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 539 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 540 fflush(fd); 541 MPIU_Seq_end(mat->comm,1); 542 } 543 else { 544 int size = aij->size; 545 rank = aij->rank; 546 if (size == 1) { 547 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 548 } 549 else { 550 /* assemble the entire matrix onto first processor. */ 551 Mat A; 552 Mat_SeqAIJ *Aloc; 553 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 554 Scalar *a; 555 556 if (!rank) { 557 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr); 558 } 559 else { 560 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr); 561 } 562 PLogObjectParent(mat,A); 563 564 565 /* copy over the A part */ 566 Aloc = (Mat_SeqAIJ*) aij->A->data; 567 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 568 row = aij->rstart; 569 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 570 for ( i=0; i<m; i++ ) { 571 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 572 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 573 } 574 aj = Aloc->j; 575 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 576 577 /* copy over the B part */ 578 Aloc = (Mat_SeqAIJ*) aij->B->data; 579 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 580 row = aij->rstart; 581 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 582 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 583 for ( i=0; i<m; i++ ) { 584 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 585 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 586 } 587 PETSCFREE(ct); 588 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 589 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 590 if (!rank) { 591 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 592 } 593 ierr = MatDestroy(A); CHKERRQ(ierr); 594 } 595 } 596 return 0; 597 } 598 599 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 600 { 601 Mat mat = (Mat) obj; 602 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 603 int ierr; 604 PetscObject vobj = (PetscObject) viewer; 605 606 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix"); 607 if (!viewer) { 608 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 609 } 610 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 611 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 612 ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 613 } 614 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 615 ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 616 } 617 else if (vobj->cookie == DRAW_COOKIE) { 618 ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 619 } 620 else if (vobj->type == BINARY_FILE_VIEWER) { 621 return MatView_MPIAIJ_Binary(mat,viewer); 622 } 623 return 0; 624 } 625 626 extern int MatMarkDiag_SeqAIJ(Mat); 627 /* 628 This has to provide several versions. 629 630 1) per sequential 631 2) a) use only local smoothing updating outer values only once. 632 b) local smoothing updating outer values each inner iteration 633 3) color updating out values betwen colors. 634 */ 635 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 636 double fshift,int its,Vec xx) 637 { 638 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 639 Mat AA = mat->A, BB = mat->B; 640 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 641 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 642 int ierr,*idx, *diag; 643 int n = mat->n, m = mat->m, i,shift = A->indexshift; 644 Vec tt; 645 646 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix"); 647 648 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 649 xs = x + shift; /* shift by one for index start of 1 */ 650 ls = ls + shift; 651 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 652 diag = A->diag; 653 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 654 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 655 } 656 if (flag & SOR_EISENSTAT) { 657 /* Let A = L + U + D; where L is lower trianglar, 658 U is upper triangular, E is diagonal; This routine applies 659 660 (L + E)^{-1} A (U + E)^{-1} 661 662 to a vector efficiently using Eisenstat's trick. This is for 663 the case of SSOR preconditioner, so E is D/omega where omega 664 is the relaxation factor. 665 */ 666 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 667 PLogObjectParent(matin,tt); 668 VecGetArray(tt,&t); 669 scale = (2.0/omega) - 1.0; 670 /* x = (E + U)^{-1} b */ 671 VecSet(&zero,mat->lvec); 672 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 673 mat->Mvctx); CHKERRQ(ierr); 674 for ( i=m-1; i>-1; i-- ) { 675 n = A->i[i+1] - diag[i] - 1; 676 idx = A->j + diag[i] + !shift; 677 v = A->a + diag[i] + !shift; 678 sum = b[i]; 679 SPARSEDENSEMDOT(sum,xs,v,idx,n); 680 d = fshift + A->a[diag[i]+shift]; 681 n = B->i[i+1] - B->i[i]; 682 idx = B->j + B->i[i] + shift; 683 v = B->a + B->i[i] + shift; 684 SPARSEDENSEMDOT(sum,ls,v,idx,n); 685 x[i] = omega*(sum/d); 686 } 687 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 688 mat->Mvctx); CHKERRQ(ierr); 689 690 /* t = b - (2*E - D)x */ 691 v = A->a; 692 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 693 694 /* t = (E + L)^{-1}t */ 695 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 696 diag = A->diag; 697 VecSet(&zero,mat->lvec); 698 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 699 mat->Mvctx); CHKERRQ(ierr); 700 for ( i=0; i<m; i++ ) { 701 n = diag[i] - A->i[i]; 702 idx = A->j + A->i[i] + shift; 703 v = A->a + A->i[i] + shift; 704 sum = t[i]; 705 SPARSEDENSEMDOT(sum,ts,v,idx,n); 706 d = fshift + A->a[diag[i]+shift]; 707 n = B->i[i+1] - B->i[i]; 708 idx = B->j + B->i[i] + shift; 709 v = B->a + B->i[i] + shift; 710 SPARSEDENSEMDOT(sum,ls,v,idx,n); 711 t[i] = omega*(sum/d); 712 } 713 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 714 mat->Mvctx); CHKERRQ(ierr); 715 /* x = x + t */ 716 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 717 VecDestroy(tt); 718 return 0; 719 } 720 721 722 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 723 if (flag & SOR_ZERO_INITIAL_GUESS) { 724 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 725 } 726 else { 727 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 728 CHKERRQ(ierr); 729 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 730 CHKERRQ(ierr); 731 } 732 while (its--) { 733 /* go down through the rows */ 734 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 735 mat->Mvctx); CHKERRQ(ierr); 736 for ( i=0; i<m; i++ ) { 737 n = A->i[i+1] - A->i[i]; 738 idx = A->j + A->i[i] + shift; 739 v = A->a + A->i[i] + shift; 740 sum = b[i]; 741 SPARSEDENSEMDOT(sum,xs,v,idx,n); 742 d = fshift + A->a[diag[i]+shift]; 743 n = B->i[i+1] - B->i[i]; 744 idx = B->j + B->i[i] + shift; 745 v = B->a + B->i[i] + shift; 746 SPARSEDENSEMDOT(sum,ls,v,idx,n); 747 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 748 } 749 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 750 mat->Mvctx); CHKERRQ(ierr); 751 /* come up through the rows */ 752 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 753 mat->Mvctx); CHKERRQ(ierr); 754 for ( i=m-1; i>-1; i-- ) { 755 n = A->i[i+1] - A->i[i]; 756 idx = A->j + A->i[i] + shift; 757 v = A->a + A->i[i] + shift; 758 sum = b[i]; 759 SPARSEDENSEMDOT(sum,xs,v,idx,n); 760 d = fshift + A->a[diag[i]+shift]; 761 n = B->i[i+1] - B->i[i]; 762 idx = B->j + B->i[i] + shift; 763 v = B->a + B->i[i] + shift; 764 SPARSEDENSEMDOT(sum,ls,v,idx,n); 765 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 766 } 767 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 768 mat->Mvctx); CHKERRQ(ierr); 769 } 770 } 771 else if (flag & SOR_FORWARD_SWEEP){ 772 if (flag & SOR_ZERO_INITIAL_GUESS) { 773 VecSet(&zero,mat->lvec); 774 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 775 mat->Mvctx); CHKERRQ(ierr); 776 for ( i=0; i<m; i++ ) { 777 n = diag[i] - A->i[i]; 778 idx = A->j + A->i[i] + shift; 779 v = A->a + A->i[i] + shift; 780 sum = b[i]; 781 SPARSEDENSEMDOT(sum,xs,v,idx,n); 782 d = fshift + A->a[diag[i]+shift]; 783 n = B->i[i+1] - B->i[i]; 784 idx = B->j + B->i[i] + shift; 785 v = B->a + B->i[i] + shift; 786 SPARSEDENSEMDOT(sum,ls,v,idx,n); 787 x[i] = omega*(sum/d); 788 } 789 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 790 mat->Mvctx); CHKERRQ(ierr); 791 its--; 792 } 793 while (its--) { 794 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 795 CHKERRQ(ierr); 796 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 797 CHKERRQ(ierr); 798 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 799 mat->Mvctx); CHKERRQ(ierr); 800 for ( i=0; i<m; i++ ) { 801 n = A->i[i+1] - A->i[i]; 802 idx = A->j + A->i[i] + shift; 803 v = A->a + A->i[i] + shift; 804 sum = b[i]; 805 SPARSEDENSEMDOT(sum,xs,v,idx,n); 806 d = fshift + A->a[diag[i]+shift]; 807 n = B->i[i+1] - B->i[i]; 808 idx = B->j + B->i[i] + shift; 809 v = B->a + B->i[i] + shift; 810 SPARSEDENSEMDOT(sum,ls,v,idx,n); 811 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 812 } 813 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 814 mat->Mvctx); CHKERRQ(ierr); 815 } 816 } 817 else if (flag & SOR_BACKWARD_SWEEP){ 818 if (flag & SOR_ZERO_INITIAL_GUESS) { 819 VecSet(&zero,mat->lvec); 820 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 821 mat->Mvctx); CHKERRQ(ierr); 822 for ( i=m-1; i>-1; i-- ) { 823 n = A->i[i+1] - diag[i] - 1; 824 idx = A->j + diag[i] + !shift; 825 v = A->a + diag[i] + !shift; 826 sum = b[i]; 827 SPARSEDENSEMDOT(sum,xs,v,idx,n); 828 d = fshift + A->a[diag[i]+shift]; 829 n = B->i[i+1] - B->i[i]; 830 idx = B->j + B->i[i] + shift; 831 v = B->a + B->i[i] + shift; 832 SPARSEDENSEMDOT(sum,ls,v,idx,n); 833 x[i] = omega*(sum/d); 834 } 835 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 836 mat->Mvctx); CHKERRQ(ierr); 837 its--; 838 } 839 while (its--) { 840 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 841 mat->Mvctx); CHKERRQ(ierr); 842 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 843 mat->Mvctx); CHKERRQ(ierr); 844 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 845 mat->Mvctx); CHKERRQ(ierr); 846 for ( i=m-1; i>-1; i-- ) { 847 n = A->i[i+1] - A->i[i]; 848 idx = A->j + A->i[i] + shift; 849 v = A->a + A->i[i] + shift; 850 sum = b[i]; 851 SPARSEDENSEMDOT(sum,xs,v,idx,n); 852 d = fshift + A->a[diag[i]+shift]; 853 n = B->i[i+1] - B->i[i]; 854 idx = B->j + B->i[i] + shift; 855 v = B->a + B->i[i] + shift; 856 SPARSEDENSEMDOT(sum,ls,v,idx,n); 857 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 858 } 859 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 860 mat->Mvctx); CHKERRQ(ierr); 861 } 862 } 863 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 864 if (flag & SOR_ZERO_INITIAL_GUESS) { 865 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 866 } 867 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 868 CHKERRQ(ierr); 869 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 870 CHKERRQ(ierr); 871 while (its--) { 872 /* go down through the rows */ 873 for ( i=0; i<m; i++ ) { 874 n = A->i[i+1] - A->i[i]; 875 idx = A->j + A->i[i] + shift; 876 v = A->a + A->i[i] + shift; 877 sum = b[i]; 878 SPARSEDENSEMDOT(sum,xs,v,idx,n); 879 d = fshift + A->a[diag[i]+shift]; 880 n = B->i[i+1] - B->i[i]; 881 idx = B->j + B->i[i] + shift; 882 v = B->a + B->i[i] + shift; 883 SPARSEDENSEMDOT(sum,ls,v,idx,n); 884 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 885 } 886 /* come up through the rows */ 887 for ( i=m-1; i>-1; i-- ) { 888 n = A->i[i+1] - A->i[i]; 889 idx = A->j + A->i[i] + shift; 890 v = A->a + A->i[i] + shift; 891 sum = b[i]; 892 SPARSEDENSEMDOT(sum,xs,v,idx,n); 893 d = fshift + A->a[diag[i]+shift]; 894 n = B->i[i+1] - B->i[i]; 895 idx = B->j + B->i[i] + shift; 896 v = B->a + B->i[i] + shift; 897 SPARSEDENSEMDOT(sum,ls,v,idx,n); 898 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 899 } 900 } 901 } 902 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 903 if (flag & SOR_ZERO_INITIAL_GUESS) { 904 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 905 } 906 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 907 CHKERRQ(ierr); 908 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 909 CHKERRQ(ierr); 910 while (its--) { 911 for ( i=0; i<m; i++ ) { 912 n = A->i[i+1] - A->i[i]; 913 idx = A->j + A->i[i] + shift; 914 v = A->a + A->i[i] + shift; 915 sum = b[i]; 916 SPARSEDENSEMDOT(sum,xs,v,idx,n); 917 d = fshift + A->a[diag[i]+shift]; 918 n = B->i[i+1] - B->i[i]; 919 idx = B->j + B->i[i] + shift; 920 v = B->a + B->i[i] + shift; 921 SPARSEDENSEMDOT(sum,ls,v,idx,n); 922 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 923 } 924 } 925 } 926 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 927 if (flag & SOR_ZERO_INITIAL_GUESS) { 928 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 929 } 930 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 931 mat->Mvctx); CHKERRQ(ierr); 932 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 933 mat->Mvctx); CHKERRQ(ierr); 934 while (its--) { 935 for ( i=m-1; i>-1; i-- ) { 936 n = A->i[i+1] - A->i[i]; 937 idx = A->j + A->i[i] + shift; 938 v = A->a + A->i[i] + shift; 939 sum = b[i]; 940 SPARSEDENSEMDOT(sum,xs,v,idx,n); 941 d = fshift + A->a[diag[i]+shift]; 942 n = B->i[i+1] - B->i[i]; 943 idx = B->j + B->i[i] + shift; 944 v = B->a + B->i[i] + shift; 945 SPARSEDENSEMDOT(sum,ls,v,idx,n); 946 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 947 } 948 } 949 } 950 return 0; 951 } 952 953 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 954 int *nzalloc,int *mem) 955 { 956 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 957 Mat A = mat->A, B = mat->B; 958 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 959 960 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 961 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 962 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 963 if (flag == MAT_LOCAL) { 964 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 965 } else if (flag == MAT_GLOBAL_MAX) { 966 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 967 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 968 } else if (flag == MAT_GLOBAL_SUM) { 969 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 970 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 971 } 972 return 0; 973 } 974 975 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 976 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 977 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 978 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 979 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 980 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 981 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 982 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 983 984 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 985 { 986 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 987 988 if (op == NO_NEW_NONZERO_LOCATIONS || 989 op == YES_NEW_NONZERO_LOCATIONS || 990 op == COLUMNS_SORTED || 991 op == ROW_ORIENTED) { 992 MatSetOption(a->A,op); 993 MatSetOption(a->B,op); 994 } 995 else if (op == ROWS_SORTED || 996 op == SYMMETRIC_MATRIX || 997 op == STRUCTURALLY_SYMMETRIC_MATRIX || 998 op == YES_NEW_DIAGONALS) 999 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1000 else if (op == COLUMN_ORIENTED) 1001 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");} 1002 else if (op == NO_NEW_DIAGONALS) 1003 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1004 else 1005 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1006 return 0; 1007 } 1008 1009 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1010 { 1011 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1012 *m = mat->M; *n = mat->N; 1013 return 0; 1014 } 1015 1016 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1019 *m = mat->m; *n = mat->N; 1020 return 0; 1021 } 1022 1023 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1024 { 1025 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1026 *m = mat->rstart; *n = mat->rend; 1027 return 0; 1028 } 1029 1030 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1031 { 1032 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1033 Scalar *vworkA, *vworkB, **pvA, **pvB; 1034 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1035 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1036 1037 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:only local rows") 1038 lrow = row - rstart; 1039 1040 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1041 if (!v) {pvA = 0; pvB = 0;} 1042 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1043 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1044 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1045 nztot = nzA + nzB; 1046 1047 if (v || idx) { 1048 if (nztot) { 1049 /* Sort by increasing column numbers, assuming A and B already sorted */ 1050 int imark; 1051 if (mat->assembled) { 1052 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1053 } 1054 if (v) { 1055 *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1056 for ( i=0; i<nzB; i++ ) { 1057 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1058 else break; 1059 } 1060 imark = i; 1061 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1062 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1063 } 1064 if (idx) { 1065 *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1066 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1067 for ( i=0; i<nzB; i++ ) { 1068 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1069 else break; 1070 } 1071 imark = i; 1072 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1073 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1074 } 1075 } 1076 else {*idx = 0; *v=0;} 1077 } 1078 *nz = nztot; 1079 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1080 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1081 return 0; 1082 } 1083 1084 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1085 { 1086 if (idx) PETSCFREE(*idx); 1087 if (v) PETSCFREE(*v); 1088 return 0; 1089 } 1090 1091 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm) 1092 { 1093 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1094 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1095 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1096 double sum = 0.0; 1097 Scalar *v; 1098 1099 if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix"); 1100 if (aij->size == 1) { 1101 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1102 } else { 1103 if (type == NORM_FROBENIUS) { 1104 v = amat->a; 1105 for (i=0; i<amat->nz; i++ ) { 1106 #if defined(PETSC_COMPLEX) 1107 sum += real(conj(*v)*(*v)); v++; 1108 #else 1109 sum += (*v)*(*v); v++; 1110 #endif 1111 } 1112 v = bmat->a; 1113 for (i=0; i<bmat->nz; i++ ) { 1114 #if defined(PETSC_COMPLEX) 1115 sum += real(conj(*v)*(*v)); v++; 1116 #else 1117 sum += (*v)*(*v); v++; 1118 #endif 1119 } 1120 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1121 *norm = sqrt(*norm); 1122 } 1123 else if (type == NORM_1) { /* max column norm */ 1124 double *tmp, *tmp2; 1125 int *jj, *garray = aij->garray; 1126 tmp = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1127 tmp2 = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1128 PetscZero(tmp,aij->N*sizeof(double)); 1129 *norm = 0.0; 1130 v = amat->a; jj = amat->j; 1131 for ( j=0; j<amat->nz; j++ ) { 1132 #if defined(PETSC_COMPLEX) 1133 tmp[cstart + *jj++ + shift] += abs(*v++); 1134 #else 1135 tmp[cstart + *jj++ + shift] += fabs(*v++); 1136 #endif 1137 } 1138 v = bmat->a; jj = bmat->j; 1139 for ( j=0; j<bmat->nz; j++ ) { 1140 #if defined(PETSC_COMPLEX) 1141 tmp[garray[*jj++ + shift]] += abs(*v++); 1142 #else 1143 tmp[garray[*jj++ + shift]] += fabs(*v++); 1144 #endif 1145 } 1146 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1147 for ( j=0; j<aij->N; j++ ) { 1148 if (tmp2[j] > *norm) *norm = tmp2[j]; 1149 } 1150 PETSCFREE(tmp); PETSCFREE(tmp2); 1151 } 1152 else if (type == NORM_INFINITY) { /* max row norm */ 1153 double ntemp = 0.0; 1154 for ( j=0; j<amat->m; j++ ) { 1155 v = amat->a + amat->i[j] + shift; 1156 sum = 0.0; 1157 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1158 #if defined(PETSC_COMPLEX) 1159 sum += abs(*v); v++; 1160 #else 1161 sum += fabs(*v); v++; 1162 #endif 1163 } 1164 v = bmat->a + bmat->i[j] + shift; 1165 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1166 #if defined(PETSC_COMPLEX) 1167 sum += abs(*v); v++; 1168 #else 1169 sum += fabs(*v); v++; 1170 #endif 1171 } 1172 if (sum > ntemp) ntemp = sum; 1173 } 1174 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1175 } 1176 else { 1177 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1178 } 1179 } 1180 return 0; 1181 } 1182 1183 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1184 { 1185 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1186 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1187 int ierr,shift = Aloc->indexshift; 1188 Mat B; 1189 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1190 Scalar *array; 1191 1192 if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1193 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1194 CHKERRQ(ierr); 1195 1196 /* copy over the A part */ 1197 Aloc = (Mat_SeqAIJ*) a->A->data; 1198 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1199 row = a->rstart; 1200 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1201 for ( i=0; i<m; i++ ) { 1202 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1203 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1204 } 1205 aj = Aloc->j; 1206 for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1207 1208 /* copy over the B part */ 1209 Aloc = (Mat_SeqAIJ*) a->B->data; 1210 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1211 row = a->rstart; 1212 ct = cols = (int *) PETSCMALLOC( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1213 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1214 for ( i=0; i<m; i++ ) { 1215 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1216 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1217 } 1218 PETSCFREE(ct); 1219 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1220 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1221 if (matout) { 1222 *matout = B; 1223 } else { 1224 /* This isn't really an in-place transpose .... but free data structures from a */ 1225 PETSCFREE(a->rowners); 1226 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1227 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1228 if (a->colmap) PETSCFREE(a->colmap); 1229 if (a->garray) PETSCFREE(a->garray); 1230 if (a->lvec) VecDestroy(a->lvec); 1231 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1232 PETSCFREE(a); 1233 PetscMemcpy(A,B,sizeof(struct _Mat)); 1234 PETSCHEADERDESTROY(B); 1235 } 1236 return 0; 1237 } 1238 1239 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1240 static int MatCopyPrivate_MPIAIJ(Mat,Mat *,int); 1241 1242 /* -------------------------------------------------------------------*/ 1243 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1244 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1245 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1246 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1247 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1248 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1249 MatLUFactor_MPIAIJ,0, 1250 MatRelax_MPIAIJ, 1251 MatTranspose_MPIAIJ, 1252 MatGetInfo_MPIAIJ,0, 1253 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1254 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1255 0, 1256 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1257 MatGetReordering_MPIAIJ, 1258 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1259 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1260 MatILUFactorSymbolic_MPIAIJ,0, 1261 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 1262 1263 /*@C 1264 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1265 (the default parallel PETSc format). 1266 1267 Input Parameters: 1268 . comm - MPI communicator 1269 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1270 . n - number of local columns (or PETSC_DECIDE to have calculated 1271 if N is given) 1272 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1273 . N - number of global columns (or PETSC_DECIDE to have calculated 1274 if n is given) 1275 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1276 (same for all local rows) 1277 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1278 or null (possibly different for each row). You must leave room 1279 for the diagonal entry even if it is zero. 1280 . o_nz - number of nonzeros per row in off-diagonal portion of local 1281 submatrix (same for all local rows). 1282 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1283 submatrix or null (possibly different for each row). 1284 1285 Output Parameter: 1286 . newmat - the matrix 1287 1288 Notes: 1289 The AIJ format (also called the Yale sparse matrix format or 1290 compressed row storage), is fully compatible with standard Fortran 77 1291 storage. That is, the stored row and column indices can begin at 1292 either one (as in Fortran) or zero. See the users manual for details. 1293 1294 The user MUST specify either the local or global matrix dimensions 1295 (possibly both). 1296 1297 Storage Information: 1298 For a square global matrix we define each processor's diagonal portion 1299 to be its local rows and the corresponding columns (a square submatrix); 1300 each processor's off-diagonal portion encompasses the remainder of the 1301 local matrix (a rectangular submatrix). 1302 1303 The user can specify preallocated storage for the diagonal part of 1304 the local submatrix with either d_nz or d_nnz (not both). Set both 1305 d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1306 Likewise, specify preallocated storage for the off-diagonal part of 1307 the local submatrix with o_nz or o_nnz (not both). 1308 1309 .keywords: matrix, aij, compressed row, sparse, parallel 1310 1311 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1312 @*/ 1313 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1314 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 1315 { 1316 Mat mat; 1317 Mat_MPIAIJ *a; 1318 int ierr, i,sum[2],work[2]; 1319 1320 *newmat = 0; 1321 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1322 PLogObjectCreate(mat); 1323 mat->data = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a); 1324 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1325 mat->destroy = MatDestroy_MPIAIJ; 1326 mat->view = MatView_MPIAIJ; 1327 mat->factor = 0; 1328 1329 a->insertmode = NOT_SET_VALUES; 1330 MPI_Comm_rank(comm,&a->rank); 1331 MPI_Comm_size(comm,&a->size); 1332 1333 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) 1334 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1335 1336 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1337 work[0] = m; work[1] = n; 1338 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1339 if (M == PETSC_DECIDE) M = sum[0]; 1340 if (N == PETSC_DECIDE) N = sum[1]; 1341 } 1342 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1343 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1344 a->m = m; 1345 a->n = n; 1346 a->N = N; 1347 a->M = M; 1348 1349 /* build local table of row and column ownerships */ 1350 a->rowners = (int *) PETSCMALLOC(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1351 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 1352 sizeof(Mat_MPIAIJ)); 1353 a->cowners = a->rowners + a->size + 1; 1354 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1355 a->rowners[0] = 0; 1356 for ( i=2; i<=a->size; i++ ) { 1357 a->rowners[i] += a->rowners[i-1]; 1358 } 1359 a->rstart = a->rowners[a->rank]; 1360 a->rend = a->rowners[a->rank+1]; 1361 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1362 a->cowners[0] = 0; 1363 for ( i=2; i<=a->size; i++ ) { 1364 a->cowners[i] += a->cowners[i-1]; 1365 } 1366 a->cstart = a->cowners[a->rank]; 1367 a->cend = a->cowners[a->rank+1]; 1368 1369 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1370 PLogObjectParent(mat,a->A); 1371 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1372 PLogObjectParent(mat,a->B); 1373 1374 /* build cache for off array entries formed */ 1375 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1376 a->colmap = 0; 1377 a->garray = 0; 1378 1379 /* stuff used for matrix vector multiply */ 1380 a->lvec = 0; 1381 a->Mvctx = 0; 1382 a->assembled = 0; 1383 1384 *newmat = mat; 1385 return 0; 1386 } 1387 1388 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1389 { 1390 Mat mat; 1391 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1392 int ierr, len; 1393 1394 if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix"); 1395 *newmat = 0; 1396 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1397 PLogObjectCreate(mat); 1398 mat->data = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a); 1399 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1400 mat->destroy = MatDestroy_MPIAIJ; 1401 mat->view = MatView_MPIAIJ; 1402 mat->factor = matin->factor; 1403 1404 a->m = oldmat->m; 1405 a->n = oldmat->n; 1406 a->M = oldmat->M; 1407 a->N = oldmat->N; 1408 1409 a->assembled = 1; 1410 a->rstart = oldmat->rstart; 1411 a->rend = oldmat->rend; 1412 a->cstart = oldmat->cstart; 1413 a->cend = oldmat->cend; 1414 a->size = oldmat->size; 1415 a->rank = oldmat->rank; 1416 a->insertmode = NOT_SET_VALUES; 1417 1418 a->rowners = (int *) PETSCMALLOC((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1419 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1420 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1421 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1422 if (oldmat->colmap) { 1423 a->colmap = (int *) PETSCMALLOC((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1424 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1425 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1426 } else a->colmap = 0; 1427 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1428 a->garray = (int *) PETSCMALLOC(len*sizeof(int)); CHKPTRQ(a->garray); 1429 PLogObjectMemory(mat,len*sizeof(int)); 1430 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1431 } else a->garray = 0; 1432 1433 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1434 PLogObjectParent(mat,a->lvec); 1435 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1436 PLogObjectParent(mat,a->Mvctx); 1437 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1438 PLogObjectParent(mat,a->A); 1439 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1440 PLogObjectParent(mat,a->B); 1441 *newmat = mat; 1442 return 0; 1443 } 1444 1445 #include "sysio.h" 1446 1447 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat) 1448 { 1449 Mat A; 1450 int i, nz, ierr, j,rstart, rend, fd; 1451 Scalar *vals,*svals; 1452 PetscObject vobj = (PetscObject) bview; 1453 MPI_Comm comm = vobj->comm; 1454 MPI_Status status; 1455 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1456 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1457 int tag = ((PetscObject)bview)->tag; 1458 1459 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1460 if (!rank) { 1461 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1462 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1463 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object"); 1464 } 1465 1466 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1467 M = header[1]; N = header[2]; 1468 /* determine ownership of all rows */ 1469 m = M/size + ((M % size) > rank); 1470 rowners = (int *) PETSCMALLOC((size+2)*sizeof(int)); CHKPTRQ(rowners); 1471 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1472 rowners[0] = 0; 1473 for ( i=2; i<=size; i++ ) { 1474 rowners[i] += rowners[i-1]; 1475 } 1476 rstart = rowners[rank]; 1477 rend = rowners[rank+1]; 1478 1479 /* distribute row lengths to all processors */ 1480 ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1481 offlens = ourlens + (rend-rstart); 1482 if (!rank) { 1483 rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths); 1484 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1485 sndcounts = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(sndcounts); 1486 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1487 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1488 PETSCFREE(sndcounts); 1489 } 1490 else { 1491 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1492 } 1493 1494 if (!rank) { 1495 /* calculate the number of nonzeros on each processor */ 1496 procsnz = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(procsnz); 1497 PetscZero(procsnz,size*sizeof(int)); 1498 for ( i=0; i<size; i++ ) { 1499 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1500 procsnz[i] += rowlengths[j]; 1501 } 1502 } 1503 PETSCFREE(rowlengths); 1504 1505 /* determine max buffer needed and allocate it */ 1506 maxnz = 0; 1507 for ( i=0; i<size; i++ ) { 1508 maxnz = PETSCMAX(maxnz,procsnz[i]); 1509 } 1510 cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols); 1511 1512 /* read in my part of the matrix column indices */ 1513 nz = procsnz[0]; 1514 mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols); 1515 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1516 1517 /* read in every one elses and ship off */ 1518 for ( i=1; i<size; i++ ) { 1519 nz = procsnz[i]; 1520 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1521 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1522 } 1523 PETSCFREE(cols); 1524 } 1525 else { 1526 /* determine buffer space needed for message */ 1527 nz = 0; 1528 for ( i=0; i<m; i++ ) { 1529 nz += ourlens[i]; 1530 } 1531 mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols); 1532 1533 /* receive message of column indices*/ 1534 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1535 MPI_Get_count(&status,MPI_INT,&maxnz); 1536 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1537 } 1538 1539 /* loop over local rows, determining number of off diagonal entries */ 1540 PetscZero(offlens,m*sizeof(int)); 1541 jj = 0; 1542 for ( i=0; i<m; i++ ) { 1543 for ( j=0; j<ourlens[i]; j++ ) { 1544 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1545 jj++; 1546 } 1547 } 1548 1549 /* create our matrix */ 1550 for ( i=0; i<m; i++ ) { 1551 ourlens[i] -= offlens[i]; 1552 } 1553 if (type == MATMPIAIJ) { 1554 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1555 } 1556 else if (type == MATMPIROW) { 1557 ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1558 } 1559 A = *newmat; 1560 MatSetOption(A,COLUMNS_SORTED); 1561 for ( i=0; i<m; i++ ) { 1562 ourlens[i] += offlens[i]; 1563 } 1564 1565 if (!rank) { 1566 vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1567 1568 /* read in my part of the matrix numerical values */ 1569 nz = procsnz[0]; 1570 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1571 1572 /* insert into matrix */ 1573 jj = rstart; 1574 smycols = mycols; 1575 svals = vals; 1576 for ( i=0; i<m; i++ ) { 1577 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1578 smycols += ourlens[i]; 1579 svals += ourlens[i]; 1580 jj++; 1581 } 1582 1583 /* read in other processors and ship out */ 1584 for ( i=1; i<size; i++ ) { 1585 nz = procsnz[i]; 1586 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1587 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1588 } 1589 PETSCFREE(procsnz); 1590 } 1591 else { 1592 /* receive numeric values */ 1593 vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1594 1595 /* receive message of values*/ 1596 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1597 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1598 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1599 1600 /* insert into matrix */ 1601 jj = rstart; 1602 smycols = mycols; 1603 svals = vals; 1604 for ( i=0; i<m; i++ ) { 1605 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1606 smycols += ourlens[i]; 1607 svals += ourlens[i]; 1608 jj++; 1609 } 1610 } 1611 PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners); 1612 1613 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1614 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1615 return 0; 1616 } 1617