1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.60 1995/07/13 16:15:46 curfman Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_AIJ *B = (Mat_AIJ*) aij->B->data; 18 int n = B->n,i; 19 aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 20 PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int)); 21 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 22 return 0; 23 } 24 25 extern int DisAssemble_MPIAIJ(Mat); 26 27 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 28 { 29 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 30 int ierr; 31 if (aij->numtids == 1) { 32 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 33 } else 34 SETERRQ(1,"MatGetReordering_MPIAIJ: not yet supported in parallel"); 35 return 0; 36 } 37 38 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 39 int *idxn,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 43 int cstart = aij->cstart, cend = aij->cend,row,col; 44 45 if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) { 46 SETERRQ(1,"MatSetValues_MPIAIJ:You cannot mix inserts and adds"); 47 } 48 aij->insertmode = addv; 49 for ( i=0; i<m; i++ ) { 50 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 51 if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 52 if (idxm[i] >= rstart && idxm[i] < rend) { 53 row = idxm[i] - rstart; 54 for ( j=0; j<n; j++ ) { 55 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 56 if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 57 if (idxn[j] >= cstart && idxn[j] < cend){ 58 col = idxn[j] - cstart; 59 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 60 } 61 else { 62 if (aij->assembled) { 63 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 64 col = aij->colmap[idxn[j]] - 1; 65 if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 66 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 67 col = idxn[j]; 68 } 69 } 70 else col = idxn[j]; 71 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 72 } 73 } 74 } 75 else { 76 ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv); 77 CHKERRQ(ierr); 78 } 79 } 80 return 0; 81 } 82 83 /* 84 the assembly code is a lot like the code for vectors, we should 85 sometime derive a single assembly code that can be used for 86 either case. 87 */ 88 89 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 90 { 91 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 92 MPI_Comm comm = mat->comm; 93 int numtids = aij->numtids, *owners = aij->rowners; 94 int mytid = aij->mytid; 95 MPI_Request *send_waits,*recv_waits; 96 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 97 int tag = mat->tag, *owner,*starts,count,ierr; 98 InsertMode addv; 99 Scalar *rvalues,*svalues; 100 101 /* make sure all processors are either in INSERTMODE or ADDMODE */ 102 MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT, 103 MPI_BOR,comm); 104 if (addv == (ADDVALUES|INSERTVALUES)) { 105 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 106 } 107 aij->insertmode = addv; /* in case this processor had no cache */ 108 109 /* first count number of contributors to each processor */ 110 nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 111 PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 112 owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 113 for ( i=0; i<aij->stash.n; i++ ) { 114 idx = aij->stash.idx[i]; 115 for ( j=0; j<numtids; j++ ) { 116 if (idx >= owners[j] && idx < owners[j+1]) { 117 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 118 } 119 } 120 } 121 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 122 123 /* inform other processors of number of messages and max length*/ 124 work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 125 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 126 nreceives = work[mytid]; 127 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 128 nmax = work[mytid]; 129 PETSCFREE(work); 130 131 /* post receives: 132 1) each message will consist of ordered pairs 133 (global index,value) we store the global index as a double 134 to simplify the message passing. 135 2) since we don't know how long each individual message is we 136 allocate the largest needed buffer for each receive. Potentially 137 this is a lot of wasted space. 138 139 140 This could be done better. 141 */ 142 rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 143 CHKPTRQ(rvalues); 144 recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 145 CHKPTRQ(recv_waits); 146 for ( i=0; i<nreceives; i++ ) { 147 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 148 comm,recv_waits+i); 149 } 150 151 /* do sends: 152 1) starts[i] gives the starting index in svalues for stuff going to 153 the ith processor 154 */ 155 svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 156 CHKPTRQ(svalues); 157 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 158 CHKPTRQ(send_waits); 159 starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts); 160 starts[0] = 0; 161 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 162 for ( i=0; i<aij->stash.n; i++ ) { 163 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 164 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 165 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 166 } 167 PETSCFREE(owner); 168 starts[0] = 0; 169 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 170 count = 0; 171 for ( i=0; i<numtids; i++ ) { 172 if (procs[i]) { 173 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 174 comm,send_waits+count++); 175 } 176 } 177 PETSCFREE(starts); PETSCFREE(nprocs); 178 179 /* Free cache space */ 180 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 181 182 aij->svalues = svalues; aij->rvalues = rvalues; 183 aij->nsends = nsends; aij->nrecvs = nreceives; 184 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 185 aij->rmax = nmax; 186 187 return 0; 188 } 189 extern int MatSetUpMultiply_MPIAIJ(Mat); 190 191 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 192 { 193 int ierr; 194 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 195 MPI_Status *send_status,recv_status; 196 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n; 197 int row,col,other_disassembled; 198 Scalar *values,val; 199 InsertMode addv = aij->insertmode; 200 201 /* wait on receives */ 202 while (count) { 203 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 204 /* unpack receives into our local space */ 205 values = aij->rvalues + 3*imdex*aij->rmax; 206 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 207 n = n/3; 208 for ( i=0; i<n; i++ ) { 209 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 210 col = (int) PETSCREAL(values[3*i+1]); 211 val = values[3*i+2]; 212 if (col >= aij->cstart && col < aij->cend) { 213 col -= aij->cstart; 214 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 215 } 216 else { 217 if (aij->assembled) { 218 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 219 col = aij->colmap[col] - 1; 220 if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 221 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 222 col = (int) PETSCREAL(values[3*i+1]); 223 } 224 } 225 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 226 } 227 } 228 count--; 229 } 230 PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues); 231 232 /* wait on sends */ 233 if (aij->nsends) { 234 send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) ); 235 CHKPTRQ(send_status); 236 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 237 PETSCFREE(send_status); 238 } 239 PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues); 240 241 aij->insertmode = NOTSETVALUES; 242 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 243 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 244 245 /* determine if any processor has disassembled, if so we must 246 also disassemble ourselfs, in order that we may reassemble. */ 247 MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1, 248 MPI_INT,MPI_PROD,mat->comm); 249 if (aij->assembled && !other_disassembled) { 250 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 251 } 252 253 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 254 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 255 aij->assembled = 1; 256 } 257 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 258 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 259 260 return 0; 261 } 262 263 static int MatZeroEntries_MPIAIJ(Mat A) 264 { 265 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 266 int ierr; 267 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 268 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 269 return 0; 270 } 271 272 /* again this uses the same basic stratagy as in the assembly and 273 scatter create routines, we should try to do it systemamatically 274 if we can figure out the proper level of generality. */ 275 276 /* the code does not do the diagonal entries correctly unless the 277 matrix is square and the column and row owerships are identical. 278 This is a BUG. The only way to fix it seems to be to access 279 aij->A and aij->B directly and not through the MatZeroRows() 280 routine. 281 */ 282 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 283 { 284 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 285 int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 286 int *procs,*nprocs,j,found,idx,nsends,*work; 287 int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 288 int *rvalues,tag = A->tag,count,base,slen,n,*source; 289 int *lens,imdex,*lrows,*values; 290 MPI_Comm comm = A->comm; 291 MPI_Request *send_waits,*recv_waits; 292 MPI_Status recv_status,*send_status; 293 IS istmp; 294 295 if (!l->assembled) 296 SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix first"); 297 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 298 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 299 300 /* first count number of contributors to each processor */ 301 nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 302 PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 303 owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 304 for ( i=0; i<N; i++ ) { 305 idx = rows[i]; 306 found = 0; 307 for ( j=0; j<numtids; j++ ) { 308 if (idx >= owners[j] && idx < owners[j+1]) { 309 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 310 } 311 } 312 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 313 } 314 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 315 316 /* inform other processors of number of messages and max length*/ 317 work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 318 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 319 nrecvs = work[mytid]; 320 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 321 nmax = work[mytid]; 322 PETSCFREE(work); 323 324 /* post receives: */ 325 rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 326 CHKPTRQ(rvalues); 327 recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 328 CHKPTRQ(recv_waits); 329 for ( i=0; i<nrecvs; i++ ) { 330 MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 331 comm,recv_waits+i); 332 } 333 334 /* do sends: 335 1) starts[i] gives the starting index in svalues for stuff going to 336 the ith processor 337 */ 338 svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 339 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 340 CHKPTRQ(send_waits); 341 starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts); 342 starts[0] = 0; 343 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 344 for ( i=0; i<N; i++ ) { 345 svalues[starts[owner[i]]++] = rows[i]; 346 } 347 ISRestoreIndices(is,&rows); 348 349 starts[0] = 0; 350 for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 351 count = 0; 352 for ( i=0; i<numtids; i++ ) { 353 if (procs[i]) { 354 MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 355 comm,send_waits+count++); 356 } 357 } 358 PETSCFREE(starts); 359 360 base = owners[mytid]; 361 362 /* wait on receives */ 363 lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 364 source = lens + nrecvs; 365 count = nrecvs; slen = 0; 366 while (count) { 367 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 368 /* unpack receives into our local space */ 369 MPI_Get_count(&recv_status,MPI_INT,&n); 370 source[imdex] = recv_status.MPI_SOURCE; 371 lens[imdex] = n; 372 slen += n; 373 count--; 374 } 375 PETSCFREE(recv_waits); 376 377 /* move the data into the send scatter */ 378 lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 379 count = 0; 380 for ( i=0; i<nrecvs; i++ ) { 381 values = rvalues + i*nmax; 382 for ( j=0; j<lens[i]; j++ ) { 383 lrows[count++] = values[j] - base; 384 } 385 } 386 PETSCFREE(rvalues); PETSCFREE(lens); 387 PETSCFREE(owner); PETSCFREE(nprocs); 388 389 /* actually zap the local rows */ 390 ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp); 391 CHKERRQ(ierr); PETSCFREE(lrows); 392 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 393 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 394 ierr = ISDestroy(istmp); CHKERRQ(ierr); 395 396 /* wait on sends */ 397 if (nsends) { 398 send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) ); 399 CHKPTRQ(send_status); 400 MPI_Waitall(nsends,send_waits,send_status); 401 PETSCFREE(send_status); 402 } 403 PETSCFREE(send_waits); PETSCFREE(svalues); 404 405 406 return 0; 407 } 408 409 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 412 int ierr; 413 if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first"); 414 ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 415 CHKERRQ(ierr); 416 ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr); 417 ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 418 CHKERRQ(ierr); 419 ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr); 420 return 0; 421 } 422 423 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 424 { 425 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 426 int ierr; 427 if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first"); 428 ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 429 CHKERRQ(ierr); 430 ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 431 ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 432 CHKERRQ(ierr); 433 ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr); 434 return 0; 435 } 436 437 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy) 438 { 439 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 440 int ierr; 441 442 if (!aij->assembled) 443 SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix first"); 444 /* do nondiagonal part */ 445 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 446 /* send it on its way */ 447 ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES, 448 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 449 /* do local part */ 450 ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr); 451 /* receive remote parts: note this assumes the values are not actually */ 452 /* inserted in yy until the next line, which is true for my implementation*/ 453 /* but is not perhaps always true. */ 454 ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES, 455 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 456 return 0; 457 } 458 459 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 460 { 461 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 462 int ierr; 463 464 if (!aij->assembled) 465 SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix first"); 466 /* do nondiagonal part */ 467 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 468 /* send it on its way */ 469 ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES, 470 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 471 /* do local part */ 472 ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 473 /* receive remote parts: note this assumes the values are not actually */ 474 /* inserted in yy until the next line, which is true for my implementation*/ 475 /* but is not perhaps always true. */ 476 ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES, 477 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 478 return 0; 479 } 480 481 /* 482 This only works correctly for square matrices where the subblock A->A is the 483 diagonal block 484 */ 485 static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v) 486 { 487 Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data; 488 if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix first"); 489 return MatGetDiagonal(A->A,v); 490 } 491 492 static int MatDestroy_MPIAIJ(PetscObject obj) 493 { 494 Mat mat = (Mat) obj; 495 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 496 int ierr; 497 #if defined(PETSC_LOG) 498 PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N); 499 #endif 500 PETSCFREE(aij->rowners); 501 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 502 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 503 if (aij->colmap) PETSCFREE(aij->colmap); 504 if (aij->garray) PETSCFREE(aij->garray); 505 if (aij->lvec) VecDestroy(aij->lvec); 506 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 507 PETSCFREE(aij); 508 PLogObjectDestroy(mat); 509 PETSCHEADERDESTROY(mat); 510 return 0; 511 } 512 #include "draw.h" 513 #include "pviewer.h" 514 515 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 516 { 517 Mat mat = (Mat) obj; 518 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 519 int ierr; 520 PetscObject vobj = (PetscObject) viewer; 521 522 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix first"); 523 if (!viewer) { /* so that viewers may be used from debuggers */ 524 viewer = STDOUT_VIEWER; vobj = (PetscObject) viewer; 525 } 526 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 527 if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) { 528 FILE *fd = ViewerFileGetPointer_Private(viewer); 529 MPIU_Seq_begin(mat->comm,1); 530 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 531 aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 532 aij->cend); 533 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 534 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 535 fflush(fd); 536 MPIU_Seq_end(mat->comm,1); 537 } 538 else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) || 539 vobj->cookie == DRAW_COOKIE) { 540 int numtids = aij->numtids, mytid = aij->mytid; 541 if (numtids == 1) { 542 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 543 } 544 else { 545 /* assemble the entire matrix onto first processor. */ 546 Mat A; 547 Mat_AIJ *Aloc; 548 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 549 Scalar *a; 550 551 if (!mytid) { 552 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A); 553 } 554 else { 555 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A); 556 } 557 CHKERRQ(ierr); 558 559 /* copy over the A part */ 560 Aloc = (Mat_AIJ*) aij->A->data; 561 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 562 row = aij->rstart; 563 for ( i=0; i<ai[m]-1; i++ ) {aj[i] += aij->cstart - 1;} 564 for ( i=0; i<m; i++ ) { 565 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES); 566 CHKERRQ(ierr); 567 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 568 } 569 aj = Aloc->j; 570 for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= aij->cstart - 1;} 571 572 /* copy over the B part */ 573 Aloc = (Mat_AIJ*) aij->B->data; 574 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 575 row = aij->rstart; 576 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 577 for ( i=0; i<ai[m]-1; i++ ) {cols[i] = aij->garray[aj[i]-1];} 578 for ( i=0; i<m; i++ ) { 579 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES); 580 CHKERRQ(ierr); 581 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 582 } 583 PETSCFREE(ct); 584 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 585 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 586 if (!mytid) { 587 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 588 } 589 ierr = MatDestroy(A); CHKERRQ(ierr); 590 } 591 } 592 return 0; 593 } 594 595 extern int MatMarkDiag_AIJ(Mat_AIJ *); 596 /* 597 This has to provide several versions. 598 599 1) per sequential 600 2) a) use only local smoothing updating outer values only once. 601 b) local smoothing updating outer values each inner iteration 602 3) color updating out values betwen colors. 603 */ 604 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 605 double shift,int its,Vec xx) 606 { 607 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 608 Mat AA = mat->A, BB = mat->B; 609 Mat_AIJ *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data; 610 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 611 int ierr,*idx, *diag; 612 int n = mat->n, m = mat->m, i; 613 Vec tt; 614 615 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix first"); 616 617 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 618 xs = x -1; /* shift by one for index start of 1 */ 619 ls--; 620 if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;} 621 diag = A->diag; 622 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 623 SETERRQ(1,"MatRelax_MPIAIJ:option not yet support"); 624 } 625 if (flag & SOR_EISENSTAT) { 626 /* Let A = L + U + D; where L is lower trianglar, 627 U is upper triangular, E is diagonal; This routine applies 628 629 (L + E)^{-1} A (U + E)^{-1} 630 631 to a vector efficiently using Eisenstat's trick. This is for 632 the case of SSOR preconditioner, so E is D/omega where omega 633 is the relaxation factor. 634 */ 635 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 636 VecGetArray(tt,&t); 637 scale = (2.0/omega) - 1.0; 638 /* x = (E + U)^{-1} b */ 639 VecSet(&zero,mat->lvec); 640 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 641 mat->Mvctx); CHKERRQ(ierr); 642 for ( i=m-1; i>-1; i-- ) { 643 n = A->i[i+1] - diag[i] - 1; 644 idx = A->j + diag[i]; 645 v = A->a + diag[i]; 646 sum = b[i]; 647 SPARSEDENSEMDOT(sum,xs,v,idx,n); 648 d = shift + A->a[diag[i]-1]; 649 n = B->i[i+1] - B->i[i]; 650 idx = B->j + B->i[i] - 1; 651 v = B->a + B->i[i] - 1; 652 SPARSEDENSEMDOT(sum,ls,v,idx,n); 653 x[i] = omega*(sum/d); 654 } 655 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 656 mat->Mvctx); CHKERRQ(ierr); 657 658 /* t = b - (2*E - D)x */ 659 v = A->a; 660 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; } 661 662 /* t = (E + L)^{-1}t */ 663 ts = t - 1; /* shifted by one for index start of a or mat->j*/ 664 diag = A->diag; 665 VecSet(&zero,mat->lvec); 666 ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 667 mat->Mvctx); CHKERRQ(ierr); 668 for ( i=0; i<m; i++ ) { 669 n = diag[i] - A->i[i]; 670 idx = A->j + A->i[i] - 1; 671 v = A->a + A->i[i] - 1; 672 sum = t[i]; 673 SPARSEDENSEMDOT(sum,ts,v,idx,n); 674 d = shift + A->a[diag[i]-1]; 675 n = B->i[i+1] - B->i[i]; 676 idx = B->j + B->i[i] - 1; 677 v = B->a + B->i[i] - 1; 678 SPARSEDENSEMDOT(sum,ls,v,idx,n); 679 t[i] = omega*(sum/d); 680 } 681 ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 682 mat->Mvctx); CHKERRQ(ierr); 683 /* x = x + t */ 684 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 685 VecDestroy(tt); 686 return 0; 687 } 688 689 690 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 691 if (flag & SOR_ZERO_INITIAL_GUESS) { 692 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 693 } 694 else { 695 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 696 CHKERRQ(ierr); 697 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 698 CHKERRQ(ierr); 699 } 700 while (its--) { 701 /* go down through the rows */ 702 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 703 mat->Mvctx); CHKERRQ(ierr); 704 for ( i=0; i<m; i++ ) { 705 n = A->i[i+1] - A->i[i]; 706 idx = A->j + A->i[i] - 1; 707 v = A->a + A->i[i] - 1; 708 sum = b[i]; 709 SPARSEDENSEMDOT(sum,xs,v,idx,n); 710 d = shift + A->a[diag[i]-1]; 711 n = B->i[i+1] - B->i[i]; 712 idx = B->j + B->i[i] - 1; 713 v = B->a + B->i[i] - 1; 714 SPARSEDENSEMDOT(sum,ls,v,idx,n); 715 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 716 } 717 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 718 mat->Mvctx); CHKERRQ(ierr); 719 /* come up through the rows */ 720 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 721 mat->Mvctx); CHKERRQ(ierr); 722 for ( i=m-1; i>-1; i-- ) { 723 n = A->i[i+1] - A->i[i]; 724 idx = A->j + A->i[i] - 1; 725 v = A->a + A->i[i] - 1; 726 sum = b[i]; 727 SPARSEDENSEMDOT(sum,xs,v,idx,n); 728 d = shift + A->a[diag[i]-1]; 729 n = B->i[i+1] - B->i[i]; 730 idx = B->j + B->i[i] - 1; 731 v = B->a + B->i[i] - 1; 732 SPARSEDENSEMDOT(sum,ls,v,idx,n); 733 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 734 } 735 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 736 mat->Mvctx); CHKERRQ(ierr); 737 } 738 } 739 else if (flag & SOR_FORWARD_SWEEP){ 740 if (flag & SOR_ZERO_INITIAL_GUESS) { 741 VecSet(&zero,mat->lvec); 742 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 743 mat->Mvctx); CHKERRQ(ierr); 744 for ( i=0; i<m; i++ ) { 745 n = diag[i] - A->i[i]; 746 idx = A->j + A->i[i] - 1; 747 v = A->a + A->i[i] - 1; 748 sum = b[i]; 749 SPARSEDENSEMDOT(sum,xs,v,idx,n); 750 d = shift + A->a[diag[i]-1]; 751 n = B->i[i+1] - B->i[i]; 752 idx = B->j + B->i[i] - 1; 753 v = B->a + B->i[i] - 1; 754 SPARSEDENSEMDOT(sum,ls,v,idx,n); 755 x[i] = omega*(sum/d); 756 } 757 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 758 mat->Mvctx); CHKERRQ(ierr); 759 its--; 760 } 761 while (its--) { 762 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 763 CHKERRQ(ierr); 764 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 765 CHKERRQ(ierr); 766 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 767 mat->Mvctx); CHKERRQ(ierr); 768 for ( i=0; i<m; i++ ) { 769 n = A->i[i+1] - A->i[i]; 770 idx = A->j + A->i[i] - 1; 771 v = A->a + A->i[i] - 1; 772 sum = b[i]; 773 SPARSEDENSEMDOT(sum,xs,v,idx,n); 774 d = shift + A->a[diag[i]-1]; 775 n = B->i[i+1] - B->i[i]; 776 idx = B->j + B->i[i] - 1; 777 v = B->a + B->i[i] - 1; 778 SPARSEDENSEMDOT(sum,ls,v,idx,n); 779 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 780 } 781 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 782 mat->Mvctx); CHKERRQ(ierr); 783 } 784 } 785 else if (flag & SOR_BACKWARD_SWEEP){ 786 if (flag & SOR_ZERO_INITIAL_GUESS) { 787 VecSet(&zero,mat->lvec); 788 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 789 mat->Mvctx); CHKERRQ(ierr); 790 for ( i=m-1; i>-1; i-- ) { 791 n = A->i[i+1] - diag[i] - 1; 792 idx = A->j + diag[i]; 793 v = A->a + diag[i]; 794 sum = b[i]; 795 SPARSEDENSEMDOT(sum,xs,v,idx,n); 796 d = shift + A->a[diag[i]-1]; 797 n = B->i[i+1] - B->i[i]; 798 idx = B->j + B->i[i] - 1; 799 v = B->a + B->i[i] - 1; 800 SPARSEDENSEMDOT(sum,ls,v,idx,n); 801 x[i] = omega*(sum/d); 802 } 803 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 804 mat->Mvctx); CHKERRQ(ierr); 805 its--; 806 } 807 while (its--) { 808 ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 809 mat->Mvctx); CHKERRQ(ierr); 810 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 811 mat->Mvctx); CHKERRQ(ierr); 812 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 813 mat->Mvctx); CHKERRQ(ierr); 814 for ( i=m-1; i>-1; i-- ) { 815 n = A->i[i+1] - A->i[i]; 816 idx = A->j + A->i[i] - 1; 817 v = A->a + A->i[i] - 1; 818 sum = b[i]; 819 SPARSEDENSEMDOT(sum,xs,v,idx,n); 820 d = shift + A->a[diag[i]-1]; 821 n = B->i[i+1] - B->i[i]; 822 idx = B->j + B->i[i] - 1; 823 v = B->a + B->i[i] - 1; 824 SPARSEDENSEMDOT(sum,ls,v,idx,n); 825 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 826 } 827 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 828 mat->Mvctx); CHKERRQ(ierr); 829 } 830 } 831 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 832 if (flag & SOR_ZERO_INITIAL_GUESS) { 833 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 834 } 835 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 836 CHKERRQ(ierr); 837 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 838 CHKERRQ(ierr); 839 while (its--) { 840 /* go down through the rows */ 841 for ( i=0; i<m; i++ ) { 842 n = A->i[i+1] - A->i[i]; 843 idx = A->j + A->i[i] - 1; 844 v = A->a + A->i[i] - 1; 845 sum = b[i]; 846 SPARSEDENSEMDOT(sum,xs,v,idx,n); 847 d = shift + A->a[diag[i]-1]; 848 n = B->i[i+1] - B->i[i]; 849 idx = B->j + B->i[i] - 1; 850 v = B->a + B->i[i] - 1; 851 SPARSEDENSEMDOT(sum,ls,v,idx,n); 852 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 853 } 854 /* come up through the rows */ 855 for ( i=m-1; i>-1; i-- ) { 856 n = A->i[i+1] - A->i[i]; 857 idx = A->j + A->i[i] - 1; 858 v = A->a + A->i[i] - 1; 859 sum = b[i]; 860 SPARSEDENSEMDOT(sum,xs,v,idx,n); 861 d = shift + A->a[diag[i]-1]; 862 n = B->i[i+1] - B->i[i]; 863 idx = B->j + B->i[i] - 1; 864 v = B->a + B->i[i] - 1; 865 SPARSEDENSEMDOT(sum,ls,v,idx,n); 866 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 867 } 868 } 869 } 870 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 871 if (flag & SOR_ZERO_INITIAL_GUESS) { 872 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 873 } 874 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 875 CHKERRQ(ierr); 876 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 877 CHKERRQ(ierr); 878 while (its--) { 879 for ( i=0; i<m; i++ ) { 880 n = A->i[i+1] - A->i[i]; 881 idx = A->j + A->i[i] - 1; 882 v = A->a + A->i[i] - 1; 883 sum = b[i]; 884 SPARSEDENSEMDOT(sum,xs,v,idx,n); 885 d = shift + A->a[diag[i]-1]; 886 n = B->i[i+1] - B->i[i]; 887 idx = B->j + B->i[i] - 1; 888 v = B->a + B->i[i] - 1; 889 SPARSEDENSEMDOT(sum,ls,v,idx,n); 890 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 891 } 892 } 893 } 894 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 895 if (flag & SOR_ZERO_INITIAL_GUESS) { 896 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 897 } 898 ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL, 899 mat->Mvctx); CHKERRQ(ierr); 900 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL, 901 mat->Mvctx); CHKERRQ(ierr); 902 while (its--) { 903 for ( i=m-1; i>-1; i-- ) { 904 n = A->i[i+1] - A->i[i]; 905 idx = A->j + A->i[i] - 1; 906 v = A->a + A->i[i] - 1; 907 sum = b[i]; 908 SPARSEDENSEMDOT(sum,xs,v,idx,n); 909 d = shift + A->a[diag[i]-1]; 910 n = B->i[i+1] - B->i[i]; 911 idx = B->j + B->i[i] - 1; 912 v = B->a + B->i[i] - 1; 913 SPARSEDENSEMDOT(sum,ls,v,idx,n); 914 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 915 } 916 } 917 } 918 return 0; 919 } 920 921 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 922 int *nzalloc,int *mem) 923 { 924 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 925 Mat A = mat->A, B = mat->B; 926 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 927 928 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 929 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 930 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 931 if (flag == MAT_LOCAL) { 932 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 933 } else if (flag == MAT_GLOBAL_MAX) { 934 MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm); 935 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 936 } else if (flag == MAT_GLOBAL_SUM) { 937 MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm); 938 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 939 } 940 return 0; 941 } 942 943 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 944 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 945 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 946 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 947 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 948 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 949 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 950 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 951 952 static int MatSetOption_MPIAIJ(Mat aijin,MatOption op) 953 { 954 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 955 956 if (op == NO_NEW_NONZERO_LOCATIONS) { 957 MatSetOption(aij->A,op); 958 MatSetOption(aij->B,op); 959 } 960 else if (op == YES_NEW_NONZERO_LOCATIONS) { 961 MatSetOption(aij->A,op); 962 MatSetOption(aij->B,op); 963 } 964 else if (op == COLUMN_ORIENTED) 965 SETERRQ(1,"MatSetOption_MPIAIJ:Column oriented not supported"); 966 return 0; 967 } 968 969 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 970 { 971 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 972 *m = mat->M; *n = mat->N; 973 return 0; 974 } 975 976 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 977 { 978 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 979 *m = mat->m; *n = mat->N; 980 return 0; 981 } 982 983 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 984 { 985 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 986 *m = mat->rstart; *n = mat->rend; 987 return 0; 988 } 989 990 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 991 { 992 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 993 Scalar *vworkA, *vworkB, **pvA, **pvB; 994 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 995 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 996 997 if (row < rstart || row >= rend) 998 SETERRQ(1,"MatGetRow_MPIAIJ:Currently you can get only local rows") 999 lrow = row - rstart; 1000 1001 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1002 if (!v) {pvA = 0; pvB = 0;} 1003 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1004 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1005 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1006 nztot = nzA + nzB; 1007 1008 if (v || idx) { 1009 if (nztot) { 1010 /* Sort by increasing column numbers, assuming A and B already sorted */ 1011 int imark; 1012 if (mat->assembled) { 1013 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1014 } 1015 if (v) { 1016 *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1017 for ( i=0; i<nzB; i++ ) { 1018 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1019 else break; 1020 } 1021 imark = i; 1022 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1023 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1024 } 1025 if (idx) { 1026 *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1027 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1028 for ( i=0; i<nzB; i++ ) { 1029 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1030 else break; 1031 } 1032 imark = i; 1033 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1034 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1035 } 1036 } 1037 else {*idx = 0; *v=0;} 1038 } 1039 *nz = nztot; 1040 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1041 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1042 return 0; 1043 } 1044 1045 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1046 { 1047 if (idx) PETSCFREE(*idx); 1048 if (v) PETSCFREE(*v); 1049 return 0; 1050 } 1051 1052 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm) 1053 { 1054 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1055 int ierr; 1056 if (aij->numtids == 1) { 1057 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1058 } else 1059 SETERRQ(1,"MatNorm_MPIAIJ:not yet supported in parallel"); 1060 return 0; 1061 } 1062 1063 static int MatTranspose_MPIAIJ(Mat A,Mat *Bin) 1064 { 1065 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1066 int ierr; 1067 Mat B; 1068 Mat_AIJ *Aloc; 1069 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1070 Scalar *array; 1071 1072 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1073 CHKERRQ(ierr); 1074 1075 /* copy over the A part */ 1076 Aloc = (Mat_AIJ*) a->A->data; 1077 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1078 row = a->rstart; 1079 for ( i=0; i<ai[m]-1; i++ ) {aj[i] += a->cstart - 1;} 1080 for ( i=0; i<m; i++ ) { 1081 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERTVALUES); 1082 CHKERRQ(ierr); 1083 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1084 } 1085 aj = Aloc->j; 1086 for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= a->cstart - 1;} 1087 1088 /* copy over the B part */ 1089 Aloc = (Mat_AIJ*) a->B->data; 1090 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1091 row = a->rstart; 1092 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 1093 for ( i=0; i<ai[m]-1; i++ ) {cols[i] = a->garray[aj[i]-1];} 1094 for ( i=0; i<m; i++ ) { 1095 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERTVALUES); 1096 CHKERRQ(ierr); 1097 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1098 } 1099 PETSCFREE(ct); 1100 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1101 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1102 *Bin = B; 1103 return 0; 1104 } 1105 1106 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1107 static int MatCopyPrivate_MPIAIJ(Mat,Mat *); 1108 1109 /* -------------------------------------------------------------------*/ 1110 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1111 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1112 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1113 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1114 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1115 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1116 MatLUFactor_MPIAIJ,0, 1117 MatRelax_MPIAIJ, 1118 MatTranspose_MPIAIJ, 1119 MatGetInfo_MPIAIJ,0, 1120 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1121 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1122 0, 1123 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1124 MatGetReordering_MPIAIJ, 1125 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1126 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1127 MatILUFactorSymbolic_MPIAIJ,0, 1128 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 1129 1130 /*@ 1131 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1132 (the default parallel PETSc format). 1133 1134 Input Parameters: 1135 . comm - MPI communicator 1136 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1137 . n - number of local columns (or PETSC_DECIDE to have calculated 1138 if N is given) 1139 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1140 . N - number of global columns (or PETSC_DECIDE to have calculated 1141 if n is given) 1142 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1143 (same for all local rows) 1144 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1145 or null (possibly different for each row). You must leave room 1146 for the diagonal entry even if it is zero. 1147 . o_nz - number of nonzeros per row in off-diagonal portion of local 1148 submatrix (same for all local rows). 1149 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1150 submatrix or null (possibly different for each row). 1151 1152 Output Parameter: 1153 . newmat - the matrix 1154 1155 Notes: 1156 The AIJ format (also called the Yale sparse matrix format or 1157 compressed row storage), is fully compatible with standard Fortran 77 1158 storage. That is, the stored row and column indices begin at 1159 one, not zero. See the users manual for further details. 1160 1161 The user MUST specify either the local or global matrix dimensions 1162 (possibly both). 1163 1164 Storage Information: 1165 For a square global matrix we define each processor's diagonal portion 1166 to be its local rows and the corresponding columns (a square submatrix); 1167 each processor's off-diagonal portion encompasses the remainder of the 1168 local matrix (a rectangular submatrix). 1169 1170 The user can specify preallocated storage for the diagonal part of 1171 the local submatrix with either d_nz or d_nnz (not both). Set both 1172 d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1173 Likewise, specify preallocated storage for the off-diagonal part of 1174 the local submatrix with o_nz or o_nnz (not both). 1175 1176 .keywords: matrix, aij, compressed row, sparse, parallel 1177 1178 .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues() 1179 @*/ 1180 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1181 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 1182 { 1183 Mat mat; 1184 Mat_MPIAIJ *aij; 1185 int ierr, i,sum[2],work[2]; 1186 *newmat = 0; 1187 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1188 PLogObjectCreate(mat); 1189 mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1190 mat->ops = &MatOps; 1191 mat->destroy = MatDestroy_MPIAIJ; 1192 mat->view = MatView_MPIAIJ; 1193 mat->factor = 0; 1194 1195 aij->insertmode = NOTSETVALUES; 1196 MPI_Comm_rank(comm,&aij->mytid); 1197 MPI_Comm_size(comm,&aij->numtids); 1198 1199 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1200 work[0] = m; work[1] = n; 1201 MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 1202 if (M == PETSC_DECIDE) M = sum[0]; 1203 if (N == PETSC_DECIDE) N = sum[1]; 1204 } 1205 if (m == PETSC_DECIDE) 1206 {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 1207 if (n == PETSC_DECIDE) 1208 {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 1209 aij->m = m; 1210 aij->n = n; 1211 aij->N = N; 1212 aij->M = M; 1213 1214 /* build local table of row and column ownerships */ 1215 aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int)); 1216 CHKPTRQ(aij->rowners); 1217 aij->cowners = aij->rowners + aij->numtids + 1; 1218 MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 1219 aij->rowners[0] = 0; 1220 for ( i=2; i<=aij->numtids; i++ ) { 1221 aij->rowners[i] += aij->rowners[i-1]; 1222 } 1223 aij->rstart = aij->rowners[aij->mytid]; 1224 aij->rend = aij->rowners[aij->mytid+1]; 1225 MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 1226 aij->cowners[0] = 0; 1227 for ( i=2; i<=aij->numtids; i++ ) { 1228 aij->cowners[i] += aij->cowners[i-1]; 1229 } 1230 aij->cstart = aij->cowners[aij->mytid]; 1231 aij->cend = aij->cowners[aij->mytid+1]; 1232 1233 1234 ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A); 1235 CHKERRQ(ierr); 1236 PLogObjectParent(mat,aij->A); 1237 ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B); 1238 CHKERRQ(ierr); 1239 PLogObjectParent(mat,aij->B); 1240 1241 /* build cache for off array entries formed */ 1242 ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr); 1243 aij->colmap = 0; 1244 aij->garray = 0; 1245 1246 /* stuff used for matrix vector multiply */ 1247 aij->lvec = 0; 1248 aij->Mvctx = 0; 1249 aij->assembled = 0; 1250 1251 *newmat = mat; 1252 return 0; 1253 } 1254 1255 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat) 1256 { 1257 Mat mat; 1258 Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data; 1259 int ierr, len; 1260 *newmat = 0; 1261 1262 if (!oldmat->assembled) 1263 SETERRQ(1,"MatCopyPrivate_MPIAIJ:Cannot copy unassembled matrix"); 1264 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1265 PLogObjectCreate(mat); 1266 mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1267 mat->ops = &MatOps; 1268 mat->destroy = MatDestroy_MPIAIJ; 1269 mat->view = MatView_MPIAIJ; 1270 mat->factor = matin->factor; 1271 1272 aij->m = oldmat->m; 1273 aij->n = oldmat->n; 1274 aij->M = oldmat->M; 1275 aij->N = oldmat->N; 1276 1277 aij->assembled = 1; 1278 aij->rstart = oldmat->rstart; 1279 aij->rend = oldmat->rend; 1280 aij->cstart = oldmat->cstart; 1281 aij->cend = oldmat->cend; 1282 aij->numtids = oldmat->numtids; 1283 aij->mytid = oldmat->mytid; 1284 aij->insertmode = NOTSETVALUES; 1285 1286 aij->rowners = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) ); 1287 CHKPTRQ(aij->rowners); 1288 PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 1289 ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr); 1290 if (oldmat->colmap) { 1291 aij->colmap = (int *) PETSCMALLOC( (aij->N)*sizeof(int) ); 1292 CHKPTRQ(aij->colmap); 1293 PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int)); 1294 } else aij->colmap = 0; 1295 if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) { 1296 aij->garray = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray); 1297 PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int)); 1298 } else aij->garray = 0; 1299 1300 ierr = VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr); 1301 PLogObjectParent(mat,aij->lvec); 1302 ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr); 1303 PLogObjectParent(mat,aij->Mvctx); 1304 ierr = MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr); 1305 PLogObjectParent(mat,aij->A); 1306 ierr = MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr); 1307 PLogObjectParent(mat,aij->B); 1308 *newmat = mat; 1309 return 0; 1310 } 1311