1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.67 1995/08/18 13:42:21 bsmith Exp curfman $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_AIJ *B = (Mat_AIJ*) aij->B->data; 18 int n = B->n,i; 19 aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 20 PLogObjectMemory(mat,aij->N*sizeof(int)); 21 PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int)); 22 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 23 return 0; 24 } 25 26 extern int DisAssemble_MPIAIJ(Mat); 27 28 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 29 { 30 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 31 int ierr; 32 if (aij->numtids == 1) { 33 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 34 } else 35 SETERRQ(1,"MatGetReordering_MPIAIJ: not yet supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 40 int *idxn,Scalar *v,InsertMode addv) 41 { 42 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 43 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 44 int cstart = aij->cstart, cend = aij->cend,row,col; 45 46 if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) { 47 SETERRQ(1,"MatSetValues_MPIAIJ:You cannot mix inserts and adds"); 48 } 49 aij->insertmode = addv; 50 for ( i=0; i<m; i++ ) { 51 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 52 if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 53 if (idxm[i] >= rstart && idxm[i] < rend) { 54 row = idxm[i] - rstart; 55 for ( j=0; j<n; j++ ) { 56 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 57 if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 58 if (idxn[j] >= cstart && idxn[j] < cend){ 59 col = idxn[j] - cstart; 60 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 61 } 62 else { 63 if (aij->assembled) { 64 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 65 col = aij->colmap[idxn[j]] - 1; 66 if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 67 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 68 col = idxn[j]; 69 } 70 } 71 else col = idxn[j]; 72 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 73 } 74 } 75 } 76 else { 77 ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv); 78 CHKERRQ(ierr); 79 } 80 } 81 return 0; 82 } 83 84 /* 85 the assembly code is a lot like the code for vectors, we should 86 sometime derive a single assembly code that can be used for 87 either case. 88 */ 89 90 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 91 { 92 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 93 MPI_Comm comm = mat->comm; 94 int numtids = aij->numtids, *owners = aij->rowners; 95 int mytid = aij->mytid; 96 MPI_Request *send_waits,*recv_waits; 97 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 98 int tag = mat->tag, *owner,*starts,count,ierr; 99 InsertMode addv; 100 Scalar *rvalues,*svalues; 101 102 /* make sure all processors are either in INSERTMODE or ADDMODE */ 103 MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT, 104 MPI_BOR,comm); 105 if (addv == (ADDVALUES|INSERTVALUES)) { 106 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 107 } 108 aij->insertmode = addv; /* in case this processor had no cache */ 109 110 /* first count number of contributors to each processor */ 111 nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 112 PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 113 owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 114 for ( i=0; i<aij->stash.n; i++ ) { 115 idx = aij->stash.idx[i]; 116 for ( j=0; j<numtids; j++ ) { 117 if (idx >= owners[j] && idx < owners[j+1]) { 118 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 119 } 120 } 121 } 122 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 123 124 /* inform other processors of number of messages and max length*/ 125 work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 126 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 127 nreceives = work[mytid]; 128 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 129 nmax = work[mytid]; 130 PETSCFREE(work); 131 132 /* post receives: 133 1) each message will consist of ordered pairs 134 (global index,value) we store the global index as a double 135 to simplify the message passing. 136 2) since we don't know how long each individual message is we 137 allocate the largest needed buffer for each receive. Potentially 138 this is a lot of wasted space. 139 140 141 This could be done better. 142 */ 143 rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 144 CHKPTRQ(rvalues); 145 recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 146 CHKPTRQ(recv_waits); 147 for ( i=0; i<nreceives; i++ ) { 148 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 149 comm,recv_waits+i); 150 } 151 152 /* do sends: 153 1) starts[i] gives the starting index in svalues for stuff going to 154 the ith processor 155 */ 156 svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 157 CHKPTRQ(svalues); 158 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 159 CHKPTRQ(send_waits); 160 starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts); 161 starts[0] = 0; 162 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 163 for ( i=0; i<aij->stash.n; i++ ) { 164 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 165 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 166 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 167 } 168 PETSCFREE(owner); 169 starts[0] = 0; 170 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 171 count = 0; 172 for ( i=0; i<numtids; i++ ) { 173 if (procs[i]) { 174 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 175 comm,send_waits+count++); 176 } 177 } 178 PETSCFREE(starts); PETSCFREE(nprocs); 179 180 /* Free cache space */ 181 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 182 183 aij->svalues = svalues; aij->rvalues = rvalues; 184 aij->nsends = nsends; aij->nrecvs = nreceives; 185 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 186 aij->rmax = nmax; 187 188 return 0; 189 } 190 extern int MatSetUpMultiply_MPIAIJ(Mat); 191 192 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 193 { 194 int ierr; 195 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 196 MPI_Status *send_status,recv_status; 197 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n; 198 int row,col,other_disassembled; 199 Scalar *values,val; 200 InsertMode addv = aij->insertmode; 201 202 /* wait on receives */ 203 while (count) { 204 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 205 /* unpack receives into our local space */ 206 values = aij->rvalues + 3*imdex*aij->rmax; 207 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 208 n = n/3; 209 for ( i=0; i<n; i++ ) { 210 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 211 col = (int) PETSCREAL(values[3*i+1]); 212 val = values[3*i+2]; 213 if (col >= aij->cstart && col < aij->cend) { 214 col -= aij->cstart; 215 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 216 } 217 else { 218 if (aij->assembled) { 219 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 220 col = aij->colmap[col] - 1; 221 if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 222 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 223 col = (int) PETSCREAL(values[3*i+1]); 224 } 225 } 226 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 227 } 228 } 229 count--; 230 } 231 PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues); 232 233 /* wait on sends */ 234 if (aij->nsends) { 235 send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) ); 236 CHKPTRQ(send_status); 237 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 238 PETSCFREE(send_status); 239 } 240 PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues); 241 242 aij->insertmode = NOTSETVALUES; 243 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 244 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 245 246 /* determine if any processor has disassembled, if so we must 247 also disassemble ourselfs, in order that we may reassemble. */ 248 MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1, 249 MPI_INT,MPI_PROD,mat->comm); 250 if (aij->assembled && !other_disassembled) { 251 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 252 } 253 254 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 255 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 256 aij->assembled = 1; 257 } 258 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 259 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 260 261 return 0; 262 } 263 264 static int MatZeroEntries_MPIAIJ(Mat A) 265 { 266 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 267 int ierr; 268 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 269 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 270 return 0; 271 } 272 273 /* again this uses the same basic stratagy as in the assembly and 274 scatter create routines, we should try to do it systemamatically 275 if we can figure out the proper level of generality. */ 276 277 /* the code does not do the diagonal entries correctly unless the 278 matrix is square and the column and row owerships are identical. 279 This is a BUG. The only way to fix it seems to be to access 280 aij->A and aij->B directly and not through the MatZeroRows() 281 routine. 282 */ 283 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 284 { 285 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 286 int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 287 int *procs,*nprocs,j,found,idx,nsends,*work; 288 int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 289 int *rvalues,tag = A->tag,count,base,slen,n,*source; 290 int *lens,imdex,*lrows,*values; 291 MPI_Comm comm = A->comm; 292 MPI_Request *send_waits,*recv_waits; 293 MPI_Status recv_status,*send_status; 294 IS istmp; 295 296 if (!l->assembled) 297 SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix first"); 298 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 299 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 300 301 /* first count number of contributors to each processor */ 302 nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 303 PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 304 owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 305 for ( i=0; i<N; i++ ) { 306 idx = rows[i]; 307 found = 0; 308 for ( j=0; j<numtids; j++ ) { 309 if (idx >= owners[j] && idx < owners[j+1]) { 310 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 311 } 312 } 313 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 314 } 315 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 316 317 /* inform other processors of number of messages and max length*/ 318 work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 319 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 320 nrecvs = work[mytid]; 321 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 322 nmax = work[mytid]; 323 PETSCFREE(work); 324 325 /* post receives: */ 326 rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 327 CHKPTRQ(rvalues); 328 recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 329 CHKPTRQ(recv_waits); 330 for ( i=0; i<nrecvs; i++ ) { 331 MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 332 comm,recv_waits+i); 333 } 334 335 /* do sends: 336 1) starts[i] gives the starting index in svalues for stuff going to 337 the ith processor 338 */ 339 svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 340 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 341 CHKPTRQ(send_waits); 342 starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts); 343 starts[0] = 0; 344 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 345 for ( i=0; i<N; i++ ) { 346 svalues[starts[owner[i]]++] = rows[i]; 347 } 348 ISRestoreIndices(is,&rows); 349 350 starts[0] = 0; 351 for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 352 count = 0; 353 for ( i=0; i<numtids; i++ ) { 354 if (procs[i]) { 355 MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 356 comm,send_waits+count++); 357 } 358 } 359 PETSCFREE(starts); 360 361 base = owners[mytid]; 362 363 /* wait on receives */ 364 lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 365 source = lens + nrecvs; 366 count = nrecvs; slen = 0; 367 while (count) { 368 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 369 /* unpack receives into our local space */ 370 MPI_Get_count(&recv_status,MPI_INT,&n); 371 source[imdex] = recv_status.MPI_SOURCE; 372 lens[imdex] = n; 373 slen += n; 374 count--; 375 } 376 PETSCFREE(recv_waits); 377 378 /* move the data into the send scatter */ 379 lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 380 count = 0; 381 for ( i=0; i<nrecvs; i++ ) { 382 values = rvalues + i*nmax; 383 for ( j=0; j<lens[i]; j++ ) { 384 lrows[count++] = values[j] - base; 385 } 386 } 387 PETSCFREE(rvalues); PETSCFREE(lens); 388 PETSCFREE(owner); PETSCFREE(nprocs); 389 390 /* actually zap the local rows */ 391 ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp); 392 PLogObjectParent(A,istmp); 393 CHKERRQ(ierr); PETSCFREE(lrows); 394 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 395 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 396 ierr = ISDestroy(istmp); CHKERRQ(ierr); 397 398 /* wait on sends */ 399 if (nsends) { 400 send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) ); 401 CHKPTRQ(send_status); 402 MPI_Waitall(nsends,send_waits,send_status); 403 PETSCFREE(send_status); 404 } 405 PETSCFREE(send_waits); PETSCFREE(svalues); 406 407 408 return 0; 409 } 410 411 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy) 412 { 413 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 414 int ierr; 415 if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first"); 416 ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 417 CHKERRQ(ierr); 418 ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr); 419 ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 420 CHKERRQ(ierr); 421 ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr); 422 return 0; 423 } 424 425 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 426 { 427 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 428 int ierr; 429 if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first"); 430 ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 431 CHKERRQ(ierr); 432 ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 433 ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 434 CHKERRQ(ierr); 435 ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr); 436 return 0; 437 } 438 439 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy) 440 { 441 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 442 int ierr; 443 444 if (!aij->assembled) 445 SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix first"); 446 /* do nondiagonal part */ 447 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 448 /* send it on its way */ 449 ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES, 450 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 451 /* do local part */ 452 ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr); 453 /* receive remote parts: note this assumes the values are not actually */ 454 /* inserted in yy until the next line, which is true for my implementation*/ 455 /* but is not perhaps always true. */ 456 ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES, 457 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 458 return 0; 459 } 460 461 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 462 { 463 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 464 int ierr; 465 466 if (!aij->assembled) 467 SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix first"); 468 /* do nondiagonal part */ 469 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 470 /* send it on its way */ 471 ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES, 472 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 473 /* do local part */ 474 ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 475 /* receive remote parts: note this assumes the values are not actually */ 476 /* inserted in yy until the next line, which is true for my implementation*/ 477 /* but is not perhaps always true. */ 478 ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES, 479 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 480 return 0; 481 } 482 483 /* 484 This only works correctly for square matrices where the subblock A->A is the 485 diagonal block 486 */ 487 static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v) 488 { 489 Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data; 490 if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix first"); 491 return MatGetDiagonal(A->A,v); 492 } 493 494 static int MatDestroy_MPIAIJ(PetscObject obj) 495 { 496 Mat mat = (Mat) obj; 497 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 498 int ierr; 499 #if defined(PETSC_LOG) 500 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 501 #endif 502 PETSCFREE(aij->rowners); 503 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 504 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 505 if (aij->colmap) PETSCFREE(aij->colmap); 506 if (aij->garray) PETSCFREE(aij->garray); 507 if (aij->lvec) VecDestroy(aij->lvec); 508 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 509 PETSCFREE(aij); 510 PLogObjectDestroy(mat); 511 PETSCHEADERDESTROY(mat); 512 return 0; 513 } 514 #include "draw.h" 515 #include "pviewer.h" 516 517 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 518 { 519 Mat mat = (Mat) obj; 520 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 521 int ierr, format; 522 PetscObject vobj = (PetscObject) viewer; 523 524 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix first"); 525 if (!viewer) { /* so that viewers may be used from debuggers */ 526 viewer = STDOUT_VIEWER; vobj = (PetscObject) viewer; 527 } 528 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 529 format = ViewerFileGetFormat_Private(viewer); 530 if (vobj->cookie == VIEWER_COOKIE && format == FILE_FORMAT_INFO && 531 (vobj->type == FILE_VIEWER || vobj->type == FILES_VIEWER)) { 532 /* do nothing for now */ 533 } 534 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) { 535 FILE *fd = ViewerFileGetPointer_Private(viewer); 536 MPIU_Seq_begin(mat->comm,1); 537 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 538 aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 539 aij->cend); 540 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 541 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 542 fflush(fd); 543 MPIU_Seq_end(mat->comm,1); 544 } 545 else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) || 546 vobj->cookie == DRAW_COOKIE) { 547 int numtids = aij->numtids, mytid = aij->mytid; 548 if (numtids == 1) { 549 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 550 } 551 else { 552 /* assemble the entire matrix onto first processor. */ 553 Mat A; 554 Mat_AIJ *Aloc; 555 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 556 Scalar *a; 557 558 if (!mytid) { 559 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A); 560 } 561 else { 562 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A); 563 } 564 PLogObjectParent(mat,A); 565 CHKERRQ(ierr); 566 567 /* copy over the A part */ 568 Aloc = (Mat_AIJ*) aij->A->data; 569 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 570 row = aij->rstart; 571 for ( i=0; i<ai[m]-1; i++ ) {aj[i] += aij->cstart - 1;} 572 for ( i=0; i<m; i++ ) { 573 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES); 574 CHKERRQ(ierr); 575 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 576 } 577 aj = Aloc->j; 578 for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= aij->cstart - 1;} 579 580 /* copy over the B part */ 581 Aloc = (Mat_AIJ*) aij->B->data; 582 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 583 row = aij->rstart; 584 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 585 for ( i=0; i<ai[m]-1; i++ ) {cols[i] = aij->garray[aj[i]-1];} 586 for ( i=0; i<m; i++ ) { 587 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES); 588 CHKERRQ(ierr); 589 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 590 } 591 PETSCFREE(ct); 592 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 593 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 594 if (!mytid) { 595 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 596 } 597 ierr = MatDestroy(A); CHKERRQ(ierr); 598 } 599 } 600 return 0; 601 } 602 603 extern int MatMarkDiag_AIJ(Mat); 604 /* 605 This has to provide several versions. 606 607 1) per sequential 608 2) a) use only local smoothing updating outer values only once. 609 b) local smoothing updating outer values each inner iteration 610 3) color updating out values betwen colors. 611 */ 612 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 613 double shift,int its,Vec xx) 614 { 615 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 616 Mat AA = mat->A, BB = mat->B; 617 Mat_AIJ *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data; 618 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 619 int ierr,*idx, *diag; 620 int n = mat->n, m = mat->m, i; 621 Vec tt; 622 623 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix first"); 624 625 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 626 xs = x -1; /* shift by one for index start of 1 */ 627 ls--; 628 if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(AA))) return ierr;} 629 diag = A->diag; 630 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 631 SETERRQ(1,"MatRelax_MPIAIJ:Option not yet supported"); 632 } 633 if (flag & SOR_EISENSTAT) { 634 /* Let A = L + U + D; where L is lower trianglar, 635 U is upper triangular, E is diagonal; This routine applies 636 637 (L + E)^{-1} A (U + E)^{-1} 638 639 to a vector efficiently using Eisenstat's trick. This is for 640 the case of SSOR preconditioner, so E is D/omega where omega 641 is the relaxation factor. 642 */ 643 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 644 PLogObjectParent(matin,tt); 645 VecGetArray(tt,&t); 646 scale = (2.0/omega) - 1.0; 647 /* x = (E + U)^{-1} b */ 648 VecSet(&zero,mat->lvec); 649 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 650 mat->Mvctx); CHKERRQ(ierr); 651 for ( i=m-1; i>-1; i-- ) { 652 n = A->i[i+1] - diag[i] - 1; 653 idx = A->j + diag[i]; 654 v = A->a + diag[i]; 655 sum = b[i]; 656 SPARSEDENSEMDOT(sum,xs,v,idx,n); 657 d = shift + A->a[diag[i]-1]; 658 n = B->i[i+1] - B->i[i]; 659 idx = B->j + B->i[i] - 1; 660 v = B->a + B->i[i] - 1; 661 SPARSEDENSEMDOT(sum,ls,v,idx,n); 662 x[i] = omega*(sum/d); 663 } 664 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 665 mat->Mvctx); CHKERRQ(ierr); 666 667 /* t = b - (2*E - D)x */ 668 v = A->a; 669 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; } 670 671 /* t = (E + L)^{-1}t */ 672 ts = t - 1; /* shifted by one for index start of a or mat->j*/ 673 diag = A->diag; 674 VecSet(&zero,mat->lvec); 675 ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 676 mat->Mvctx); CHKERRQ(ierr); 677 for ( i=0; i<m; i++ ) { 678 n = diag[i] - A->i[i]; 679 idx = A->j + A->i[i] - 1; 680 v = A->a + A->i[i] - 1; 681 sum = t[i]; 682 SPARSEDENSEMDOT(sum,ts,v,idx,n); 683 d = shift + A->a[diag[i]-1]; 684 n = B->i[i+1] - B->i[i]; 685 idx = B->j + B->i[i] - 1; 686 v = B->a + B->i[i] - 1; 687 SPARSEDENSEMDOT(sum,ls,v,idx,n); 688 t[i] = omega*(sum/d); 689 } 690 ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 691 mat->Mvctx); CHKERRQ(ierr); 692 /* x = x + t */ 693 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 694 VecDestroy(tt); 695 return 0; 696 } 697 698 699 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 700 if (flag & SOR_ZERO_INITIAL_GUESS) { 701 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 702 } 703 else { 704 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 705 CHKERRQ(ierr); 706 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 707 CHKERRQ(ierr); 708 } 709 while (its--) { 710 /* go down through the rows */ 711 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 712 mat->Mvctx); CHKERRQ(ierr); 713 for ( i=0; i<m; i++ ) { 714 n = A->i[i+1] - A->i[i]; 715 idx = A->j + A->i[i] - 1; 716 v = A->a + A->i[i] - 1; 717 sum = b[i]; 718 SPARSEDENSEMDOT(sum,xs,v,idx,n); 719 d = shift + A->a[diag[i]-1]; 720 n = B->i[i+1] - B->i[i]; 721 idx = B->j + B->i[i] - 1; 722 v = B->a + B->i[i] - 1; 723 SPARSEDENSEMDOT(sum,ls,v,idx,n); 724 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 725 } 726 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 727 mat->Mvctx); CHKERRQ(ierr); 728 /* come up through the rows */ 729 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 730 mat->Mvctx); CHKERRQ(ierr); 731 for ( i=m-1; i>-1; i-- ) { 732 n = A->i[i+1] - A->i[i]; 733 idx = A->j + A->i[i] - 1; 734 v = A->a + A->i[i] - 1; 735 sum = b[i]; 736 SPARSEDENSEMDOT(sum,xs,v,idx,n); 737 d = shift + A->a[diag[i]-1]; 738 n = B->i[i+1] - B->i[i]; 739 idx = B->j + B->i[i] - 1; 740 v = B->a + B->i[i] - 1; 741 SPARSEDENSEMDOT(sum,ls,v,idx,n); 742 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 743 } 744 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 745 mat->Mvctx); CHKERRQ(ierr); 746 } 747 } 748 else if (flag & SOR_FORWARD_SWEEP){ 749 if (flag & SOR_ZERO_INITIAL_GUESS) { 750 VecSet(&zero,mat->lvec); 751 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 752 mat->Mvctx); CHKERRQ(ierr); 753 for ( i=0; i<m; i++ ) { 754 n = diag[i] - A->i[i]; 755 idx = A->j + A->i[i] - 1; 756 v = A->a + A->i[i] - 1; 757 sum = b[i]; 758 SPARSEDENSEMDOT(sum,xs,v,idx,n); 759 d = shift + A->a[diag[i]-1]; 760 n = B->i[i+1] - B->i[i]; 761 idx = B->j + B->i[i] - 1; 762 v = B->a + B->i[i] - 1; 763 SPARSEDENSEMDOT(sum,ls,v,idx,n); 764 x[i] = omega*(sum/d); 765 } 766 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 767 mat->Mvctx); CHKERRQ(ierr); 768 its--; 769 } 770 while (its--) { 771 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 772 CHKERRQ(ierr); 773 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 774 CHKERRQ(ierr); 775 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 776 mat->Mvctx); CHKERRQ(ierr); 777 for ( i=0; i<m; i++ ) { 778 n = A->i[i+1] - A->i[i]; 779 idx = A->j + A->i[i] - 1; 780 v = A->a + A->i[i] - 1; 781 sum = b[i]; 782 SPARSEDENSEMDOT(sum,xs,v,idx,n); 783 d = shift + A->a[diag[i]-1]; 784 n = B->i[i+1] - B->i[i]; 785 idx = B->j + B->i[i] - 1; 786 v = B->a + B->i[i] - 1; 787 SPARSEDENSEMDOT(sum,ls,v,idx,n); 788 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 789 } 790 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 791 mat->Mvctx); CHKERRQ(ierr); 792 } 793 } 794 else if (flag & SOR_BACKWARD_SWEEP){ 795 if (flag & SOR_ZERO_INITIAL_GUESS) { 796 VecSet(&zero,mat->lvec); 797 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 798 mat->Mvctx); CHKERRQ(ierr); 799 for ( i=m-1; i>-1; i-- ) { 800 n = A->i[i+1] - diag[i] - 1; 801 idx = A->j + diag[i]; 802 v = A->a + diag[i]; 803 sum = b[i]; 804 SPARSEDENSEMDOT(sum,xs,v,idx,n); 805 d = shift + A->a[diag[i]-1]; 806 n = B->i[i+1] - B->i[i]; 807 idx = B->j + B->i[i] - 1; 808 v = B->a + B->i[i] - 1; 809 SPARSEDENSEMDOT(sum,ls,v,idx,n); 810 x[i] = omega*(sum/d); 811 } 812 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 813 mat->Mvctx); CHKERRQ(ierr); 814 its--; 815 } 816 while (its--) { 817 ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 818 mat->Mvctx); CHKERRQ(ierr); 819 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 820 mat->Mvctx); CHKERRQ(ierr); 821 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 822 mat->Mvctx); CHKERRQ(ierr); 823 for ( i=m-1; i>-1; i-- ) { 824 n = A->i[i+1] - A->i[i]; 825 idx = A->j + A->i[i] - 1; 826 v = A->a + A->i[i] - 1; 827 sum = b[i]; 828 SPARSEDENSEMDOT(sum,xs,v,idx,n); 829 d = shift + A->a[diag[i]-1]; 830 n = B->i[i+1] - B->i[i]; 831 idx = B->j + B->i[i] - 1; 832 v = B->a + B->i[i] - 1; 833 SPARSEDENSEMDOT(sum,ls,v,idx,n); 834 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 835 } 836 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 837 mat->Mvctx); CHKERRQ(ierr); 838 } 839 } 840 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 841 if (flag & SOR_ZERO_INITIAL_GUESS) { 842 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 843 } 844 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 845 CHKERRQ(ierr); 846 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 847 CHKERRQ(ierr); 848 while (its--) { 849 /* go down through the rows */ 850 for ( i=0; i<m; i++ ) { 851 n = A->i[i+1] - A->i[i]; 852 idx = A->j + A->i[i] - 1; 853 v = A->a + A->i[i] - 1; 854 sum = b[i]; 855 SPARSEDENSEMDOT(sum,xs,v,idx,n); 856 d = shift + A->a[diag[i]-1]; 857 n = B->i[i+1] - B->i[i]; 858 idx = B->j + B->i[i] - 1; 859 v = B->a + B->i[i] - 1; 860 SPARSEDENSEMDOT(sum,ls,v,idx,n); 861 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 862 } 863 /* come up through the rows */ 864 for ( i=m-1; i>-1; i-- ) { 865 n = A->i[i+1] - A->i[i]; 866 idx = A->j + A->i[i] - 1; 867 v = A->a + A->i[i] - 1; 868 sum = b[i]; 869 SPARSEDENSEMDOT(sum,xs,v,idx,n); 870 d = shift + A->a[diag[i]-1]; 871 n = B->i[i+1] - B->i[i]; 872 idx = B->j + B->i[i] - 1; 873 v = B->a + B->i[i] - 1; 874 SPARSEDENSEMDOT(sum,ls,v,idx,n); 875 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 876 } 877 } 878 } 879 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 880 if (flag & SOR_ZERO_INITIAL_GUESS) { 881 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 882 } 883 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 884 CHKERRQ(ierr); 885 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 886 CHKERRQ(ierr); 887 while (its--) { 888 for ( i=0; i<m; i++ ) { 889 n = A->i[i+1] - A->i[i]; 890 idx = A->j + A->i[i] - 1; 891 v = A->a + A->i[i] - 1; 892 sum = b[i]; 893 SPARSEDENSEMDOT(sum,xs,v,idx,n); 894 d = shift + A->a[diag[i]-1]; 895 n = B->i[i+1] - B->i[i]; 896 idx = B->j + B->i[i] - 1; 897 v = B->a + B->i[i] - 1; 898 SPARSEDENSEMDOT(sum,ls,v,idx,n); 899 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 900 } 901 } 902 } 903 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 904 if (flag & SOR_ZERO_INITIAL_GUESS) { 905 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 906 } 907 ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL, 908 mat->Mvctx); CHKERRQ(ierr); 909 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL, 910 mat->Mvctx); CHKERRQ(ierr); 911 while (its--) { 912 for ( i=m-1; i>-1; i-- ) { 913 n = A->i[i+1] - A->i[i]; 914 idx = A->j + A->i[i] - 1; 915 v = A->a + A->i[i] - 1; 916 sum = b[i]; 917 SPARSEDENSEMDOT(sum,xs,v,idx,n); 918 d = shift + A->a[diag[i]-1]; 919 n = B->i[i+1] - B->i[i]; 920 idx = B->j + B->i[i] - 1; 921 v = B->a + B->i[i] - 1; 922 SPARSEDENSEMDOT(sum,ls,v,idx,n); 923 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 924 } 925 } 926 } 927 return 0; 928 } 929 930 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 931 int *nzalloc,int *mem) 932 { 933 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 934 Mat A = mat->A, B = mat->B; 935 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 936 937 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 938 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 939 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 940 if (flag == MAT_LOCAL) { 941 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 942 } else if (flag == MAT_GLOBAL_MAX) { 943 MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm); 944 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 945 } else if (flag == MAT_GLOBAL_SUM) { 946 MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm); 947 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 948 } 949 return 0; 950 } 951 952 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 953 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 954 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 955 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 956 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 957 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 958 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 959 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 960 961 static int MatSetOption_MPIAIJ(Mat aijin,MatOption op) 962 { 963 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 964 965 if (op == NO_NEW_NONZERO_LOCATIONS) { 966 MatSetOption(aij->A,op); 967 MatSetOption(aij->B,op); 968 } 969 else if (op == YES_NEW_NONZERO_LOCATIONS) { 970 MatSetOption(aij->A,op); 971 MatSetOption(aij->B,op); 972 } 973 else if (op == COLUMN_ORIENTED) 974 SETERRQ(1,"MatSetOption_MPIAIJ:Column oriented not supported"); 975 return 0; 976 } 977 978 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 979 { 980 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 981 *m = mat->M; *n = mat->N; 982 return 0; 983 } 984 985 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 986 { 987 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 988 *m = mat->m; *n = mat->N; 989 return 0; 990 } 991 992 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 993 { 994 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 995 *m = mat->rstart; *n = mat->rend; 996 return 0; 997 } 998 999 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1000 { 1001 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1002 Scalar *vworkA, *vworkB, **pvA, **pvB; 1003 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1004 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1005 1006 if (row < rstart || row >= rend) 1007 SETERRQ(1,"MatGetRow_MPIAIJ:Currently you can get only local rows") 1008 lrow = row - rstart; 1009 1010 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1011 if (!v) {pvA = 0; pvB = 0;} 1012 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1013 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1014 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1015 nztot = nzA + nzB; 1016 1017 if (v || idx) { 1018 if (nztot) { 1019 /* Sort by increasing column numbers, assuming A and B already sorted */ 1020 int imark; 1021 if (mat->assembled) { 1022 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1023 } 1024 if (v) { 1025 *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1026 for ( i=0; i<nzB; i++ ) { 1027 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1028 else break; 1029 } 1030 imark = i; 1031 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1032 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1033 } 1034 if (idx) { 1035 *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1036 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1037 for ( i=0; i<nzB; i++ ) { 1038 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1039 else break; 1040 } 1041 imark = i; 1042 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1043 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1044 } 1045 } 1046 else {*idx = 0; *v=0;} 1047 } 1048 *nz = nztot; 1049 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1050 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1051 return 0; 1052 } 1053 1054 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1055 { 1056 if (idx) PETSCFREE(*idx); 1057 if (v) PETSCFREE(*v); 1058 return 0; 1059 } 1060 1061 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm) 1062 { 1063 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1064 int ierr; 1065 if (aij->numtids == 1) { 1066 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1067 } else 1068 SETERRQ(1,"MatNorm_MPIAIJ:not yet supported in parallel"); 1069 return 0; 1070 } 1071 1072 static int MatTranspose_MPIAIJ(Mat A,Mat *Bin) 1073 { 1074 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1075 int ierr; 1076 Mat B; 1077 Mat_AIJ *Aloc; 1078 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1079 Scalar *array; 1080 1081 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1082 CHKERRQ(ierr); 1083 1084 /* copy over the A part */ 1085 Aloc = (Mat_AIJ*) a->A->data; 1086 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1087 row = a->rstart; 1088 for ( i=0; i<ai[m]-1; i++ ) {aj[i] += a->cstart - 1;} 1089 for ( i=0; i<m; i++ ) { 1090 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERTVALUES); 1091 CHKERRQ(ierr); 1092 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1093 } 1094 aj = Aloc->j; 1095 for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= a->cstart - 1;} 1096 1097 /* copy over the B part */ 1098 Aloc = (Mat_AIJ*) a->B->data; 1099 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1100 row = a->rstart; 1101 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 1102 for ( i=0; i<ai[m]-1; i++ ) {cols[i] = a->garray[aj[i]-1];} 1103 for ( i=0; i<m; i++ ) { 1104 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERTVALUES); 1105 CHKERRQ(ierr); 1106 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1107 } 1108 PETSCFREE(ct); 1109 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1110 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1111 *Bin = B; 1112 return 0; 1113 } 1114 1115 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1116 static int MatCopyPrivate_MPIAIJ(Mat,Mat *); 1117 1118 /* -------------------------------------------------------------------*/ 1119 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1120 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1121 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1122 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1123 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1124 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1125 MatLUFactor_MPIAIJ,0, 1126 MatRelax_MPIAIJ, 1127 MatTranspose_MPIAIJ, 1128 MatGetInfo_MPIAIJ,0, 1129 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1130 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1131 0, 1132 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1133 MatGetReordering_MPIAIJ, 1134 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1135 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1136 MatILUFactorSymbolic_MPIAIJ,0, 1137 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 1138 1139 /*@ 1140 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1141 (the default parallel PETSc format). 1142 1143 Input Parameters: 1144 . comm - MPI communicator 1145 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1146 . n - number of local columns (or PETSC_DECIDE to have calculated 1147 if N is given) 1148 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1149 . N - number of global columns (or PETSC_DECIDE to have calculated 1150 if n is given) 1151 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1152 (same for all local rows) 1153 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1154 or null (possibly different for each row). You must leave room 1155 for the diagonal entry even if it is zero. 1156 . o_nz - number of nonzeros per row in off-diagonal portion of local 1157 submatrix (same for all local rows). 1158 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1159 submatrix or null (possibly different for each row). 1160 1161 Output Parameter: 1162 . newmat - the matrix 1163 1164 Notes: 1165 The AIJ format (also called the Yale sparse matrix format or 1166 compressed row storage), is fully compatible with standard Fortran 77 1167 storage. That is, the stored row and column indices begin at 1168 one, not zero. See the users manual for further details. 1169 1170 The user MUST specify either the local or global matrix dimensions 1171 (possibly both). 1172 1173 Storage Information: 1174 For a square global matrix we define each processor's diagonal portion 1175 to be its local rows and the corresponding columns (a square submatrix); 1176 each processor's off-diagonal portion encompasses the remainder of the 1177 local matrix (a rectangular submatrix). 1178 1179 The user can specify preallocated storage for the diagonal part of 1180 the local submatrix with either d_nz or d_nnz (not both). Set both 1181 d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1182 Likewise, specify preallocated storage for the off-diagonal part of 1183 the local submatrix with o_nz or o_nnz (not both). 1184 1185 .keywords: matrix, aij, compressed row, sparse, parallel 1186 1187 .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues() 1188 @*/ 1189 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1190 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 1191 { 1192 Mat mat; 1193 Mat_MPIAIJ *aij; 1194 int ierr, i,sum[2],work[2]; 1195 *newmat = 0; 1196 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1197 PLogObjectCreate(mat); 1198 mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1199 mat->ops = &MatOps; 1200 mat->destroy = MatDestroy_MPIAIJ; 1201 mat->view = MatView_MPIAIJ; 1202 mat->factor = 0; 1203 1204 aij->insertmode = NOTSETVALUES; 1205 MPI_Comm_rank(comm,&aij->mytid); 1206 MPI_Comm_size(comm,&aij->numtids); 1207 1208 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1209 work[0] = m; work[1] = n; 1210 MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 1211 if (M == PETSC_DECIDE) M = sum[0]; 1212 if (N == PETSC_DECIDE) N = sum[1]; 1213 } 1214 if (m == PETSC_DECIDE) 1215 {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 1216 if (n == PETSC_DECIDE) 1217 {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 1218 aij->m = m; 1219 aij->n = n; 1220 aij->N = N; 1221 aij->M = M; 1222 1223 /* build local table of row and column ownerships */ 1224 aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int)); 1225 CHKPTRQ(aij->rowners); 1226 PLogObjectMemory(mat,2*(aij->numtids+2)*sizeof(int)+sizeof(struct _Mat)+ 1227 sizeof(Mat_MPIAIJ)); 1228 aij->cowners = aij->rowners + aij->numtids + 1; 1229 MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 1230 aij->rowners[0] = 0; 1231 for ( i=2; i<=aij->numtids; i++ ) { 1232 aij->rowners[i] += aij->rowners[i-1]; 1233 } 1234 aij->rstart = aij->rowners[aij->mytid]; 1235 aij->rend = aij->rowners[aij->mytid+1]; 1236 MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 1237 aij->cowners[0] = 0; 1238 for ( i=2; i<=aij->numtids; i++ ) { 1239 aij->cowners[i] += aij->cowners[i-1]; 1240 } 1241 aij->cstart = aij->cowners[aij->mytid]; 1242 aij->cend = aij->cowners[aij->mytid+1]; 1243 1244 1245 ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A); 1246 CHKERRQ(ierr); 1247 PLogObjectParent(mat,aij->A); 1248 ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B); 1249 CHKERRQ(ierr); 1250 PLogObjectParent(mat,aij->B); 1251 1252 /* build cache for off array entries formed */ 1253 ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr); 1254 aij->colmap = 0; 1255 aij->garray = 0; 1256 1257 /* stuff used for matrix vector multiply */ 1258 aij->lvec = 0; 1259 aij->Mvctx = 0; 1260 aij->assembled = 0; 1261 1262 *newmat = mat; 1263 return 0; 1264 } 1265 1266 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat) 1267 { 1268 Mat mat; 1269 Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data; 1270 int ierr, len; 1271 *newmat = 0; 1272 1273 if (!oldmat->assembled) 1274 SETERRQ(1,"MatCopyPrivate_MPIAIJ:Cannot copy unassembled matrix"); 1275 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1276 PLogObjectCreate(mat); 1277 mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1278 mat->ops = &MatOps; 1279 mat->destroy = MatDestroy_MPIAIJ; 1280 mat->view = MatView_MPIAIJ; 1281 mat->factor = matin->factor; 1282 1283 aij->m = oldmat->m; 1284 aij->n = oldmat->n; 1285 aij->M = oldmat->M; 1286 aij->N = oldmat->N; 1287 1288 aij->assembled = 1; 1289 aij->rstart = oldmat->rstart; 1290 aij->rend = oldmat->rend; 1291 aij->cstart = oldmat->cstart; 1292 aij->cend = oldmat->cend; 1293 aij->numtids = oldmat->numtids; 1294 aij->mytid = oldmat->mytid; 1295 aij->insertmode = NOTSETVALUES; 1296 1297 aij->rowners = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) ); 1298 CHKPTRQ(aij->rowners); 1299 PLogObjectMemory(mat,(aij->numtids+1)*sizeof(int)+sizeof(struct _Mat)+ 1300 sizeof(Mat_MPIAIJ)); 1301 PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 1302 ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr); 1303 if (oldmat->colmap) { 1304 aij->colmap = (int *) PETSCMALLOC( (aij->N)*sizeof(int) ); 1305 CHKPTRQ(aij->colmap); 1306 PLogObjectMemory(mat,(aij->N)*sizeof(int)); 1307 PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int)); 1308 } else aij->colmap = 0; 1309 if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) { 1310 aij->garray = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray); 1311 PLogObjectMemory(mat,len*sizeof(int)); 1312 PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int)); 1313 } else aij->garray = 0; 1314 1315 ierr = VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr); 1316 PLogObjectParent(mat,aij->lvec); 1317 ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr); 1318 PLogObjectParent(mat,aij->Mvctx); 1319 ierr = MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr); 1320 PLogObjectParent(mat,aij->A); 1321 ierr = MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr); 1322 PLogObjectParent(mat,aij->B); 1323 *newmat = mat; 1324 return 0; 1325 } 1326