1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.63 1995/08/15 20:28:20 bsmith Exp curfman $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_AIJ *B = (Mat_AIJ*) aij->B->data; 18 int n = B->n,i; 19 aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 20 PLogObjectMemory(mat,aij->N*sizeof(int)); 21 PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int)); 22 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 23 return 0; 24 } 25 26 extern int DisAssemble_MPIAIJ(Mat); 27 28 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 29 { 30 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 31 int ierr; 32 if (aij->numtids == 1) { 33 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 34 } else 35 SETERRQ(1,"MatGetReordering_MPIAIJ: not yet supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 40 int *idxn,Scalar *v,InsertMode addv) 41 { 42 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 43 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 44 int cstart = aij->cstart, cend = aij->cend,row,col; 45 46 if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) { 47 SETERRQ(1,"MatSetValues_MPIAIJ:You cannot mix inserts and adds"); 48 } 49 aij->insertmode = addv; 50 for ( i=0; i<m; i++ ) { 51 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 52 if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 53 if (idxm[i] >= rstart && idxm[i] < rend) { 54 row = idxm[i] - rstart; 55 for ( j=0; j<n; j++ ) { 56 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 57 if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 58 if (idxn[j] >= cstart && idxn[j] < cend){ 59 col = idxn[j] - cstart; 60 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 61 } 62 else { 63 if (aij->assembled) { 64 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 65 col = aij->colmap[idxn[j]] - 1; 66 if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 67 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 68 col = idxn[j]; 69 } 70 } 71 else col = idxn[j]; 72 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 73 } 74 } 75 } 76 else { 77 ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv); 78 CHKERRQ(ierr); 79 } 80 } 81 return 0; 82 } 83 84 /* 85 the assembly code is a lot like the code for vectors, we should 86 sometime derive a single assembly code that can be used for 87 either case. 88 */ 89 90 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 91 { 92 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 93 MPI_Comm comm = mat->comm; 94 int numtids = aij->numtids, *owners = aij->rowners; 95 int mytid = aij->mytid; 96 MPI_Request *send_waits,*recv_waits; 97 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 98 int tag = mat->tag, *owner,*starts,count,ierr; 99 InsertMode addv; 100 Scalar *rvalues,*svalues; 101 102 /* make sure all processors are either in INSERTMODE or ADDMODE */ 103 MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT, 104 MPI_BOR,comm); 105 if (addv == (ADDVALUES|INSERTVALUES)) { 106 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 107 } 108 aij->insertmode = addv; /* in case this processor had no cache */ 109 110 /* first count number of contributors to each processor */ 111 nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 112 PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 113 owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 114 for ( i=0; i<aij->stash.n; i++ ) { 115 idx = aij->stash.idx[i]; 116 for ( j=0; j<numtids; j++ ) { 117 if (idx >= owners[j] && idx < owners[j+1]) { 118 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 119 } 120 } 121 } 122 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 123 124 /* inform other processors of number of messages and max length*/ 125 work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 126 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 127 nreceives = work[mytid]; 128 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 129 nmax = work[mytid]; 130 PETSCFREE(work); 131 132 /* post receives: 133 1) each message will consist of ordered pairs 134 (global index,value) we store the global index as a double 135 to simplify the message passing. 136 2) since we don't know how long each individual message is we 137 allocate the largest needed buffer for each receive. Potentially 138 this is a lot of wasted space. 139 140 141 This could be done better. 142 */ 143 rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 144 CHKPTRQ(rvalues); 145 recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 146 CHKPTRQ(recv_waits); 147 for ( i=0; i<nreceives; i++ ) { 148 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 149 comm,recv_waits+i); 150 } 151 152 /* do sends: 153 1) starts[i] gives the starting index in svalues for stuff going to 154 the ith processor 155 */ 156 svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 157 CHKPTRQ(svalues); 158 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 159 CHKPTRQ(send_waits); 160 starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts); 161 starts[0] = 0; 162 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 163 for ( i=0; i<aij->stash.n; i++ ) { 164 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 165 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 166 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 167 } 168 PETSCFREE(owner); 169 starts[0] = 0; 170 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 171 count = 0; 172 for ( i=0; i<numtids; i++ ) { 173 if (procs[i]) { 174 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 175 comm,send_waits+count++); 176 } 177 } 178 PETSCFREE(starts); PETSCFREE(nprocs); 179 180 /* Free cache space */ 181 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 182 183 aij->svalues = svalues; aij->rvalues = rvalues; 184 aij->nsends = nsends; aij->nrecvs = nreceives; 185 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 186 aij->rmax = nmax; 187 188 return 0; 189 } 190 extern int MatSetUpMultiply_MPIAIJ(Mat); 191 192 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 193 { 194 int ierr; 195 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 196 MPI_Status *send_status,recv_status; 197 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n; 198 int row,col,other_disassembled; 199 Scalar *values,val; 200 InsertMode addv = aij->insertmode; 201 202 /* wait on receives */ 203 while (count) { 204 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 205 /* unpack receives into our local space */ 206 values = aij->rvalues + 3*imdex*aij->rmax; 207 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 208 n = n/3; 209 for ( i=0; i<n; i++ ) { 210 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 211 col = (int) PETSCREAL(values[3*i+1]); 212 val = values[3*i+2]; 213 if (col >= aij->cstart && col < aij->cend) { 214 col -= aij->cstart; 215 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 216 } 217 else { 218 if (aij->assembled) { 219 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 220 col = aij->colmap[col] - 1; 221 if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 222 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 223 col = (int) PETSCREAL(values[3*i+1]); 224 } 225 } 226 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 227 } 228 } 229 count--; 230 } 231 PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues); 232 233 /* wait on sends */ 234 if (aij->nsends) { 235 send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) ); 236 CHKPTRQ(send_status); 237 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 238 PETSCFREE(send_status); 239 } 240 PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues); 241 242 aij->insertmode = NOTSETVALUES; 243 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 244 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 245 246 /* determine if any processor has disassembled, if so we must 247 also disassemble ourselfs, in order that we may reassemble. */ 248 MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1, 249 MPI_INT,MPI_PROD,mat->comm); 250 if (aij->assembled && !other_disassembled) { 251 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 252 } 253 254 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 255 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 256 aij->assembled = 1; 257 } 258 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 259 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 260 261 return 0; 262 } 263 264 static int MatZeroEntries_MPIAIJ(Mat A) 265 { 266 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 267 int ierr; 268 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 269 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 270 return 0; 271 } 272 273 /* again this uses the same basic stratagy as in the assembly and 274 scatter create routines, we should try to do it systemamatically 275 if we can figure out the proper level of generality. */ 276 277 /* the code does not do the diagonal entries correctly unless the 278 matrix is square and the column and row owerships are identical. 279 This is a BUG. The only way to fix it seems to be to access 280 aij->A and aij->B directly and not through the MatZeroRows() 281 routine. 282 */ 283 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 284 { 285 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 286 int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 287 int *procs,*nprocs,j,found,idx,nsends,*work; 288 int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 289 int *rvalues,tag = A->tag,count,base,slen,n,*source; 290 int *lens,imdex,*lrows,*values; 291 MPI_Comm comm = A->comm; 292 MPI_Request *send_waits,*recv_waits; 293 MPI_Status recv_status,*send_status; 294 IS istmp; 295 296 if (!l->assembled) 297 SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix first"); 298 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 299 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 300 301 /* first count number of contributors to each processor */ 302 nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 303 PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 304 owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 305 for ( i=0; i<N; i++ ) { 306 idx = rows[i]; 307 found = 0; 308 for ( j=0; j<numtids; j++ ) { 309 if (idx >= owners[j] && idx < owners[j+1]) { 310 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 311 } 312 } 313 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 314 } 315 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 316 317 /* inform other processors of number of messages and max length*/ 318 work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 319 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 320 nrecvs = work[mytid]; 321 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 322 nmax = work[mytid]; 323 PETSCFREE(work); 324 325 /* post receives: */ 326 rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 327 CHKPTRQ(rvalues); 328 recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 329 CHKPTRQ(recv_waits); 330 for ( i=0; i<nrecvs; i++ ) { 331 MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 332 comm,recv_waits+i); 333 } 334 335 /* do sends: 336 1) starts[i] gives the starting index in svalues for stuff going to 337 the ith processor 338 */ 339 svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 340 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 341 CHKPTRQ(send_waits); 342 starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts); 343 starts[0] = 0; 344 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 345 for ( i=0; i<N; i++ ) { 346 svalues[starts[owner[i]]++] = rows[i]; 347 } 348 ISRestoreIndices(is,&rows); 349 350 starts[0] = 0; 351 for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 352 count = 0; 353 for ( i=0; i<numtids; i++ ) { 354 if (procs[i]) { 355 MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 356 comm,send_waits+count++); 357 } 358 } 359 PETSCFREE(starts); 360 361 base = owners[mytid]; 362 363 /* wait on receives */ 364 lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 365 source = lens + nrecvs; 366 count = nrecvs; slen = 0; 367 while (count) { 368 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 369 /* unpack receives into our local space */ 370 MPI_Get_count(&recv_status,MPI_INT,&n); 371 source[imdex] = recv_status.MPI_SOURCE; 372 lens[imdex] = n; 373 slen += n; 374 count--; 375 } 376 PETSCFREE(recv_waits); 377 378 /* move the data into the send scatter */ 379 lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 380 count = 0; 381 for ( i=0; i<nrecvs; i++ ) { 382 values = rvalues + i*nmax; 383 for ( j=0; j<lens[i]; j++ ) { 384 lrows[count++] = values[j] - base; 385 } 386 } 387 PETSCFREE(rvalues); PETSCFREE(lens); 388 PETSCFREE(owner); PETSCFREE(nprocs); 389 390 /* actually zap the local rows */ 391 ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp); 392 PLogObjectParent(A,istmp); 393 CHKERRQ(ierr); PETSCFREE(lrows); 394 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 395 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 396 ierr = ISDestroy(istmp); CHKERRQ(ierr); 397 398 /* wait on sends */ 399 if (nsends) { 400 send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) ); 401 CHKPTRQ(send_status); 402 MPI_Waitall(nsends,send_waits,send_status); 403 PETSCFREE(send_status); 404 } 405 PETSCFREE(send_waits); PETSCFREE(svalues); 406 407 408 return 0; 409 } 410 411 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy) 412 { 413 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 414 int ierr; 415 if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first"); 416 ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 417 CHKERRQ(ierr); 418 ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr); 419 ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 420 CHKERRQ(ierr); 421 ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr); 422 return 0; 423 } 424 425 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 426 { 427 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 428 int ierr; 429 if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first"); 430 ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 431 CHKERRQ(ierr); 432 ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 433 ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 434 CHKERRQ(ierr); 435 ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr); 436 return 0; 437 } 438 439 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy) 440 { 441 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 442 int ierr; 443 444 if (!aij->assembled) 445 SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix first"); 446 /* do nondiagonal part */ 447 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 448 /* send it on its way */ 449 ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES, 450 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 451 /* do local part */ 452 ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr); 453 /* receive remote parts: note this assumes the values are not actually */ 454 /* inserted in yy until the next line, which is true for my implementation*/ 455 /* but is not perhaps always true. */ 456 ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES, 457 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 458 return 0; 459 } 460 461 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 462 { 463 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 464 int ierr; 465 466 if (!aij->assembled) 467 SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix first"); 468 /* do nondiagonal part */ 469 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 470 /* send it on its way */ 471 ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES, 472 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 473 /* do local part */ 474 ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 475 /* receive remote parts: note this assumes the values are not actually */ 476 /* inserted in yy until the next line, which is true for my implementation*/ 477 /* but is not perhaps always true. */ 478 ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES, 479 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 480 return 0; 481 } 482 483 /* 484 This only works correctly for square matrices where the subblock A->A is the 485 diagonal block 486 */ 487 static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v) 488 { 489 Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data; 490 if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix first"); 491 return MatGetDiagonal(A->A,v); 492 } 493 494 static int MatDestroy_MPIAIJ(PetscObject obj) 495 { 496 Mat mat = (Mat) obj; 497 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 498 int ierr; 499 #if defined(PETSC_LOG) 500 PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N); 501 #endif 502 PETSCFREE(aij->rowners); 503 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 504 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 505 if (aij->colmap) PETSCFREE(aij->colmap); 506 if (aij->garray) PETSCFREE(aij->garray); 507 if (aij->lvec) VecDestroy(aij->lvec); 508 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 509 PETSCFREE(aij); 510 PLogObjectDestroy(mat); 511 PETSCHEADERDESTROY(mat); 512 return 0; 513 } 514 #include "draw.h" 515 #include "pviewer.h" 516 517 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 518 { 519 Mat mat = (Mat) obj; 520 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 521 int ierr; 522 PetscObject vobj = (PetscObject) viewer; 523 524 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix first"); 525 if (!viewer) { /* so that viewers may be used from debuggers */ 526 viewer = STDOUT_VIEWER; vobj = (PetscObject) viewer; 527 } 528 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 529 if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) { 530 FILE *fd = ViewerFileGetPointer_Private(viewer); 531 MPIU_Seq_begin(mat->comm,1); 532 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 533 aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 534 aij->cend); 535 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 536 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 537 fflush(fd); 538 MPIU_Seq_end(mat->comm,1); 539 } 540 else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) || 541 vobj->cookie == DRAW_COOKIE) { 542 int numtids = aij->numtids, mytid = aij->mytid; 543 if (numtids == 1) { 544 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 545 } 546 else { 547 /* assemble the entire matrix onto first processor. */ 548 Mat A; 549 Mat_AIJ *Aloc; 550 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 551 Scalar *a; 552 553 if (!mytid) { 554 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A); 555 } 556 else { 557 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A); 558 } 559 PLogObjectParent(mat,A); 560 CHKERRQ(ierr); 561 562 /* copy over the A part */ 563 Aloc = (Mat_AIJ*) aij->A->data; 564 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 565 row = aij->rstart; 566 for ( i=0; i<ai[m]-1; i++ ) {aj[i] += aij->cstart - 1;} 567 for ( i=0; i<m; i++ ) { 568 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES); 569 CHKERRQ(ierr); 570 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 571 } 572 aj = Aloc->j; 573 for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= aij->cstart - 1;} 574 575 /* copy over the B part */ 576 Aloc = (Mat_AIJ*) aij->B->data; 577 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 578 row = aij->rstart; 579 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 580 for ( i=0; i<ai[m]-1; i++ ) {cols[i] = aij->garray[aj[i]-1];} 581 for ( i=0; i<m; i++ ) { 582 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES); 583 CHKERRQ(ierr); 584 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 585 } 586 PETSCFREE(ct); 587 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 588 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 589 if (!mytid) { 590 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 591 } 592 ierr = MatDestroy(A); CHKERRQ(ierr); 593 } 594 } 595 return 0; 596 } 597 598 extern int MatMarkDiag_AIJ(Mat); 599 /* 600 This has to provide several versions. 601 602 1) per sequential 603 2) a) use only local smoothing updating outer values only once. 604 b) local smoothing updating outer values each inner iteration 605 3) color updating out values betwen colors. 606 */ 607 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 608 double shift,int its,Vec xx) 609 { 610 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 611 Mat AA = mat->A, BB = mat->B; 612 Mat_AIJ *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data; 613 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 614 int ierr,*idx, *diag; 615 int n = mat->n, m = mat->m, i; 616 Vec tt; 617 618 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix first"); 619 620 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 621 xs = x -1; /* shift by one for index start of 1 */ 622 ls--; 623 if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(AA))) return ierr;} 624 diag = A->diag; 625 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 626 SETERRQ(1,"MatRelax_MPIAIJ:Option not yet supported"); 627 } 628 if (flag & SOR_EISENSTAT) { 629 /* Let A = L + U + D; where L is lower trianglar, 630 U is upper triangular, E is diagonal; This routine applies 631 632 (L + E)^{-1} A (U + E)^{-1} 633 634 to a vector efficiently using Eisenstat's trick. This is for 635 the case of SSOR preconditioner, so E is D/omega where omega 636 is the relaxation factor. 637 */ 638 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 639 PLogObjectParent(matin,tt); 640 VecGetArray(tt,&t); 641 scale = (2.0/omega) - 1.0; 642 /* x = (E + U)^{-1} b */ 643 VecSet(&zero,mat->lvec); 644 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 645 mat->Mvctx); CHKERRQ(ierr); 646 for ( i=m-1; i>-1; i-- ) { 647 n = A->i[i+1] - diag[i] - 1; 648 idx = A->j + diag[i]; 649 v = A->a + diag[i]; 650 sum = b[i]; 651 SPARSEDENSEMDOT(sum,xs,v,idx,n); 652 d = shift + A->a[diag[i]-1]; 653 n = B->i[i+1] - B->i[i]; 654 idx = B->j + B->i[i] - 1; 655 v = B->a + B->i[i] - 1; 656 SPARSEDENSEMDOT(sum,ls,v,idx,n); 657 x[i] = omega*(sum/d); 658 } 659 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 660 mat->Mvctx); CHKERRQ(ierr); 661 662 /* t = b - (2*E - D)x */ 663 v = A->a; 664 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; } 665 666 /* t = (E + L)^{-1}t */ 667 ts = t - 1; /* shifted by one for index start of a or mat->j*/ 668 diag = A->diag; 669 VecSet(&zero,mat->lvec); 670 ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 671 mat->Mvctx); CHKERRQ(ierr); 672 for ( i=0; i<m; i++ ) { 673 n = diag[i] - A->i[i]; 674 idx = A->j + A->i[i] - 1; 675 v = A->a + A->i[i] - 1; 676 sum = t[i]; 677 SPARSEDENSEMDOT(sum,ts,v,idx,n); 678 d = shift + A->a[diag[i]-1]; 679 n = B->i[i+1] - B->i[i]; 680 idx = B->j + B->i[i] - 1; 681 v = B->a + B->i[i] - 1; 682 SPARSEDENSEMDOT(sum,ls,v,idx,n); 683 t[i] = omega*(sum/d); 684 } 685 ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 686 mat->Mvctx); CHKERRQ(ierr); 687 /* x = x + t */ 688 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 689 VecDestroy(tt); 690 return 0; 691 } 692 693 694 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 695 if (flag & SOR_ZERO_INITIAL_GUESS) { 696 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 697 } 698 else { 699 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 700 CHKERRQ(ierr); 701 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 702 CHKERRQ(ierr); 703 } 704 while (its--) { 705 /* go down through the rows */ 706 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 707 mat->Mvctx); CHKERRQ(ierr); 708 for ( i=0; i<m; i++ ) { 709 n = A->i[i+1] - A->i[i]; 710 idx = A->j + A->i[i] - 1; 711 v = A->a + A->i[i] - 1; 712 sum = b[i]; 713 SPARSEDENSEMDOT(sum,xs,v,idx,n); 714 d = shift + A->a[diag[i]-1]; 715 n = B->i[i+1] - B->i[i]; 716 idx = B->j + B->i[i] - 1; 717 v = B->a + B->i[i] - 1; 718 SPARSEDENSEMDOT(sum,ls,v,idx,n); 719 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 720 } 721 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 722 mat->Mvctx); CHKERRQ(ierr); 723 /* come up through the rows */ 724 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 725 mat->Mvctx); CHKERRQ(ierr); 726 for ( i=m-1; i>-1; i-- ) { 727 n = A->i[i+1] - A->i[i]; 728 idx = A->j + A->i[i] - 1; 729 v = A->a + A->i[i] - 1; 730 sum = b[i]; 731 SPARSEDENSEMDOT(sum,xs,v,idx,n); 732 d = shift + A->a[diag[i]-1]; 733 n = B->i[i+1] - B->i[i]; 734 idx = B->j + B->i[i] - 1; 735 v = B->a + B->i[i] - 1; 736 SPARSEDENSEMDOT(sum,ls,v,idx,n); 737 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 738 } 739 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 740 mat->Mvctx); CHKERRQ(ierr); 741 } 742 } 743 else if (flag & SOR_FORWARD_SWEEP){ 744 if (flag & SOR_ZERO_INITIAL_GUESS) { 745 VecSet(&zero,mat->lvec); 746 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 747 mat->Mvctx); CHKERRQ(ierr); 748 for ( i=0; i<m; i++ ) { 749 n = diag[i] - A->i[i]; 750 idx = A->j + A->i[i] - 1; 751 v = A->a + A->i[i] - 1; 752 sum = b[i]; 753 SPARSEDENSEMDOT(sum,xs,v,idx,n); 754 d = shift + A->a[diag[i]-1]; 755 n = B->i[i+1] - B->i[i]; 756 idx = B->j + B->i[i] - 1; 757 v = B->a + B->i[i] - 1; 758 SPARSEDENSEMDOT(sum,ls,v,idx,n); 759 x[i] = omega*(sum/d); 760 } 761 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 762 mat->Mvctx); CHKERRQ(ierr); 763 its--; 764 } 765 while (its--) { 766 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 767 CHKERRQ(ierr); 768 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 769 CHKERRQ(ierr); 770 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 771 mat->Mvctx); CHKERRQ(ierr); 772 for ( i=0; i<m; i++ ) { 773 n = A->i[i+1] - A->i[i]; 774 idx = A->j + A->i[i] - 1; 775 v = A->a + A->i[i] - 1; 776 sum = b[i]; 777 SPARSEDENSEMDOT(sum,xs,v,idx,n); 778 d = shift + A->a[diag[i]-1]; 779 n = B->i[i+1] - B->i[i]; 780 idx = B->j + B->i[i] - 1; 781 v = B->a + B->i[i] - 1; 782 SPARSEDENSEMDOT(sum,ls,v,idx,n); 783 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 784 } 785 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 786 mat->Mvctx); CHKERRQ(ierr); 787 } 788 } 789 else if (flag & SOR_BACKWARD_SWEEP){ 790 if (flag & SOR_ZERO_INITIAL_GUESS) { 791 VecSet(&zero,mat->lvec); 792 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 793 mat->Mvctx); CHKERRQ(ierr); 794 for ( i=m-1; i>-1; i-- ) { 795 n = A->i[i+1] - diag[i] - 1; 796 idx = A->j + diag[i]; 797 v = A->a + diag[i]; 798 sum = b[i]; 799 SPARSEDENSEMDOT(sum,xs,v,idx,n); 800 d = shift + A->a[diag[i]-1]; 801 n = B->i[i+1] - B->i[i]; 802 idx = B->j + B->i[i] - 1; 803 v = B->a + B->i[i] - 1; 804 SPARSEDENSEMDOT(sum,ls,v,idx,n); 805 x[i] = omega*(sum/d); 806 } 807 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 808 mat->Mvctx); CHKERRQ(ierr); 809 its--; 810 } 811 while (its--) { 812 ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 813 mat->Mvctx); CHKERRQ(ierr); 814 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 815 mat->Mvctx); CHKERRQ(ierr); 816 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 817 mat->Mvctx); CHKERRQ(ierr); 818 for ( i=m-1; i>-1; i-- ) { 819 n = A->i[i+1] - A->i[i]; 820 idx = A->j + A->i[i] - 1; 821 v = A->a + A->i[i] - 1; 822 sum = b[i]; 823 SPARSEDENSEMDOT(sum,xs,v,idx,n); 824 d = shift + A->a[diag[i]-1]; 825 n = B->i[i+1] - B->i[i]; 826 idx = B->j + B->i[i] - 1; 827 v = B->a + B->i[i] - 1; 828 SPARSEDENSEMDOT(sum,ls,v,idx,n); 829 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 830 } 831 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 832 mat->Mvctx); CHKERRQ(ierr); 833 } 834 } 835 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 836 if (flag & SOR_ZERO_INITIAL_GUESS) { 837 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 838 } 839 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 840 CHKERRQ(ierr); 841 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 842 CHKERRQ(ierr); 843 while (its--) { 844 /* go down through the rows */ 845 for ( i=0; i<m; i++ ) { 846 n = A->i[i+1] - A->i[i]; 847 idx = A->j + A->i[i] - 1; 848 v = A->a + A->i[i] - 1; 849 sum = b[i]; 850 SPARSEDENSEMDOT(sum,xs,v,idx,n); 851 d = shift + A->a[diag[i]-1]; 852 n = B->i[i+1] - B->i[i]; 853 idx = B->j + B->i[i] - 1; 854 v = B->a + B->i[i] - 1; 855 SPARSEDENSEMDOT(sum,ls,v,idx,n); 856 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 857 } 858 /* come up through the rows */ 859 for ( i=m-1; i>-1; i-- ) { 860 n = A->i[i+1] - A->i[i]; 861 idx = A->j + A->i[i] - 1; 862 v = A->a + A->i[i] - 1; 863 sum = b[i]; 864 SPARSEDENSEMDOT(sum,xs,v,idx,n); 865 d = shift + A->a[diag[i]-1]; 866 n = B->i[i+1] - B->i[i]; 867 idx = B->j + B->i[i] - 1; 868 v = B->a + B->i[i] - 1; 869 SPARSEDENSEMDOT(sum,ls,v,idx,n); 870 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 871 } 872 } 873 } 874 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 875 if (flag & SOR_ZERO_INITIAL_GUESS) { 876 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 877 } 878 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 879 CHKERRQ(ierr); 880 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 881 CHKERRQ(ierr); 882 while (its--) { 883 for ( i=0; i<m; i++ ) { 884 n = A->i[i+1] - A->i[i]; 885 idx = A->j + A->i[i] - 1; 886 v = A->a + A->i[i] - 1; 887 sum = b[i]; 888 SPARSEDENSEMDOT(sum,xs,v,idx,n); 889 d = shift + A->a[diag[i]-1]; 890 n = B->i[i+1] - B->i[i]; 891 idx = B->j + B->i[i] - 1; 892 v = B->a + B->i[i] - 1; 893 SPARSEDENSEMDOT(sum,ls,v,idx,n); 894 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 895 } 896 } 897 } 898 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 899 if (flag & SOR_ZERO_INITIAL_GUESS) { 900 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 901 } 902 ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL, 903 mat->Mvctx); CHKERRQ(ierr); 904 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL, 905 mat->Mvctx); CHKERRQ(ierr); 906 while (its--) { 907 for ( i=m-1; i>-1; i-- ) { 908 n = A->i[i+1] - A->i[i]; 909 idx = A->j + A->i[i] - 1; 910 v = A->a + A->i[i] - 1; 911 sum = b[i]; 912 SPARSEDENSEMDOT(sum,xs,v,idx,n); 913 d = shift + A->a[diag[i]-1]; 914 n = B->i[i+1] - B->i[i]; 915 idx = B->j + B->i[i] - 1; 916 v = B->a + B->i[i] - 1; 917 SPARSEDENSEMDOT(sum,ls,v,idx,n); 918 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 919 } 920 } 921 } 922 return 0; 923 } 924 925 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 926 int *nzalloc,int *mem) 927 { 928 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 929 Mat A = mat->A, B = mat->B; 930 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 931 932 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 933 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 934 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 935 if (flag == MAT_LOCAL) { 936 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 937 } else if (flag == MAT_GLOBAL_MAX) { 938 MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm); 939 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 940 } else if (flag == MAT_GLOBAL_SUM) { 941 MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm); 942 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 943 } 944 return 0; 945 } 946 947 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 948 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 949 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 950 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 951 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 952 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 953 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 954 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 955 956 static int MatSetOption_MPIAIJ(Mat aijin,MatOption op) 957 { 958 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 959 960 if (op == NO_NEW_NONZERO_LOCATIONS) { 961 MatSetOption(aij->A,op); 962 MatSetOption(aij->B,op); 963 } 964 else if (op == YES_NEW_NONZERO_LOCATIONS) { 965 MatSetOption(aij->A,op); 966 MatSetOption(aij->B,op); 967 } 968 else if (op == COLUMN_ORIENTED) 969 SETERRQ(1,"MatSetOption_MPIAIJ:Column oriented not supported"); 970 return 0; 971 } 972 973 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 974 { 975 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 976 *m = mat->M; *n = mat->N; 977 return 0; 978 } 979 980 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 981 { 982 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 983 *m = mat->m; *n = mat->N; 984 return 0; 985 } 986 987 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 988 { 989 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 990 *m = mat->rstart; *n = mat->rend; 991 return 0; 992 } 993 994 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 995 { 996 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 997 Scalar *vworkA, *vworkB, **pvA, **pvB; 998 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 999 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1000 1001 if (row < rstart || row >= rend) 1002 SETERRQ(1,"MatGetRow_MPIAIJ:Currently you can get only local rows") 1003 lrow = row - rstart; 1004 1005 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1006 if (!v) {pvA = 0; pvB = 0;} 1007 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1008 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1009 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1010 nztot = nzA + nzB; 1011 1012 if (v || idx) { 1013 if (nztot) { 1014 /* Sort by increasing column numbers, assuming A and B already sorted */ 1015 int imark; 1016 if (mat->assembled) { 1017 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1018 } 1019 if (v) { 1020 *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1021 for ( i=0; i<nzB; i++ ) { 1022 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1023 else break; 1024 } 1025 imark = i; 1026 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1027 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1028 } 1029 if (idx) { 1030 *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1031 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1032 for ( i=0; i<nzB; i++ ) { 1033 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1034 else break; 1035 } 1036 imark = i; 1037 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1038 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1039 } 1040 } 1041 else {*idx = 0; *v=0;} 1042 } 1043 *nz = nztot; 1044 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1045 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1046 return 0; 1047 } 1048 1049 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1050 { 1051 if (idx) PETSCFREE(*idx); 1052 if (v) PETSCFREE(*v); 1053 return 0; 1054 } 1055 1056 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm) 1057 { 1058 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1059 int ierr; 1060 if (aij->numtids == 1) { 1061 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1062 } else 1063 SETERRQ(1,"MatNorm_MPIAIJ:not yet supported in parallel"); 1064 return 0; 1065 } 1066 1067 static int MatTranspose_MPIAIJ(Mat A,Mat *Bin) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1070 int ierr; 1071 Mat B; 1072 Mat_AIJ *Aloc; 1073 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1074 Scalar *array; 1075 1076 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1077 CHKERRQ(ierr); 1078 1079 /* copy over the A part */ 1080 Aloc = (Mat_AIJ*) a->A->data; 1081 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1082 row = a->rstart; 1083 for ( i=0; i<ai[m]-1; i++ ) {aj[i] += a->cstart - 1;} 1084 for ( i=0; i<m; i++ ) { 1085 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERTVALUES); 1086 CHKERRQ(ierr); 1087 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1088 } 1089 aj = Aloc->j; 1090 for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= a->cstart - 1;} 1091 1092 /* copy over the B part */ 1093 Aloc = (Mat_AIJ*) a->B->data; 1094 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1095 row = a->rstart; 1096 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 1097 for ( i=0; i<ai[m]-1; i++ ) {cols[i] = a->garray[aj[i]-1];} 1098 for ( i=0; i<m; i++ ) { 1099 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERTVALUES); 1100 CHKERRQ(ierr); 1101 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1102 } 1103 PETSCFREE(ct); 1104 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1105 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1106 *Bin = B; 1107 return 0; 1108 } 1109 1110 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1111 static int MatCopyPrivate_MPIAIJ(Mat,Mat *); 1112 1113 /* -------------------------------------------------------------------*/ 1114 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1115 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1116 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1117 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1118 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1119 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1120 MatLUFactor_MPIAIJ,0, 1121 MatRelax_MPIAIJ, 1122 MatTranspose_MPIAIJ, 1123 MatGetInfo_MPIAIJ,0, 1124 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1125 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1126 0, 1127 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1128 MatGetReordering_MPIAIJ, 1129 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1130 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1131 MatILUFactorSymbolic_MPIAIJ,0, 1132 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 1133 1134 /*@ 1135 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1136 (the default parallel PETSc format). 1137 1138 Input Parameters: 1139 . comm - MPI communicator 1140 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1141 . n - number of local columns (or PETSC_DECIDE to have calculated 1142 if N is given) 1143 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1144 . N - number of global columns (or PETSC_DECIDE to have calculated 1145 if n is given) 1146 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1147 (same for all local rows) 1148 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1149 or null (possibly different for each row). You must leave room 1150 for the diagonal entry even if it is zero. 1151 . o_nz - number of nonzeros per row in off-diagonal portion of local 1152 submatrix (same for all local rows). 1153 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1154 submatrix or null (possibly different for each row). 1155 1156 Output Parameter: 1157 . newmat - the matrix 1158 1159 Notes: 1160 The AIJ format (also called the Yale sparse matrix format or 1161 compressed row storage), is fully compatible with standard Fortran 77 1162 storage. That is, the stored row and column indices begin at 1163 one, not zero. See the users manual for further details. 1164 1165 The user MUST specify either the local or global matrix dimensions 1166 (possibly both). 1167 1168 Storage Information: 1169 For a square global matrix we define each processor's diagonal portion 1170 to be its local rows and the corresponding columns (a square submatrix); 1171 each processor's off-diagonal portion encompasses the remainder of the 1172 local matrix (a rectangular submatrix). 1173 1174 The user can specify preallocated storage for the diagonal part of 1175 the local submatrix with either d_nz or d_nnz (not both). Set both 1176 d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1177 Likewise, specify preallocated storage for the off-diagonal part of 1178 the local submatrix with o_nz or o_nnz (not both). 1179 1180 .keywords: matrix, aij, compressed row, sparse, parallel 1181 1182 .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues() 1183 @*/ 1184 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1185 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 1186 { 1187 Mat mat; 1188 Mat_MPIAIJ *aij; 1189 int ierr, i,sum[2],work[2]; 1190 *newmat = 0; 1191 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1192 PLogObjectCreate(mat); 1193 mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1194 mat->ops = &MatOps; 1195 mat->destroy = MatDestroy_MPIAIJ; 1196 mat->view = MatView_MPIAIJ; 1197 mat->factor = 0; 1198 1199 aij->insertmode = NOTSETVALUES; 1200 MPI_Comm_rank(comm,&aij->mytid); 1201 MPI_Comm_size(comm,&aij->numtids); 1202 1203 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1204 work[0] = m; work[1] = n; 1205 MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 1206 if (M == PETSC_DECIDE) M = sum[0]; 1207 if (N == PETSC_DECIDE) N = sum[1]; 1208 } 1209 if (m == PETSC_DECIDE) 1210 {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 1211 if (n == PETSC_DECIDE) 1212 {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 1213 aij->m = m; 1214 aij->n = n; 1215 aij->N = N; 1216 aij->M = M; 1217 1218 /* build local table of row and column ownerships */ 1219 aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int)); 1220 CHKPTRQ(aij->rowners); 1221 PLogObjectMemory(mat,2*(aij->numtids+2)*sizeof(int)+sizeof(struct _Mat)+ 1222 sizeof(Mat_MPIAIJ)); 1223 aij->cowners = aij->rowners + aij->numtids + 1; 1224 MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 1225 aij->rowners[0] = 0; 1226 for ( i=2; i<=aij->numtids; i++ ) { 1227 aij->rowners[i] += aij->rowners[i-1]; 1228 } 1229 aij->rstart = aij->rowners[aij->mytid]; 1230 aij->rend = aij->rowners[aij->mytid+1]; 1231 MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 1232 aij->cowners[0] = 0; 1233 for ( i=2; i<=aij->numtids; i++ ) { 1234 aij->cowners[i] += aij->cowners[i-1]; 1235 } 1236 aij->cstart = aij->cowners[aij->mytid]; 1237 aij->cend = aij->cowners[aij->mytid+1]; 1238 1239 1240 ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A); 1241 CHKERRQ(ierr); 1242 PLogObjectParent(mat,aij->A); 1243 ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B); 1244 CHKERRQ(ierr); 1245 PLogObjectParent(mat,aij->B); 1246 1247 /* build cache for off array entries formed */ 1248 ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr); 1249 aij->colmap = 0; 1250 aij->garray = 0; 1251 1252 /* stuff used for matrix vector multiply */ 1253 aij->lvec = 0; 1254 aij->Mvctx = 0; 1255 aij->assembled = 0; 1256 1257 *newmat = mat; 1258 return 0; 1259 } 1260 1261 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat) 1262 { 1263 Mat mat; 1264 Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data; 1265 int ierr, len; 1266 *newmat = 0; 1267 1268 if (!oldmat->assembled) 1269 SETERRQ(1,"MatCopyPrivate_MPIAIJ:Cannot copy unassembled matrix"); 1270 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1271 PLogObjectCreate(mat); 1272 mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1273 mat->ops = &MatOps; 1274 mat->destroy = MatDestroy_MPIAIJ; 1275 mat->view = MatView_MPIAIJ; 1276 mat->factor = matin->factor; 1277 1278 aij->m = oldmat->m; 1279 aij->n = oldmat->n; 1280 aij->M = oldmat->M; 1281 aij->N = oldmat->N; 1282 1283 aij->assembled = 1; 1284 aij->rstart = oldmat->rstart; 1285 aij->rend = oldmat->rend; 1286 aij->cstart = oldmat->cstart; 1287 aij->cend = oldmat->cend; 1288 aij->numtids = oldmat->numtids; 1289 aij->mytid = oldmat->mytid; 1290 aij->insertmode = NOTSETVALUES; 1291 1292 aij->rowners = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) ); 1293 CHKPTRQ(aij->rowners); 1294 PLogObjectMemory(mat,(aij->numtids+1)*sizeof(int)+sizeof(struct _Mat)+ 1295 sizeof(Mat_MPIAIJ)); 1296 PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 1297 ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr); 1298 if (oldmat->colmap) { 1299 aij->colmap = (int *) PETSCMALLOC( (aij->N)*sizeof(int) ); 1300 CHKPTRQ(aij->colmap); 1301 PLogObjectMemory(mat,(aij->N)*sizeof(int)); 1302 PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int)); 1303 } else aij->colmap = 0; 1304 if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) { 1305 aij->garray = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray); 1306 PLogObjectMemory(mat,len*sizeof(int)); 1307 PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int)); 1308 } else aij->garray = 0; 1309 1310 ierr = VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr); 1311 PLogObjectParent(mat,aij->lvec); 1312 ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr); 1313 PLogObjectParent(mat,aij->Mvctx); 1314 ierr = MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr); 1315 PLogObjectParent(mat,aij->A); 1316 ierr = MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr); 1317 PLogObjectParent(mat,aij->B); 1318 *newmat = mat; 1319 return 0; 1320 } 1321