1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.149 1996/07/02 18:06:22 bsmith Exp balay $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (mat->was_assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 for ( i=0; i<m; i++ ) { 104 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106 if (idxm[i] >= rstart && idxm[i] < rend) { 107 row = idxm[i] - rstart; 108 for ( j=0; j<n; j++ ) { 109 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111 if (idxn[j] >= cstart && idxn[j] < cend){ 112 col = idxn[j] - cstart; 113 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114 } 115 else { 116 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 117 col = aij->colmap[idxn[j]] + shift; 118 if ( aij->garray[col] != idxn[j] ) *(v+i*n+j) = 0.0; 119 else { 120 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 121 } 122 } 123 } 124 } 125 else { 126 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 127 } 128 } 129 return 0; 130 } 131 132 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 133 { 134 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 135 MPI_Comm comm = mat->comm; 136 int size = aij->size, *owners = aij->rowners; 137 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 138 MPI_Request *send_waits,*recv_waits; 139 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 140 InsertMode addv; 141 Scalar *rvalues,*svalues; 142 143 /* make sure all processors are either in INSERTMODE or ADDMODE */ 144 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 145 if (addv == (ADD_VALUES|INSERT_VALUES)) { 146 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 147 } 148 aij->insertmode = addv; /* in case this processor had no cache */ 149 150 /* first count number of contributors to each processor */ 151 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 152 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 153 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 154 for ( i=0; i<aij->stash.n; i++ ) { 155 idx = aij->stash.idx[i]; 156 for ( j=0; j<size; j++ ) { 157 if (idx >= owners[j] && idx < owners[j+1]) { 158 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 159 } 160 } 161 } 162 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 163 164 /* inform other processors of number of messages and max length*/ 165 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 166 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 167 nreceives = work[rank]; 168 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 169 nmax = work[rank]; 170 PetscFree(work); 171 172 /* post receives: 173 1) each message will consist of ordered pairs 174 (global index,value) we store the global index as a double 175 to simplify the message passing. 176 2) since we don't know how long each individual message is we 177 allocate the largest needed buffer for each receive. Potentially 178 this is a lot of wasted space. 179 180 181 This could be done better. 182 */ 183 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 184 CHKPTRQ(rvalues); 185 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 186 CHKPTRQ(recv_waits); 187 for ( i=0; i<nreceives; i++ ) { 188 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 189 comm,recv_waits+i); 190 } 191 192 /* do sends: 193 1) starts[i] gives the starting index in svalues for stuff going to 194 the ith processor 195 */ 196 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 197 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 198 CHKPTRQ(send_waits); 199 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 200 starts[0] = 0; 201 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 202 for ( i=0; i<aij->stash.n; i++ ) { 203 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 204 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 205 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 206 } 207 PetscFree(owner); 208 starts[0] = 0; 209 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 210 count = 0; 211 for ( i=0; i<size; i++ ) { 212 if (procs[i]) { 213 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 214 comm,send_waits+count++); 215 } 216 } 217 PetscFree(starts); PetscFree(nprocs); 218 219 /* Free cache space */ 220 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 221 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 222 223 aij->svalues = svalues; aij->rvalues = rvalues; 224 aij->nsends = nsends; aij->nrecvs = nreceives; 225 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 226 aij->rmax = nmax; 227 228 return 0; 229 } 230 extern int MatSetUpMultiply_MPIAIJ(Mat); 231 232 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 233 { 234 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 235 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 236 MPI_Status *send_status,recv_status; 237 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 238 int row,col,other_disassembled,shift = C->indexshift; 239 Scalar *values,val; 240 InsertMode addv = aij->insertmode; 241 242 /* wait on receives */ 243 while (count) { 244 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 245 /* unpack receives into our local space */ 246 values = aij->rvalues + 3*imdex*aij->rmax; 247 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 248 n = n/3; 249 for ( i=0; i<n; i++ ) { 250 row = (int) PetscReal(values[3*i]) - aij->rstart; 251 col = (int) PetscReal(values[3*i+1]); 252 val = values[3*i+2]; 253 if (col >= aij->cstart && col < aij->cend) { 254 col -= aij->cstart; 255 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 256 } 257 else { 258 if (mat->was_assembled) { 259 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 260 col = aij->colmap[col] + shift; 261 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 262 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 263 col = (int) PetscReal(values[3*i+1]); 264 } 265 } 266 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 267 } 268 } 269 count--; 270 } 271 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 272 273 /* wait on sends */ 274 if (aij->nsends) { 275 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 276 CHKPTRQ(send_status); 277 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 278 PetscFree(send_status); 279 } 280 PetscFree(aij->send_waits); PetscFree(aij->svalues); 281 282 aij->insertmode = NOT_SET_VALUES; 283 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 284 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 285 286 /* determine if any processor has disassembled, if so we must 287 also disassemble ourselfs, in order that we may reassemble. */ 288 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 289 if (mat->was_assembled && !other_disassembled) { 290 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 291 } 292 293 if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 294 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 295 } 296 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 297 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 298 299 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 300 return 0; 301 } 302 303 static int MatZeroEntries_MPIAIJ(Mat A) 304 { 305 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 306 int ierr; 307 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 308 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 309 return 0; 310 } 311 312 /* the code does not do the diagonal entries correctly unless the 313 matrix is square and the column and row owerships are identical. 314 This is a BUG. The only way to fix it seems to be to access 315 aij->A and aij->B directly and not through the MatZeroRows() 316 routine. 317 */ 318 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 319 { 320 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 321 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 322 int *procs,*nprocs,j,found,idx,nsends,*work; 323 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 324 int *rvalues,tag = A->tag,count,base,slen,n,*source; 325 int *lens,imdex,*lrows,*values; 326 MPI_Comm comm = A->comm; 327 MPI_Request *send_waits,*recv_waits; 328 MPI_Status recv_status,*send_status; 329 IS istmp; 330 331 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 332 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 333 334 /* first count number of contributors to each processor */ 335 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 336 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 337 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 338 for ( i=0; i<N; i++ ) { 339 idx = rows[i]; 340 found = 0; 341 for ( j=0; j<size; j++ ) { 342 if (idx >= owners[j] && idx < owners[j+1]) { 343 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 344 } 345 } 346 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 347 } 348 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 349 350 /* inform other processors of number of messages and max length*/ 351 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 352 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 353 nrecvs = work[rank]; 354 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 355 nmax = work[rank]; 356 PetscFree(work); 357 358 /* post receives: */ 359 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 360 CHKPTRQ(rvalues); 361 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 362 CHKPTRQ(recv_waits); 363 for ( i=0; i<nrecvs; i++ ) { 364 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 365 } 366 367 /* do sends: 368 1) starts[i] gives the starting index in svalues for stuff going to 369 the ith processor 370 */ 371 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 372 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 373 CHKPTRQ(send_waits); 374 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 375 starts[0] = 0; 376 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 377 for ( i=0; i<N; i++ ) { 378 svalues[starts[owner[i]]++] = rows[i]; 379 } 380 ISRestoreIndices(is,&rows); 381 382 starts[0] = 0; 383 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 384 count = 0; 385 for ( i=0; i<size; i++ ) { 386 if (procs[i]) { 387 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 388 } 389 } 390 PetscFree(starts); 391 392 base = owners[rank]; 393 394 /* wait on receives */ 395 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 396 source = lens + nrecvs; 397 count = nrecvs; slen = 0; 398 while (count) { 399 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 400 /* unpack receives into our local space */ 401 MPI_Get_count(&recv_status,MPI_INT,&n); 402 source[imdex] = recv_status.MPI_SOURCE; 403 lens[imdex] = n; 404 slen += n; 405 count--; 406 } 407 PetscFree(recv_waits); 408 409 /* move the data into the send scatter */ 410 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 411 count = 0; 412 for ( i=0; i<nrecvs; i++ ) { 413 values = rvalues + i*nmax; 414 for ( j=0; j<lens[i]; j++ ) { 415 lrows[count++] = values[j] - base; 416 } 417 } 418 PetscFree(rvalues); PetscFree(lens); 419 PetscFree(owner); PetscFree(nprocs); 420 421 /* actually zap the local rows */ 422 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 423 PLogObjectParent(A,istmp); 424 PetscFree(lrows); 425 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 426 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 427 ierr = ISDestroy(istmp); CHKERRQ(ierr); 428 429 /* wait on sends */ 430 if (nsends) { 431 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 432 CHKPTRQ(send_status); 433 MPI_Waitall(nsends,send_waits,send_status); 434 PetscFree(send_status); 435 } 436 PetscFree(send_waits); PetscFree(svalues); 437 438 return 0; 439 } 440 441 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 442 { 443 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 444 int ierr,nt; 445 446 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 447 if (nt != a->n) { 448 SETERRQ(1,"MatMult_MPIAIJ:Incompatiable parition of A and xx"); 449 } 450 ierr = VecGetLocalSize(yy,&nt); CHKERRQ(ierr); 451 if (nt != a->m) { 452 SETERRQ(1,"MatMult_MPIAIJ:Incompatiable parition of A and yy"); 453 } 454 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 455 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 456 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 457 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 458 return 0; 459 } 460 461 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 462 { 463 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 464 int ierr; 465 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 466 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 467 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 468 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 469 return 0; 470 } 471 472 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 473 { 474 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 475 int ierr; 476 477 /* do nondiagonal part */ 478 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 479 /* send it on its way */ 480 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 481 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 482 /* do local part */ 483 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 484 /* receive remote parts: note this assumes the values are not actually */ 485 /* inserted in yy until the next line, which is true for my implementation*/ 486 /* but is not perhaps always true. */ 487 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 488 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 489 return 0; 490 } 491 492 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 493 { 494 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 495 int ierr; 496 497 /* do nondiagonal part */ 498 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 499 /* send it on its way */ 500 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 501 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 502 /* do local part */ 503 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 504 /* receive remote parts: note this assumes the values are not actually */ 505 /* inserted in yy until the next line, which is true for my implementation*/ 506 /* but is not perhaps always true. */ 507 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 508 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 509 return 0; 510 } 511 512 /* 513 This only works correctly for square matrices where the subblock A->A is the 514 diagonal block 515 */ 516 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 517 { 518 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 519 if (a->M != a->N) 520 SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 521 return MatGetDiagonal(a->A,v); 522 } 523 524 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 525 { 526 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 527 int ierr; 528 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 529 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 530 return 0; 531 } 532 533 static int MatDestroy_MPIAIJ(PetscObject obj) 534 { 535 Mat mat = (Mat) obj; 536 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 537 int ierr; 538 #if defined(PETSC_LOG) 539 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 540 #endif 541 PetscFree(aij->rowners); 542 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 543 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 544 if (aij->colmap) PetscFree(aij->colmap); 545 if (aij->garray) PetscFree(aij->garray); 546 if (aij->lvec) VecDestroy(aij->lvec); 547 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 548 if (aij->rowvalues) PetscFree(aij->rowvalues); 549 PetscFree(aij); 550 PLogObjectDestroy(mat); 551 PetscHeaderDestroy(mat); 552 return 0; 553 } 554 #include "draw.h" 555 #include "pinclude/pviewer.h" 556 557 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 558 { 559 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 560 int ierr; 561 562 if (aij->size == 1) { 563 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 564 } 565 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 566 return 0; 567 } 568 569 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 570 { 571 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 572 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 573 int ierr, format,shift = C->indexshift,rank; 574 FILE *fd; 575 ViewerType vtype; 576 577 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 578 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 579 ierr = ViewerGetFormat(viewer,&format); 580 if (format == ASCII_FORMAT_INFO_DETAILED) { 581 int nz, nzalloc, mem, flg; 582 MPI_Comm_rank(mat->comm,&rank); 583 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 584 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 585 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 586 PetscSequentialPhaseBegin(mat->comm,1); 587 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 588 rank,aij->m,nz,nzalloc,mem); 589 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 590 rank,aij->m,nz,nzalloc,mem); 591 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 592 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz); 593 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 594 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz); 595 fflush(fd); 596 PetscSequentialPhaseEnd(mat->comm,1); 597 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 598 return 0; 599 } 600 else if (format == ASCII_FORMAT_INFO) { 601 return 0; 602 } 603 } 604 605 if (vtype == DRAW_VIEWER) { 606 Draw draw; 607 PetscTruth isnull; 608 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 609 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 610 } 611 612 if (vtype == ASCII_FILE_VIEWER) { 613 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 614 PetscSequentialPhaseBegin(mat->comm,1); 615 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 616 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 617 aij->cend); 618 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 619 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 620 fflush(fd); 621 PetscSequentialPhaseEnd(mat->comm,1); 622 } 623 else { 624 int size = aij->size; 625 rank = aij->rank; 626 if (size == 1) { 627 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 628 } 629 else { 630 /* assemble the entire matrix onto first processor. */ 631 Mat A; 632 Mat_SeqAIJ *Aloc; 633 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 634 Scalar *a; 635 636 if (!rank) { 637 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 638 CHKERRQ(ierr); 639 } 640 else { 641 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 642 CHKERRQ(ierr); 643 } 644 PLogObjectParent(mat,A); 645 646 /* copy over the A part */ 647 Aloc = (Mat_SeqAIJ*) aij->A->data; 648 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 649 row = aij->rstart; 650 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 651 for ( i=0; i<m; i++ ) { 652 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 653 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 654 } 655 aj = Aloc->j; 656 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 657 658 /* copy over the B part */ 659 Aloc = (Mat_SeqAIJ*) aij->B->data; 660 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 661 row = aij->rstart; 662 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 663 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 664 for ( i=0; i<m; i++ ) { 665 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 666 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 667 } 668 PetscFree(ct); 669 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 670 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 671 if (!rank) { 672 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 673 } 674 ierr = MatDestroy(A); CHKERRQ(ierr); 675 } 676 } 677 return 0; 678 } 679 680 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 681 { 682 Mat mat = (Mat) obj; 683 int ierr; 684 ViewerType vtype; 685 686 if (!viewer) { 687 viewer = STDOUT_VIEWER_SELF; 688 } 689 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 690 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 691 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 692 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 693 } 694 else if (vtype == BINARY_FILE_VIEWER) { 695 return MatView_MPIAIJ_Binary(mat,viewer); 696 } 697 return 0; 698 } 699 700 /* 701 This has to provide several versions. 702 703 1) per sequential 704 2) a) use only local smoothing updating outer values only once. 705 b) local smoothing updating outer values each inner iteration 706 3) color updating out values betwen colors. 707 */ 708 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 709 double fshift,int its,Vec xx) 710 { 711 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 712 Mat AA = mat->A, BB = mat->B; 713 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 714 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 715 int ierr,*idx, *diag; 716 int n = mat->n, m = mat->m, i,shift = A->indexshift; 717 Vec tt; 718 719 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 720 xs = x + shift; /* shift by one for index start of 1 */ 721 ls = ls + shift; 722 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 723 diag = A->diag; 724 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 725 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 726 } 727 if (flag & SOR_EISENSTAT) { 728 /* Let A = L + U + D; where L is lower trianglar, 729 U is upper triangular, E is diagonal; This routine applies 730 731 (L + E)^{-1} A (U + E)^{-1} 732 733 to a vector efficiently using Eisenstat's trick. This is for 734 the case of SSOR preconditioner, so E is D/omega where omega 735 is the relaxation factor. 736 */ 737 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 738 PLogObjectParent(matin,tt); 739 VecGetArray(tt,&t); 740 scale = (2.0/omega) - 1.0; 741 /* x = (E + U)^{-1} b */ 742 VecSet(&zero,mat->lvec); 743 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 744 mat->Mvctx); CHKERRQ(ierr); 745 for ( i=m-1; i>-1; i-- ) { 746 n = A->i[i+1] - diag[i] - 1; 747 idx = A->j + diag[i] + !shift; 748 v = A->a + diag[i] + !shift; 749 sum = b[i]; 750 SPARSEDENSEMDOT(sum,xs,v,idx,n); 751 d = fshift + A->a[diag[i]+shift]; 752 n = B->i[i+1] - B->i[i]; 753 idx = B->j + B->i[i] + shift; 754 v = B->a + B->i[i] + shift; 755 SPARSEDENSEMDOT(sum,ls,v,idx,n); 756 x[i] = omega*(sum/d); 757 } 758 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 759 mat->Mvctx); CHKERRQ(ierr); 760 761 /* t = b - (2*E - D)x */ 762 v = A->a; 763 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 764 765 /* t = (E + L)^{-1}t */ 766 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 767 diag = A->diag; 768 VecSet(&zero,mat->lvec); 769 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 770 mat->Mvctx); CHKERRQ(ierr); 771 for ( i=0; i<m; i++ ) { 772 n = diag[i] - A->i[i]; 773 idx = A->j + A->i[i] + shift; 774 v = A->a + A->i[i] + shift; 775 sum = t[i]; 776 SPARSEDENSEMDOT(sum,ts,v,idx,n); 777 d = fshift + A->a[diag[i]+shift]; 778 n = B->i[i+1] - B->i[i]; 779 idx = B->j + B->i[i] + shift; 780 v = B->a + B->i[i] + shift; 781 SPARSEDENSEMDOT(sum,ls,v,idx,n); 782 t[i] = omega*(sum/d); 783 } 784 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 785 mat->Mvctx); CHKERRQ(ierr); 786 /* x = x + t */ 787 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 788 VecDestroy(tt); 789 return 0; 790 } 791 792 793 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 794 if (flag & SOR_ZERO_INITIAL_GUESS) { 795 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 796 } 797 else { 798 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 799 CHKERRQ(ierr); 800 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 801 CHKERRQ(ierr); 802 } 803 while (its--) { 804 /* go down through the rows */ 805 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 806 mat->Mvctx); CHKERRQ(ierr); 807 for ( i=0; i<m; i++ ) { 808 n = A->i[i+1] - A->i[i]; 809 idx = A->j + A->i[i] + shift; 810 v = A->a + A->i[i] + shift; 811 sum = b[i]; 812 SPARSEDENSEMDOT(sum,xs,v,idx,n); 813 d = fshift + A->a[diag[i]+shift]; 814 n = B->i[i+1] - B->i[i]; 815 idx = B->j + B->i[i] + shift; 816 v = B->a + B->i[i] + shift; 817 SPARSEDENSEMDOT(sum,ls,v,idx,n); 818 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 819 } 820 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 821 mat->Mvctx); CHKERRQ(ierr); 822 /* come up through the rows */ 823 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 824 mat->Mvctx); CHKERRQ(ierr); 825 for ( i=m-1; i>-1; i-- ) { 826 n = A->i[i+1] - A->i[i]; 827 idx = A->j + A->i[i] + shift; 828 v = A->a + A->i[i] + shift; 829 sum = b[i]; 830 SPARSEDENSEMDOT(sum,xs,v,idx,n); 831 d = fshift + A->a[diag[i]+shift]; 832 n = B->i[i+1] - B->i[i]; 833 idx = B->j + B->i[i] + shift; 834 v = B->a + B->i[i] + shift; 835 SPARSEDENSEMDOT(sum,ls,v,idx,n); 836 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 837 } 838 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 839 mat->Mvctx); CHKERRQ(ierr); 840 } 841 } 842 else if (flag & SOR_FORWARD_SWEEP){ 843 if (flag & SOR_ZERO_INITIAL_GUESS) { 844 VecSet(&zero,mat->lvec); 845 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 846 mat->Mvctx); CHKERRQ(ierr); 847 for ( i=0; i<m; i++ ) { 848 n = diag[i] - A->i[i]; 849 idx = A->j + A->i[i] + shift; 850 v = A->a + A->i[i] + shift; 851 sum = b[i]; 852 SPARSEDENSEMDOT(sum,xs,v,idx,n); 853 d = fshift + A->a[diag[i]+shift]; 854 n = B->i[i+1] - B->i[i]; 855 idx = B->j + B->i[i] + shift; 856 v = B->a + B->i[i] + shift; 857 SPARSEDENSEMDOT(sum,ls,v,idx,n); 858 x[i] = omega*(sum/d); 859 } 860 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 861 mat->Mvctx); CHKERRQ(ierr); 862 its--; 863 } 864 while (its--) { 865 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 866 CHKERRQ(ierr); 867 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 868 CHKERRQ(ierr); 869 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 870 mat->Mvctx); CHKERRQ(ierr); 871 for ( i=0; i<m; i++ ) { 872 n = A->i[i+1] - A->i[i]; 873 idx = A->j + A->i[i] + shift; 874 v = A->a + A->i[i] + shift; 875 sum = b[i]; 876 SPARSEDENSEMDOT(sum,xs,v,idx,n); 877 d = fshift + A->a[diag[i]+shift]; 878 n = B->i[i+1] - B->i[i]; 879 idx = B->j + B->i[i] + shift; 880 v = B->a + B->i[i] + shift; 881 SPARSEDENSEMDOT(sum,ls,v,idx,n); 882 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 883 } 884 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 885 mat->Mvctx); CHKERRQ(ierr); 886 } 887 } 888 else if (flag & SOR_BACKWARD_SWEEP){ 889 if (flag & SOR_ZERO_INITIAL_GUESS) { 890 VecSet(&zero,mat->lvec); 891 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 892 mat->Mvctx); CHKERRQ(ierr); 893 for ( i=m-1; i>-1; i-- ) { 894 n = A->i[i+1] - diag[i] - 1; 895 idx = A->j + diag[i] + !shift; 896 v = A->a + diag[i] + !shift; 897 sum = b[i]; 898 SPARSEDENSEMDOT(sum,xs,v,idx,n); 899 d = fshift + A->a[diag[i]+shift]; 900 n = B->i[i+1] - B->i[i]; 901 idx = B->j + B->i[i] + shift; 902 v = B->a + B->i[i] + shift; 903 SPARSEDENSEMDOT(sum,ls,v,idx,n); 904 x[i] = omega*(sum/d); 905 } 906 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 907 mat->Mvctx); CHKERRQ(ierr); 908 its--; 909 } 910 while (its--) { 911 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 912 mat->Mvctx); CHKERRQ(ierr); 913 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 914 mat->Mvctx); CHKERRQ(ierr); 915 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 916 mat->Mvctx); CHKERRQ(ierr); 917 for ( i=m-1; i>-1; i-- ) { 918 n = A->i[i+1] - A->i[i]; 919 idx = A->j + A->i[i] + shift; 920 v = A->a + A->i[i] + shift; 921 sum = b[i]; 922 SPARSEDENSEMDOT(sum,xs,v,idx,n); 923 d = fshift + A->a[diag[i]+shift]; 924 n = B->i[i+1] - B->i[i]; 925 idx = B->j + B->i[i] + shift; 926 v = B->a + B->i[i] + shift; 927 SPARSEDENSEMDOT(sum,ls,v,idx,n); 928 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 929 } 930 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 931 mat->Mvctx); CHKERRQ(ierr); 932 } 933 } 934 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 935 if (flag & SOR_ZERO_INITIAL_GUESS) { 936 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 937 } 938 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 939 CHKERRQ(ierr); 940 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 941 CHKERRQ(ierr); 942 while (its--) { 943 /* go down through the rows */ 944 for ( i=0; i<m; i++ ) { 945 n = A->i[i+1] - A->i[i]; 946 idx = A->j + A->i[i] + shift; 947 v = A->a + A->i[i] + shift; 948 sum = b[i]; 949 SPARSEDENSEMDOT(sum,xs,v,idx,n); 950 d = fshift + A->a[diag[i]+shift]; 951 n = B->i[i+1] - B->i[i]; 952 idx = B->j + B->i[i] + shift; 953 v = B->a + B->i[i] + shift; 954 SPARSEDENSEMDOT(sum,ls,v,idx,n); 955 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 956 } 957 /* come up through the rows */ 958 for ( i=m-1; i>-1; i-- ) { 959 n = A->i[i+1] - A->i[i]; 960 idx = A->j + A->i[i] + shift; 961 v = A->a + A->i[i] + shift; 962 sum = b[i]; 963 SPARSEDENSEMDOT(sum,xs,v,idx,n); 964 d = fshift + A->a[diag[i]+shift]; 965 n = B->i[i+1] - B->i[i]; 966 idx = B->j + B->i[i] + shift; 967 v = B->a + B->i[i] + shift; 968 SPARSEDENSEMDOT(sum,ls,v,idx,n); 969 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 970 } 971 } 972 } 973 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 974 if (flag & SOR_ZERO_INITIAL_GUESS) { 975 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 976 } 977 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 978 CHKERRQ(ierr); 979 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 980 CHKERRQ(ierr); 981 while (its--) { 982 for ( i=0; i<m; i++ ) { 983 n = A->i[i+1] - A->i[i]; 984 idx = A->j + A->i[i] + shift; 985 v = A->a + A->i[i] + shift; 986 sum = b[i]; 987 SPARSEDENSEMDOT(sum,xs,v,idx,n); 988 d = fshift + A->a[diag[i]+shift]; 989 n = B->i[i+1] - B->i[i]; 990 idx = B->j + B->i[i] + shift; 991 v = B->a + B->i[i] + shift; 992 SPARSEDENSEMDOT(sum,ls,v,idx,n); 993 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 994 } 995 } 996 } 997 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 998 if (flag & SOR_ZERO_INITIAL_GUESS) { 999 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 1000 } 1001 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 1002 mat->Mvctx); CHKERRQ(ierr); 1003 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 1004 mat->Mvctx); CHKERRQ(ierr); 1005 while (its--) { 1006 for ( i=m-1; i>-1; i-- ) { 1007 n = A->i[i+1] - A->i[i]; 1008 idx = A->j + A->i[i] + shift; 1009 v = A->a + A->i[i] + shift; 1010 sum = b[i]; 1011 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1012 d = fshift + A->a[diag[i]+shift]; 1013 n = B->i[i+1] - B->i[i]; 1014 idx = B->j + B->i[i] + shift; 1015 v = B->a + B->i[i] + shift; 1016 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1017 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1018 } 1019 } 1020 } 1021 return 0; 1022 } 1023 1024 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1025 int *nzalloc,int *mem) 1026 { 1027 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1028 Mat A = mat->A, B = mat->B; 1029 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1030 1031 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1032 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1033 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1034 if (flag == MAT_LOCAL) { 1035 if (nz) *nz = isend[0]; 1036 if (nzalloc) *nzalloc = isend[1]; 1037 if (mem) *mem = isend[2]; 1038 } else if (flag == MAT_GLOBAL_MAX) { 1039 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1040 if (nz) *nz = irecv[0]; 1041 if (nzalloc) *nzalloc = irecv[1]; 1042 if (mem) *mem = irecv[2]; 1043 } else if (flag == MAT_GLOBAL_SUM) { 1044 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1045 if (nz) *nz = irecv[0]; 1046 if (nzalloc) *nzalloc = irecv[1]; 1047 if (mem) *mem = irecv[2]; 1048 } 1049 return 0; 1050 } 1051 1052 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1053 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1054 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1055 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1056 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1057 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1058 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1059 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1060 1061 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1062 { 1063 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1064 1065 if (op == NO_NEW_NONZERO_LOCATIONS || 1066 op == YES_NEW_NONZERO_LOCATIONS || 1067 op == COLUMNS_SORTED || 1068 op == ROW_ORIENTED) { 1069 MatSetOption(a->A,op); 1070 MatSetOption(a->B,op); 1071 } 1072 else if (op == ROWS_SORTED || 1073 op == SYMMETRIC_MATRIX || 1074 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1075 op == YES_NEW_DIAGONALS) 1076 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1077 else if (op == COLUMN_ORIENTED) { 1078 a->roworiented = 0; 1079 MatSetOption(a->A,op); 1080 MatSetOption(a->B,op); 1081 } 1082 else if (op == NO_NEW_DIAGONALS) 1083 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1084 else 1085 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1086 return 0; 1087 } 1088 1089 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1090 { 1091 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1092 *m = mat->M; *n = mat->N; 1093 return 0; 1094 } 1095 1096 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1097 { 1098 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1099 *m = mat->m; *n = mat->N; 1100 return 0; 1101 } 1102 1103 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1104 { 1105 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1106 *m = mat->rstart; *n = mat->rend; 1107 return 0; 1108 } 1109 1110 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1111 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1112 1113 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1114 { 1115 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1116 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1117 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1118 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1119 int *cmap, *idx_p; 1120 1121 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 1122 mat->getrowactive = PETSC_TRUE; 1123 1124 if (!mat->rowvalues && (idx || v)) { 1125 /* 1126 allocate enough space to hold information from the longest row. 1127 */ 1128 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1129 int max = 1,n = mat->n,tmp; 1130 for ( i=0; i<n; i++ ) { 1131 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1132 if (max < tmp) { max = tmp; } 1133 } 1134 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1135 CHKPTRQ(mat->rowvalues); 1136 mat->rowindices = (int *) (mat->rowvalues + max); 1137 } 1138 1139 1140 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1141 lrow = row - rstart; 1142 1143 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1144 if (!v) {pvA = 0; pvB = 0;} 1145 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1146 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1147 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1148 nztot = nzA + nzB; 1149 1150 cmap = mat->garray; 1151 if (v || idx) { 1152 if (nztot) { 1153 /* Sort by increasing column numbers, assuming A and B already sorted */ 1154 int imark = -1; 1155 if (v) { 1156 *v = v_p = mat->rowvalues; 1157 for ( i=0; i<nzB; i++ ) { 1158 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1159 else break; 1160 } 1161 imark = i; 1162 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1163 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1164 } 1165 if (idx) { 1166 *idx = idx_p = mat->rowindices; 1167 if (imark > -1) { 1168 for ( i=0; i<imark; i++ ) { 1169 idx_p[i] = cmap[cworkB[i]]; 1170 } 1171 } else { 1172 for ( i=0; i<nzB; i++ ) { 1173 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1174 else break; 1175 } 1176 imark = i; 1177 } 1178 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1179 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1180 } 1181 } 1182 else { 1183 if (idx) *idx = 0; 1184 if (v) *v = 0; 1185 } 1186 } 1187 *nz = nztot; 1188 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1189 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1190 return 0; 1191 } 1192 1193 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1194 { 1195 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1196 if (aij->getrowactive == PETSC_FALSE) { 1197 SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 1198 } 1199 aij->getrowactive = PETSC_FALSE; 1200 return 0; 1201 } 1202 1203 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1204 { 1205 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1206 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1207 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1208 double sum = 0.0; 1209 Scalar *v; 1210 1211 if (aij->size == 1) { 1212 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1213 } else { 1214 if (type == NORM_FROBENIUS) { 1215 v = amat->a; 1216 for (i=0; i<amat->nz; i++ ) { 1217 #if defined(PETSC_COMPLEX) 1218 sum += real(conj(*v)*(*v)); v++; 1219 #else 1220 sum += (*v)*(*v); v++; 1221 #endif 1222 } 1223 v = bmat->a; 1224 for (i=0; i<bmat->nz; i++ ) { 1225 #if defined(PETSC_COMPLEX) 1226 sum += real(conj(*v)*(*v)); v++; 1227 #else 1228 sum += (*v)*(*v); v++; 1229 #endif 1230 } 1231 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1232 *norm = sqrt(*norm); 1233 } 1234 else if (type == NORM_1) { /* max column norm */ 1235 double *tmp, *tmp2; 1236 int *jj, *garray = aij->garray; 1237 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1238 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1239 PetscMemzero(tmp,aij->N*sizeof(double)); 1240 *norm = 0.0; 1241 v = amat->a; jj = amat->j; 1242 for ( j=0; j<amat->nz; j++ ) { 1243 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1244 } 1245 v = bmat->a; jj = bmat->j; 1246 for ( j=0; j<bmat->nz; j++ ) { 1247 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1248 } 1249 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1250 for ( j=0; j<aij->N; j++ ) { 1251 if (tmp2[j] > *norm) *norm = tmp2[j]; 1252 } 1253 PetscFree(tmp); PetscFree(tmp2); 1254 } 1255 else if (type == NORM_INFINITY) { /* max row norm */ 1256 double ntemp = 0.0; 1257 for ( j=0; j<amat->m; j++ ) { 1258 v = amat->a + amat->i[j] + shift; 1259 sum = 0.0; 1260 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1261 sum += PetscAbsScalar(*v); v++; 1262 } 1263 v = bmat->a + bmat->i[j] + shift; 1264 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1265 sum += PetscAbsScalar(*v); v++; 1266 } 1267 if (sum > ntemp) ntemp = sum; 1268 } 1269 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1270 } 1271 else { 1272 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1273 } 1274 } 1275 return 0; 1276 } 1277 1278 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1279 { 1280 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1281 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1282 int ierr,shift = Aloc->indexshift; 1283 Mat B; 1284 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1285 Scalar *array; 1286 1287 if (matout == PETSC_NULL && M != N) 1288 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1289 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1290 PETSC_NULL,&B); CHKERRQ(ierr); 1291 1292 /* copy over the A part */ 1293 Aloc = (Mat_SeqAIJ*) a->A->data; 1294 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1295 row = a->rstart; 1296 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1297 for ( i=0; i<m; i++ ) { 1298 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1299 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1300 } 1301 aj = Aloc->j; 1302 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1303 1304 /* copy over the B part */ 1305 Aloc = (Mat_SeqAIJ*) a->B->data; 1306 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1307 row = a->rstart; 1308 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1309 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1310 for ( i=0; i<m; i++ ) { 1311 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1312 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1313 } 1314 PetscFree(ct); 1315 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1316 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1317 if (matout != PETSC_NULL) { 1318 *matout = B; 1319 } else { 1320 /* This isn't really an in-place transpose .... but free data structures from a */ 1321 PetscFree(a->rowners); 1322 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1323 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1324 if (a->colmap) PetscFree(a->colmap); 1325 if (a->garray) PetscFree(a->garray); 1326 if (a->lvec) VecDestroy(a->lvec); 1327 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1328 PetscFree(a); 1329 PetscMemcpy(A,B,sizeof(struct _Mat)); 1330 PetscHeaderDestroy(B); 1331 } 1332 return 0; 1333 } 1334 1335 extern int MatPrintHelp_SeqAIJ(Mat); 1336 static int MatPrintHelp_MPIAIJ(Mat A) 1337 { 1338 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1339 1340 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1341 else return 0; 1342 } 1343 1344 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1345 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1346 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1347 int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1348 /* -------------------------------------------------------------------*/ 1349 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1350 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1351 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1352 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1353 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1354 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1355 MatLUFactor_MPIAIJ,0, 1356 MatRelax_MPIAIJ, 1357 MatTranspose_MPIAIJ, 1358 MatGetInfo_MPIAIJ,0, 1359 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1360 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1361 0, 1362 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1363 MatGetReordering_MPIAIJ, 1364 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1365 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1366 MatILUFactorSymbolic_MPIAIJ,0, 1367 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1368 0,0,0, 1369 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1370 MatPrintHelp_MPIAIJ, 1371 MatScale_MPIAIJ}; 1372 1373 /*@C 1374 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1375 (the default parallel PETSc format). For good matrix assembly performance 1376 the user should preallocate the matrix storage by setting the parameters 1377 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1378 performance can be increased by more than a factor of 50. 1379 1380 Input Parameters: 1381 . comm - MPI communicator 1382 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1383 . This should be the same as the local size used in creating the 1384 . y vector in y = Ax 1385 . n - number of local columns (or PETSC_DECIDE to have calculated 1386 if N is given) 1387 . This should be the same as the local size used in creating the 1388 . x vector in y = Ax 1389 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1390 . N - number of global columns (or PETSC_DECIDE to have calculated 1391 if n is given) 1392 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1393 (same for all local rows) 1394 . d_nzz - array containing the number of nonzeros in the various rows of the 1395 diagonal portion of local submatrix (possibly different for each row) 1396 or PETSC_NULL. You must leave room for the diagonal entry even if 1397 it is zero. 1398 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1399 submatrix (same for all local rows). 1400 . o_nzz - array containing the number of nonzeros in the various rows of the 1401 off-diagonal portion of the local submatrix (possibly different for 1402 each row) or PETSC_NULL. 1403 1404 Output Parameter: 1405 . A - the matrix 1406 1407 Notes: 1408 The AIJ format (also called the Yale sparse matrix format or 1409 compressed row storage), is fully compatible with standard Fortran 77 1410 storage. That is, the stored row and column indices can begin at 1411 either one (as in Fortran) or zero. See the users manual for details. 1412 1413 The user MUST specify either the local or global matrix dimensions 1414 (possibly both). 1415 1416 By default, this format uses inodes (identical nodes) when possible. 1417 We search for consecutive rows with the same nonzero structure, thereby 1418 reusing matrix information to achieve increased efficiency. 1419 1420 Options Database Keys: 1421 $ -mat_aij_no_inode - Do not use inodes 1422 $ -mat_aij_inode_limit <limit> - Set inode limit. 1423 $ (max limit=5) 1424 $ -mat_aij_oneindex - Internally use indexing starting at 1 1425 $ rather than 0. Note: When calling MatSetValues(), 1426 $ the user still MUST index entries starting at 0! 1427 1428 Storage Information: 1429 For a square global matrix we define each processor's diagonal portion 1430 to be its local rows and the corresponding columns (a square submatrix); 1431 each processor's off-diagonal portion encompasses the remainder of the 1432 local matrix (a rectangular submatrix). 1433 1434 The user can specify preallocated storage for the diagonal part of 1435 the local submatrix with either d_nz or d_nnz (not both). Set 1436 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1437 memory allocation. Likewise, specify preallocated storage for the 1438 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1439 1440 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1441 the figure below we depict these three local rows and all columns (0-11). 1442 1443 $ 0 1 2 3 4 5 6 7 8 9 10 11 1444 $ ------------------- 1445 $ row 3 | o o o d d d o o o o o o 1446 $ row 4 | o o o d d d o o o o o o 1447 $ row 5 | o o o d d d o o o o o o 1448 $ ------------------- 1449 $ 1450 1451 Thus, any entries in the d locations are stored in the d (diagonal) 1452 submatrix, and any entries in the o locations are stored in the 1453 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1454 stored simply in the MATSEQAIJ format for compressed row storage. 1455 1456 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1457 and o_nz should indicate the number of nonzeros per row in the o matrix. 1458 In general, for PDE problems in which most nonzeros are near the diagonal, 1459 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1460 or you will get TERRIBLE performance, see the users' manual chapter on 1461 matrices and the file $(PETSC_DIR)/Performance. 1462 1463 .keywords: matrix, aij, compressed row, sparse, parallel 1464 1465 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1466 @*/ 1467 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1468 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1469 { 1470 Mat B; 1471 Mat_MPIAIJ *b; 1472 int ierr, i,sum[2],work[2]; 1473 1474 *A = 0; 1475 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1476 PLogObjectCreate(B); 1477 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1478 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1479 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1480 B->destroy = MatDestroy_MPIAIJ; 1481 B->view = MatView_MPIAIJ; 1482 B->factor = 0; 1483 B->assembled = PETSC_FALSE; 1484 1485 b->insertmode = NOT_SET_VALUES; 1486 MPI_Comm_rank(comm,&b->rank); 1487 MPI_Comm_size(comm,&b->size); 1488 1489 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1490 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1491 1492 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1493 work[0] = m; work[1] = n; 1494 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1495 if (M == PETSC_DECIDE) M = sum[0]; 1496 if (N == PETSC_DECIDE) N = sum[1]; 1497 } 1498 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1499 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1500 b->m = m; B->m = m; 1501 b->n = n; B->n = n; 1502 b->N = N; B->N = N; 1503 b->M = M; B->M = M; 1504 1505 /* build local table of row and column ownerships */ 1506 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1507 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1508 b->cowners = b->rowners + b->size + 1; 1509 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1510 b->rowners[0] = 0; 1511 for ( i=2; i<=b->size; i++ ) { 1512 b->rowners[i] += b->rowners[i-1]; 1513 } 1514 b->rstart = b->rowners[b->rank]; 1515 b->rend = b->rowners[b->rank+1]; 1516 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1517 b->cowners[0] = 0; 1518 for ( i=2; i<=b->size; i++ ) { 1519 b->cowners[i] += b->cowners[i-1]; 1520 } 1521 b->cstart = b->cowners[b->rank]; 1522 b->cend = b->cowners[b->rank+1]; 1523 1524 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1525 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1526 PLogObjectParent(B,b->A); 1527 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1528 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1529 PLogObjectParent(B,b->B); 1530 1531 /* build cache for off array entries formed */ 1532 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1533 b->colmap = 0; 1534 b->garray = 0; 1535 b->roworiented = 1; 1536 1537 /* stuff used for matrix vector multiply */ 1538 b->lvec = 0; 1539 b->Mvctx = 0; 1540 1541 /* stuff for MatGetRow() */ 1542 b->rowindices = 0; 1543 b->rowvalues = 0; 1544 b->getrowactive = PETSC_FALSE; 1545 1546 *A = B; 1547 return 0; 1548 } 1549 1550 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1551 { 1552 Mat mat; 1553 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1554 int ierr, len=0, flg; 1555 1556 *newmat = 0; 1557 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1558 PLogObjectCreate(mat); 1559 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1560 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1561 mat->destroy = MatDestroy_MPIAIJ; 1562 mat->view = MatView_MPIAIJ; 1563 mat->factor = matin->factor; 1564 mat->assembled = PETSC_TRUE; 1565 1566 a->m = mat->m = oldmat->m; 1567 a->n = mat->n = oldmat->n; 1568 a->M = mat->M = oldmat->M; 1569 a->N = mat->N = oldmat->N; 1570 1571 a->rstart = oldmat->rstart; 1572 a->rend = oldmat->rend; 1573 a->cstart = oldmat->cstart; 1574 a->cend = oldmat->cend; 1575 a->size = oldmat->size; 1576 a->rank = oldmat->rank; 1577 a->insertmode = NOT_SET_VALUES; 1578 a->rowvalues = 0; 1579 a->getrowactive = PETSC_FALSE; 1580 1581 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1582 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1583 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1584 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1585 if (oldmat->colmap) { 1586 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1587 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1588 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1589 } else a->colmap = 0; 1590 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1591 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1592 PLogObjectMemory(mat,len*sizeof(int)); 1593 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1594 } else a->garray = 0; 1595 1596 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1597 PLogObjectParent(mat,a->lvec); 1598 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1599 PLogObjectParent(mat,a->Mvctx); 1600 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1601 PLogObjectParent(mat,a->A); 1602 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1603 PLogObjectParent(mat,a->B); 1604 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1605 if (flg) { 1606 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1607 } 1608 *newmat = mat; 1609 return 0; 1610 } 1611 1612 #include "sys.h" 1613 1614 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1615 { 1616 Mat A; 1617 int i, nz, ierr, j,rstart, rend, fd; 1618 Scalar *vals,*svals; 1619 MPI_Comm comm = ((PetscObject)viewer)->comm; 1620 MPI_Status status; 1621 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1622 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1623 int tag = ((PetscObject)viewer)->tag; 1624 1625 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1626 if (!rank) { 1627 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1628 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1629 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1630 } 1631 1632 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1633 M = header[1]; N = header[2]; 1634 /* determine ownership of all rows */ 1635 m = M/size + ((M % size) > rank); 1636 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1637 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1638 rowners[0] = 0; 1639 for ( i=2; i<=size; i++ ) { 1640 rowners[i] += rowners[i-1]; 1641 } 1642 rstart = rowners[rank]; 1643 rend = rowners[rank+1]; 1644 1645 /* distribute row lengths to all processors */ 1646 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1647 offlens = ourlens + (rend-rstart); 1648 if (!rank) { 1649 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1650 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1651 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1652 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1653 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1654 PetscFree(sndcounts); 1655 } 1656 else { 1657 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1658 } 1659 1660 if (!rank) { 1661 /* calculate the number of nonzeros on each processor */ 1662 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1663 PetscMemzero(procsnz,size*sizeof(int)); 1664 for ( i=0; i<size; i++ ) { 1665 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1666 procsnz[i] += rowlengths[j]; 1667 } 1668 } 1669 PetscFree(rowlengths); 1670 1671 /* determine max buffer needed and allocate it */ 1672 maxnz = 0; 1673 for ( i=0; i<size; i++ ) { 1674 maxnz = PetscMax(maxnz,procsnz[i]); 1675 } 1676 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1677 1678 /* read in my part of the matrix column indices */ 1679 nz = procsnz[0]; 1680 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1681 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1682 1683 /* read in every one elses and ship off */ 1684 for ( i=1; i<size; i++ ) { 1685 nz = procsnz[i]; 1686 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1687 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1688 } 1689 PetscFree(cols); 1690 } 1691 else { 1692 /* determine buffer space needed for message */ 1693 nz = 0; 1694 for ( i=0; i<m; i++ ) { 1695 nz += ourlens[i]; 1696 } 1697 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1698 1699 /* receive message of column indices*/ 1700 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1701 MPI_Get_count(&status,MPI_INT,&maxnz); 1702 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1703 } 1704 1705 /* loop over local rows, determining number of off diagonal entries */ 1706 PetscMemzero(offlens,m*sizeof(int)); 1707 jj = 0; 1708 for ( i=0; i<m; i++ ) { 1709 for ( j=0; j<ourlens[i]; j++ ) { 1710 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1711 jj++; 1712 } 1713 } 1714 1715 /* create our matrix */ 1716 for ( i=0; i<m; i++ ) { 1717 ourlens[i] -= offlens[i]; 1718 } 1719 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1720 A = *newmat; 1721 MatSetOption(A,COLUMNS_SORTED); 1722 for ( i=0; i<m; i++ ) { 1723 ourlens[i] += offlens[i]; 1724 } 1725 1726 if (!rank) { 1727 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1728 1729 /* read in my part of the matrix numerical values */ 1730 nz = procsnz[0]; 1731 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1732 1733 /* insert into matrix */ 1734 jj = rstart; 1735 smycols = mycols; 1736 svals = vals; 1737 for ( i=0; i<m; i++ ) { 1738 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1739 smycols += ourlens[i]; 1740 svals += ourlens[i]; 1741 jj++; 1742 } 1743 1744 /* read in other processors and ship out */ 1745 for ( i=1; i<size; i++ ) { 1746 nz = procsnz[i]; 1747 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1748 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1749 } 1750 PetscFree(procsnz); 1751 } 1752 else { 1753 /* receive numeric values */ 1754 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1755 1756 /* receive message of values*/ 1757 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1758 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1759 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1760 1761 /* insert into matrix */ 1762 jj = rstart; 1763 smycols = mycols; 1764 svals = vals; 1765 for ( i=0; i<m; i++ ) { 1766 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1767 smycols += ourlens[i]; 1768 svals += ourlens[i]; 1769 jj++; 1770 } 1771 } 1772 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1773 1774 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1775 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1776 return 0; 1777 } 1778