1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.137 1996/04/03 14:39:22 curfman Exp curfman $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (mat->was_assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 for ( i=0; i<m; i++ ) { 104 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106 if (idxm[i] >= rstart && idxm[i] < rend) { 107 row = idxm[i] - rstart; 108 for ( j=0; j<n; j++ ) { 109 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111 if (idxn[j] >= cstart && idxn[j] < cend){ 112 col = idxn[j] - cstart; 113 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114 } 115 else { 116 col = aij->colmap[idxn[j]] + shift; 117 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118 } 119 } 120 } 121 else { 122 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123 } 124 } 125 return 0; 126 } 127 128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 MPI_Comm comm = mat->comm; 132 int size = aij->size, *owners = aij->rowners; 133 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 134 MPI_Request *send_waits,*recv_waits; 135 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 136 InsertMode addv; 137 Scalar *rvalues,*svalues; 138 139 /* make sure all processors are either in INSERTMODE or ADDMODE */ 140 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141 if (addv == (ADD_VALUES|INSERT_VALUES)) { 142 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 143 } 144 aij->insertmode = addv; /* in case this processor had no cache */ 145 146 /* first count number of contributors to each processor */ 147 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 149 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 150 for ( i=0; i<aij->stash.n; i++ ) { 151 idx = aij->stash.idx[i]; 152 for ( j=0; j<size; j++ ) { 153 if (idx >= owners[j] && idx < owners[j+1]) { 154 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 155 } 156 } 157 } 158 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 159 160 /* inform other processors of number of messages and max length*/ 161 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 162 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 163 nreceives = work[rank]; 164 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 165 nmax = work[rank]; 166 PetscFree(work); 167 168 /* post receives: 169 1) each message will consist of ordered pairs 170 (global index,value) we store the global index as a double 171 to simplify the message passing. 172 2) since we don't know how long each individual message is we 173 allocate the largest needed buffer for each receive. Potentially 174 this is a lot of wasted space. 175 176 177 This could be done better. 178 */ 179 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 180 CHKPTRQ(rvalues); 181 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 182 CHKPTRQ(recv_waits); 183 for ( i=0; i<nreceives; i++ ) { 184 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 185 comm,recv_waits+i); 186 } 187 188 /* do sends: 189 1) starts[i] gives the starting index in svalues for stuff going to 190 the ith processor 191 */ 192 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 193 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 194 CHKPTRQ(send_waits); 195 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 196 starts[0] = 0; 197 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 198 for ( i=0; i<aij->stash.n; i++ ) { 199 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 200 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 201 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 202 } 203 PetscFree(owner); 204 starts[0] = 0; 205 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 206 count = 0; 207 for ( i=0; i<size; i++ ) { 208 if (procs[i]) { 209 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 210 comm,send_waits+count++); 211 } 212 } 213 PetscFree(starts); PetscFree(nprocs); 214 215 /* Free cache space */ 216 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 217 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 218 219 aij->svalues = svalues; aij->rvalues = rvalues; 220 aij->nsends = nsends; aij->nrecvs = nreceives; 221 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 222 aij->rmax = nmax; 223 224 return 0; 225 } 226 extern int MatSetUpMultiply_MPIAIJ(Mat); 227 228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 229 { 230 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 232 MPI_Status *send_status,recv_status; 233 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234 int row,col,other_disassembled,shift = C->indexshift; 235 Scalar *values,val; 236 InsertMode addv = aij->insertmode; 237 238 /* wait on receives */ 239 while (count) { 240 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 241 /* unpack receives into our local space */ 242 values = aij->rvalues + 3*imdex*aij->rmax; 243 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 244 n = n/3; 245 for ( i=0; i<n; i++ ) { 246 row = (int) PetscReal(values[3*i]) - aij->rstart; 247 col = (int) PetscReal(values[3*i+1]); 248 val = values[3*i+2]; 249 if (col >= aij->cstart && col < aij->cend) { 250 col -= aij->cstart; 251 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 252 } 253 else { 254 if (mat->was_assembled) { 255 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256 col = aij->colmap[col] + shift; 257 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 258 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259 col = (int) PetscReal(values[3*i+1]); 260 } 261 } 262 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 263 } 264 } 265 count--; 266 } 267 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 268 269 /* wait on sends */ 270 if (aij->nsends) { 271 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 272 CHKPTRQ(send_status); 273 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 274 PetscFree(send_status); 275 } 276 PetscFree(aij->send_waits); PetscFree(aij->svalues); 277 278 aij->insertmode = NOT_SET_VALUES; 279 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 280 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 281 282 /* determine if any processor has disassembled, if so we must 283 also disassemble ourselfs, in order that we may reassemble. */ 284 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285 if (mat->was_assembled && !other_disassembled) { 286 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287 } 288 289 if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 290 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 291 } 292 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 293 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 294 295 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 296 return 0; 297 } 298 299 static int MatZeroEntries_MPIAIJ(Mat A) 300 { 301 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 302 int ierr; 303 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 304 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 305 return 0; 306 } 307 308 /* the code does not do the diagonal entries correctly unless the 309 matrix is square and the column and row owerships are identical. 310 This is a BUG. The only way to fix it seems to be to access 311 aij->A and aij->B directly and not through the MatZeroRows() 312 routine. 313 */ 314 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 315 { 316 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 317 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 318 int *procs,*nprocs,j,found,idx,nsends,*work; 319 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 320 int *rvalues,tag = A->tag,count,base,slen,n,*source; 321 int *lens,imdex,*lrows,*values; 322 MPI_Comm comm = A->comm; 323 MPI_Request *send_waits,*recv_waits; 324 MPI_Status recv_status,*send_status; 325 IS istmp; 326 327 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 328 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 329 330 /* first count number of contributors to each processor */ 331 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 332 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 333 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 334 for ( i=0; i<N; i++ ) { 335 idx = rows[i]; 336 found = 0; 337 for ( j=0; j<size; j++ ) { 338 if (idx >= owners[j] && idx < owners[j+1]) { 339 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 340 } 341 } 342 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 343 } 344 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 345 346 /* inform other processors of number of messages and max length*/ 347 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 348 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 349 nrecvs = work[rank]; 350 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 351 nmax = work[rank]; 352 PetscFree(work); 353 354 /* post receives: */ 355 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 356 CHKPTRQ(rvalues); 357 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 358 CHKPTRQ(recv_waits); 359 for ( i=0; i<nrecvs; i++ ) { 360 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 361 } 362 363 /* do sends: 364 1) starts[i] gives the starting index in svalues for stuff going to 365 the ith processor 366 */ 367 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 368 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 369 CHKPTRQ(send_waits); 370 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 371 starts[0] = 0; 372 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 373 for ( i=0; i<N; i++ ) { 374 svalues[starts[owner[i]]++] = rows[i]; 375 } 376 ISRestoreIndices(is,&rows); 377 378 starts[0] = 0; 379 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 380 count = 0; 381 for ( i=0; i<size; i++ ) { 382 if (procs[i]) { 383 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 384 } 385 } 386 PetscFree(starts); 387 388 base = owners[rank]; 389 390 /* wait on receives */ 391 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 392 source = lens + nrecvs; 393 count = nrecvs; slen = 0; 394 while (count) { 395 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 396 /* unpack receives into our local space */ 397 MPI_Get_count(&recv_status,MPI_INT,&n); 398 source[imdex] = recv_status.MPI_SOURCE; 399 lens[imdex] = n; 400 slen += n; 401 count--; 402 } 403 PetscFree(recv_waits); 404 405 /* move the data into the send scatter */ 406 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 407 count = 0; 408 for ( i=0; i<nrecvs; i++ ) { 409 values = rvalues + i*nmax; 410 for ( j=0; j<lens[i]; j++ ) { 411 lrows[count++] = values[j] - base; 412 } 413 } 414 PetscFree(rvalues); PetscFree(lens); 415 PetscFree(owner); PetscFree(nprocs); 416 417 /* actually zap the local rows */ 418 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 419 PLogObjectParent(A,istmp); 420 PetscFree(lrows); 421 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 422 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 423 ierr = ISDestroy(istmp); CHKERRQ(ierr); 424 425 /* wait on sends */ 426 if (nsends) { 427 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 428 CHKPTRQ(send_status); 429 MPI_Waitall(nsends,send_waits,send_status); 430 PetscFree(send_status); 431 } 432 PetscFree(send_waits); PetscFree(svalues); 433 434 return 0; 435 } 436 437 extern int MatMult_SeqAIJ(Mat A,Vec,Vec); 438 extern int MatMultAdd_SeqAIJ(Mat A,Vec,Vec,Vec); 439 extern int MatMultTrans_SeqAIJ(Mat A,Vec,Vec); 440 extern int MatMultTransAdd_SeqAIJ(Mat A,Vec,Vec,Vec); 441 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 442 { 443 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 444 int ierr; 445 446 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 447 ierr = MatMult_SeqAIJ(a->A,xx,yy); CHKERRQ(ierr); 448 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 449 ierr = MatMultAdd_SeqAIJ(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 450 return 0; 451 } 452 453 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 454 { 455 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 456 int ierr; 457 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 458 ierr = MatMultAdd_SeqAIJ(a->A,xx,yy,zz); CHKERRQ(ierr); 459 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 460 ierr = MatMultAdd_SeqAIJ(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 461 return 0; 462 } 463 464 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 465 { 466 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 467 int ierr; 468 469 /* do nondiagonal part */ 470 ierr = MatMultTrans_SeqAIJ(a->B,xx,a->lvec); CHKERRQ(ierr); 471 /* send it on its way */ 472 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 473 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 474 /* do local part */ 475 ierr = MatMultTrans_SeqAIJ(a->A,xx,yy); CHKERRQ(ierr); 476 /* receive remote parts: note this assumes the values are not actually */ 477 /* inserted in yy until the next line, which is true for my implementation*/ 478 /* but is not perhaps always true. */ 479 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 480 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 481 return 0; 482 } 483 484 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 485 { 486 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 487 int ierr; 488 489 /* do nondiagonal part */ 490 ierr = MatMultTrans_SeqAIJ(a->B,xx,a->lvec); CHKERRQ(ierr); 491 /* send it on its way */ 492 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 493 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 494 /* do local part */ 495 ierr = MatMultTransAdd_SeqAIJ(a->A,xx,yy,zz); CHKERRQ(ierr); 496 /* receive remote parts: note this assumes the values are not actually */ 497 /* inserted in yy until the next line, which is true for my implementation*/ 498 /* but is not perhaps always true. */ 499 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 500 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 501 return 0; 502 } 503 504 /* 505 This only works correctly for square matrices where the subblock A->A is the 506 diagonal block 507 */ 508 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 509 { 510 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 511 if (a->M != a->N) 512 SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 513 return MatGetDiagonal(a->A,v); 514 } 515 516 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 517 { 518 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 519 int ierr; 520 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 521 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 522 return 0; 523 } 524 525 static int MatDestroy_MPIAIJ(PetscObject obj) 526 { 527 Mat mat = (Mat) obj; 528 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 529 int ierr; 530 #if defined(PETSC_LOG) 531 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 532 #endif 533 PetscFree(aij->rowners); 534 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 535 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 536 if (aij->colmap) PetscFree(aij->colmap); 537 if (aij->garray) PetscFree(aij->garray); 538 if (aij->lvec) VecDestroy(aij->lvec); 539 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 540 if (aij->rowvalues) PetscFree(aij->rowvalues); 541 PetscFree(aij); 542 PLogObjectDestroy(mat); 543 PetscHeaderDestroy(mat); 544 return 0; 545 } 546 #include "draw.h" 547 #include "pinclude/pviewer.h" 548 549 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 550 { 551 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 552 int ierr; 553 554 if (aij->size == 1) { 555 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 556 } 557 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 558 return 0; 559 } 560 561 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 562 { 563 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 564 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 565 int ierr, format,shift = C->indexshift,rank; 566 FILE *fd; 567 ViewerType vtype; 568 569 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 570 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 571 ierr = ViewerGetFormat(viewer,&format); 572 if (format == ASCII_FORMAT_INFO_DETAILED) { 573 int nz, nzalloc, mem, flg; 574 MPI_Comm_rank(mat->comm,&rank); 575 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 576 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 577 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 578 PetscSequentialPhaseBegin(mat->comm,1); 579 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 580 rank,aij->m,nz,nzalloc,mem); 581 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 582 rank,aij->m,nz,nzalloc,mem); 583 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 584 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz); 585 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 586 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz); 587 fflush(fd); 588 PetscSequentialPhaseEnd(mat->comm,1); 589 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 590 return 0; 591 } 592 else if (format == ASCII_FORMAT_INFO) { 593 return 0; 594 } 595 } 596 597 if (vtype == DRAW_VIEWER) { 598 Draw draw; 599 PetscTruth isnull; 600 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 601 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 602 } 603 604 if (vtype == ASCII_FILE_VIEWER) { 605 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 606 PetscSequentialPhaseBegin(mat->comm,1); 607 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 608 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 609 aij->cend); 610 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 611 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 612 fflush(fd); 613 PetscSequentialPhaseEnd(mat->comm,1); 614 } 615 else { 616 int size = aij->size; 617 rank = aij->rank; 618 if (size == 1) { 619 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 620 } 621 else { 622 /* assemble the entire matrix onto first processor. */ 623 Mat A; 624 Mat_SeqAIJ *Aloc; 625 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 626 Scalar *a; 627 628 if (!rank) { 629 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 630 CHKERRQ(ierr); 631 } 632 else { 633 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 634 CHKERRQ(ierr); 635 } 636 PLogObjectParent(mat,A); 637 638 /* copy over the A part */ 639 Aloc = (Mat_SeqAIJ*) aij->A->data; 640 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 641 row = aij->rstart; 642 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 643 for ( i=0; i<m; i++ ) { 644 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 645 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 646 } 647 aj = Aloc->j; 648 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 649 650 /* copy over the B part */ 651 Aloc = (Mat_SeqAIJ*) aij->B->data; 652 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 653 row = aij->rstart; 654 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 655 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 656 for ( i=0; i<m; i++ ) { 657 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 658 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 659 } 660 PetscFree(ct); 661 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 662 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 663 if (!rank) { 664 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 665 } 666 ierr = MatDestroy(A); CHKERRQ(ierr); 667 } 668 } 669 return 0; 670 } 671 672 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 673 { 674 Mat mat = (Mat) obj; 675 int ierr; 676 ViewerType vtype; 677 678 if (!viewer) { 679 viewer = STDOUT_VIEWER_SELF; 680 } 681 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 682 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 683 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 684 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 685 } 686 else if (vtype == BINARY_FILE_VIEWER) { 687 return MatView_MPIAIJ_Binary(mat,viewer); 688 } 689 return 0; 690 } 691 692 extern int MatMarkDiag_SeqAIJ(Mat); 693 extern int MatRelax_SeqAIJ(Mat,Vec,double,MatSORType,double,int,Vec); 694 /* 695 This has to provide several versions. 696 697 1) per sequential 698 2) a) use only local smoothing updating outer values only once. 699 b) local smoothing updating outer values each inner iteration 700 3) color updating out values betwen colors. 701 */ 702 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 703 double fshift,int its,Vec xx) 704 { 705 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 706 Mat AA = mat->A, BB = mat->B; 707 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 708 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 709 int ierr,*idx, *diag; 710 int n = mat->n, m = mat->m, i,shift = A->indexshift; 711 Vec tt; 712 713 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 714 xs = x + shift; /* shift by one for index start of 1 */ 715 ls = ls + shift; 716 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 717 diag = A->diag; 718 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 719 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 720 } 721 if (flag & SOR_EISENSTAT) { 722 /* Let A = L + U + D; where L is lower trianglar, 723 U is upper triangular, E is diagonal; This routine applies 724 725 (L + E)^{-1} A (U + E)^{-1} 726 727 to a vector efficiently using Eisenstat's trick. This is for 728 the case of SSOR preconditioner, so E is D/omega where omega 729 is the relaxation factor. 730 */ 731 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 732 PLogObjectParent(matin,tt); 733 VecGetArray(tt,&t); 734 scale = (2.0/omega) - 1.0; 735 /* x = (E + U)^{-1} b */ 736 VecSet(&zero,mat->lvec); 737 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 738 mat->Mvctx); CHKERRQ(ierr); 739 for ( i=m-1; i>-1; i-- ) { 740 n = A->i[i+1] - diag[i] - 1; 741 idx = A->j + diag[i] + !shift; 742 v = A->a + diag[i] + !shift; 743 sum = b[i]; 744 SPARSEDENSEMDOT(sum,xs,v,idx,n); 745 d = fshift + A->a[diag[i]+shift]; 746 n = B->i[i+1] - B->i[i]; 747 idx = B->j + B->i[i] + shift; 748 v = B->a + B->i[i] + shift; 749 SPARSEDENSEMDOT(sum,ls,v,idx,n); 750 x[i] = omega*(sum/d); 751 } 752 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 753 mat->Mvctx); CHKERRQ(ierr); 754 755 /* t = b - (2*E - D)x */ 756 v = A->a; 757 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 758 759 /* t = (E + L)^{-1}t */ 760 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 761 diag = A->diag; 762 VecSet(&zero,mat->lvec); 763 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 764 mat->Mvctx); CHKERRQ(ierr); 765 for ( i=0; i<m; i++ ) { 766 n = diag[i] - A->i[i]; 767 idx = A->j + A->i[i] + shift; 768 v = A->a + A->i[i] + shift; 769 sum = t[i]; 770 SPARSEDENSEMDOT(sum,ts,v,idx,n); 771 d = fshift + A->a[diag[i]+shift]; 772 n = B->i[i+1] - B->i[i]; 773 idx = B->j + B->i[i] + shift; 774 v = B->a + B->i[i] + shift; 775 SPARSEDENSEMDOT(sum,ls,v,idx,n); 776 t[i] = omega*(sum/d); 777 } 778 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 779 mat->Mvctx); CHKERRQ(ierr); 780 /* x = x + t */ 781 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 782 VecDestroy(tt); 783 return 0; 784 } 785 786 787 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 788 if (flag & SOR_ZERO_INITIAL_GUESS) { 789 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 790 } 791 else { 792 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 793 CHKERRQ(ierr); 794 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 795 CHKERRQ(ierr); 796 } 797 while (its--) { 798 /* go down through the rows */ 799 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 800 mat->Mvctx); CHKERRQ(ierr); 801 for ( i=0; i<m; i++ ) { 802 n = A->i[i+1] - A->i[i]; 803 idx = A->j + A->i[i] + shift; 804 v = A->a + A->i[i] + shift; 805 sum = b[i]; 806 SPARSEDENSEMDOT(sum,xs,v,idx,n); 807 d = fshift + A->a[diag[i]+shift]; 808 n = B->i[i+1] - B->i[i]; 809 idx = B->j + B->i[i] + shift; 810 v = B->a + B->i[i] + shift; 811 SPARSEDENSEMDOT(sum,ls,v,idx,n); 812 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 813 } 814 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 815 mat->Mvctx); CHKERRQ(ierr); 816 /* come up through the rows */ 817 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 818 mat->Mvctx); CHKERRQ(ierr); 819 for ( i=m-1; i>-1; i-- ) { 820 n = A->i[i+1] - A->i[i]; 821 idx = A->j + A->i[i] + shift; 822 v = A->a + A->i[i] + shift; 823 sum = b[i]; 824 SPARSEDENSEMDOT(sum,xs,v,idx,n); 825 d = fshift + A->a[diag[i]+shift]; 826 n = B->i[i+1] - B->i[i]; 827 idx = B->j + B->i[i] + shift; 828 v = B->a + B->i[i] + shift; 829 SPARSEDENSEMDOT(sum,ls,v,idx,n); 830 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 831 } 832 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 833 mat->Mvctx); CHKERRQ(ierr); 834 } 835 } 836 else if (flag & SOR_FORWARD_SWEEP){ 837 if (flag & SOR_ZERO_INITIAL_GUESS) { 838 VecSet(&zero,mat->lvec); 839 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 840 mat->Mvctx); CHKERRQ(ierr); 841 for ( i=0; i<m; i++ ) { 842 n = diag[i] - A->i[i]; 843 idx = A->j + A->i[i] + shift; 844 v = A->a + A->i[i] + shift; 845 sum = b[i]; 846 SPARSEDENSEMDOT(sum,xs,v,idx,n); 847 d = fshift + A->a[diag[i]+shift]; 848 n = B->i[i+1] - B->i[i]; 849 idx = B->j + B->i[i] + shift; 850 v = B->a + B->i[i] + shift; 851 SPARSEDENSEMDOT(sum,ls,v,idx,n); 852 x[i] = omega*(sum/d); 853 } 854 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 855 mat->Mvctx); CHKERRQ(ierr); 856 its--; 857 } 858 while (its--) { 859 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 860 CHKERRQ(ierr); 861 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 862 CHKERRQ(ierr); 863 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 864 mat->Mvctx); CHKERRQ(ierr); 865 for ( i=0; i<m; i++ ) { 866 n = A->i[i+1] - A->i[i]; 867 idx = A->j + A->i[i] + shift; 868 v = A->a + A->i[i] + shift; 869 sum = b[i]; 870 SPARSEDENSEMDOT(sum,xs,v,idx,n); 871 d = fshift + A->a[diag[i]+shift]; 872 n = B->i[i+1] - B->i[i]; 873 idx = B->j + B->i[i] + shift; 874 v = B->a + B->i[i] + shift; 875 SPARSEDENSEMDOT(sum,ls,v,idx,n); 876 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 877 } 878 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 879 mat->Mvctx); CHKERRQ(ierr); 880 } 881 } 882 else if (flag & SOR_BACKWARD_SWEEP){ 883 if (flag & SOR_ZERO_INITIAL_GUESS) { 884 VecSet(&zero,mat->lvec); 885 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 886 mat->Mvctx); CHKERRQ(ierr); 887 for ( i=m-1; i>-1; i-- ) { 888 n = A->i[i+1] - diag[i] - 1; 889 idx = A->j + diag[i] + !shift; 890 v = A->a + diag[i] + !shift; 891 sum = b[i]; 892 SPARSEDENSEMDOT(sum,xs,v,idx,n); 893 d = fshift + A->a[diag[i]+shift]; 894 n = B->i[i+1] - B->i[i]; 895 idx = B->j + B->i[i] + shift; 896 v = B->a + B->i[i] + shift; 897 SPARSEDENSEMDOT(sum,ls,v,idx,n); 898 x[i] = omega*(sum/d); 899 } 900 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 901 mat->Mvctx); CHKERRQ(ierr); 902 its--; 903 } 904 while (its--) { 905 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 906 mat->Mvctx); CHKERRQ(ierr); 907 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 908 mat->Mvctx); CHKERRQ(ierr); 909 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 910 mat->Mvctx); CHKERRQ(ierr); 911 for ( i=m-1; i>-1; i-- ) { 912 n = A->i[i+1] - A->i[i]; 913 idx = A->j + A->i[i] + shift; 914 v = A->a + A->i[i] + shift; 915 sum = b[i]; 916 SPARSEDENSEMDOT(sum,xs,v,idx,n); 917 d = fshift + A->a[diag[i]+shift]; 918 n = B->i[i+1] - B->i[i]; 919 idx = B->j + B->i[i] + shift; 920 v = B->a + B->i[i] + shift; 921 SPARSEDENSEMDOT(sum,ls,v,idx,n); 922 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 923 } 924 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 925 mat->Mvctx); CHKERRQ(ierr); 926 } 927 } 928 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 929 if (flag & SOR_ZERO_INITIAL_GUESS) { 930 return MatRelax_SeqAIJ(mat->A,bb,omega,flag,fshift,its,xx); 931 } 932 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 933 CHKERRQ(ierr); 934 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 935 CHKERRQ(ierr); 936 while (its--) { 937 /* go down through the rows */ 938 for ( i=0; i<m; i++ ) { 939 n = A->i[i+1] - A->i[i]; 940 idx = A->j + A->i[i] + shift; 941 v = A->a + A->i[i] + shift; 942 sum = b[i]; 943 SPARSEDENSEMDOT(sum,xs,v,idx,n); 944 d = fshift + A->a[diag[i]+shift]; 945 n = B->i[i+1] - B->i[i]; 946 idx = B->j + B->i[i] + shift; 947 v = B->a + B->i[i] + shift; 948 SPARSEDENSEMDOT(sum,ls,v,idx,n); 949 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 950 } 951 /* come up through the rows */ 952 for ( i=m-1; i>-1; i-- ) { 953 n = A->i[i+1] - A->i[i]; 954 idx = A->j + A->i[i] + shift; 955 v = A->a + A->i[i] + shift; 956 sum = b[i]; 957 SPARSEDENSEMDOT(sum,xs,v,idx,n); 958 d = fshift + A->a[diag[i]+shift]; 959 n = B->i[i+1] - B->i[i]; 960 idx = B->j + B->i[i] + shift; 961 v = B->a + B->i[i] + shift; 962 SPARSEDENSEMDOT(sum,ls,v,idx,n); 963 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 964 } 965 } 966 } 967 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 968 if (flag & SOR_ZERO_INITIAL_GUESS) { 969 return MatRelax_SeqAIJ(mat->A,bb,omega,flag,fshift,its,xx); 970 } 971 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 972 CHKERRQ(ierr); 973 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 974 CHKERRQ(ierr); 975 while (its--) { 976 for ( i=0; i<m; i++ ) { 977 n = A->i[i+1] - A->i[i]; 978 idx = A->j + A->i[i] + shift; 979 v = A->a + A->i[i] + shift; 980 sum = b[i]; 981 SPARSEDENSEMDOT(sum,xs,v,idx,n); 982 d = fshift + A->a[diag[i]+shift]; 983 n = B->i[i+1] - B->i[i]; 984 idx = B->j + B->i[i] + shift; 985 v = B->a + B->i[i] + shift; 986 SPARSEDENSEMDOT(sum,ls,v,idx,n); 987 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 988 } 989 } 990 } 991 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 992 if (flag & SOR_ZERO_INITIAL_GUESS) { 993 return MatRelax_SeqAIJ(mat->A,bb,omega,flag,fshift,its,xx); 994 } 995 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 996 mat->Mvctx); CHKERRQ(ierr); 997 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 998 mat->Mvctx); CHKERRQ(ierr); 999 while (its--) { 1000 for ( i=m-1; i>-1; i-- ) { 1001 n = A->i[i+1] - A->i[i]; 1002 idx = A->j + A->i[i] + shift; 1003 v = A->a + A->i[i] + shift; 1004 sum = b[i]; 1005 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1006 d = fshift + A->a[diag[i]+shift]; 1007 n = B->i[i+1] - B->i[i]; 1008 idx = B->j + B->i[i] + shift; 1009 v = B->a + B->i[i] + shift; 1010 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1011 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1012 } 1013 } 1014 } 1015 return 0; 1016 } 1017 1018 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1019 int *nzalloc,int *mem) 1020 { 1021 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1022 Mat A = mat->A, B = mat->B; 1023 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1024 1025 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1026 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1027 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1028 if (flag == MAT_LOCAL) { 1029 if (nz) *nz = isend[0]; 1030 if (nzalloc) *nzalloc = isend[1]; 1031 if (mem) *mem = isend[2]; 1032 } else if (flag == MAT_GLOBAL_MAX) { 1033 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1034 if (nz) *nz = irecv[0]; 1035 if (nzalloc) *nzalloc = irecv[1]; 1036 if (mem) *mem = irecv[2]; 1037 } else if (flag == MAT_GLOBAL_SUM) { 1038 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1039 if (nz) *nz = irecv[0]; 1040 if (nzalloc) *nzalloc = irecv[1]; 1041 if (mem) *mem = irecv[2]; 1042 } 1043 return 0; 1044 } 1045 1046 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1047 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1048 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1049 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1050 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1051 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1052 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1053 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1054 1055 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1056 { 1057 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1058 1059 if (op == NO_NEW_NONZERO_LOCATIONS || 1060 op == YES_NEW_NONZERO_LOCATIONS || 1061 op == COLUMNS_SORTED || 1062 op == ROW_ORIENTED) { 1063 MatSetOption(a->A,op); 1064 MatSetOption(a->B,op); 1065 } 1066 else if (op == ROWS_SORTED || 1067 op == SYMMETRIC_MATRIX || 1068 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1069 op == YES_NEW_DIAGONALS) 1070 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1071 else if (op == COLUMN_ORIENTED) { 1072 a->roworiented = 0; 1073 MatSetOption(a->A,op); 1074 MatSetOption(a->B,op); 1075 } 1076 else if (op == NO_NEW_DIAGONALS) 1077 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1078 else 1079 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1080 return 0; 1081 } 1082 1083 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1084 { 1085 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1086 *m = mat->M; *n = mat->N; 1087 return 0; 1088 } 1089 1090 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1091 { 1092 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1093 *m = mat->m; *n = mat->N; 1094 return 0; 1095 } 1096 1097 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1098 { 1099 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1100 *m = mat->rstart; *n = mat->rend; 1101 return 0; 1102 } 1103 1104 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1105 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1106 1107 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1108 { 1109 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1110 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1111 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1112 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1113 int *cmap, *idx_p; 1114 1115 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 1116 mat->getrowactive = PETSC_TRUE; 1117 1118 if (!mat->rowvalues && (idx || v)) { 1119 /* 1120 allocate enough space to hold information from the longest row. 1121 */ 1122 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1123 int max = 1,n = mat->n,tmp; 1124 for ( i=0; i<n; i++ ) { 1125 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1126 if (max < tmp) { max = tmp; } 1127 } 1128 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1129 CHKPTRQ(mat->rowvalues); 1130 mat->rowindices = (int *) (mat->rowvalues + max); 1131 } 1132 1133 1134 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1135 lrow = row - rstart; 1136 1137 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1138 if (!v) {pvA = 0; pvB = 0;} 1139 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1140 ierr = MatGetRow_SeqAIJ(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1141 ierr = MatGetRow_SeqAIJ(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1142 nztot = nzA + nzB; 1143 1144 cmap = mat->garray; 1145 if (v || idx) { 1146 if (nztot) { 1147 /* Sort by increasing column numbers, assuming A and B already sorted */ 1148 int imark = -1; 1149 if (v) { 1150 *v = v_p = mat->rowvalues; 1151 for ( i=0; i<nzB; i++ ) { 1152 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1153 else break; 1154 } 1155 imark = i; 1156 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1157 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1158 } 1159 if (idx) { 1160 *idx = idx_p = mat->rowindices; 1161 if (imark > -1) { 1162 for ( i=0; i<imark; i++ ) { 1163 idx_p[i] = cmap[cworkB[i]]; 1164 } 1165 } else { 1166 for ( i=0; i<nzB; i++ ) { 1167 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1168 else break; 1169 } 1170 imark = i; 1171 } 1172 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1173 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1174 } 1175 } 1176 else {*idx = 0; *v=0;} 1177 } 1178 *nz = nztot; 1179 ierr = MatRestoreRow_SeqAIJ(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1180 ierr = MatRestoreRow_SeqAIJ(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1181 return 0; 1182 } 1183 1184 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1185 { 1186 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1187 if (aij->getrowactive == PETSC_FALSE) { 1188 SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 1189 } 1190 aij->getrowactive = PETSC_FALSE; 1191 return 0; 1192 } 1193 1194 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1195 { 1196 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1197 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1198 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1199 double sum = 0.0; 1200 Scalar *v; 1201 1202 if (aij->size == 1) { 1203 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1204 } else { 1205 if (type == NORM_FROBENIUS) { 1206 v = amat->a; 1207 for (i=0; i<amat->nz; i++ ) { 1208 #if defined(PETSC_COMPLEX) 1209 sum += real(conj(*v)*(*v)); v++; 1210 #else 1211 sum += (*v)*(*v); v++; 1212 #endif 1213 } 1214 v = bmat->a; 1215 for (i=0; i<bmat->nz; i++ ) { 1216 #if defined(PETSC_COMPLEX) 1217 sum += real(conj(*v)*(*v)); v++; 1218 #else 1219 sum += (*v)*(*v); v++; 1220 #endif 1221 } 1222 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1223 *norm = sqrt(*norm); 1224 } 1225 else if (type == NORM_1) { /* max column norm */ 1226 double *tmp, *tmp2; 1227 int *jj, *garray = aij->garray; 1228 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1229 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1230 PetscMemzero(tmp,aij->N*sizeof(double)); 1231 *norm = 0.0; 1232 v = amat->a; jj = amat->j; 1233 for ( j=0; j<amat->nz; j++ ) { 1234 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1235 } 1236 v = bmat->a; jj = bmat->j; 1237 for ( j=0; j<bmat->nz; j++ ) { 1238 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1239 } 1240 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1241 for ( j=0; j<aij->N; j++ ) { 1242 if (tmp2[j] > *norm) *norm = tmp2[j]; 1243 } 1244 PetscFree(tmp); PetscFree(tmp2); 1245 } 1246 else if (type == NORM_INFINITY) { /* max row norm */ 1247 double ntemp = 0.0; 1248 for ( j=0; j<amat->m; j++ ) { 1249 v = amat->a + amat->i[j] + shift; 1250 sum = 0.0; 1251 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1252 sum += PetscAbsScalar(*v); v++; 1253 } 1254 v = bmat->a + bmat->i[j] + shift; 1255 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1256 sum += PetscAbsScalar(*v); v++; 1257 } 1258 if (sum > ntemp) ntemp = sum; 1259 } 1260 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1261 } 1262 else { 1263 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1264 } 1265 } 1266 return 0; 1267 } 1268 1269 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1270 { 1271 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1272 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1273 int ierr,shift = Aloc->indexshift; 1274 Mat B; 1275 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1276 Scalar *array; 1277 1278 if (matout == PETSC_NULL && M != N) 1279 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1280 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1281 PETSC_NULL,&B); CHKERRQ(ierr); 1282 1283 /* copy over the A part */ 1284 Aloc = (Mat_SeqAIJ*) a->A->data; 1285 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1286 row = a->rstart; 1287 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1288 for ( i=0; i<m; i++ ) { 1289 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1290 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1291 } 1292 aj = Aloc->j; 1293 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1294 1295 /* copy over the B part */ 1296 Aloc = (Mat_SeqAIJ*) a->B->data; 1297 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1298 row = a->rstart; 1299 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1300 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1301 for ( i=0; i<m; i++ ) { 1302 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1303 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1304 } 1305 PetscFree(ct); 1306 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1307 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1308 if (matout != PETSC_NULL) { 1309 *matout = B; 1310 } else { 1311 /* This isn't really an in-place transpose .... but free data structures from a */ 1312 PetscFree(a->rowners); 1313 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1314 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1315 if (a->colmap) PetscFree(a->colmap); 1316 if (a->garray) PetscFree(a->garray); 1317 if (a->lvec) VecDestroy(a->lvec); 1318 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1319 PetscFree(a); 1320 PetscMemcpy(A,B,sizeof(struct _Mat)); 1321 PetscHeaderDestroy(B); 1322 } 1323 return 0; 1324 } 1325 1326 extern int MatPrintHelp_SeqAIJ(Mat); 1327 static int MatPrintHelp_MPIAIJ(Mat A) 1328 { 1329 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1330 1331 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1332 else return 0; 1333 } 1334 1335 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1336 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1337 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1338 int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1339 /* -------------------------------------------------------------------*/ 1340 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1341 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1342 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1343 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1344 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1345 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1346 MatLUFactor_MPIAIJ,0, 1347 MatRelax_MPIAIJ, 1348 MatTranspose_MPIAIJ, 1349 MatGetInfo_MPIAIJ,0, 1350 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1351 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1352 0, 1353 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1354 MatGetReordering_MPIAIJ, 1355 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1356 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1357 MatILUFactorSymbolic_MPIAIJ,0, 1358 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1359 0,0,0, 1360 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1361 MatPrintHelp_MPIAIJ, 1362 MatScale_MPIAIJ}; 1363 1364 /*@C 1365 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1366 (the default parallel PETSc format). For good matrix assembly performance 1367 the user should preallocate the matrix storage by setting the parameters 1368 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1369 performance can be increased by more than a factor of 50. 1370 1371 Input Parameters: 1372 . comm - MPI communicator 1373 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1374 . n - number of local columns (or PETSC_DECIDE to have calculated 1375 if N is given) 1376 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1377 . N - number of global columns (or PETSC_DECIDE to have calculated 1378 if n is given) 1379 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1380 (same for all local rows) 1381 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1382 or null (possibly different for each row). You must leave room 1383 for the diagonal entry even if it is zero. 1384 . o_nz - number of nonzeros per row in off-diagonal portion of local 1385 submatrix (same for all local rows). 1386 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1387 submatrix or null (possibly different for each row). 1388 1389 Output Parameter: 1390 . A - the matrix 1391 1392 Notes: 1393 The AIJ format (also called the Yale sparse matrix format or 1394 compressed row storage), is fully compatible with standard Fortran 77 1395 storage. That is, the stored row and column indices can begin at 1396 either one (as in Fortran) or zero. See the users manual for details. 1397 1398 The user MUST specify either the local or global matrix dimensions 1399 (possibly both). 1400 1401 By default, this format uses inodes (identical nodes) when possible. 1402 We search for consecutive rows with the same nonzero structure, thereby 1403 reusing matrix information to achieve increased efficiency. 1404 1405 Options Database Keys: 1406 $ -mat_aij_no_inode - Do not use inodes 1407 $ -mat_aij_inode_limit <limit> - Set inode limit. 1408 $ (max limit=5) 1409 $ -mat_aij_oneindex - Internally use indexing starting at 1 1410 $ rather than 0. Note: When calling MatSetValues(), 1411 $ the user still MUST index entries starting at 0! 1412 1413 Storage Information: 1414 For a square global matrix we define each processor's diagonal portion 1415 to be its local rows and the corresponding columns (a square submatrix); 1416 each processor's off-diagonal portion encompasses the remainder of the 1417 local matrix (a rectangular submatrix). 1418 1419 The user can specify preallocated storage for the diagonal part of 1420 the local submatrix with either d_nz or d_nnz (not both). Set 1421 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1422 memory allocation. Likewise, specify preallocated storage for the 1423 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1424 1425 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1426 the figure below we depict these three local rows and all columns (0-11). 1427 1428 $ 0 1 2 3 4 5 6 7 8 9 10 11 1429 $ ------------------- 1430 $ row 3 | o o o d d d o o o o o o 1431 $ row 4 | o o o d d d o o o o o o 1432 $ row 5 | o o o d d d o o o o o o 1433 $ ------------------- 1434 $ 1435 1436 Thus, any entries in the d locations are stored in the d (diagonal) 1437 submatrix, and any entries in the o locations are stored in the 1438 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1439 stored simply in the MATSEQAIJ format for compressed row storage. 1440 1441 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1442 and o_nz should indicate the number of nonzeros per row in the o matrix. 1443 In general, for PDE problems in which most nonzeros are near the diagonal, 1444 one expects d_nz >> o_nz. For additional details, see the users manual 1445 chapter on matrices and the file $(PETSC_DIR)/Performance. 1446 1447 .keywords: matrix, aij, compressed row, sparse, parallel 1448 1449 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1450 @*/ 1451 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1452 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1453 { 1454 Mat B; 1455 Mat_MPIAIJ *b; 1456 int ierr, i,sum[2],work[2]; 1457 1458 *A = 0; 1459 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1460 PLogObjectCreate(B); 1461 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1462 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1463 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1464 B->destroy = MatDestroy_MPIAIJ; 1465 B->view = MatView_MPIAIJ; 1466 B->factor = 0; 1467 B->assembled = PETSC_FALSE; 1468 1469 b->insertmode = NOT_SET_VALUES; 1470 MPI_Comm_rank(comm,&b->rank); 1471 MPI_Comm_size(comm,&b->size); 1472 1473 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1474 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1475 1476 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1477 work[0] = m; work[1] = n; 1478 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1479 if (M == PETSC_DECIDE) M = sum[0]; 1480 if (N == PETSC_DECIDE) N = sum[1]; 1481 } 1482 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1483 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1484 b->m = m; B->m = m; 1485 b->n = n; B->n = n; 1486 b->N = N; B->N = N; 1487 b->M = M; B->M = M; 1488 1489 /* build local table of row and column ownerships */ 1490 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1491 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1492 b->cowners = b->rowners + b->size + 1; 1493 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1494 b->rowners[0] = 0; 1495 for ( i=2; i<=b->size; i++ ) { 1496 b->rowners[i] += b->rowners[i-1]; 1497 } 1498 b->rstart = b->rowners[b->rank]; 1499 b->rend = b->rowners[b->rank+1]; 1500 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1501 b->cowners[0] = 0; 1502 for ( i=2; i<=b->size; i++ ) { 1503 b->cowners[i] += b->cowners[i-1]; 1504 } 1505 b->cstart = b->cowners[b->rank]; 1506 b->cend = b->cowners[b->rank+1]; 1507 1508 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1509 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1510 PLogObjectParent(B,b->A); 1511 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1512 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1513 PLogObjectParent(B,b->B); 1514 1515 /* build cache for off array entries formed */ 1516 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1517 b->colmap = 0; 1518 b->garray = 0; 1519 b->roworiented = 1; 1520 1521 /* stuff used for matrix vector multiply */ 1522 b->lvec = 0; 1523 b->Mvctx = 0; 1524 1525 /* stuff for MatGetRow() */ 1526 b->rowindices = 0; 1527 b->rowvalues = 0; 1528 b->getrowactive = PETSC_FALSE; 1529 1530 *A = B; 1531 return 0; 1532 } 1533 1534 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1535 { 1536 Mat mat; 1537 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1538 int ierr, len,flg; 1539 1540 *newmat = 0; 1541 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1542 PLogObjectCreate(mat); 1543 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1544 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1545 mat->destroy = MatDestroy_MPIAIJ; 1546 mat->view = MatView_MPIAIJ; 1547 mat->factor = matin->factor; 1548 mat->assembled = PETSC_TRUE; 1549 1550 a->m = mat->m = oldmat->m; 1551 a->n = mat->n = oldmat->n; 1552 a->M = mat->M = oldmat->M; 1553 a->N = mat->N = oldmat->N; 1554 1555 a->rstart = oldmat->rstart; 1556 a->rend = oldmat->rend; 1557 a->cstart = oldmat->cstart; 1558 a->cend = oldmat->cend; 1559 a->size = oldmat->size; 1560 a->rank = oldmat->rank; 1561 a->insertmode = NOT_SET_VALUES; 1562 a->rowvalues = 0; 1563 a->getrowactive = PETSC_FALSE; 1564 1565 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1566 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1567 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1568 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1569 if (oldmat->colmap) { 1570 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1571 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1572 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1573 } else a->colmap = 0; 1574 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1575 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1576 PLogObjectMemory(mat,len*sizeof(int)); 1577 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1578 } else a->garray = 0; 1579 1580 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1581 PLogObjectParent(mat,a->lvec); 1582 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1583 PLogObjectParent(mat,a->Mvctx); 1584 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1585 PLogObjectParent(mat,a->A); 1586 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1587 PLogObjectParent(mat,a->B); 1588 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1589 if (flg) { 1590 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1591 } 1592 *newmat = mat; 1593 return 0; 1594 } 1595 1596 #include "sys.h" 1597 1598 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1599 { 1600 Mat A; 1601 int i, nz, ierr, j,rstart, rend, fd; 1602 Scalar *vals,*svals; 1603 MPI_Comm comm = ((PetscObject)viewer)->comm; 1604 MPI_Status status; 1605 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1606 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1607 int tag = ((PetscObject)viewer)->tag; 1608 1609 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1610 if (!rank) { 1611 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1612 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1613 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1614 } 1615 1616 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1617 M = header[1]; N = header[2]; 1618 /* determine ownership of all rows */ 1619 m = M/size + ((M % size) > rank); 1620 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1621 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1622 rowners[0] = 0; 1623 for ( i=2; i<=size; i++ ) { 1624 rowners[i] += rowners[i-1]; 1625 } 1626 rstart = rowners[rank]; 1627 rend = rowners[rank+1]; 1628 1629 /* distribute row lengths to all processors */ 1630 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1631 offlens = ourlens + (rend-rstart); 1632 if (!rank) { 1633 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1634 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1635 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1636 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1637 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1638 PetscFree(sndcounts); 1639 } 1640 else { 1641 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1642 } 1643 1644 if (!rank) { 1645 /* calculate the number of nonzeros on each processor */ 1646 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1647 PetscMemzero(procsnz,size*sizeof(int)); 1648 for ( i=0; i<size; i++ ) { 1649 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1650 procsnz[i] += rowlengths[j]; 1651 } 1652 } 1653 PetscFree(rowlengths); 1654 1655 /* determine max buffer needed and allocate it */ 1656 maxnz = 0; 1657 for ( i=0; i<size; i++ ) { 1658 maxnz = PetscMax(maxnz,procsnz[i]); 1659 } 1660 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1661 1662 /* read in my part of the matrix column indices */ 1663 nz = procsnz[0]; 1664 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1665 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1666 1667 /* read in every one elses and ship off */ 1668 for ( i=1; i<size; i++ ) { 1669 nz = procsnz[i]; 1670 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1671 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1672 } 1673 PetscFree(cols); 1674 } 1675 else { 1676 /* determine buffer space needed for message */ 1677 nz = 0; 1678 for ( i=0; i<m; i++ ) { 1679 nz += ourlens[i]; 1680 } 1681 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1682 1683 /* receive message of column indices*/ 1684 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1685 MPI_Get_count(&status,MPI_INT,&maxnz); 1686 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1687 } 1688 1689 /* loop over local rows, determining number of off diagonal entries */ 1690 PetscMemzero(offlens,m*sizeof(int)); 1691 jj = 0; 1692 for ( i=0; i<m; i++ ) { 1693 for ( j=0; j<ourlens[i]; j++ ) { 1694 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1695 jj++; 1696 } 1697 } 1698 1699 /* create our matrix */ 1700 for ( i=0; i<m; i++ ) { 1701 ourlens[i] -= offlens[i]; 1702 } 1703 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1704 A = *newmat; 1705 MatSetOption(A,COLUMNS_SORTED); 1706 for ( i=0; i<m; i++ ) { 1707 ourlens[i] += offlens[i]; 1708 } 1709 1710 if (!rank) { 1711 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1712 1713 /* read in my part of the matrix numerical values */ 1714 nz = procsnz[0]; 1715 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1716 1717 /* insert into matrix */ 1718 jj = rstart; 1719 smycols = mycols; 1720 svals = vals; 1721 for ( i=0; i<m; i++ ) { 1722 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1723 smycols += ourlens[i]; 1724 svals += ourlens[i]; 1725 jj++; 1726 } 1727 1728 /* read in other processors and ship out */ 1729 for ( i=1; i<size; i++ ) { 1730 nz = procsnz[i]; 1731 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1732 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1733 } 1734 PetscFree(procsnz); 1735 } 1736 else { 1737 /* receive numeric values */ 1738 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1739 1740 /* receive message of values*/ 1741 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1742 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1743 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1744 1745 /* insert into matrix */ 1746 jj = rstart; 1747 smycols = mycols; 1748 svals = vals; 1749 for ( i=0; i<m; i++ ) { 1750 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1751 smycols += ourlens[i]; 1752 svals += ourlens[i]; 1753 jj++; 1754 } 1755 } 1756 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1757 1758 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1759 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1760 return 0; 1761 } 1762