1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.128 1996/02/27 23:21:05 bsmith Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (mat->was_assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 for ( i=0; i<m; i++ ) { 104 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106 if (idxm[i] >= rstart && idxm[i] < rend) { 107 row = idxm[i] - rstart; 108 for ( j=0; j<n; j++ ) { 109 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111 if (idxn[j] >= cstart && idxn[j] < cend){ 112 col = idxn[j] - cstart; 113 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114 } 115 else { 116 col = aij->colmap[idxn[j]] + shift; 117 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118 } 119 } 120 } 121 else { 122 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123 } 124 } 125 return 0; 126 } 127 128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 MPI_Comm comm = mat->comm; 132 int size = aij->size, *owners = aij->rowners; 133 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 134 MPI_Request *send_waits,*recv_waits; 135 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 136 InsertMode addv; 137 Scalar *rvalues,*svalues; 138 139 /* make sure all processors are either in INSERTMODE or ADDMODE */ 140 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141 if (addv == (ADD_VALUES|INSERT_VALUES)) { 142 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 143 } 144 aij->insertmode = addv; /* in case this processor had no cache */ 145 146 /* first count number of contributors to each processor */ 147 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 149 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 150 for ( i=0; i<aij->stash.n; i++ ) { 151 idx = aij->stash.idx[i]; 152 for ( j=0; j<size; j++ ) { 153 if (idx >= owners[j] && idx < owners[j+1]) { 154 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 155 } 156 } 157 } 158 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 159 160 /* inform other processors of number of messages and max length*/ 161 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 162 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 163 nreceives = work[rank]; 164 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 165 nmax = work[rank]; 166 PetscFree(work); 167 168 /* post receives: 169 1) each message will consist of ordered pairs 170 (global index,value) we store the global index as a double 171 to simplify the message passing. 172 2) since we don't know how long each individual message is we 173 allocate the largest needed buffer for each receive. Potentially 174 this is a lot of wasted space. 175 176 177 This could be done better. 178 */ 179 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 180 CHKPTRQ(rvalues); 181 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 182 CHKPTRQ(recv_waits); 183 for ( i=0; i<nreceives; i++ ) { 184 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 185 comm,recv_waits+i); 186 } 187 188 /* do sends: 189 1) starts[i] gives the starting index in svalues for stuff going to 190 the ith processor 191 */ 192 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 193 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 194 CHKPTRQ(send_waits); 195 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 196 starts[0] = 0; 197 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 198 for ( i=0; i<aij->stash.n; i++ ) { 199 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 200 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 201 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 202 } 203 PetscFree(owner); 204 starts[0] = 0; 205 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 206 count = 0; 207 for ( i=0; i<size; i++ ) { 208 if (procs[i]) { 209 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 210 comm,send_waits+count++); 211 } 212 } 213 PetscFree(starts); PetscFree(nprocs); 214 215 /* Free cache space */ 216 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 217 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 218 219 aij->svalues = svalues; aij->rvalues = rvalues; 220 aij->nsends = nsends; aij->nrecvs = nreceives; 221 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 222 aij->rmax = nmax; 223 224 return 0; 225 } 226 extern int MatSetUpMultiply_MPIAIJ(Mat); 227 228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 229 { 230 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 232 MPI_Status *send_status,recv_status; 233 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234 int row,col,other_disassembled,shift = C->indexshift; 235 Scalar *values,val; 236 InsertMode addv = aij->insertmode; 237 238 /* wait on receives */ 239 while (count) { 240 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 241 /* unpack receives into our local space */ 242 values = aij->rvalues + 3*imdex*aij->rmax; 243 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 244 n = n/3; 245 for ( i=0; i<n; i++ ) { 246 row = (int) PetscReal(values[3*i]) - aij->rstart; 247 col = (int) PetscReal(values[3*i+1]); 248 val = values[3*i+2]; 249 if (col >= aij->cstart && col < aij->cend) { 250 col -= aij->cstart; 251 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 252 } 253 else { 254 if (mat->was_assembled) { 255 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256 col = aij->colmap[col] + shift; 257 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 258 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259 col = (int) PetscReal(values[3*i+1]); 260 } 261 } 262 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 263 } 264 } 265 count--; 266 } 267 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 268 269 /* wait on sends */ 270 if (aij->nsends) { 271 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 272 CHKPTRQ(send_status); 273 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 274 PetscFree(send_status); 275 } 276 PetscFree(aij->send_waits); PetscFree(aij->svalues); 277 278 aij->insertmode = NOT_SET_VALUES; 279 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 280 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 281 282 /* determine if any processor has disassembled, if so we must 283 also disassemble ourselfs, in order that we may reassemble. */ 284 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285 if (mat->was_assembled && !other_disassembled) { 286 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287 } 288 289 if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 290 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 291 } 292 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 293 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 294 295 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 296 return 0; 297 } 298 299 static int MatZeroEntries_MPIAIJ(Mat A) 300 { 301 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 302 int ierr; 303 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 304 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 305 return 0; 306 } 307 308 /* the code does not do the diagonal entries correctly unless the 309 matrix is square and the column and row owerships are identical. 310 This is a BUG. The only way to fix it seems to be to access 311 aij->A and aij->B directly and not through the MatZeroRows() 312 routine. 313 */ 314 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 315 { 316 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 317 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 318 int *procs,*nprocs,j,found,idx,nsends,*work; 319 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 320 int *rvalues,tag = A->tag,count,base,slen,n,*source; 321 int *lens,imdex,*lrows,*values; 322 MPI_Comm comm = A->comm; 323 MPI_Request *send_waits,*recv_waits; 324 MPI_Status recv_status,*send_status; 325 IS istmp; 326 327 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 328 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 329 330 /* first count number of contributors to each processor */ 331 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 332 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 333 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 334 for ( i=0; i<N; i++ ) { 335 idx = rows[i]; 336 found = 0; 337 for ( j=0; j<size; j++ ) { 338 if (idx >= owners[j] && idx < owners[j+1]) { 339 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 340 } 341 } 342 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 343 } 344 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 345 346 /* inform other processors of number of messages and max length*/ 347 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 348 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 349 nrecvs = work[rank]; 350 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 351 nmax = work[rank]; 352 PetscFree(work); 353 354 /* post receives: */ 355 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 356 CHKPTRQ(rvalues); 357 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 358 CHKPTRQ(recv_waits); 359 for ( i=0; i<nrecvs; i++ ) { 360 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 361 } 362 363 /* do sends: 364 1) starts[i] gives the starting index in svalues for stuff going to 365 the ith processor 366 */ 367 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 368 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 369 CHKPTRQ(send_waits); 370 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 371 starts[0] = 0; 372 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 373 for ( i=0; i<N; i++ ) { 374 svalues[starts[owner[i]]++] = rows[i]; 375 } 376 ISRestoreIndices(is,&rows); 377 378 starts[0] = 0; 379 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 380 count = 0; 381 for ( i=0; i<size; i++ ) { 382 if (procs[i]) { 383 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 384 } 385 } 386 PetscFree(starts); 387 388 base = owners[rank]; 389 390 /* wait on receives */ 391 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 392 source = lens + nrecvs; 393 count = nrecvs; slen = 0; 394 while (count) { 395 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 396 /* unpack receives into our local space */ 397 MPI_Get_count(&recv_status,MPI_INT,&n); 398 source[imdex] = recv_status.MPI_SOURCE; 399 lens[imdex] = n; 400 slen += n; 401 count--; 402 } 403 PetscFree(recv_waits); 404 405 /* move the data into the send scatter */ 406 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 407 count = 0; 408 for ( i=0; i<nrecvs; i++ ) { 409 values = rvalues + i*nmax; 410 for ( j=0; j<lens[i]; j++ ) { 411 lrows[count++] = values[j] - base; 412 } 413 } 414 PetscFree(rvalues); PetscFree(lens); 415 PetscFree(owner); PetscFree(nprocs); 416 417 /* actually zap the local rows */ 418 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 419 PLogObjectParent(A,istmp); 420 PetscFree(lrows); 421 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 422 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 423 ierr = ISDestroy(istmp); CHKERRQ(ierr); 424 425 /* wait on sends */ 426 if (nsends) { 427 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 428 CHKPTRQ(send_status); 429 MPI_Waitall(nsends,send_waits,send_status); 430 PetscFree(send_status); 431 } 432 PetscFree(send_waits); PetscFree(svalues); 433 434 return 0; 435 } 436 437 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 438 { 439 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 440 int ierr; 441 442 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 443 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 444 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 445 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 446 return 0; 447 } 448 449 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 450 { 451 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 452 int ierr; 453 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 454 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 455 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 456 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 457 return 0; 458 } 459 460 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 461 { 462 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 463 int ierr; 464 465 /* do nondiagonal part */ 466 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 467 /* send it on its way */ 468 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 469 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 470 /* do local part */ 471 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 472 /* receive remote parts: note this assumes the values are not actually */ 473 /* inserted in yy until the next line, which is true for my implementation*/ 474 /* but is not perhaps always true. */ 475 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 476 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 477 return 0; 478 } 479 480 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 483 int ierr; 484 485 /* do nondiagonal part */ 486 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 487 /* send it on its way */ 488 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 489 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 490 /* do local part */ 491 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 492 /* receive remote parts: note this assumes the values are not actually */ 493 /* inserted in yy until the next line, which is true for my implementation*/ 494 /* but is not perhaps always true. */ 495 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 496 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 497 return 0; 498 } 499 500 /* 501 This only works correctly for square matrices where the subblock A->A is the 502 diagonal block 503 */ 504 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 505 { 506 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 507 return MatGetDiagonal(a->A,v); 508 } 509 510 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 511 { 512 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 513 int ierr; 514 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 515 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 516 return 0; 517 } 518 519 static int MatDestroy_MPIAIJ(PetscObject obj) 520 { 521 Mat mat = (Mat) obj; 522 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 523 int ierr; 524 #if defined(PETSC_LOG) 525 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 526 #endif 527 PetscFree(aij->rowners); 528 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 529 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 530 if (aij->colmap) PetscFree(aij->colmap); 531 if (aij->garray) PetscFree(aij->garray); 532 if (aij->lvec) VecDestroy(aij->lvec); 533 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 534 if (aij->rowvalues) PetscFree(aij->rowvalues); 535 PetscFree(aij); 536 PLogObjectDestroy(mat); 537 PetscHeaderDestroy(mat); 538 return 0; 539 } 540 #include "draw.h" 541 #include "pinclude/pviewer.h" 542 543 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 544 { 545 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 546 int ierr; 547 548 if (aij->size == 1) { 549 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 550 } 551 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 552 return 0; 553 } 554 555 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 556 { 557 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 558 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 559 int ierr, format,shift = C->indexshift,rank; 560 PetscObject vobj = (PetscObject) viewer; 561 FILE *fd; 562 563 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 564 ierr = ViewerFileGetFormat_Private(viewer,&format); 565 if (format == FILE_FORMAT_INFO_DETAILED) { 566 int nz, nzalloc, mem, flg; 567 MPI_Comm_rank(mat->comm,&rank); 568 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 569 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 570 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 571 MPIU_Seq_begin(mat->comm,1); 572 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 573 rank,aij->m,nz,nzalloc,mem); 574 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 575 rank,aij->m,nz,nzalloc,mem); 576 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 577 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz); 578 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 579 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz); 580 fflush(fd); 581 MPIU_Seq_end(mat->comm,1); 582 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 583 return 0; 584 } 585 else if (format == FILE_FORMAT_INFO) { 586 return 0; 587 } 588 } 589 590 if (vobj->type == ASCII_FILE_VIEWER) { 591 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 592 MPIU_Seq_begin(mat->comm,1); 593 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 594 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 595 aij->cend); 596 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 597 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 598 fflush(fd); 599 MPIU_Seq_end(mat->comm,1); 600 } 601 else { 602 int size = aij->size; 603 rank = aij->rank; 604 if (size == 1) { 605 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 606 } 607 else { 608 /* assemble the entire matrix onto first processor. */ 609 Mat A; 610 Mat_SeqAIJ *Aloc; 611 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 612 Scalar *a; 613 614 if (!rank) { 615 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 616 CHKERRQ(ierr); 617 } 618 else { 619 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 620 CHKERRQ(ierr); 621 } 622 PLogObjectParent(mat,A); 623 624 /* copy over the A part */ 625 Aloc = (Mat_SeqAIJ*) aij->A->data; 626 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 627 row = aij->rstart; 628 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 629 for ( i=0; i<m; i++ ) { 630 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 631 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 632 } 633 aj = Aloc->j; 634 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 635 636 /* copy over the B part */ 637 Aloc = (Mat_SeqAIJ*) aij->B->data; 638 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 639 row = aij->rstart; 640 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 641 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 642 for ( i=0; i<m; i++ ) { 643 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 644 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 645 } 646 PetscFree(ct); 647 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 648 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 649 if (!rank) { 650 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 651 } 652 ierr = MatDestroy(A); CHKERRQ(ierr); 653 } 654 } 655 return 0; 656 } 657 658 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 659 { 660 Mat mat = (Mat) obj; 661 int ierr; 662 PetscObject vobj = (PetscObject) viewer; 663 664 if (!viewer) { 665 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 666 } 667 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 668 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 669 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 670 } 671 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 672 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 673 } 674 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 675 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 676 } 677 else if (vobj->cookie == DRAW_COOKIE) { 678 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 679 } 680 else if (vobj->type == BINARY_FILE_VIEWER) { 681 return MatView_MPIAIJ_Binary(mat,viewer); 682 } 683 return 0; 684 } 685 686 extern int MatMarkDiag_SeqAIJ(Mat); 687 /* 688 This has to provide several versions. 689 690 1) per sequential 691 2) a) use only local smoothing updating outer values only once. 692 b) local smoothing updating outer values each inner iteration 693 3) color updating out values betwen colors. 694 */ 695 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 696 double fshift,int its,Vec xx) 697 { 698 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 699 Mat AA = mat->A, BB = mat->B; 700 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 701 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 702 int ierr,*idx, *diag; 703 int n = mat->n, m = mat->m, i,shift = A->indexshift; 704 Vec tt; 705 706 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 707 xs = x + shift; /* shift by one for index start of 1 */ 708 ls = ls + shift; 709 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 710 diag = A->diag; 711 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 712 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 713 } 714 if (flag & SOR_EISENSTAT) { 715 /* Let A = L + U + D; where L is lower trianglar, 716 U is upper triangular, E is diagonal; This routine applies 717 718 (L + E)^{-1} A (U + E)^{-1} 719 720 to a vector efficiently using Eisenstat's trick. This is for 721 the case of SSOR preconditioner, so E is D/omega where omega 722 is the relaxation factor. 723 */ 724 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 725 PLogObjectParent(matin,tt); 726 VecGetArray(tt,&t); 727 scale = (2.0/omega) - 1.0; 728 /* x = (E + U)^{-1} b */ 729 VecSet(&zero,mat->lvec); 730 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 731 mat->Mvctx); CHKERRQ(ierr); 732 for ( i=m-1; i>-1; i-- ) { 733 n = A->i[i+1] - diag[i] - 1; 734 idx = A->j + diag[i] + !shift; 735 v = A->a + diag[i] + !shift; 736 sum = b[i]; 737 SPARSEDENSEMDOT(sum,xs,v,idx,n); 738 d = fshift + A->a[diag[i]+shift]; 739 n = B->i[i+1] - B->i[i]; 740 idx = B->j + B->i[i] + shift; 741 v = B->a + B->i[i] + shift; 742 SPARSEDENSEMDOT(sum,ls,v,idx,n); 743 x[i] = omega*(sum/d); 744 } 745 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 746 mat->Mvctx); CHKERRQ(ierr); 747 748 /* t = b - (2*E - D)x */ 749 v = A->a; 750 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 751 752 /* t = (E + L)^{-1}t */ 753 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 754 diag = A->diag; 755 VecSet(&zero,mat->lvec); 756 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 757 mat->Mvctx); CHKERRQ(ierr); 758 for ( i=0; i<m; i++ ) { 759 n = diag[i] - A->i[i]; 760 idx = A->j + A->i[i] + shift; 761 v = A->a + A->i[i] + shift; 762 sum = t[i]; 763 SPARSEDENSEMDOT(sum,ts,v,idx,n); 764 d = fshift + A->a[diag[i]+shift]; 765 n = B->i[i+1] - B->i[i]; 766 idx = B->j + B->i[i] + shift; 767 v = B->a + B->i[i] + shift; 768 SPARSEDENSEMDOT(sum,ls,v,idx,n); 769 t[i] = omega*(sum/d); 770 } 771 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 772 mat->Mvctx); CHKERRQ(ierr); 773 /* x = x + t */ 774 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 775 VecDestroy(tt); 776 return 0; 777 } 778 779 780 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 781 if (flag & SOR_ZERO_INITIAL_GUESS) { 782 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 783 } 784 else { 785 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 786 CHKERRQ(ierr); 787 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 788 CHKERRQ(ierr); 789 } 790 while (its--) { 791 /* go down through the rows */ 792 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 793 mat->Mvctx); CHKERRQ(ierr); 794 for ( i=0; i<m; i++ ) { 795 n = A->i[i+1] - A->i[i]; 796 idx = A->j + A->i[i] + shift; 797 v = A->a + A->i[i] + shift; 798 sum = b[i]; 799 SPARSEDENSEMDOT(sum,xs,v,idx,n); 800 d = fshift + A->a[diag[i]+shift]; 801 n = B->i[i+1] - B->i[i]; 802 idx = B->j + B->i[i] + shift; 803 v = B->a + B->i[i] + shift; 804 SPARSEDENSEMDOT(sum,ls,v,idx,n); 805 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 806 } 807 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 808 mat->Mvctx); CHKERRQ(ierr); 809 /* come up through the rows */ 810 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 811 mat->Mvctx); CHKERRQ(ierr); 812 for ( i=m-1; i>-1; i-- ) { 813 n = A->i[i+1] - A->i[i]; 814 idx = A->j + A->i[i] + shift; 815 v = A->a + A->i[i] + shift; 816 sum = b[i]; 817 SPARSEDENSEMDOT(sum,xs,v,idx,n); 818 d = fshift + A->a[diag[i]+shift]; 819 n = B->i[i+1] - B->i[i]; 820 idx = B->j + B->i[i] + shift; 821 v = B->a + B->i[i] + shift; 822 SPARSEDENSEMDOT(sum,ls,v,idx,n); 823 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 824 } 825 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 826 mat->Mvctx); CHKERRQ(ierr); 827 } 828 } 829 else if (flag & SOR_FORWARD_SWEEP){ 830 if (flag & SOR_ZERO_INITIAL_GUESS) { 831 VecSet(&zero,mat->lvec); 832 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 833 mat->Mvctx); CHKERRQ(ierr); 834 for ( i=0; i<m; i++ ) { 835 n = diag[i] - A->i[i]; 836 idx = A->j + A->i[i] + shift; 837 v = A->a + A->i[i] + shift; 838 sum = b[i]; 839 SPARSEDENSEMDOT(sum,xs,v,idx,n); 840 d = fshift + A->a[diag[i]+shift]; 841 n = B->i[i+1] - B->i[i]; 842 idx = B->j + B->i[i] + shift; 843 v = B->a + B->i[i] + shift; 844 SPARSEDENSEMDOT(sum,ls,v,idx,n); 845 x[i] = omega*(sum/d); 846 } 847 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 848 mat->Mvctx); CHKERRQ(ierr); 849 its--; 850 } 851 while (its--) { 852 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 853 CHKERRQ(ierr); 854 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 855 CHKERRQ(ierr); 856 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 857 mat->Mvctx); CHKERRQ(ierr); 858 for ( i=0; i<m; i++ ) { 859 n = A->i[i+1] - A->i[i]; 860 idx = A->j + A->i[i] + shift; 861 v = A->a + A->i[i] + shift; 862 sum = b[i]; 863 SPARSEDENSEMDOT(sum,xs,v,idx,n); 864 d = fshift + A->a[diag[i]+shift]; 865 n = B->i[i+1] - B->i[i]; 866 idx = B->j + B->i[i] + shift; 867 v = B->a + B->i[i] + shift; 868 SPARSEDENSEMDOT(sum,ls,v,idx,n); 869 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 870 } 871 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 872 mat->Mvctx); CHKERRQ(ierr); 873 } 874 } 875 else if (flag & SOR_BACKWARD_SWEEP){ 876 if (flag & SOR_ZERO_INITIAL_GUESS) { 877 VecSet(&zero,mat->lvec); 878 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 879 mat->Mvctx); CHKERRQ(ierr); 880 for ( i=m-1; i>-1; i-- ) { 881 n = A->i[i+1] - diag[i] - 1; 882 idx = A->j + diag[i] + !shift; 883 v = A->a + diag[i] + !shift; 884 sum = b[i]; 885 SPARSEDENSEMDOT(sum,xs,v,idx,n); 886 d = fshift + A->a[diag[i]+shift]; 887 n = B->i[i+1] - B->i[i]; 888 idx = B->j + B->i[i] + shift; 889 v = B->a + B->i[i] + shift; 890 SPARSEDENSEMDOT(sum,ls,v,idx,n); 891 x[i] = omega*(sum/d); 892 } 893 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 894 mat->Mvctx); CHKERRQ(ierr); 895 its--; 896 } 897 while (its--) { 898 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 899 mat->Mvctx); CHKERRQ(ierr); 900 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 901 mat->Mvctx); CHKERRQ(ierr); 902 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 903 mat->Mvctx); CHKERRQ(ierr); 904 for ( i=m-1; i>-1; i-- ) { 905 n = A->i[i+1] - A->i[i]; 906 idx = A->j + A->i[i] + shift; 907 v = A->a + A->i[i] + shift; 908 sum = b[i]; 909 SPARSEDENSEMDOT(sum,xs,v,idx,n); 910 d = fshift + A->a[diag[i]+shift]; 911 n = B->i[i+1] - B->i[i]; 912 idx = B->j + B->i[i] + shift; 913 v = B->a + B->i[i] + shift; 914 SPARSEDENSEMDOT(sum,ls,v,idx,n); 915 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 916 } 917 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 918 mat->Mvctx); CHKERRQ(ierr); 919 } 920 } 921 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 922 if (flag & SOR_ZERO_INITIAL_GUESS) { 923 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 924 } 925 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 926 CHKERRQ(ierr); 927 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 928 CHKERRQ(ierr); 929 while (its--) { 930 /* go down through the rows */ 931 for ( i=0; i<m; i++ ) { 932 n = A->i[i+1] - A->i[i]; 933 idx = A->j + A->i[i] + shift; 934 v = A->a + A->i[i] + shift; 935 sum = b[i]; 936 SPARSEDENSEMDOT(sum,xs,v,idx,n); 937 d = fshift + A->a[diag[i]+shift]; 938 n = B->i[i+1] - B->i[i]; 939 idx = B->j + B->i[i] + shift; 940 v = B->a + B->i[i] + shift; 941 SPARSEDENSEMDOT(sum,ls,v,idx,n); 942 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 943 } 944 /* come up through the rows */ 945 for ( i=m-1; i>-1; i-- ) { 946 n = A->i[i+1] - A->i[i]; 947 idx = A->j + A->i[i] + shift; 948 v = A->a + A->i[i] + shift; 949 sum = b[i]; 950 SPARSEDENSEMDOT(sum,xs,v,idx,n); 951 d = fshift + A->a[diag[i]+shift]; 952 n = B->i[i+1] - B->i[i]; 953 idx = B->j + B->i[i] + shift; 954 v = B->a + B->i[i] + shift; 955 SPARSEDENSEMDOT(sum,ls,v,idx,n); 956 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 957 } 958 } 959 } 960 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 961 if (flag & SOR_ZERO_INITIAL_GUESS) { 962 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 963 } 964 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 965 CHKERRQ(ierr); 966 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 967 CHKERRQ(ierr); 968 while (its--) { 969 for ( i=0; i<m; i++ ) { 970 n = A->i[i+1] - A->i[i]; 971 idx = A->j + A->i[i] + shift; 972 v = A->a + A->i[i] + shift; 973 sum = b[i]; 974 SPARSEDENSEMDOT(sum,xs,v,idx,n); 975 d = fshift + A->a[diag[i]+shift]; 976 n = B->i[i+1] - B->i[i]; 977 idx = B->j + B->i[i] + shift; 978 v = B->a + B->i[i] + shift; 979 SPARSEDENSEMDOT(sum,ls,v,idx,n); 980 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 981 } 982 } 983 } 984 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 985 if (flag & SOR_ZERO_INITIAL_GUESS) { 986 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 987 } 988 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 989 mat->Mvctx); CHKERRQ(ierr); 990 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 991 mat->Mvctx); CHKERRQ(ierr); 992 while (its--) { 993 for ( i=m-1; i>-1; i-- ) { 994 n = A->i[i+1] - A->i[i]; 995 idx = A->j + A->i[i] + shift; 996 v = A->a + A->i[i] + shift; 997 sum = b[i]; 998 SPARSEDENSEMDOT(sum,xs,v,idx,n); 999 d = fshift + A->a[diag[i]+shift]; 1000 n = B->i[i+1] - B->i[i]; 1001 idx = B->j + B->i[i] + shift; 1002 v = B->a + B->i[i] + shift; 1003 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1004 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1005 } 1006 } 1007 } 1008 return 0; 1009 } 1010 1011 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1012 int *nzalloc,int *mem) 1013 { 1014 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1015 Mat A = mat->A, B = mat->B; 1016 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1017 1018 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1019 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1020 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1021 if (flag == MAT_LOCAL) { 1022 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1023 } else if (flag == MAT_GLOBAL_MAX) { 1024 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1025 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1026 } else if (flag == MAT_GLOBAL_SUM) { 1027 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1028 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1029 } 1030 return 0; 1031 } 1032 1033 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1034 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1035 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1036 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1037 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1038 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1039 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1040 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1041 1042 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1045 1046 if (op == NO_NEW_NONZERO_LOCATIONS || 1047 op == YES_NEW_NONZERO_LOCATIONS || 1048 op == COLUMNS_SORTED || 1049 op == ROW_ORIENTED) { 1050 MatSetOption(a->A,op); 1051 MatSetOption(a->B,op); 1052 } 1053 else if (op == ROWS_SORTED || 1054 op == SYMMETRIC_MATRIX || 1055 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1056 op == YES_NEW_DIAGONALS) 1057 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1058 else if (op == COLUMN_ORIENTED) { 1059 a->roworiented = 0; 1060 MatSetOption(a->A,op); 1061 MatSetOption(a->B,op); 1062 } 1063 else if (op == NO_NEW_DIAGONALS) 1064 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1065 else 1066 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1067 return 0; 1068 } 1069 1070 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1071 { 1072 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1073 *m = mat->M; *n = mat->N; 1074 return 0; 1075 } 1076 1077 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1078 { 1079 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1080 *m = mat->m; *n = mat->N; 1081 return 0; 1082 } 1083 1084 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1085 { 1086 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1087 *m = mat->rstart; *n = mat->rend; 1088 return 0; 1089 } 1090 1091 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1092 { 1093 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1094 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1095 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1096 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1097 int *cmap, *idx_p; 1098 1099 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 1100 mat->getrowactive = PETSC_TRUE; 1101 1102 if (!mat->rowvalues && (idx || v)) { 1103 /* 1104 allocate enough space to hold information from the longest row. 1105 */ 1106 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1107 int max = 1,n = mat->n,tmp; 1108 for ( i=0; i<n; i++ ) { 1109 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1110 if (max < tmp) { max = tmp; } 1111 } 1112 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1113 CHKPTRQ(mat->rowvalues); 1114 mat->rowindices = (int *) (mat->rowvalues + max); 1115 } 1116 1117 1118 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1119 lrow = row - rstart; 1120 1121 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1122 if (!v) {pvA = 0; pvB = 0;} 1123 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1124 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1125 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1126 nztot = nzA + nzB; 1127 1128 cmap = mat->garray; 1129 if (v || idx) { 1130 if (nztot) { 1131 /* Sort by increasing column numbers, assuming A and B already sorted */ 1132 int imark = -1; 1133 if (v) { 1134 *v = v_p = mat->rowvalues; 1135 for ( i=0; i<nzB; i++ ) { 1136 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1137 else break; 1138 } 1139 imark = i; 1140 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1141 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1142 } 1143 if (idx) { 1144 *idx = idx_p = mat->rowindices; 1145 if (imark > -1) { 1146 for ( i=0; i<imark; i++ ) { 1147 idx_p[i] = cmap[cworkB[i]]; 1148 } 1149 } else { 1150 for ( i=0; i<nzB; i++ ) { 1151 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1152 else break; 1153 } 1154 imark = i; 1155 } 1156 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1157 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1158 } 1159 } 1160 else {*idx = 0; *v=0;} 1161 } 1162 *nz = nztot; 1163 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1164 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1165 return 0; 1166 } 1167 1168 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1169 { 1170 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1171 if (aij->getrowactive == PETSC_FALSE) { 1172 SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 1173 } 1174 aij->getrowactive = PETSC_FALSE; 1175 return 0; 1176 } 1177 1178 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1179 { 1180 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1181 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1182 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1183 double sum = 0.0; 1184 Scalar *v; 1185 1186 if (aij->size == 1) { 1187 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1188 } else { 1189 if (type == NORM_FROBENIUS) { 1190 v = amat->a; 1191 for (i=0; i<amat->nz; i++ ) { 1192 #if defined(PETSC_COMPLEX) 1193 sum += real(conj(*v)*(*v)); v++; 1194 #else 1195 sum += (*v)*(*v); v++; 1196 #endif 1197 } 1198 v = bmat->a; 1199 for (i=0; i<bmat->nz; i++ ) { 1200 #if defined(PETSC_COMPLEX) 1201 sum += real(conj(*v)*(*v)); v++; 1202 #else 1203 sum += (*v)*(*v); v++; 1204 #endif 1205 } 1206 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1207 *norm = sqrt(*norm); 1208 } 1209 else if (type == NORM_1) { /* max column norm */ 1210 double *tmp, *tmp2; 1211 int *jj, *garray = aij->garray; 1212 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1213 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1214 PetscMemzero(tmp,aij->N*sizeof(double)); 1215 *norm = 0.0; 1216 v = amat->a; jj = amat->j; 1217 for ( j=0; j<amat->nz; j++ ) { 1218 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1219 } 1220 v = bmat->a; jj = bmat->j; 1221 for ( j=0; j<bmat->nz; j++ ) { 1222 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1223 } 1224 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1225 for ( j=0; j<aij->N; j++ ) { 1226 if (tmp2[j] > *norm) *norm = tmp2[j]; 1227 } 1228 PetscFree(tmp); PetscFree(tmp2); 1229 } 1230 else if (type == NORM_INFINITY) { /* max row norm */ 1231 double ntemp = 0.0; 1232 for ( j=0; j<amat->m; j++ ) { 1233 v = amat->a + amat->i[j] + shift; 1234 sum = 0.0; 1235 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1236 sum += PetscAbsScalar(*v); v++; 1237 } 1238 v = bmat->a + bmat->i[j] + shift; 1239 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1240 sum += PetscAbsScalar(*v); v++; 1241 } 1242 if (sum > ntemp) ntemp = sum; 1243 } 1244 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1245 } 1246 else { 1247 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1248 } 1249 } 1250 return 0; 1251 } 1252 1253 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1254 { 1255 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1256 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1257 int ierr,shift = Aloc->indexshift; 1258 Mat B; 1259 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1260 Scalar *array; 1261 1262 if (matout == PETSC_NULL && M != N) 1263 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1264 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1265 PETSC_NULL,&B); CHKERRQ(ierr); 1266 1267 /* copy over the A part */ 1268 Aloc = (Mat_SeqAIJ*) a->A->data; 1269 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1270 row = a->rstart; 1271 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1272 for ( i=0; i<m; i++ ) { 1273 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1274 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1275 } 1276 aj = Aloc->j; 1277 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1278 1279 /* copy over the B part */ 1280 Aloc = (Mat_SeqAIJ*) a->B->data; 1281 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1282 row = a->rstart; 1283 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1284 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1285 for ( i=0; i<m; i++ ) { 1286 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1287 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1288 } 1289 PetscFree(ct); 1290 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1291 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1292 if (matout != PETSC_NULL) { 1293 *matout = B; 1294 } else { 1295 /* This isn't really an in-place transpose .... but free data structures from a */ 1296 PetscFree(a->rowners); 1297 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1298 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1299 if (a->colmap) PetscFree(a->colmap); 1300 if (a->garray) PetscFree(a->garray); 1301 if (a->lvec) VecDestroy(a->lvec); 1302 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1303 PetscFree(a); 1304 PetscMemcpy(A,B,sizeof(struct _Mat)); 1305 PetscHeaderDestroy(B); 1306 } 1307 return 0; 1308 } 1309 1310 extern int MatPrintHelp_SeqAIJ(Mat); 1311 static int MatPrintHelp_MPIAIJ(Mat A) 1312 { 1313 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1314 1315 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1316 else return 0; 1317 } 1318 1319 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1320 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1321 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1322 int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1323 /* -------------------------------------------------------------------*/ 1324 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1325 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1326 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1327 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1328 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1329 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1330 MatLUFactor_MPIAIJ,0, 1331 MatRelax_MPIAIJ, 1332 MatTranspose_MPIAIJ, 1333 MatGetInfo_MPIAIJ,0, 1334 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1335 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1336 0, 1337 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1338 MatGetReordering_MPIAIJ, 1339 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1340 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1341 MatILUFactorSymbolic_MPIAIJ,0, 1342 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1343 0,0,0, 1344 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1345 MatPrintHelp_MPIAIJ, 1346 MatScale_MPIAIJ}; 1347 1348 /*@C 1349 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1350 (the default parallel PETSc format). For good matrix assembly performance 1351 the user should preallocate the matrix storage by setting the parameters 1352 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1353 performance can be increased by more than a factor of 50. 1354 1355 Input Parameters: 1356 . comm - MPI communicator 1357 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1358 . n - number of local columns (or PETSC_DECIDE to have calculated 1359 if N is given) 1360 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1361 . N - number of global columns (or PETSC_DECIDE to have calculated 1362 if n is given) 1363 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1364 (same for all local rows) 1365 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1366 or null (possibly different for each row). You must leave room 1367 for the diagonal entry even if it is zero. 1368 . o_nz - number of nonzeros per row in off-diagonal portion of local 1369 submatrix (same for all local rows). 1370 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1371 submatrix or null (possibly different for each row). 1372 1373 Output Parameter: 1374 . newmat - the matrix 1375 1376 Notes: 1377 The AIJ format (also called the Yale sparse matrix format or 1378 compressed row storage), is fully compatible with standard Fortran 77 1379 storage. That is, the stored row and column indices can begin at 1380 either one (as in Fortran) or zero. See the users manual for details. 1381 1382 The user MUST specify either the local or global matrix dimensions 1383 (possibly both). 1384 1385 By default, this format uses inodes (identical nodes) when possible. 1386 We search for consecutive rows with the same nonzero structure, thereby 1387 reusing matrix information to achieve increased efficiency. 1388 1389 Options Database Keys: 1390 $ -mat_aij_no_inode - Do not use inodes 1391 $ -mat_aij_inode_limit <limit> - Set inode limit. 1392 $ (max limit=5) 1393 $ -mat_aij_oneindex - Internally use indexing starting at 1 1394 $ rather than 0. Note: When calling MatSetValues(), 1395 $ the user still MUST index entries starting at 0! 1396 1397 Storage Information: 1398 For a square global matrix we define each processor's diagonal portion 1399 to be its local rows and the corresponding columns (a square submatrix); 1400 each processor's off-diagonal portion encompasses the remainder of the 1401 local matrix (a rectangular submatrix). 1402 1403 The user can specify preallocated storage for the diagonal part of 1404 the local submatrix with either d_nz or d_nnz (not both). Set 1405 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1406 memory allocation. Likewise, specify preallocated storage for the 1407 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1408 1409 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1410 the figure below we depict these three local rows and all columns (0-11). 1411 1412 $ 0 1 2 3 4 5 6 7 8 9 10 11 1413 $ ------------------- 1414 $ row 3 | o o o d d d o o o o o o 1415 $ row 4 | o o o d d d o o o o o o 1416 $ row 5 | o o o d d d o o o o o o 1417 $ ------------------- 1418 $ 1419 1420 Thus, any entries in the d locations are stored in the d (diagonal) 1421 submatrix, and any entries in the o locations are stored in the 1422 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1423 stored simply in the MATSEQAIJ format for compressed row storage. 1424 1425 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1426 and o_nz should indicate the number of nonzeros per row in the o matrix. 1427 In general, for PDE problems in which most nonzeros are near the diagonal, 1428 one expects d_nz >> o_nz. For additional details, see the users manual 1429 chapter on matrices and the file $(PETSC_DIR)/Performance. 1430 1431 .keywords: matrix, aij, compressed row, sparse, parallel 1432 1433 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1434 @*/ 1435 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1436 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 1437 { 1438 Mat mat; 1439 Mat_MPIAIJ *a; 1440 int ierr, i,sum[2],work[2]; 1441 1442 *newmat = 0; 1443 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1444 PLogObjectCreate(mat); 1445 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1446 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1447 mat->destroy = MatDestroy_MPIAIJ; 1448 mat->view = MatView_MPIAIJ; 1449 mat->factor = 0; 1450 mat->assembled = PETSC_FALSE; 1451 1452 a->insertmode = NOT_SET_VALUES; 1453 MPI_Comm_rank(comm,&a->rank); 1454 MPI_Comm_size(comm,&a->size); 1455 1456 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1457 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1458 1459 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1460 work[0] = m; work[1] = n; 1461 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1462 if (M == PETSC_DECIDE) M = sum[0]; 1463 if (N == PETSC_DECIDE) N = sum[1]; 1464 } 1465 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1466 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1467 a->m = m; 1468 a->n = n; 1469 a->N = N; 1470 a->M = M; 1471 1472 /* build local table of row and column ownerships */ 1473 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1474 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1475 a->cowners = a->rowners + a->size + 1; 1476 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1477 a->rowners[0] = 0; 1478 for ( i=2; i<=a->size; i++ ) { 1479 a->rowners[i] += a->rowners[i-1]; 1480 } 1481 a->rstart = a->rowners[a->rank]; 1482 a->rend = a->rowners[a->rank+1]; 1483 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1484 a->cowners[0] = 0; 1485 for ( i=2; i<=a->size; i++ ) { 1486 a->cowners[i] += a->cowners[i-1]; 1487 } 1488 a->cstart = a->cowners[a->rank]; 1489 a->cend = a->cowners[a->rank+1]; 1490 1491 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1492 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1493 PLogObjectParent(mat,a->A); 1494 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1495 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1496 PLogObjectParent(mat,a->B); 1497 1498 /* build cache for off array entries formed */ 1499 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1500 a->colmap = 0; 1501 a->garray = 0; 1502 a->roworiented = 1; 1503 1504 /* stuff used for matrix vector multiply */ 1505 a->lvec = 0; 1506 a->Mvctx = 0; 1507 1508 /* stuff for MatGetRow() */ 1509 a->rowindices = 0; 1510 a->rowvalues = 0; 1511 a->getrowactive = PETSC_FALSE; 1512 1513 *newmat = mat; 1514 return 0; 1515 } 1516 1517 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1518 { 1519 Mat mat; 1520 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1521 int ierr, len,flg; 1522 1523 *newmat = 0; 1524 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1525 PLogObjectCreate(mat); 1526 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1527 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1528 mat->destroy = MatDestroy_MPIAIJ; 1529 mat->view = MatView_MPIAIJ; 1530 mat->factor = matin->factor; 1531 mat->assembled = PETSC_TRUE; 1532 1533 a->m = oldmat->m; 1534 a->n = oldmat->n; 1535 a->M = oldmat->M; 1536 a->N = oldmat->N; 1537 1538 a->rstart = oldmat->rstart; 1539 a->rend = oldmat->rend; 1540 a->cstart = oldmat->cstart; 1541 a->cend = oldmat->cend; 1542 a->size = oldmat->size; 1543 a->rank = oldmat->rank; 1544 a->insertmode = NOT_SET_VALUES; 1545 1546 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1547 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1548 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1549 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1550 if (oldmat->colmap) { 1551 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1552 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1553 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1554 } else a->colmap = 0; 1555 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1556 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1557 PLogObjectMemory(mat,len*sizeof(int)); 1558 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1559 } else a->garray = 0; 1560 1561 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1562 PLogObjectParent(mat,a->lvec); 1563 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1564 PLogObjectParent(mat,a->Mvctx); 1565 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1566 PLogObjectParent(mat,a->A); 1567 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1568 PLogObjectParent(mat,a->B); 1569 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1570 if (flg) { 1571 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1572 } 1573 *newmat = mat; 1574 return 0; 1575 } 1576 1577 #include "sysio.h" 1578 1579 int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat) 1580 { 1581 Mat A; 1582 int i, nz, ierr, j,rstart, rend, fd; 1583 Scalar *vals,*svals; 1584 PetscObject vobj = (PetscObject) bview; 1585 MPI_Comm comm = vobj->comm; 1586 MPI_Status status; 1587 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1588 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1589 int tag = ((PetscObject)bview)->tag; 1590 1591 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1592 if (!rank) { 1593 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1594 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1595 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1596 } 1597 1598 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1599 M = header[1]; N = header[2]; 1600 /* determine ownership of all rows */ 1601 m = M/size + ((M % size) > rank); 1602 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1603 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1604 rowners[0] = 0; 1605 for ( i=2; i<=size; i++ ) { 1606 rowners[i] += rowners[i-1]; 1607 } 1608 rstart = rowners[rank]; 1609 rend = rowners[rank+1]; 1610 1611 /* distribute row lengths to all processors */ 1612 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1613 offlens = ourlens + (rend-rstart); 1614 if (!rank) { 1615 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1616 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1617 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1618 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1619 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1620 PetscFree(sndcounts); 1621 } 1622 else { 1623 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1624 } 1625 1626 if (!rank) { 1627 /* calculate the number of nonzeros on each processor */ 1628 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1629 PetscMemzero(procsnz,size*sizeof(int)); 1630 for ( i=0; i<size; i++ ) { 1631 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1632 procsnz[i] += rowlengths[j]; 1633 } 1634 } 1635 PetscFree(rowlengths); 1636 1637 /* determine max buffer needed and allocate it */ 1638 maxnz = 0; 1639 for ( i=0; i<size; i++ ) { 1640 maxnz = PetscMax(maxnz,procsnz[i]); 1641 } 1642 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1643 1644 /* read in my part of the matrix column indices */ 1645 nz = procsnz[0]; 1646 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1647 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1648 1649 /* read in every one elses and ship off */ 1650 for ( i=1; i<size; i++ ) { 1651 nz = procsnz[i]; 1652 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1653 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1654 } 1655 PetscFree(cols); 1656 } 1657 else { 1658 /* determine buffer space needed for message */ 1659 nz = 0; 1660 for ( i=0; i<m; i++ ) { 1661 nz += ourlens[i]; 1662 } 1663 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1664 1665 /* receive message of column indices*/ 1666 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1667 MPI_Get_count(&status,MPI_INT,&maxnz); 1668 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1669 } 1670 1671 /* loop over local rows, determining number of off diagonal entries */ 1672 PetscMemzero(offlens,m*sizeof(int)); 1673 jj = 0; 1674 for ( i=0; i<m; i++ ) { 1675 for ( j=0; j<ourlens[i]; j++ ) { 1676 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1677 jj++; 1678 } 1679 } 1680 1681 /* create our matrix */ 1682 for ( i=0; i<m; i++ ) { 1683 ourlens[i] -= offlens[i]; 1684 } 1685 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1686 A = *newmat; 1687 MatSetOption(A,COLUMNS_SORTED); 1688 for ( i=0; i<m; i++ ) { 1689 ourlens[i] += offlens[i]; 1690 } 1691 1692 if (!rank) { 1693 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1694 1695 /* read in my part of the matrix numerical values */ 1696 nz = procsnz[0]; 1697 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1698 1699 /* insert into matrix */ 1700 jj = rstart; 1701 smycols = mycols; 1702 svals = vals; 1703 for ( i=0; i<m; i++ ) { 1704 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1705 smycols += ourlens[i]; 1706 svals += ourlens[i]; 1707 jj++; 1708 } 1709 1710 /* read in other processors and ship out */ 1711 for ( i=1; i<size; i++ ) { 1712 nz = procsnz[i]; 1713 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1714 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1715 } 1716 PetscFree(procsnz); 1717 } 1718 else { 1719 /* receive numeric values */ 1720 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1721 1722 /* receive message of values*/ 1723 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1724 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1725 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1726 1727 /* insert into matrix */ 1728 jj = rstart; 1729 smycols = mycols; 1730 svals = vals; 1731 for ( i=0; i<m; i++ ) { 1732 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1733 smycols += ourlens[i]; 1734 svals += ourlens[i]; 1735 jj++; 1736 } 1737 } 1738 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1739 1740 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1741 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1742 return 0; 1743 } 1744