1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.115 1996/01/24 05:46:00 bsmith Exp balay $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (mat->assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 for ( i=0; i<m; i++ ) { 104 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106 if (idxm[i] >= rstart && idxm[i] < rend) { 107 row = idxm[i] - rstart; 108 for ( j=0; j<n; j++ ) { 109 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111 if (idxn[j] >= cstart && idxn[j] < cend){ 112 col = idxn[j] - cstart; 113 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114 } 115 else { 116 col = aij->colmap[idxn[j]] + shift; 117 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118 } 119 } 120 } 121 else { 122 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123 } 124 } 125 return 0; 126 } 127 128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 MPI_Comm comm = mat->comm; 132 int size = aij->size, *owners = aij->rowners; 133 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 134 MPI_Request *send_waits,*recv_waits; 135 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 136 InsertMode addv; 137 Scalar *rvalues,*svalues; 138 139 /* make sure all processors are either in INSERTMODE or ADDMODE */ 140 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141 if (addv == (ADD_VALUES|INSERT_VALUES)) { 142 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 143 } 144 aij->insertmode = addv; /* in case this processor had no cache */ 145 146 /* first count number of contributors to each processor */ 147 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 149 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 150 for ( i=0; i<aij->stash.n; i++ ) { 151 idx = aij->stash.idx[i]; 152 for ( j=0; j<size; j++ ) { 153 if (idx >= owners[j] && idx < owners[j+1]) { 154 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 155 } 156 } 157 } 158 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 159 160 /* inform other processors of number of messages and max length*/ 161 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 162 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 163 nreceives = work[rank]; 164 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 165 nmax = work[rank]; 166 PetscFree(work); 167 168 /* post receives: 169 1) each message will consist of ordered pairs 170 (global index,value) we store the global index as a double 171 to simplify the message passing. 172 2) since we don't know how long each individual message is we 173 allocate the largest needed buffer for each receive. Potentially 174 this is a lot of wasted space. 175 176 177 This could be done better. 178 */ 179 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 180 CHKPTRQ(rvalues); 181 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 182 CHKPTRQ(recv_waits); 183 for ( i=0; i<nreceives; i++ ) { 184 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 185 comm,recv_waits+i); 186 } 187 188 /* do sends: 189 1) starts[i] gives the starting index in svalues for stuff going to 190 the ith processor 191 */ 192 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 193 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 194 CHKPTRQ(send_waits); 195 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 196 starts[0] = 0; 197 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 198 for ( i=0; i<aij->stash.n; i++ ) { 199 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 200 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 201 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 202 } 203 PetscFree(owner); 204 starts[0] = 0; 205 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 206 count = 0; 207 for ( i=0; i<size; i++ ) { 208 if (procs[i]) { 209 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 210 comm,send_waits+count++); 211 } 212 } 213 PetscFree(starts); PetscFree(nprocs); 214 215 /* Free cache space */ 216 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 217 218 aij->svalues = svalues; aij->rvalues = rvalues; 219 aij->nsends = nsends; aij->nrecvs = nreceives; 220 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 221 aij->rmax = nmax; 222 223 return 0; 224 } 225 extern int MatSetUpMultiply_MPIAIJ(Mat); 226 227 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 228 { 229 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 230 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 231 MPI_Status *send_status,recv_status; 232 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 233 int row,col,other_disassembled,shift = C->indexshift; 234 Scalar *values,val; 235 InsertMode addv = aij->insertmode; 236 237 /* wait on receives */ 238 while (count) { 239 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 240 /* unpack receives into our local space */ 241 values = aij->rvalues + 3*imdex*aij->rmax; 242 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 243 n = n/3; 244 for ( i=0; i<n; i++ ) { 245 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 246 col = (int) PETSCREAL(values[3*i+1]); 247 val = values[3*i+2]; 248 if (col >= aij->cstart && col < aij->cend) { 249 col -= aij->cstart; 250 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 251 } 252 else { 253 if (mat->assembled) { 254 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 255 col = aij->colmap[col] + shift; 256 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 257 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 258 col = (int) PETSCREAL(values[3*i+1]); 259 } 260 } 261 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 262 } 263 } 264 count--; 265 } 266 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 267 268 /* wait on sends */ 269 if (aij->nsends) { 270 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 271 CHKPTRQ(send_status); 272 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 273 PetscFree(send_status); 274 } 275 PetscFree(aij->send_waits); PetscFree(aij->svalues); 276 277 aij->insertmode = NOT_SET_VALUES; 278 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 279 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 280 281 /* determine if any processor has disassembled, if so we must 282 also disassemble ourselfs, in order that we may reassemble. */ 283 MPI_Allreduce(&mat->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 284 if (mat->assembled && !other_disassembled) { 285 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 286 } 287 288 if (!mat->assembled && mode == FINAL_ASSEMBLY) { 289 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 290 } 291 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 292 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 293 294 return 0; 295 } 296 297 static int MatZeroEntries_MPIAIJ(Mat A) 298 { 299 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 300 int ierr; 301 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 302 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 303 return 0; 304 } 305 306 /* the code does not do the diagonal entries correctly unless the 307 matrix is square and the column and row owerships are identical. 308 This is a BUG. The only way to fix it seems to be to access 309 aij->A and aij->B directly and not through the MatZeroRows() 310 routine. 311 */ 312 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 313 { 314 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 315 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 316 int *procs,*nprocs,j,found,idx,nsends,*work; 317 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 318 int *rvalues,tag = A->tag,count,base,slen,n,*source; 319 int *lens,imdex,*lrows,*values; 320 MPI_Comm comm = A->comm; 321 MPI_Request *send_waits,*recv_waits; 322 MPI_Status recv_status,*send_status; 323 IS istmp; 324 325 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 326 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 327 328 /* first count number of contributors to each processor */ 329 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 330 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 331 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 332 for ( i=0; i<N; i++ ) { 333 idx = rows[i]; 334 found = 0; 335 for ( j=0; j<size; j++ ) { 336 if (idx >= owners[j] && idx < owners[j+1]) { 337 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 338 } 339 } 340 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 341 } 342 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 343 344 /* inform other processors of number of messages and max length*/ 345 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 346 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 347 nrecvs = work[rank]; 348 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 349 nmax = work[rank]; 350 PetscFree(work); 351 352 /* post receives: */ 353 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 354 CHKPTRQ(rvalues); 355 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 356 CHKPTRQ(recv_waits); 357 for ( i=0; i<nrecvs; i++ ) { 358 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 359 } 360 361 /* do sends: 362 1) starts[i] gives the starting index in svalues for stuff going to 363 the ith processor 364 */ 365 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 366 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 367 CHKPTRQ(send_waits); 368 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 369 starts[0] = 0; 370 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 371 for ( i=0; i<N; i++ ) { 372 svalues[starts[owner[i]]++] = rows[i]; 373 } 374 ISRestoreIndices(is,&rows); 375 376 starts[0] = 0; 377 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 378 count = 0; 379 for ( i=0; i<size; i++ ) { 380 if (procs[i]) { 381 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 382 } 383 } 384 PetscFree(starts); 385 386 base = owners[rank]; 387 388 /* wait on receives */ 389 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 390 source = lens + nrecvs; 391 count = nrecvs; slen = 0; 392 while (count) { 393 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 394 /* unpack receives into our local space */ 395 MPI_Get_count(&recv_status,MPI_INT,&n); 396 source[imdex] = recv_status.MPI_SOURCE; 397 lens[imdex] = n; 398 slen += n; 399 count--; 400 } 401 PetscFree(recv_waits); 402 403 /* move the data into the send scatter */ 404 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 405 count = 0; 406 for ( i=0; i<nrecvs; i++ ) { 407 values = rvalues + i*nmax; 408 for ( j=0; j<lens[i]; j++ ) { 409 lrows[count++] = values[j] - base; 410 } 411 } 412 PetscFree(rvalues); PetscFree(lens); 413 PetscFree(owner); PetscFree(nprocs); 414 415 /* actually zap the local rows */ 416 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 417 PLogObjectParent(A,istmp); 418 PetscFree(lrows); 419 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 420 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 421 ierr = ISDestroy(istmp); CHKERRQ(ierr); 422 423 /* wait on sends */ 424 if (nsends) { 425 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 426 CHKPTRQ(send_status); 427 MPI_Waitall(nsends,send_waits,send_status); 428 PetscFree(send_status); 429 } 430 PetscFree(send_waits); PetscFree(svalues); 431 432 return 0; 433 } 434 435 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 436 { 437 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 438 int ierr; 439 440 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 441 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 442 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 443 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 444 return 0; 445 } 446 447 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 448 { 449 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 450 int ierr; 451 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 452 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 453 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 454 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 455 return 0; 456 } 457 458 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 459 { 460 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 461 int ierr; 462 463 /* do nondiagonal part */ 464 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 465 /* send it on its way */ 466 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 467 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 468 /* do local part */ 469 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 470 /* receive remote parts: note this assumes the values are not actually */ 471 /* inserted in yy until the next line, which is true for my implementation*/ 472 /* but is not perhaps always true. */ 473 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 474 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 475 return 0; 476 } 477 478 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 479 { 480 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 481 int ierr; 482 483 /* do nondiagonal part */ 484 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 485 /* send it on its way */ 486 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 487 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 488 /* do local part */ 489 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 490 /* receive remote parts: note this assumes the values are not actually */ 491 /* inserted in yy until the next line, which is true for my implementation*/ 492 /* but is not perhaps always true. */ 493 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 494 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 495 return 0; 496 } 497 498 /* 499 This only works correctly for square matrices where the subblock A->A is the 500 diagonal block 501 */ 502 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 503 { 504 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 505 return MatGetDiagonal(a->A,v); 506 } 507 508 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 509 { 510 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 511 int ierr; 512 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 513 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 514 return 0; 515 } 516 517 static int MatDestroy_MPIAIJ(PetscObject obj) 518 { 519 Mat mat = (Mat) obj; 520 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 521 int ierr; 522 #if defined(PETSC_LOG) 523 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 524 #endif 525 PetscFree(aij->rowners); 526 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 527 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 528 if (aij->colmap) PetscFree(aij->colmap); 529 if (aij->garray) PetscFree(aij->garray); 530 if (aij->lvec) VecDestroy(aij->lvec); 531 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 532 PetscFree(aij); 533 PLogObjectDestroy(mat); 534 PetscHeaderDestroy(mat); 535 return 0; 536 } 537 #include "draw.h" 538 #include "pinclude/pviewer.h" 539 540 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 541 { 542 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 543 int ierr; 544 545 if (aij->size == 1) { 546 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 547 } 548 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 549 return 0; 550 } 551 552 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 555 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 556 int ierr, format,shift = C->indexshift,rank; 557 PetscObject vobj = (PetscObject) viewer; 558 FILE *fd; 559 560 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 561 ierr = ViewerFileGetFormat_Private(viewer,&format); 562 if (format == FILE_FORMAT_INFO_DETAILED) { 563 int nz,nzalloc,mem; 564 MPI_Comm_rank(mat->comm,&rank); 565 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 566 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 567 MPIU_Seq_begin(mat->comm,1); 568 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 569 nzalloc,mem); 570 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 571 fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 572 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 573 fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 574 fflush(fd); 575 MPIU_Seq_end(mat->comm,1); 576 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 577 return 0; 578 } 579 else if (format == FILE_FORMAT_INFO) { 580 return 0; 581 } 582 } 583 584 if (vobj->type == ASCII_FILE_VIEWER) { 585 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 586 MPIU_Seq_begin(mat->comm,1); 587 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 588 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 589 aij->cend); 590 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 591 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 592 fflush(fd); 593 MPIU_Seq_end(mat->comm,1); 594 } 595 else { 596 int size = aij->size; 597 rank = aij->rank; 598 if (size == 1) { 599 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 600 } 601 else { 602 /* assemble the entire matrix onto first processor. */ 603 Mat A; 604 Mat_SeqAIJ *Aloc; 605 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 606 Scalar *a; 607 608 if (!rank) { 609 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 610 CHKERRQ(ierr); 611 } 612 else { 613 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 614 CHKERRQ(ierr); 615 } 616 PLogObjectParent(mat,A); 617 618 /* copy over the A part */ 619 Aloc = (Mat_SeqAIJ*) aij->A->data; 620 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 621 row = aij->rstart; 622 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 623 for ( i=0; i<m; i++ ) { 624 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 625 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 626 } 627 aj = Aloc->j; 628 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 629 630 /* copy over the B part */ 631 Aloc = (Mat_SeqAIJ*) aij->B->data; 632 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 633 row = aij->rstart; 634 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 635 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 636 for ( i=0; i<m; i++ ) { 637 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 638 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 639 } 640 PetscFree(ct); 641 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 642 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 643 if (!rank) { 644 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 645 } 646 ierr = MatDestroy(A); CHKERRQ(ierr); 647 } 648 } 649 return 0; 650 } 651 652 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 653 { 654 Mat mat = (Mat) obj; 655 int ierr; 656 PetscObject vobj = (PetscObject) viewer; 657 658 if (!viewer) { 659 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 660 } 661 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 662 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 663 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 664 } 665 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 666 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 667 } 668 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 669 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 670 } 671 else if (vobj->cookie == DRAW_COOKIE) { 672 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 673 } 674 else if (vobj->type == BINARY_FILE_VIEWER) { 675 return MatView_MPIAIJ_Binary(mat,viewer); 676 } 677 return 0; 678 } 679 680 extern int MatMarkDiag_SeqAIJ(Mat); 681 /* 682 This has to provide several versions. 683 684 1) per sequential 685 2) a) use only local smoothing updating outer values only once. 686 b) local smoothing updating outer values each inner iteration 687 3) color updating out values betwen colors. 688 */ 689 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 690 double fshift,int its,Vec xx) 691 { 692 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 693 Mat AA = mat->A, BB = mat->B; 694 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 695 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 696 int ierr,*idx, *diag; 697 int n = mat->n, m = mat->m, i,shift = A->indexshift; 698 Vec tt; 699 700 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 701 xs = x + shift; /* shift by one for index start of 1 */ 702 ls = ls + shift; 703 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 704 diag = A->diag; 705 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 706 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 707 } 708 if (flag & SOR_EISENSTAT) { 709 /* Let A = L + U + D; where L is lower trianglar, 710 U is upper triangular, E is diagonal; This routine applies 711 712 (L + E)^{-1} A (U + E)^{-1} 713 714 to a vector efficiently using Eisenstat's trick. This is for 715 the case of SSOR preconditioner, so E is D/omega where omega 716 is the relaxation factor. 717 */ 718 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 719 PLogObjectParent(matin,tt); 720 VecGetArray(tt,&t); 721 scale = (2.0/omega) - 1.0; 722 /* x = (E + U)^{-1} b */ 723 VecSet(&zero,mat->lvec); 724 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 725 mat->Mvctx); CHKERRQ(ierr); 726 for ( i=m-1; i>-1; i-- ) { 727 n = A->i[i+1] - diag[i] - 1; 728 idx = A->j + diag[i] + !shift; 729 v = A->a + diag[i] + !shift; 730 sum = b[i]; 731 SPARSEDENSEMDOT(sum,xs,v,idx,n); 732 d = fshift + A->a[diag[i]+shift]; 733 n = B->i[i+1] - B->i[i]; 734 idx = B->j + B->i[i] + shift; 735 v = B->a + B->i[i] + shift; 736 SPARSEDENSEMDOT(sum,ls,v,idx,n); 737 x[i] = omega*(sum/d); 738 } 739 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 740 mat->Mvctx); CHKERRQ(ierr); 741 742 /* t = b - (2*E - D)x */ 743 v = A->a; 744 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 745 746 /* t = (E + L)^{-1}t */ 747 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 748 diag = A->diag; 749 VecSet(&zero,mat->lvec); 750 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 751 mat->Mvctx); CHKERRQ(ierr); 752 for ( i=0; i<m; i++ ) { 753 n = diag[i] - A->i[i]; 754 idx = A->j + A->i[i] + shift; 755 v = A->a + A->i[i] + shift; 756 sum = t[i]; 757 SPARSEDENSEMDOT(sum,ts,v,idx,n); 758 d = fshift + A->a[diag[i]+shift]; 759 n = B->i[i+1] - B->i[i]; 760 idx = B->j + B->i[i] + shift; 761 v = B->a + B->i[i] + shift; 762 SPARSEDENSEMDOT(sum,ls,v,idx,n); 763 t[i] = omega*(sum/d); 764 } 765 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 766 mat->Mvctx); CHKERRQ(ierr); 767 /* x = x + t */ 768 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 769 VecDestroy(tt); 770 return 0; 771 } 772 773 774 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 775 if (flag & SOR_ZERO_INITIAL_GUESS) { 776 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 777 } 778 else { 779 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 780 CHKERRQ(ierr); 781 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 782 CHKERRQ(ierr); 783 } 784 while (its--) { 785 /* go down through the rows */ 786 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 787 mat->Mvctx); CHKERRQ(ierr); 788 for ( i=0; i<m; i++ ) { 789 n = A->i[i+1] - A->i[i]; 790 idx = A->j + A->i[i] + shift; 791 v = A->a + A->i[i] + shift; 792 sum = b[i]; 793 SPARSEDENSEMDOT(sum,xs,v,idx,n); 794 d = fshift + A->a[diag[i]+shift]; 795 n = B->i[i+1] - B->i[i]; 796 idx = B->j + B->i[i] + shift; 797 v = B->a + B->i[i] + shift; 798 SPARSEDENSEMDOT(sum,ls,v,idx,n); 799 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 800 } 801 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 802 mat->Mvctx); CHKERRQ(ierr); 803 /* come up through the rows */ 804 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 805 mat->Mvctx); CHKERRQ(ierr); 806 for ( i=m-1; i>-1; i-- ) { 807 n = A->i[i+1] - A->i[i]; 808 idx = A->j + A->i[i] + shift; 809 v = A->a + A->i[i] + shift; 810 sum = b[i]; 811 SPARSEDENSEMDOT(sum,xs,v,idx,n); 812 d = fshift + A->a[diag[i]+shift]; 813 n = B->i[i+1] - B->i[i]; 814 idx = B->j + B->i[i] + shift; 815 v = B->a + B->i[i] + shift; 816 SPARSEDENSEMDOT(sum,ls,v,idx,n); 817 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 818 } 819 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 820 mat->Mvctx); CHKERRQ(ierr); 821 } 822 } 823 else if (flag & SOR_FORWARD_SWEEP){ 824 if (flag & SOR_ZERO_INITIAL_GUESS) { 825 VecSet(&zero,mat->lvec); 826 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 827 mat->Mvctx); CHKERRQ(ierr); 828 for ( i=0; i<m; i++ ) { 829 n = diag[i] - A->i[i]; 830 idx = A->j + A->i[i] + shift; 831 v = A->a + A->i[i] + shift; 832 sum = b[i]; 833 SPARSEDENSEMDOT(sum,xs,v,idx,n); 834 d = fshift + A->a[diag[i]+shift]; 835 n = B->i[i+1] - B->i[i]; 836 idx = B->j + B->i[i] + shift; 837 v = B->a + B->i[i] + shift; 838 SPARSEDENSEMDOT(sum,ls,v,idx,n); 839 x[i] = omega*(sum/d); 840 } 841 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 842 mat->Mvctx); CHKERRQ(ierr); 843 its--; 844 } 845 while (its--) { 846 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 847 CHKERRQ(ierr); 848 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 849 CHKERRQ(ierr); 850 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 851 mat->Mvctx); CHKERRQ(ierr); 852 for ( i=0; i<m; i++ ) { 853 n = A->i[i+1] - A->i[i]; 854 idx = A->j + A->i[i] + shift; 855 v = A->a + A->i[i] + shift; 856 sum = b[i]; 857 SPARSEDENSEMDOT(sum,xs,v,idx,n); 858 d = fshift + A->a[diag[i]+shift]; 859 n = B->i[i+1] - B->i[i]; 860 idx = B->j + B->i[i] + shift; 861 v = B->a + B->i[i] + shift; 862 SPARSEDENSEMDOT(sum,ls,v,idx,n); 863 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 864 } 865 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 866 mat->Mvctx); CHKERRQ(ierr); 867 } 868 } 869 else if (flag & SOR_BACKWARD_SWEEP){ 870 if (flag & SOR_ZERO_INITIAL_GUESS) { 871 VecSet(&zero,mat->lvec); 872 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 873 mat->Mvctx); CHKERRQ(ierr); 874 for ( i=m-1; i>-1; i-- ) { 875 n = A->i[i+1] - diag[i] - 1; 876 idx = A->j + diag[i] + !shift; 877 v = A->a + diag[i] + !shift; 878 sum = b[i]; 879 SPARSEDENSEMDOT(sum,xs,v,idx,n); 880 d = fshift + A->a[diag[i]+shift]; 881 n = B->i[i+1] - B->i[i]; 882 idx = B->j + B->i[i] + shift; 883 v = B->a + B->i[i] + shift; 884 SPARSEDENSEMDOT(sum,ls,v,idx,n); 885 x[i] = omega*(sum/d); 886 } 887 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 888 mat->Mvctx); CHKERRQ(ierr); 889 its--; 890 } 891 while (its--) { 892 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 893 mat->Mvctx); CHKERRQ(ierr); 894 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 895 mat->Mvctx); CHKERRQ(ierr); 896 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 897 mat->Mvctx); CHKERRQ(ierr); 898 for ( i=m-1; i>-1; i-- ) { 899 n = A->i[i+1] - A->i[i]; 900 idx = A->j + A->i[i] + shift; 901 v = A->a + A->i[i] + shift; 902 sum = b[i]; 903 SPARSEDENSEMDOT(sum,xs,v,idx,n); 904 d = fshift + A->a[diag[i]+shift]; 905 n = B->i[i+1] - B->i[i]; 906 idx = B->j + B->i[i] + shift; 907 v = B->a + B->i[i] + shift; 908 SPARSEDENSEMDOT(sum,ls,v,idx,n); 909 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 910 } 911 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 912 mat->Mvctx); CHKERRQ(ierr); 913 } 914 } 915 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 916 if (flag & SOR_ZERO_INITIAL_GUESS) { 917 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 918 } 919 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 920 CHKERRQ(ierr); 921 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 922 CHKERRQ(ierr); 923 while (its--) { 924 /* go down through the rows */ 925 for ( i=0; i<m; i++ ) { 926 n = A->i[i+1] - A->i[i]; 927 idx = A->j + A->i[i] + shift; 928 v = A->a + A->i[i] + shift; 929 sum = b[i]; 930 SPARSEDENSEMDOT(sum,xs,v,idx,n); 931 d = fshift + A->a[diag[i]+shift]; 932 n = B->i[i+1] - B->i[i]; 933 idx = B->j + B->i[i] + shift; 934 v = B->a + B->i[i] + shift; 935 SPARSEDENSEMDOT(sum,ls,v,idx,n); 936 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 937 } 938 /* come up through the rows */ 939 for ( i=m-1; i>-1; i-- ) { 940 n = A->i[i+1] - A->i[i]; 941 idx = A->j + A->i[i] + shift; 942 v = A->a + A->i[i] + shift; 943 sum = b[i]; 944 SPARSEDENSEMDOT(sum,xs,v,idx,n); 945 d = fshift + A->a[diag[i]+shift]; 946 n = B->i[i+1] - B->i[i]; 947 idx = B->j + B->i[i] + shift; 948 v = B->a + B->i[i] + shift; 949 SPARSEDENSEMDOT(sum,ls,v,idx,n); 950 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 951 } 952 } 953 } 954 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 955 if (flag & SOR_ZERO_INITIAL_GUESS) { 956 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 957 } 958 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 959 CHKERRQ(ierr); 960 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 961 CHKERRQ(ierr); 962 while (its--) { 963 for ( i=0; i<m; i++ ) { 964 n = A->i[i+1] - A->i[i]; 965 idx = A->j + A->i[i] + shift; 966 v = A->a + A->i[i] + shift; 967 sum = b[i]; 968 SPARSEDENSEMDOT(sum,xs,v,idx,n); 969 d = fshift + A->a[diag[i]+shift]; 970 n = B->i[i+1] - B->i[i]; 971 idx = B->j + B->i[i] + shift; 972 v = B->a + B->i[i] + shift; 973 SPARSEDENSEMDOT(sum,ls,v,idx,n); 974 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 975 } 976 } 977 } 978 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 979 if (flag & SOR_ZERO_INITIAL_GUESS) { 980 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 981 } 982 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 983 mat->Mvctx); CHKERRQ(ierr); 984 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 985 mat->Mvctx); CHKERRQ(ierr); 986 while (its--) { 987 for ( i=m-1; i>-1; i-- ) { 988 n = A->i[i+1] - A->i[i]; 989 idx = A->j + A->i[i] + shift; 990 v = A->a + A->i[i] + shift; 991 sum = b[i]; 992 SPARSEDENSEMDOT(sum,xs,v,idx,n); 993 d = fshift + A->a[diag[i]+shift]; 994 n = B->i[i+1] - B->i[i]; 995 idx = B->j + B->i[i] + shift; 996 v = B->a + B->i[i] + shift; 997 SPARSEDENSEMDOT(sum,ls,v,idx,n); 998 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 999 } 1000 } 1001 } 1002 return 0; 1003 } 1004 1005 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1006 int *nzalloc,int *mem) 1007 { 1008 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1009 Mat A = mat->A, B = mat->B; 1010 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1011 1012 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1013 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1014 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1015 if (flag == MAT_LOCAL) { 1016 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1017 } else if (flag == MAT_GLOBAL_MAX) { 1018 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1019 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1020 } else if (flag == MAT_GLOBAL_SUM) { 1021 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1022 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1023 } 1024 return 0; 1025 } 1026 1027 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1028 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1029 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1030 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1031 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1032 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1033 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1034 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1035 1036 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1037 { 1038 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1039 1040 if (op == NO_NEW_NONZERO_LOCATIONS || 1041 op == YES_NEW_NONZERO_LOCATIONS || 1042 op == COLUMNS_SORTED || 1043 op == ROW_ORIENTED) { 1044 MatSetOption(a->A,op); 1045 MatSetOption(a->B,op); 1046 } 1047 else if (op == ROWS_SORTED || 1048 op == SYMMETRIC_MATRIX || 1049 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1050 op == YES_NEW_DIAGONALS) 1051 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1052 else if (op == COLUMN_ORIENTED) { 1053 a->roworiented = 0; 1054 MatSetOption(a->A,op); 1055 MatSetOption(a->B,op); 1056 } 1057 else if (op == NO_NEW_DIAGONALS) 1058 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1059 else 1060 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1061 return 0; 1062 } 1063 1064 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1065 { 1066 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1067 *m = mat->M; *n = mat->N; 1068 return 0; 1069 } 1070 1071 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1072 { 1073 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1074 *m = mat->m; *n = mat->N; 1075 return 0; 1076 } 1077 1078 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1079 { 1080 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1081 *m = mat->rstart; *n = mat->rend; 1082 return 0; 1083 } 1084 1085 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1086 { 1087 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1088 Scalar *vworkA, *vworkB, **pvA, **pvB; 1089 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1090 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1091 1092 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1093 lrow = row - rstart; 1094 1095 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1096 if (!v) {pvA = 0; pvB = 0;} 1097 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1098 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1099 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1100 nztot = nzA + nzB; 1101 1102 if (v || idx) { 1103 if (nztot) { 1104 /* Sort by increasing column numbers, assuming A and B already sorted */ 1105 int imark; 1106 if (matin->assembled) { 1107 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1108 } 1109 if (v) { 1110 *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1111 for ( i=0; i<nzB; i++ ) { 1112 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1113 else break; 1114 } 1115 imark = i; 1116 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1117 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1118 } 1119 if (idx) { 1120 *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1121 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1122 for ( i=0; i<nzB; i++ ) { 1123 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1124 else break; 1125 } 1126 imark = i; 1127 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1128 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1129 } 1130 } 1131 else {*idx = 0; *v=0;} 1132 } 1133 *nz = nztot; 1134 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1135 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1136 return 0; 1137 } 1138 1139 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1140 { 1141 if (idx) PetscFree(*idx); 1142 if (v) PetscFree(*v); 1143 return 0; 1144 } 1145 1146 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1147 { 1148 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1149 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1150 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1151 double sum = 0.0; 1152 Scalar *v; 1153 1154 if (aij->size == 1) { 1155 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1156 } else { 1157 if (type == NORM_FROBENIUS) { 1158 v = amat->a; 1159 for (i=0; i<amat->nz; i++ ) { 1160 #if defined(PETSC_COMPLEX) 1161 sum += real(conj(*v)*(*v)); v++; 1162 #else 1163 sum += (*v)*(*v); v++; 1164 #endif 1165 } 1166 v = bmat->a; 1167 for (i=0; i<bmat->nz; i++ ) { 1168 #if defined(PETSC_COMPLEX) 1169 sum += real(conj(*v)*(*v)); v++; 1170 #else 1171 sum += (*v)*(*v); v++; 1172 #endif 1173 } 1174 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1175 *norm = sqrt(*norm); 1176 } 1177 else if (type == NORM_1) { /* max column norm */ 1178 double *tmp, *tmp2; 1179 int *jj, *garray = aij->garray; 1180 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1181 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1182 PetscMemzero(tmp,aij->N*sizeof(double)); 1183 *norm = 0.0; 1184 v = amat->a; jj = amat->j; 1185 for ( j=0; j<amat->nz; j++ ) { 1186 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1187 } 1188 v = bmat->a; jj = bmat->j; 1189 for ( j=0; j<bmat->nz; j++ ) { 1190 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1191 } 1192 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1193 for ( j=0; j<aij->N; j++ ) { 1194 if (tmp2[j] > *norm) *norm = tmp2[j]; 1195 } 1196 PetscFree(tmp); PetscFree(tmp2); 1197 } 1198 else if (type == NORM_INFINITY) { /* max row norm */ 1199 double ntemp = 0.0; 1200 for ( j=0; j<amat->m; j++ ) { 1201 v = amat->a + amat->i[j] + shift; 1202 sum = 0.0; 1203 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1204 sum += PetscAbsScalar(*v); v++; 1205 } 1206 v = bmat->a + bmat->i[j] + shift; 1207 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1208 sum += PetscAbsScalar(*v); v++; 1209 } 1210 if (sum > ntemp) ntemp = sum; 1211 } 1212 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1213 } 1214 else { 1215 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1216 } 1217 } 1218 return 0; 1219 } 1220 1221 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1222 { 1223 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1224 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1225 int ierr,shift = Aloc->indexshift; 1226 Mat B; 1227 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1228 Scalar *array; 1229 1230 if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1231 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1232 PETSC_NULL,&B); CHKERRQ(ierr); 1233 1234 /* copy over the A part */ 1235 Aloc = (Mat_SeqAIJ*) a->A->data; 1236 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1237 row = a->rstart; 1238 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1239 for ( i=0; i<m; i++ ) { 1240 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1241 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1242 } 1243 aj = Aloc->j; 1244 for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1245 1246 /* copy over the B part */ 1247 Aloc = (Mat_SeqAIJ*) a->B->data; 1248 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1249 row = a->rstart; 1250 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1251 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1252 for ( i=0; i<m; i++ ) { 1253 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1254 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1255 } 1256 PetscFree(ct); 1257 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1258 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1259 if (matout) { 1260 *matout = B; 1261 } else { 1262 /* This isn't really an in-place transpose .... but free data structures from a */ 1263 PetscFree(a->rowners); 1264 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1265 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1266 if (a->colmap) PetscFree(a->colmap); 1267 if (a->garray) PetscFree(a->garray); 1268 if (a->lvec) VecDestroy(a->lvec); 1269 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1270 PetscFree(a); 1271 PetscMemcpy(A,B,sizeof(struct _Mat)); 1272 PetscHeaderDestroy(B); 1273 } 1274 return 0; 1275 } 1276 1277 extern int MatPrintHelp_SeqAIJ(Mat); 1278 static int MatPrintHelp_MPIAIJ(Mat A) 1279 { 1280 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1281 1282 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1283 else return 0; 1284 } 1285 1286 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1287 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1288 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1289 /* -------------------------------------------------------------------*/ 1290 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1291 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1292 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1293 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1294 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1295 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1296 MatLUFactor_MPIAIJ,0, 1297 MatRelax_MPIAIJ, 1298 MatTranspose_MPIAIJ, 1299 MatGetInfo_MPIAIJ,0, 1300 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1301 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1302 0, 1303 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1304 MatGetReordering_MPIAIJ, 1305 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1306 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1307 MatILUFactorSymbolic_MPIAIJ,0, 1308 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1309 0,0,0, 1310 0,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1311 MatPrintHelp_MPIAIJ, 1312 MatScale_MPIAIJ}; 1313 1314 /*@C 1315 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1316 (the default parallel PETSc format). 1317 1318 Input Parameters: 1319 . comm - MPI communicator 1320 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1321 . n - number of local columns (or PETSC_DECIDE to have calculated 1322 if N is given) 1323 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1324 . N - number of global columns (or PETSC_DECIDE to have calculated 1325 if n is given) 1326 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1327 (same for all local rows) 1328 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1329 or null (possibly different for each row). You must leave room 1330 for the diagonal entry even if it is zero. 1331 . o_nz - number of nonzeros per row in off-diagonal portion of local 1332 submatrix (same for all local rows). 1333 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1334 submatrix or null (possibly different for each row). 1335 1336 Output Parameter: 1337 . newmat - the matrix 1338 1339 Notes: 1340 The AIJ format (also called the Yale sparse matrix format or 1341 compressed row storage), is fully compatible with standard Fortran 77 1342 storage. That is, the stored row and column indices can begin at 1343 either one (as in Fortran) or zero. See the users manual for details. 1344 1345 The user MUST specify either the local or global matrix dimensions 1346 (possibly both). 1347 1348 Storage Information: 1349 For a square global matrix we define each processor's diagonal portion 1350 to be its local rows and the corresponding columns (a square submatrix); 1351 each processor's off-diagonal portion encompasses the remainder of the 1352 local matrix (a rectangular submatrix). 1353 1354 The user can specify preallocated storage for the diagonal part of 1355 the local submatrix with either d_nz or d_nnz (not both). Set 1356 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1357 memory allocation. Likewise, specify preallocated storage for the 1358 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1359 1360 By default, this format uses inodes (identical nodes) when possible. 1361 We search for consecutive rows with the same nonzero structure, thereby 1362 reusing matrix information to achieve increased efficiency. 1363 1364 Options Database Keys: 1365 $ -mat_aij_no_inode - Do not use inodes 1366 $ -mat_aij_inode_limit <limit> - Set inode limit (max limit=5) 1367 1368 .keywords: matrix, aij, compressed row, sparse, parallel 1369 1370 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1371 @*/ 1372 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1373 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 1374 { 1375 Mat mat; 1376 Mat_MPIAIJ *a; 1377 int ierr, i,sum[2],work[2]; 1378 1379 *newmat = 0; 1380 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1381 PLogObjectCreate(mat); 1382 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1383 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1384 mat->destroy = MatDestroy_MPIAIJ; 1385 mat->view = MatView_MPIAIJ; 1386 mat->factor = 0; 1387 mat->assembled = PETSC_FALSE; 1388 1389 a->insertmode = NOT_SET_VALUES; 1390 MPI_Comm_rank(comm,&a->rank); 1391 MPI_Comm_size(comm,&a->size); 1392 1393 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1394 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1395 1396 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1397 work[0] = m; work[1] = n; 1398 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1399 if (M == PETSC_DECIDE) M = sum[0]; 1400 if (N == PETSC_DECIDE) N = sum[1]; 1401 } 1402 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1403 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1404 a->m = m; 1405 a->n = n; 1406 a->N = N; 1407 a->M = M; 1408 1409 /* build local table of row and column ownerships */ 1410 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1411 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1412 a->cowners = a->rowners + a->size + 1; 1413 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1414 a->rowners[0] = 0; 1415 for ( i=2; i<=a->size; i++ ) { 1416 a->rowners[i] += a->rowners[i-1]; 1417 } 1418 a->rstart = a->rowners[a->rank]; 1419 a->rend = a->rowners[a->rank+1]; 1420 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1421 a->cowners[0] = 0; 1422 for ( i=2; i<=a->size; i++ ) { 1423 a->cowners[i] += a->cowners[i-1]; 1424 } 1425 a->cstart = a->cowners[a->rank]; 1426 a->cend = a->cowners[a->rank+1]; 1427 1428 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1429 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1430 PLogObjectParent(mat,a->A); 1431 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1432 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1433 PLogObjectParent(mat,a->B); 1434 1435 /* build cache for off array entries formed */ 1436 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1437 a->colmap = 0; 1438 a->garray = 0; 1439 a->roworiented = 1; 1440 1441 /* stuff used for matrix vector multiply */ 1442 a->lvec = 0; 1443 a->Mvctx = 0; 1444 1445 *newmat = mat; 1446 return 0; 1447 } 1448 1449 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1450 { 1451 Mat mat; 1452 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1453 int ierr, len,flg; 1454 1455 *newmat = 0; 1456 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1457 PLogObjectCreate(mat); 1458 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1459 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1460 mat->destroy = MatDestroy_MPIAIJ; 1461 mat->view = MatView_MPIAIJ; 1462 mat->factor = matin->factor; 1463 mat->assembled = PETSC_TRUE; 1464 1465 a->m = oldmat->m; 1466 a->n = oldmat->n; 1467 a->M = oldmat->M; 1468 a->N = oldmat->N; 1469 1470 a->rstart = oldmat->rstart; 1471 a->rend = oldmat->rend; 1472 a->cstart = oldmat->cstart; 1473 a->cend = oldmat->cend; 1474 a->size = oldmat->size; 1475 a->rank = oldmat->rank; 1476 a->insertmode = NOT_SET_VALUES; 1477 1478 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1479 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1480 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1481 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1482 if (oldmat->colmap) { 1483 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1484 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1485 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1486 } else a->colmap = 0; 1487 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1488 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1489 PLogObjectMemory(mat,len*sizeof(int)); 1490 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1491 } else a->garray = 0; 1492 1493 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1494 PLogObjectParent(mat,a->lvec); 1495 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1496 PLogObjectParent(mat,a->Mvctx); 1497 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1498 PLogObjectParent(mat,a->A); 1499 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1500 PLogObjectParent(mat,a->B); 1501 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1502 if (flg) { 1503 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1504 } 1505 *newmat = mat; 1506 return 0; 1507 } 1508 1509 #include "sysio.h" 1510 1511 int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat) 1512 { 1513 Mat A; 1514 int i, nz, ierr, j,rstart, rend, fd; 1515 Scalar *vals,*svals; 1516 PetscObject vobj = (PetscObject) bview; 1517 MPI_Comm comm = vobj->comm; 1518 MPI_Status status; 1519 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1520 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1521 int tag = ((PetscObject)bview)->tag; 1522 1523 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1524 if (!rank) { 1525 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1526 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1527 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1528 } 1529 1530 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1531 M = header[1]; N = header[2]; 1532 /* determine ownership of all rows */ 1533 m = M/size + ((M % size) > rank); 1534 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1535 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1536 rowners[0] = 0; 1537 for ( i=2; i<=size; i++ ) { 1538 rowners[i] += rowners[i-1]; 1539 } 1540 rstart = rowners[rank]; 1541 rend = rowners[rank+1]; 1542 1543 /* distribute row lengths to all processors */ 1544 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1545 offlens = ourlens + (rend-rstart); 1546 if (!rank) { 1547 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1548 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1549 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1550 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1551 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1552 PetscFree(sndcounts); 1553 } 1554 else { 1555 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1556 } 1557 1558 if (!rank) { 1559 /* calculate the number of nonzeros on each processor */ 1560 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1561 PetscMemzero(procsnz,size*sizeof(int)); 1562 for ( i=0; i<size; i++ ) { 1563 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1564 procsnz[i] += rowlengths[j]; 1565 } 1566 } 1567 PetscFree(rowlengths); 1568 1569 /* determine max buffer needed and allocate it */ 1570 maxnz = 0; 1571 for ( i=0; i<size; i++ ) { 1572 maxnz = PetscMax(maxnz,procsnz[i]); 1573 } 1574 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1575 1576 /* read in my part of the matrix column indices */ 1577 nz = procsnz[0]; 1578 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1579 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1580 1581 /* read in every one elses and ship off */ 1582 for ( i=1; i<size; i++ ) { 1583 nz = procsnz[i]; 1584 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1585 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1586 } 1587 PetscFree(cols); 1588 } 1589 else { 1590 /* determine buffer space needed for message */ 1591 nz = 0; 1592 for ( i=0; i<m; i++ ) { 1593 nz += ourlens[i]; 1594 } 1595 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1596 1597 /* receive message of column indices*/ 1598 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1599 MPI_Get_count(&status,MPI_INT,&maxnz); 1600 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1601 } 1602 1603 /* loop over local rows, determining number of off diagonal entries */ 1604 PetscMemzero(offlens,m*sizeof(int)); 1605 jj = 0; 1606 for ( i=0; i<m; i++ ) { 1607 for ( j=0; j<ourlens[i]; j++ ) { 1608 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1609 jj++; 1610 } 1611 } 1612 1613 /* create our matrix */ 1614 for ( i=0; i<m; i++ ) { 1615 ourlens[i] -= offlens[i]; 1616 } 1617 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1618 A = *newmat; 1619 MatSetOption(A,COLUMNS_SORTED); 1620 for ( i=0; i<m; i++ ) { 1621 ourlens[i] += offlens[i]; 1622 } 1623 1624 if (!rank) { 1625 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1626 1627 /* read in my part of the matrix numerical values */ 1628 nz = procsnz[0]; 1629 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1630 1631 /* insert into matrix */ 1632 jj = rstart; 1633 smycols = mycols; 1634 svals = vals; 1635 for ( i=0; i<m; i++ ) { 1636 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1637 smycols += ourlens[i]; 1638 svals += ourlens[i]; 1639 jj++; 1640 } 1641 1642 /* read in other processors and ship out */ 1643 for ( i=1; i<size; i++ ) { 1644 nz = procsnz[i]; 1645 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1646 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1647 } 1648 PetscFree(procsnz); 1649 } 1650 else { 1651 /* receive numeric values */ 1652 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1653 1654 /* receive message of values*/ 1655 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1656 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1657 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1658 1659 /* insert into matrix */ 1660 jj = rstart; 1661 smycols = mycols; 1662 svals = vals; 1663 for ( i=0; i<m; i++ ) { 1664 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1665 smycols += ourlens[i]; 1666 svals += ourlens[i]; 1667 jj++; 1668 } 1669 } 1670 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1671 1672 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1673 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1674 return 0; 1675 } 1676