1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.116 1996/01/24 16:01:05 balay Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (mat->was_assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 for ( i=0; i<m; i++ ) { 104 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106 if (idxm[i] >= rstart && idxm[i] < rend) { 107 row = idxm[i] - rstart; 108 for ( j=0; j<n; j++ ) { 109 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111 if (idxn[j] >= cstart && idxn[j] < cend){ 112 col = idxn[j] - cstart; 113 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114 } 115 else { 116 col = aij->colmap[idxn[j]] + shift; 117 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 118 } 119 } 120 } 121 else { 122 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 123 } 124 } 125 return 0; 126 } 127 128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 MPI_Comm comm = mat->comm; 132 int size = aij->size, *owners = aij->rowners; 133 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 134 MPI_Request *send_waits,*recv_waits; 135 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 136 InsertMode addv; 137 Scalar *rvalues,*svalues; 138 139 /* make sure all processors are either in INSERTMODE or ADDMODE */ 140 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 141 if (addv == (ADD_VALUES|INSERT_VALUES)) { 142 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 143 } 144 aij->insertmode = addv; /* in case this processor had no cache */ 145 146 /* first count number of contributors to each processor */ 147 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 148 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 149 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 150 for ( i=0; i<aij->stash.n; i++ ) { 151 idx = aij->stash.idx[i]; 152 for ( j=0; j<size; j++ ) { 153 if (idx >= owners[j] && idx < owners[j+1]) { 154 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 155 } 156 } 157 } 158 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 159 160 /* inform other processors of number of messages and max length*/ 161 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 162 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 163 nreceives = work[rank]; 164 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 165 nmax = work[rank]; 166 PetscFree(work); 167 168 /* post receives: 169 1) each message will consist of ordered pairs 170 (global index,value) we store the global index as a double 171 to simplify the message passing. 172 2) since we don't know how long each individual message is we 173 allocate the largest needed buffer for each receive. Potentially 174 this is a lot of wasted space. 175 176 177 This could be done better. 178 */ 179 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 180 CHKPTRQ(rvalues); 181 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 182 CHKPTRQ(recv_waits); 183 for ( i=0; i<nreceives; i++ ) { 184 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 185 comm,recv_waits+i); 186 } 187 188 /* do sends: 189 1) starts[i] gives the starting index in svalues for stuff going to 190 the ith processor 191 */ 192 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 193 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 194 CHKPTRQ(send_waits); 195 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 196 starts[0] = 0; 197 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 198 for ( i=0; i<aij->stash.n; i++ ) { 199 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 200 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 201 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 202 } 203 PetscFree(owner); 204 starts[0] = 0; 205 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 206 count = 0; 207 for ( i=0; i<size; i++ ) { 208 if (procs[i]) { 209 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 210 comm,send_waits+count++); 211 } 212 } 213 PetscFree(starts); PetscFree(nprocs); 214 215 /* Free cache space */ 216 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 217 218 aij->svalues = svalues; aij->rvalues = rvalues; 219 aij->nsends = nsends; aij->nrecvs = nreceives; 220 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 221 aij->rmax = nmax; 222 223 return 0; 224 } 225 extern int MatSetUpMultiply_MPIAIJ(Mat); 226 227 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 228 { 229 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 230 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 231 MPI_Status *send_status,recv_status; 232 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 233 int row,col,other_disassembled,shift = C->indexshift; 234 Scalar *values,val; 235 InsertMode addv = aij->insertmode; 236 237 /* wait on receives */ 238 while (count) { 239 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 240 /* unpack receives into our local space */ 241 values = aij->rvalues + 3*imdex*aij->rmax; 242 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 243 n = n/3; 244 for ( i=0; i<n; i++ ) { 245 row = (int) PetscReal(values[3*i]) - aij->rstart; 246 col = (int) PetscReal(values[3*i+1]); 247 val = values[3*i+2]; 248 if (col >= aij->cstart && col < aij->cend) { 249 col -= aij->cstart; 250 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 251 } 252 else { 253 if (mat->was_assembled) { 254 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 255 col = aij->colmap[col] + shift; 256 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 257 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 258 col = (int) PetscReal(values[3*i+1]); 259 } 260 } 261 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 262 } 263 } 264 count--; 265 } 266 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 267 268 /* wait on sends */ 269 if (aij->nsends) { 270 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 271 CHKPTRQ(send_status); 272 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 273 PetscFree(send_status); 274 } 275 PetscFree(aij->send_waits); PetscFree(aij->svalues); 276 277 aij->insertmode = NOT_SET_VALUES; 278 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 279 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 280 281 /* determine if any processor has disassembled, if so we must 282 also disassemble ourselfs, in order that we may reassemble. */ 283 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 284 if (mat->was_assembled && !other_disassembled) { 285 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 286 } 287 288 if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 289 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 290 } 291 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 292 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 293 294 return 0; 295 } 296 297 static int MatZeroEntries_MPIAIJ(Mat A) 298 { 299 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 300 int ierr; 301 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 302 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 303 return 0; 304 } 305 306 /* the code does not do the diagonal entries correctly unless the 307 matrix is square and the column and row owerships are identical. 308 This is a BUG. The only way to fix it seems to be to access 309 aij->A and aij->B directly and not through the MatZeroRows() 310 routine. 311 */ 312 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 313 { 314 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 315 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 316 int *procs,*nprocs,j,found,idx,nsends,*work; 317 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 318 int *rvalues,tag = A->tag,count,base,slen,n,*source; 319 int *lens,imdex,*lrows,*values; 320 MPI_Comm comm = A->comm; 321 MPI_Request *send_waits,*recv_waits; 322 MPI_Status recv_status,*send_status; 323 IS istmp; 324 325 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 326 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 327 328 /* first count number of contributors to each processor */ 329 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 330 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 331 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 332 for ( i=0; i<N; i++ ) { 333 idx = rows[i]; 334 found = 0; 335 for ( j=0; j<size; j++ ) { 336 if (idx >= owners[j] && idx < owners[j+1]) { 337 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 338 } 339 } 340 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 341 } 342 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 343 344 /* inform other processors of number of messages and max length*/ 345 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 346 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 347 nrecvs = work[rank]; 348 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 349 nmax = work[rank]; 350 PetscFree(work); 351 352 /* post receives: */ 353 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 354 CHKPTRQ(rvalues); 355 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 356 CHKPTRQ(recv_waits); 357 for ( i=0; i<nrecvs; i++ ) { 358 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 359 } 360 361 /* do sends: 362 1) starts[i] gives the starting index in svalues for stuff going to 363 the ith processor 364 */ 365 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 366 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 367 CHKPTRQ(send_waits); 368 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 369 starts[0] = 0; 370 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 371 for ( i=0; i<N; i++ ) { 372 svalues[starts[owner[i]]++] = rows[i]; 373 } 374 ISRestoreIndices(is,&rows); 375 376 starts[0] = 0; 377 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 378 count = 0; 379 for ( i=0; i<size; i++ ) { 380 if (procs[i]) { 381 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 382 } 383 } 384 PetscFree(starts); 385 386 base = owners[rank]; 387 388 /* wait on receives */ 389 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 390 source = lens + nrecvs; 391 count = nrecvs; slen = 0; 392 while (count) { 393 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 394 /* unpack receives into our local space */ 395 MPI_Get_count(&recv_status,MPI_INT,&n); 396 source[imdex] = recv_status.MPI_SOURCE; 397 lens[imdex] = n; 398 slen += n; 399 count--; 400 } 401 PetscFree(recv_waits); 402 403 /* move the data into the send scatter */ 404 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 405 count = 0; 406 for ( i=0; i<nrecvs; i++ ) { 407 values = rvalues + i*nmax; 408 for ( j=0; j<lens[i]; j++ ) { 409 lrows[count++] = values[j] - base; 410 } 411 } 412 PetscFree(rvalues); PetscFree(lens); 413 PetscFree(owner); PetscFree(nprocs); 414 415 /* actually zap the local rows */ 416 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 417 PLogObjectParent(A,istmp); 418 PetscFree(lrows); 419 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 420 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 421 ierr = ISDestroy(istmp); CHKERRQ(ierr); 422 423 /* wait on sends */ 424 if (nsends) { 425 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 426 CHKPTRQ(send_status); 427 MPI_Waitall(nsends,send_waits,send_status); 428 PetscFree(send_status); 429 } 430 PetscFree(send_waits); PetscFree(svalues); 431 432 return 0; 433 } 434 435 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 436 { 437 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 438 int ierr; 439 440 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 441 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 442 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 443 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 444 return 0; 445 } 446 447 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 448 { 449 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 450 int ierr; 451 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 452 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 453 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 454 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 455 return 0; 456 } 457 458 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 459 { 460 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 461 int ierr; 462 463 /* do nondiagonal part */ 464 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 465 /* send it on its way */ 466 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 467 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 468 /* do local part */ 469 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 470 /* receive remote parts: note this assumes the values are not actually */ 471 /* inserted in yy until the next line, which is true for my implementation*/ 472 /* but is not perhaps always true. */ 473 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 474 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 475 return 0; 476 } 477 478 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 479 { 480 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 481 int ierr; 482 483 /* do nondiagonal part */ 484 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 485 /* send it on its way */ 486 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 487 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 488 /* do local part */ 489 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 490 /* receive remote parts: note this assumes the values are not actually */ 491 /* inserted in yy until the next line, which is true for my implementation*/ 492 /* but is not perhaps always true. */ 493 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 494 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 495 return 0; 496 } 497 498 /* 499 This only works correctly for square matrices where the subblock A->A is the 500 diagonal block 501 */ 502 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 503 { 504 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 505 return MatGetDiagonal(a->A,v); 506 } 507 508 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 509 { 510 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 511 int ierr; 512 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 513 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 514 return 0; 515 } 516 517 static int MatDestroy_MPIAIJ(PetscObject obj) 518 { 519 Mat mat = (Mat) obj; 520 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 521 int ierr; 522 #if defined(PETSC_LOG) 523 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 524 #endif 525 PetscFree(aij->rowners); 526 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 527 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 528 if (aij->colmap) PetscFree(aij->colmap); 529 if (aij->garray) PetscFree(aij->garray); 530 if (aij->lvec) VecDestroy(aij->lvec); 531 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 532 PetscFree(aij); 533 PLogObjectDestroy(mat); 534 PetscHeaderDestroy(mat); 535 return 0; 536 } 537 #include "draw.h" 538 #include "pinclude/pviewer.h" 539 540 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 541 { 542 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 543 int ierr; 544 545 if (aij->size == 1) { 546 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 547 } 548 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 549 return 0; 550 } 551 552 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 555 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 556 int ierr, format,shift = C->indexshift,rank; 557 PetscObject vobj = (PetscObject) viewer; 558 FILE *fd; 559 560 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 561 ierr = ViewerFileGetFormat_Private(viewer,&format); 562 if (format == FILE_FORMAT_INFO_DETAILED) { 563 int nz,nzalloc,mem; 564 MPI_Comm_rank(mat->comm,&rank); 565 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 566 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 567 MPIU_Seq_begin(mat->comm,1); 568 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 569 nzalloc,mem); 570 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 571 fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 572 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 573 fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 574 fflush(fd); 575 MPIU_Seq_end(mat->comm,1); 576 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 577 return 0; 578 } 579 else if (format == FILE_FORMAT_INFO) { 580 return 0; 581 } 582 } 583 584 if (vobj->type == ASCII_FILE_VIEWER) { 585 ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr); 586 MPIU_Seq_begin(mat->comm,1); 587 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 588 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 589 aij->cend); 590 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 591 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 592 fflush(fd); 593 MPIU_Seq_end(mat->comm,1); 594 } 595 else { 596 int size = aij->size; 597 rank = aij->rank; 598 if (size == 1) { 599 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 600 } 601 else { 602 /* assemble the entire matrix onto first processor. */ 603 Mat A; 604 Mat_SeqAIJ *Aloc; 605 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 606 Scalar *a; 607 608 if (!rank) { 609 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 610 CHKERRQ(ierr); 611 } 612 else { 613 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 614 CHKERRQ(ierr); 615 } 616 PLogObjectParent(mat,A); 617 618 /* copy over the A part */ 619 Aloc = (Mat_SeqAIJ*) aij->A->data; 620 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 621 row = aij->rstart; 622 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 623 for ( i=0; i<m; i++ ) { 624 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 625 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 626 } 627 aj = Aloc->j; 628 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 629 630 /* copy over the B part */ 631 Aloc = (Mat_SeqAIJ*) aij->B->data; 632 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 633 row = aij->rstart; 634 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 635 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 636 for ( i=0; i<m; i++ ) { 637 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 638 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 639 } 640 PetscFree(ct); 641 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 642 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 643 if (!rank) { 644 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 645 } 646 ierr = MatDestroy(A); CHKERRQ(ierr); 647 } 648 } 649 return 0; 650 } 651 652 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 653 { 654 Mat mat = (Mat) obj; 655 int ierr; 656 PetscObject vobj = (PetscObject) viewer; 657 658 if (!viewer) { 659 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 660 } 661 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 662 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 663 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 664 } 665 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 666 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 667 } 668 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 669 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 670 } 671 else if (vobj->cookie == DRAW_COOKIE) { 672 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 673 } 674 else if (vobj->type == BINARY_FILE_VIEWER) { 675 return MatView_MPIAIJ_Binary(mat,viewer); 676 } 677 return 0; 678 } 679 680 extern int MatMarkDiag_SeqAIJ(Mat); 681 /* 682 This has to provide several versions. 683 684 1) per sequential 685 2) a) use only local smoothing updating outer values only once. 686 b) local smoothing updating outer values each inner iteration 687 3) color updating out values betwen colors. 688 */ 689 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 690 double fshift,int its,Vec xx) 691 { 692 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 693 Mat AA = mat->A, BB = mat->B; 694 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 695 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 696 int ierr,*idx, *diag; 697 int n = mat->n, m = mat->m, i,shift = A->indexshift; 698 Vec tt; 699 700 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 701 xs = x + shift; /* shift by one for index start of 1 */ 702 ls = ls + shift; 703 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 704 diag = A->diag; 705 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 706 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 707 } 708 if (flag & SOR_EISENSTAT) { 709 /* Let A = L + U + D; where L is lower trianglar, 710 U is upper triangular, E is diagonal; This routine applies 711 712 (L + E)^{-1} A (U + E)^{-1} 713 714 to a vector efficiently using Eisenstat's trick. This is for 715 the case of SSOR preconditioner, so E is D/omega where omega 716 is the relaxation factor. 717 */ 718 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 719 PLogObjectParent(matin,tt); 720 VecGetArray(tt,&t); 721 scale = (2.0/omega) - 1.0; 722 /* x = (E + U)^{-1} b */ 723 VecSet(&zero,mat->lvec); 724 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 725 mat->Mvctx); CHKERRQ(ierr); 726 for ( i=m-1; i>-1; i-- ) { 727 n = A->i[i+1] - diag[i] - 1; 728 idx = A->j + diag[i] + !shift; 729 v = A->a + diag[i] + !shift; 730 sum = b[i]; 731 SPARSEDENSEMDOT(sum,xs,v,idx,n); 732 d = fshift + A->a[diag[i]+shift]; 733 n = B->i[i+1] - B->i[i]; 734 idx = B->j + B->i[i] + shift; 735 v = B->a + B->i[i] + shift; 736 SPARSEDENSEMDOT(sum,ls,v,idx,n); 737 x[i] = omega*(sum/d); 738 } 739 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 740 mat->Mvctx); CHKERRQ(ierr); 741 742 /* t = b - (2*E - D)x */ 743 v = A->a; 744 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 745 746 /* t = (E + L)^{-1}t */ 747 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 748 diag = A->diag; 749 VecSet(&zero,mat->lvec); 750 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 751 mat->Mvctx); CHKERRQ(ierr); 752 for ( i=0; i<m; i++ ) { 753 n = diag[i] - A->i[i]; 754 idx = A->j + A->i[i] + shift; 755 v = A->a + A->i[i] + shift; 756 sum = t[i]; 757 SPARSEDENSEMDOT(sum,ts,v,idx,n); 758 d = fshift + A->a[diag[i]+shift]; 759 n = B->i[i+1] - B->i[i]; 760 idx = B->j + B->i[i] + shift; 761 v = B->a + B->i[i] + shift; 762 SPARSEDENSEMDOT(sum,ls,v,idx,n); 763 t[i] = omega*(sum/d); 764 } 765 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 766 mat->Mvctx); CHKERRQ(ierr); 767 /* x = x + t */ 768 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 769 VecDestroy(tt); 770 return 0; 771 } 772 773 774 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 775 if (flag & SOR_ZERO_INITIAL_GUESS) { 776 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 777 } 778 else { 779 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 780 CHKERRQ(ierr); 781 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 782 CHKERRQ(ierr); 783 } 784 while (its--) { 785 /* go down through the rows */ 786 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 787 mat->Mvctx); CHKERRQ(ierr); 788 for ( i=0; i<m; i++ ) { 789 n = A->i[i+1] - A->i[i]; 790 idx = A->j + A->i[i] + shift; 791 v = A->a + A->i[i] + shift; 792 sum = b[i]; 793 SPARSEDENSEMDOT(sum,xs,v,idx,n); 794 d = fshift + A->a[diag[i]+shift]; 795 n = B->i[i+1] - B->i[i]; 796 idx = B->j + B->i[i] + shift; 797 v = B->a + B->i[i] + shift; 798 SPARSEDENSEMDOT(sum,ls,v,idx,n); 799 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 800 } 801 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 802 mat->Mvctx); CHKERRQ(ierr); 803 /* come up through the rows */ 804 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 805 mat->Mvctx); CHKERRQ(ierr); 806 for ( i=m-1; i>-1; i-- ) { 807 n = A->i[i+1] - A->i[i]; 808 idx = A->j + A->i[i] + shift; 809 v = A->a + A->i[i] + shift; 810 sum = b[i]; 811 SPARSEDENSEMDOT(sum,xs,v,idx,n); 812 d = fshift + A->a[diag[i]+shift]; 813 n = B->i[i+1] - B->i[i]; 814 idx = B->j + B->i[i] + shift; 815 v = B->a + B->i[i] + shift; 816 SPARSEDENSEMDOT(sum,ls,v,idx,n); 817 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 818 } 819 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 820 mat->Mvctx); CHKERRQ(ierr); 821 } 822 } 823 else if (flag & SOR_FORWARD_SWEEP){ 824 if (flag & SOR_ZERO_INITIAL_GUESS) { 825 VecSet(&zero,mat->lvec); 826 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 827 mat->Mvctx); CHKERRQ(ierr); 828 for ( i=0; i<m; i++ ) { 829 n = diag[i] - A->i[i]; 830 idx = A->j + A->i[i] + shift; 831 v = A->a + A->i[i] + shift; 832 sum = b[i]; 833 SPARSEDENSEMDOT(sum,xs,v,idx,n); 834 d = fshift + A->a[diag[i]+shift]; 835 n = B->i[i+1] - B->i[i]; 836 idx = B->j + B->i[i] + shift; 837 v = B->a + B->i[i] + shift; 838 SPARSEDENSEMDOT(sum,ls,v,idx,n); 839 x[i] = omega*(sum/d); 840 } 841 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 842 mat->Mvctx); CHKERRQ(ierr); 843 its--; 844 } 845 while (its--) { 846 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 847 CHKERRQ(ierr); 848 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 849 CHKERRQ(ierr); 850 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 851 mat->Mvctx); CHKERRQ(ierr); 852 for ( i=0; i<m; i++ ) { 853 n = A->i[i+1] - A->i[i]; 854 idx = A->j + A->i[i] + shift; 855 v = A->a + A->i[i] + shift; 856 sum = b[i]; 857 SPARSEDENSEMDOT(sum,xs,v,idx,n); 858 d = fshift + A->a[diag[i]+shift]; 859 n = B->i[i+1] - B->i[i]; 860 idx = B->j + B->i[i] + shift; 861 v = B->a + B->i[i] + shift; 862 SPARSEDENSEMDOT(sum,ls,v,idx,n); 863 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 864 } 865 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 866 mat->Mvctx); CHKERRQ(ierr); 867 } 868 } 869 else if (flag & SOR_BACKWARD_SWEEP){ 870 if (flag & SOR_ZERO_INITIAL_GUESS) { 871 VecSet(&zero,mat->lvec); 872 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 873 mat->Mvctx); CHKERRQ(ierr); 874 for ( i=m-1; i>-1; i-- ) { 875 n = A->i[i+1] - diag[i] - 1; 876 idx = A->j + diag[i] + !shift; 877 v = A->a + diag[i] + !shift; 878 sum = b[i]; 879 SPARSEDENSEMDOT(sum,xs,v,idx,n); 880 d = fshift + A->a[diag[i]+shift]; 881 n = B->i[i+1] - B->i[i]; 882 idx = B->j + B->i[i] + shift; 883 v = B->a + B->i[i] + shift; 884 SPARSEDENSEMDOT(sum,ls,v,idx,n); 885 x[i] = omega*(sum/d); 886 } 887 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 888 mat->Mvctx); CHKERRQ(ierr); 889 its--; 890 } 891 while (its--) { 892 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 893 mat->Mvctx); CHKERRQ(ierr); 894 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 895 mat->Mvctx); CHKERRQ(ierr); 896 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 897 mat->Mvctx); CHKERRQ(ierr); 898 for ( i=m-1; i>-1; i-- ) { 899 n = A->i[i+1] - A->i[i]; 900 idx = A->j + A->i[i] + shift; 901 v = A->a + A->i[i] + shift; 902 sum = b[i]; 903 SPARSEDENSEMDOT(sum,xs,v,idx,n); 904 d = fshift + A->a[diag[i]+shift]; 905 n = B->i[i+1] - B->i[i]; 906 idx = B->j + B->i[i] + shift; 907 v = B->a + B->i[i] + shift; 908 SPARSEDENSEMDOT(sum,ls,v,idx,n); 909 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 910 } 911 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 912 mat->Mvctx); CHKERRQ(ierr); 913 } 914 } 915 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 916 if (flag & SOR_ZERO_INITIAL_GUESS) { 917 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 918 } 919 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 920 CHKERRQ(ierr); 921 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 922 CHKERRQ(ierr); 923 while (its--) { 924 /* go down through the rows */ 925 for ( i=0; i<m; i++ ) { 926 n = A->i[i+1] - A->i[i]; 927 idx = A->j + A->i[i] + shift; 928 v = A->a + A->i[i] + shift; 929 sum = b[i]; 930 SPARSEDENSEMDOT(sum,xs,v,idx,n); 931 d = fshift + A->a[diag[i]+shift]; 932 n = B->i[i+1] - B->i[i]; 933 idx = B->j + B->i[i] + shift; 934 v = B->a + B->i[i] + shift; 935 SPARSEDENSEMDOT(sum,ls,v,idx,n); 936 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 937 } 938 /* come up through the rows */ 939 for ( i=m-1; i>-1; i-- ) { 940 n = A->i[i+1] - A->i[i]; 941 idx = A->j + A->i[i] + shift; 942 v = A->a + A->i[i] + shift; 943 sum = b[i]; 944 SPARSEDENSEMDOT(sum,xs,v,idx,n); 945 d = fshift + A->a[diag[i]+shift]; 946 n = B->i[i+1] - B->i[i]; 947 idx = B->j + B->i[i] + shift; 948 v = B->a + B->i[i] + shift; 949 SPARSEDENSEMDOT(sum,ls,v,idx,n); 950 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 951 } 952 } 953 } 954 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 955 if (flag & SOR_ZERO_INITIAL_GUESS) { 956 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 957 } 958 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 959 CHKERRQ(ierr); 960 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 961 CHKERRQ(ierr); 962 while (its--) { 963 for ( i=0; i<m; i++ ) { 964 n = A->i[i+1] - A->i[i]; 965 idx = A->j + A->i[i] + shift; 966 v = A->a + A->i[i] + shift; 967 sum = b[i]; 968 SPARSEDENSEMDOT(sum,xs,v,idx,n); 969 d = fshift + A->a[diag[i]+shift]; 970 n = B->i[i+1] - B->i[i]; 971 idx = B->j + B->i[i] + shift; 972 v = B->a + B->i[i] + shift; 973 SPARSEDENSEMDOT(sum,ls,v,idx,n); 974 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 975 } 976 } 977 } 978 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 979 if (flag & SOR_ZERO_INITIAL_GUESS) { 980 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 981 } 982 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 983 mat->Mvctx); CHKERRQ(ierr); 984 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 985 mat->Mvctx); CHKERRQ(ierr); 986 while (its--) { 987 for ( i=m-1; i>-1; i-- ) { 988 n = A->i[i+1] - A->i[i]; 989 idx = A->j + A->i[i] + shift; 990 v = A->a + A->i[i] + shift; 991 sum = b[i]; 992 SPARSEDENSEMDOT(sum,xs,v,idx,n); 993 d = fshift + A->a[diag[i]+shift]; 994 n = B->i[i+1] - B->i[i]; 995 idx = B->j + B->i[i] + shift; 996 v = B->a + B->i[i] + shift; 997 SPARSEDENSEMDOT(sum,ls,v,idx,n); 998 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 999 } 1000 } 1001 } 1002 return 0; 1003 } 1004 1005 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1006 int *nzalloc,int *mem) 1007 { 1008 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1009 Mat A = mat->A, B = mat->B; 1010 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1011 1012 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1013 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1014 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1015 if (flag == MAT_LOCAL) { 1016 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1017 } else if (flag == MAT_GLOBAL_MAX) { 1018 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1019 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1020 } else if (flag == MAT_GLOBAL_SUM) { 1021 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1022 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1023 } 1024 return 0; 1025 } 1026 1027 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1028 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1029 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1030 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1031 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1032 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1033 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1034 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1035 1036 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1037 { 1038 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1039 1040 if (op == NO_NEW_NONZERO_LOCATIONS || 1041 op == YES_NEW_NONZERO_LOCATIONS || 1042 op == COLUMNS_SORTED || 1043 op == ROW_ORIENTED) { 1044 MatSetOption(a->A,op); 1045 MatSetOption(a->B,op); 1046 } 1047 else if (op == ROWS_SORTED || 1048 op == SYMMETRIC_MATRIX || 1049 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1050 op == YES_NEW_DIAGONALS) 1051 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1052 else if (op == COLUMN_ORIENTED) { 1053 a->roworiented = 0; 1054 MatSetOption(a->A,op); 1055 MatSetOption(a->B,op); 1056 } 1057 else if (op == NO_NEW_DIAGONALS) 1058 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1059 else 1060 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1061 return 0; 1062 } 1063 1064 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1065 { 1066 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1067 *m = mat->M; *n = mat->N; 1068 return 0; 1069 } 1070 1071 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1072 { 1073 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1074 *m = mat->m; *n = mat->N; 1075 return 0; 1076 } 1077 1078 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1079 { 1080 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1081 *m = mat->rstart; *n = mat->rend; 1082 return 0; 1083 } 1084 1085 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1086 { 1087 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1088 Scalar *vworkA, *vworkB, **pvA, **pvB; 1089 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1090 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1091 1092 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1093 lrow = row - rstart; 1094 1095 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1096 if (!v) {pvA = 0; pvB = 0;} 1097 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1098 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1099 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1100 nztot = nzA + nzB; 1101 1102 if (v || idx) { 1103 if (nztot) { 1104 /* Sort by increasing column numbers, assuming A and B already sorted */ 1105 int imark; 1106 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1107 if (v) { 1108 *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1109 for ( i=0; i<nzB; i++ ) { 1110 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1111 else break; 1112 } 1113 imark = i; 1114 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1115 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1116 } 1117 if (idx) { 1118 *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1119 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1120 for ( i=0; i<nzB; i++ ) { 1121 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1122 else break; 1123 } 1124 imark = i; 1125 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1126 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1127 } 1128 } 1129 else {*idx = 0; *v=0;} 1130 } 1131 *nz = nztot; 1132 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1133 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1134 return 0; 1135 } 1136 1137 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1138 { 1139 if (idx) PetscFree(*idx); 1140 if (v) PetscFree(*v); 1141 return 0; 1142 } 1143 1144 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1145 { 1146 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1147 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1148 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1149 double sum = 0.0; 1150 Scalar *v; 1151 1152 if (aij->size == 1) { 1153 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1154 } else { 1155 if (type == NORM_FROBENIUS) { 1156 v = amat->a; 1157 for (i=0; i<amat->nz; i++ ) { 1158 #if defined(PETSC_COMPLEX) 1159 sum += real(conj(*v)*(*v)); v++; 1160 #else 1161 sum += (*v)*(*v); v++; 1162 #endif 1163 } 1164 v = bmat->a; 1165 for (i=0; i<bmat->nz; i++ ) { 1166 #if defined(PETSC_COMPLEX) 1167 sum += real(conj(*v)*(*v)); v++; 1168 #else 1169 sum += (*v)*(*v); v++; 1170 #endif 1171 } 1172 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1173 *norm = sqrt(*norm); 1174 } 1175 else if (type == NORM_1) { /* max column norm */ 1176 double *tmp, *tmp2; 1177 int *jj, *garray = aij->garray; 1178 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1179 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1180 PetscMemzero(tmp,aij->N*sizeof(double)); 1181 *norm = 0.0; 1182 v = amat->a; jj = amat->j; 1183 for ( j=0; j<amat->nz; j++ ) { 1184 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1185 } 1186 v = bmat->a; jj = bmat->j; 1187 for ( j=0; j<bmat->nz; j++ ) { 1188 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1189 } 1190 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1191 for ( j=0; j<aij->N; j++ ) { 1192 if (tmp2[j] > *norm) *norm = tmp2[j]; 1193 } 1194 PetscFree(tmp); PetscFree(tmp2); 1195 } 1196 else if (type == NORM_INFINITY) { /* max row norm */ 1197 double ntemp = 0.0; 1198 for ( j=0; j<amat->m; j++ ) { 1199 v = amat->a + amat->i[j] + shift; 1200 sum = 0.0; 1201 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1202 sum += PetscAbsScalar(*v); v++; 1203 } 1204 v = bmat->a + bmat->i[j] + shift; 1205 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1206 sum += PetscAbsScalar(*v); v++; 1207 } 1208 if (sum > ntemp) ntemp = sum; 1209 } 1210 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1211 } 1212 else { 1213 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1214 } 1215 } 1216 return 0; 1217 } 1218 1219 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1220 { 1221 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1222 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1223 int ierr,shift = Aloc->indexshift; 1224 Mat B; 1225 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1226 Scalar *array; 1227 1228 if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1229 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1230 PETSC_NULL,&B); CHKERRQ(ierr); 1231 1232 /* copy over the A part */ 1233 Aloc = (Mat_SeqAIJ*) a->A->data; 1234 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1235 row = a->rstart; 1236 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1237 for ( i=0; i<m; i++ ) { 1238 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1239 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1240 } 1241 aj = Aloc->j; 1242 for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1243 1244 /* copy over the B part */ 1245 Aloc = (Mat_SeqAIJ*) a->B->data; 1246 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1247 row = a->rstart; 1248 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1249 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1250 for ( i=0; i<m; i++ ) { 1251 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1252 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1253 } 1254 PetscFree(ct); 1255 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1256 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1257 if (matout) { 1258 *matout = B; 1259 } else { 1260 /* This isn't really an in-place transpose .... but free data structures from a */ 1261 PetscFree(a->rowners); 1262 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1263 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1264 if (a->colmap) PetscFree(a->colmap); 1265 if (a->garray) PetscFree(a->garray); 1266 if (a->lvec) VecDestroy(a->lvec); 1267 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1268 PetscFree(a); 1269 PetscMemcpy(A,B,sizeof(struct _Mat)); 1270 PetscHeaderDestroy(B); 1271 } 1272 return 0; 1273 } 1274 1275 extern int MatPrintHelp_SeqAIJ(Mat); 1276 static int MatPrintHelp_MPIAIJ(Mat A) 1277 { 1278 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1279 1280 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1281 else return 0; 1282 } 1283 1284 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1285 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1286 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1287 /* -------------------------------------------------------------------*/ 1288 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1289 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1290 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1291 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1292 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1293 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1294 MatLUFactor_MPIAIJ,0, 1295 MatRelax_MPIAIJ, 1296 MatTranspose_MPIAIJ, 1297 MatGetInfo_MPIAIJ,0, 1298 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1299 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1300 0, 1301 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1302 MatGetReordering_MPIAIJ, 1303 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1304 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1305 MatILUFactorSymbolic_MPIAIJ,0, 1306 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1307 0,0,0, 1308 0,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1309 MatPrintHelp_MPIAIJ, 1310 MatScale_MPIAIJ}; 1311 1312 /*@C 1313 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1314 (the default parallel PETSc format). 1315 1316 Input Parameters: 1317 . comm - MPI communicator 1318 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1319 . n - number of local columns (or PETSC_DECIDE to have calculated 1320 if N is given) 1321 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1322 . N - number of global columns (or PETSC_DECIDE to have calculated 1323 if n is given) 1324 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1325 (same for all local rows) 1326 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1327 or null (possibly different for each row). You must leave room 1328 for the diagonal entry even if it is zero. 1329 . o_nz - number of nonzeros per row in off-diagonal portion of local 1330 submatrix (same for all local rows). 1331 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1332 submatrix or null (possibly different for each row). 1333 1334 Output Parameter: 1335 . newmat - the matrix 1336 1337 Notes: 1338 The AIJ format (also called the Yale sparse matrix format or 1339 compressed row storage), is fully compatible with standard Fortran 77 1340 storage. That is, the stored row and column indices can begin at 1341 either one (as in Fortran) or zero. See the users manual for details. 1342 1343 The user MUST specify either the local or global matrix dimensions 1344 (possibly both). 1345 1346 Storage Information: 1347 For a square global matrix we define each processor's diagonal portion 1348 to be its local rows and the corresponding columns (a square submatrix); 1349 each processor's off-diagonal portion encompasses the remainder of the 1350 local matrix (a rectangular submatrix). 1351 1352 The user can specify preallocated storage for the diagonal part of 1353 the local submatrix with either d_nz or d_nnz (not both). Set 1354 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1355 memory allocation. Likewise, specify preallocated storage for the 1356 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1357 1358 By default, this format uses inodes (identical nodes) when possible. 1359 We search for consecutive rows with the same nonzero structure, thereby 1360 reusing matrix information to achieve increased efficiency. 1361 1362 Options Database Keys: 1363 $ -mat_aij_no_inode - Do not use inodes 1364 $ -mat_aij_inode_limit <limit> - Set inode limit (max limit=5) 1365 1366 .keywords: matrix, aij, compressed row, sparse, parallel 1367 1368 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1369 @*/ 1370 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1371 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 1372 { 1373 Mat mat; 1374 Mat_MPIAIJ *a; 1375 int ierr, i,sum[2],work[2]; 1376 1377 *newmat = 0; 1378 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1379 PLogObjectCreate(mat); 1380 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1381 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1382 mat->destroy = MatDestroy_MPIAIJ; 1383 mat->view = MatView_MPIAIJ; 1384 mat->factor = 0; 1385 mat->assembled = PETSC_FALSE; 1386 1387 a->insertmode = NOT_SET_VALUES; 1388 MPI_Comm_rank(comm,&a->rank); 1389 MPI_Comm_size(comm,&a->size); 1390 1391 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1392 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1393 1394 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1395 work[0] = m; work[1] = n; 1396 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1397 if (M == PETSC_DECIDE) M = sum[0]; 1398 if (N == PETSC_DECIDE) N = sum[1]; 1399 } 1400 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1401 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1402 a->m = m; 1403 a->n = n; 1404 a->N = N; 1405 a->M = M; 1406 1407 /* build local table of row and column ownerships */ 1408 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1409 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1410 a->cowners = a->rowners + a->size + 1; 1411 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1412 a->rowners[0] = 0; 1413 for ( i=2; i<=a->size; i++ ) { 1414 a->rowners[i] += a->rowners[i-1]; 1415 } 1416 a->rstart = a->rowners[a->rank]; 1417 a->rend = a->rowners[a->rank+1]; 1418 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1419 a->cowners[0] = 0; 1420 for ( i=2; i<=a->size; i++ ) { 1421 a->cowners[i] += a->cowners[i-1]; 1422 } 1423 a->cstart = a->cowners[a->rank]; 1424 a->cend = a->cowners[a->rank+1]; 1425 1426 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1427 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1428 PLogObjectParent(mat,a->A); 1429 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1430 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1431 PLogObjectParent(mat,a->B); 1432 1433 /* build cache for off array entries formed */ 1434 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1435 a->colmap = 0; 1436 a->garray = 0; 1437 a->roworiented = 1; 1438 1439 /* stuff used for matrix vector multiply */ 1440 a->lvec = 0; 1441 a->Mvctx = 0; 1442 1443 *newmat = mat; 1444 return 0; 1445 } 1446 1447 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1448 { 1449 Mat mat; 1450 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1451 int ierr, len,flg; 1452 1453 *newmat = 0; 1454 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1455 PLogObjectCreate(mat); 1456 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1457 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1458 mat->destroy = MatDestroy_MPIAIJ; 1459 mat->view = MatView_MPIAIJ; 1460 mat->factor = matin->factor; 1461 mat->assembled = PETSC_TRUE; 1462 1463 a->m = oldmat->m; 1464 a->n = oldmat->n; 1465 a->M = oldmat->M; 1466 a->N = oldmat->N; 1467 1468 a->rstart = oldmat->rstart; 1469 a->rend = oldmat->rend; 1470 a->cstart = oldmat->cstart; 1471 a->cend = oldmat->cend; 1472 a->size = oldmat->size; 1473 a->rank = oldmat->rank; 1474 a->insertmode = NOT_SET_VALUES; 1475 1476 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1477 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1478 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1479 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1480 if (oldmat->colmap) { 1481 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1482 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1483 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1484 } else a->colmap = 0; 1485 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1486 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1487 PLogObjectMemory(mat,len*sizeof(int)); 1488 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1489 } else a->garray = 0; 1490 1491 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1492 PLogObjectParent(mat,a->lvec); 1493 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1494 PLogObjectParent(mat,a->Mvctx); 1495 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1496 PLogObjectParent(mat,a->A); 1497 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1498 PLogObjectParent(mat,a->B); 1499 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1500 if (flg) { 1501 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1502 } 1503 *newmat = mat; 1504 return 0; 1505 } 1506 1507 #include "sysio.h" 1508 1509 int MatLoad_MPIAIJ(Viewer bview,MatType type,Mat *newmat) 1510 { 1511 Mat A; 1512 int i, nz, ierr, j,rstart, rend, fd; 1513 Scalar *vals,*svals; 1514 PetscObject vobj = (PetscObject) bview; 1515 MPI_Comm comm = vobj->comm; 1516 MPI_Status status; 1517 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1518 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1519 int tag = ((PetscObject)bview)->tag; 1520 1521 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1522 if (!rank) { 1523 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1524 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1525 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1526 } 1527 1528 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1529 M = header[1]; N = header[2]; 1530 /* determine ownership of all rows */ 1531 m = M/size + ((M % size) > rank); 1532 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1533 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1534 rowners[0] = 0; 1535 for ( i=2; i<=size; i++ ) { 1536 rowners[i] += rowners[i-1]; 1537 } 1538 rstart = rowners[rank]; 1539 rend = rowners[rank+1]; 1540 1541 /* distribute row lengths to all processors */ 1542 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1543 offlens = ourlens + (rend-rstart); 1544 if (!rank) { 1545 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1546 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1547 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1548 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1549 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1550 PetscFree(sndcounts); 1551 } 1552 else { 1553 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1554 } 1555 1556 if (!rank) { 1557 /* calculate the number of nonzeros on each processor */ 1558 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1559 PetscMemzero(procsnz,size*sizeof(int)); 1560 for ( i=0; i<size; i++ ) { 1561 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1562 procsnz[i] += rowlengths[j]; 1563 } 1564 } 1565 PetscFree(rowlengths); 1566 1567 /* determine max buffer needed and allocate it */ 1568 maxnz = 0; 1569 for ( i=0; i<size; i++ ) { 1570 maxnz = PetscMax(maxnz,procsnz[i]); 1571 } 1572 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1573 1574 /* read in my part of the matrix column indices */ 1575 nz = procsnz[0]; 1576 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1577 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1578 1579 /* read in every one elses and ship off */ 1580 for ( i=1; i<size; i++ ) { 1581 nz = procsnz[i]; 1582 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1583 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1584 } 1585 PetscFree(cols); 1586 } 1587 else { 1588 /* determine buffer space needed for message */ 1589 nz = 0; 1590 for ( i=0; i<m; i++ ) { 1591 nz += ourlens[i]; 1592 } 1593 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1594 1595 /* receive message of column indices*/ 1596 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1597 MPI_Get_count(&status,MPI_INT,&maxnz); 1598 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1599 } 1600 1601 /* loop over local rows, determining number of off diagonal entries */ 1602 PetscMemzero(offlens,m*sizeof(int)); 1603 jj = 0; 1604 for ( i=0; i<m; i++ ) { 1605 for ( j=0; j<ourlens[i]; j++ ) { 1606 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1607 jj++; 1608 } 1609 } 1610 1611 /* create our matrix */ 1612 for ( i=0; i<m; i++ ) { 1613 ourlens[i] -= offlens[i]; 1614 } 1615 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1616 A = *newmat; 1617 MatSetOption(A,COLUMNS_SORTED); 1618 for ( i=0; i<m; i++ ) { 1619 ourlens[i] += offlens[i]; 1620 } 1621 1622 if (!rank) { 1623 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1624 1625 /* read in my part of the matrix numerical values */ 1626 nz = procsnz[0]; 1627 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1628 1629 /* insert into matrix */ 1630 jj = rstart; 1631 smycols = mycols; 1632 svals = vals; 1633 for ( i=0; i<m; i++ ) { 1634 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1635 smycols += ourlens[i]; 1636 svals += ourlens[i]; 1637 jj++; 1638 } 1639 1640 /* read in other processors and ship out */ 1641 for ( i=1; i<size; i++ ) { 1642 nz = procsnz[i]; 1643 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1644 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1645 } 1646 PetscFree(procsnz); 1647 } 1648 else { 1649 /* receive numeric values */ 1650 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1651 1652 /* receive message of values*/ 1653 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1654 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1655 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1656 1657 /* insert into matrix */ 1658 jj = rstart; 1659 smycols = mycols; 1660 svals = vals; 1661 for ( i=0; i<m; i++ ) { 1662 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1663 smycols += ourlens[i]; 1664 svals += ourlens[i]; 1665 jj++; 1666 } 1667 } 1668 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1669 1670 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1671 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1672 return 0; 1673 } 1674