1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.99 1995/11/09 22:28:54 bsmith Exp curfman $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 40 int *idxn,Scalar *v,InsertMode addv) 41 { 42 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 43 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (idxm[i] >= rstart && idxm[i] < rend) { 56 row = idxm[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (idxn[j] >= cstart && idxn[j] < cend){ 61 col = idxn[j] - cstart; 62 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 63 } 64 else { 65 if (aij->assembled) { 66 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 67 col = aij->colmap[idxn[j]] + shift; 68 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 69 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 70 col = idxn[j]; 71 } 72 } 73 else col = idxn[j]; 74 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 75 } 76 } 77 } 78 else { 79 ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr); 80 } 81 } 82 return 0; 83 } 84 85 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 86 { 87 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 88 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 89 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 90 int cstart = aij->cstart, cend = aij->cend,row,col; 91 int shift = C->indexshift; 92 93 if (!aij->assembled) SETERRQ(1,"MatGetValues_MPIAIJ:Not for unassembled matrix"); 94 for ( i=0; i<m; i++ ) { 95 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 96 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 97 if (idxm[i] >= rstart && idxm[i] < rend) { 98 row = idxm[i] - rstart; 99 for ( j=0; j<n; j++ ) { 100 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 101 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 102 if (idxn[j] >= cstart && idxn[j] < cend){ 103 col = idxn[j] - cstart; 104 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 105 } 106 else { 107 col = aij->colmap[idxn[j]] + shift; 108 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 109 } 110 } 111 } 112 else { 113 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 114 } 115 } 116 return 0; 117 } 118 119 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 120 { 121 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 122 MPI_Comm comm = mat->comm; 123 int size = aij->size, *owners = aij->rowners; 124 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 125 MPI_Request *send_waits,*recv_waits; 126 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 127 InsertMode addv; 128 Scalar *rvalues,*svalues; 129 130 /* make sure all processors are either in INSERTMODE or ADDMODE */ 131 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 132 if (addv == (ADD_VALUES|INSERT_VALUES)) { 133 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 134 } 135 aij->insertmode = addv; /* in case this processor had no cache */ 136 137 /* first count number of contributors to each processor */ 138 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 139 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 140 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 141 for ( i=0; i<aij->stash.n; i++ ) { 142 idx = aij->stash.idx[i]; 143 for ( j=0; j<size; j++ ) { 144 if (idx >= owners[j] && idx < owners[j+1]) { 145 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 146 } 147 } 148 } 149 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 150 151 /* inform other processors of number of messages and max length*/ 152 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 153 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 154 nreceives = work[rank]; 155 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 156 nmax = work[rank]; 157 PetscFree(work); 158 159 /* post receives: 160 1) each message will consist of ordered pairs 161 (global index,value) we store the global index as a double 162 to simplify the message passing. 163 2) since we don't know how long each individual message is we 164 allocate the largest needed buffer for each receive. Potentially 165 this is a lot of wasted space. 166 167 168 This could be done better. 169 */ 170 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 171 CHKPTRQ(rvalues); 172 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 173 CHKPTRQ(recv_waits); 174 for ( i=0; i<nreceives; i++ ) { 175 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 176 comm,recv_waits+i); 177 } 178 179 /* do sends: 180 1) starts[i] gives the starting index in svalues for stuff going to 181 the ith processor 182 */ 183 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 184 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 185 CHKPTRQ(send_waits); 186 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 187 starts[0] = 0; 188 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 189 for ( i=0; i<aij->stash.n; i++ ) { 190 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 191 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 192 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 193 } 194 PetscFree(owner); 195 starts[0] = 0; 196 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 197 count = 0; 198 for ( i=0; i<size; i++ ) { 199 if (procs[i]) { 200 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 201 comm,send_waits+count++); 202 } 203 } 204 PetscFree(starts); PetscFree(nprocs); 205 206 /* Free cache space */ 207 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 208 209 aij->svalues = svalues; aij->rvalues = rvalues; 210 aij->nsends = nsends; aij->nrecvs = nreceives; 211 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 212 aij->rmax = nmax; 213 214 return 0; 215 } 216 extern int MatSetUpMultiply_MPIAIJ(Mat); 217 218 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 219 { 220 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 221 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 222 MPI_Status *send_status,recv_status; 223 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 224 int row,col,other_disassembled,shift = C->indexshift; 225 Scalar *values,val; 226 InsertMode addv = aij->insertmode; 227 228 /* wait on receives */ 229 while (count) { 230 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 231 /* unpack receives into our local space */ 232 values = aij->rvalues + 3*imdex*aij->rmax; 233 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 234 n = n/3; 235 for ( i=0; i<n; i++ ) { 236 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 237 col = (int) PETSCREAL(values[3*i+1]); 238 val = values[3*i+2]; 239 if (col >= aij->cstart && col < aij->cend) { 240 col -= aij->cstart; 241 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 242 } 243 else { 244 if (aij->assembled) { 245 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 246 col = aij->colmap[col] + shift; 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 249 col = (int) PETSCREAL(values[3*i+1]); 250 } 251 } 252 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 253 } 254 } 255 count--; 256 } 257 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 258 259 /* wait on sends */ 260 if (aij->nsends) { 261 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 262 CHKPTRQ(send_status); 263 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 264 PetscFree(send_status); 265 } 266 PetscFree(aij->send_waits); PetscFree(aij->svalues); 267 268 aij->insertmode = NOT_SET_VALUES; 269 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 270 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 271 272 /* determine if any processor has disassembled, if so we must 273 also disassemble ourselfs, in order that we may reassemble. */ 274 MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 275 if (aij->assembled && !other_disassembled) { 276 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 277 } 278 279 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 280 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 281 aij->assembled = 1; 282 } 283 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 284 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 285 286 return 0; 287 } 288 289 static int MatZeroEntries_MPIAIJ(Mat A) 290 { 291 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 292 int ierr; 293 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 294 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 295 return 0; 296 } 297 298 /* the code does not do the diagonal entries correctly unless the 299 matrix is square and the column and row owerships are identical. 300 This is a BUG. The only way to fix it seems to be to access 301 aij->A and aij->B directly and not through the MatZeroRows() 302 routine. 303 */ 304 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 305 { 306 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 307 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 308 int *procs,*nprocs,j,found,idx,nsends,*work; 309 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 310 int *rvalues,tag = A->tag,count,base,slen,n,*source; 311 int *lens,imdex,*lrows,*values; 312 MPI_Comm comm = A->comm; 313 MPI_Request *send_waits,*recv_waits; 314 MPI_Status recv_status,*send_status; 315 IS istmp; 316 317 if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix"); 318 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 319 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 320 321 /* first count number of contributors to each processor */ 322 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 323 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 324 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 325 for ( i=0; i<N; i++ ) { 326 idx = rows[i]; 327 found = 0; 328 for ( j=0; j<size; j++ ) { 329 if (idx >= owners[j] && idx < owners[j+1]) { 330 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 331 } 332 } 333 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 334 } 335 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 336 337 /* inform other processors of number of messages and max length*/ 338 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 339 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 340 nrecvs = work[rank]; 341 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 342 nmax = work[rank]; 343 PetscFree(work); 344 345 /* post receives: */ 346 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 347 CHKPTRQ(rvalues); 348 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 349 CHKPTRQ(recv_waits); 350 for ( i=0; i<nrecvs; i++ ) { 351 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 352 } 353 354 /* do sends: 355 1) starts[i] gives the starting index in svalues for stuff going to 356 the ith processor 357 */ 358 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 359 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 360 CHKPTRQ(send_waits); 361 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 362 starts[0] = 0; 363 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 364 for ( i=0; i<N; i++ ) { 365 svalues[starts[owner[i]]++] = rows[i]; 366 } 367 ISRestoreIndices(is,&rows); 368 369 starts[0] = 0; 370 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 371 count = 0; 372 for ( i=0; i<size; i++ ) { 373 if (procs[i]) { 374 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 375 } 376 } 377 PetscFree(starts); 378 379 base = owners[rank]; 380 381 /* wait on receives */ 382 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 383 source = lens + nrecvs; 384 count = nrecvs; slen = 0; 385 while (count) { 386 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 387 /* unpack receives into our local space */ 388 MPI_Get_count(&recv_status,MPI_INT,&n); 389 source[imdex] = recv_status.MPI_SOURCE; 390 lens[imdex] = n; 391 slen += n; 392 count--; 393 } 394 PetscFree(recv_waits); 395 396 /* move the data into the send scatter */ 397 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 398 count = 0; 399 for ( i=0; i<nrecvs; i++ ) { 400 values = rvalues + i*nmax; 401 for ( j=0; j<lens[i]; j++ ) { 402 lrows[count++] = values[j] - base; 403 } 404 } 405 PetscFree(rvalues); PetscFree(lens); 406 PetscFree(owner); PetscFree(nprocs); 407 408 /* actually zap the local rows */ 409 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 410 PLogObjectParent(A,istmp); 411 PetscFree(lrows); 412 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 413 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 414 ierr = ISDestroy(istmp); CHKERRQ(ierr); 415 416 /* wait on sends */ 417 if (nsends) { 418 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 419 CHKPTRQ(send_status); 420 MPI_Waitall(nsends,send_waits,send_status); 421 PetscFree(send_status); 422 } 423 PetscFree(send_waits); PetscFree(svalues); 424 425 return 0; 426 } 427 428 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 429 { 430 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 431 int ierr; 432 433 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 434 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 435 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 436 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 437 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 438 return 0; 439 } 440 441 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 442 { 443 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 444 int ierr; 445 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 446 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 447 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 448 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 449 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 450 return 0; 451 } 452 453 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 454 { 455 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 456 int ierr; 457 458 if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix"); 459 /* do nondiagonal part */ 460 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 461 /* send it on its way */ 462 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 463 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 464 /* do local part */ 465 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 466 /* receive remote parts: note this assumes the values are not actually */ 467 /* inserted in yy until the next line, which is true for my implementation*/ 468 /* but is not perhaps always true. */ 469 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 470 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 471 return 0; 472 } 473 474 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 475 { 476 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 477 int ierr; 478 479 if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix"); 480 /* do nondiagonal part */ 481 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 482 /* send it on its way */ 483 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 484 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 485 /* do local part */ 486 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 487 /* receive remote parts: note this assumes the values are not actually */ 488 /* inserted in yy until the next line, which is true for my implementation*/ 489 /* but is not perhaps always true. */ 490 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 491 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 492 return 0; 493 } 494 495 /* 496 This only works correctly for square matrices where the subblock A->A is the 497 diagonal block 498 */ 499 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 500 { 501 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 502 if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix"); 503 return MatGetDiagonal(a->A,v); 504 } 505 506 static int MatDestroy_MPIAIJ(PetscObject obj) 507 { 508 Mat mat = (Mat) obj; 509 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 510 int ierr; 511 #if defined(PETSC_LOG) 512 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 513 #endif 514 PetscFree(aij->rowners); 515 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 516 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 517 if (aij->colmap) PetscFree(aij->colmap); 518 if (aij->garray) PetscFree(aij->garray); 519 if (aij->lvec) VecDestroy(aij->lvec); 520 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 521 PetscFree(aij); 522 PLogObjectDestroy(mat); 523 PetscHeaderDestroy(mat); 524 return 0; 525 } 526 #include "draw.h" 527 #include "pinclude/pviewer.h" 528 529 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 532 int ierr; 533 534 if (aij->size == 1) { 535 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 536 } 537 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 538 return 0; 539 } 540 541 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 542 { 543 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 544 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 545 int ierr, format,shift = C->indexshift,rank; 546 PetscObject vobj = (PetscObject) viewer; 547 FILE *fd; 548 549 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 550 ierr = ViewerFileGetFormat_Private(viewer,&format); 551 if (format == FILE_FORMAT_INFO_DETAILED) { 552 int nz,nzalloc,mem; 553 MPI_Comm_rank(mat->comm,&rank); 554 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 555 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 556 MPIU_Seq_begin(mat->comm,1); 557 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 558 nzalloc,mem); 559 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 560 fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 561 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 562 fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 563 fflush(fd); 564 MPIU_Seq_end(mat->comm,1); 565 ierr = VecScatterView(aij->Mvctx,viewer); 566 return 0; 567 } 568 else if (format == FILE_FORMAT_INFO) { 569 return 0; 570 } 571 } 572 573 if (vobj->type == ASCII_FILE_VIEWER) { 574 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 575 MPIU_Seq_begin(mat->comm,1); 576 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 577 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 578 aij->cend); 579 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 580 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 581 fflush(fd); 582 MPIU_Seq_end(mat->comm,1); 583 } 584 else { 585 int size = aij->size; 586 rank = aij->rank; 587 if (size == 1) { 588 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 589 } 590 else { 591 /* assemble the entire matrix onto first processor. */ 592 Mat A; 593 Mat_SeqAIJ *Aloc; 594 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 595 Scalar *a; 596 597 if (!rank) { 598 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr); 599 } 600 else { 601 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr); 602 } 603 PLogObjectParent(mat,A); 604 605 606 /* copy over the A part */ 607 Aloc = (Mat_SeqAIJ*) aij->A->data; 608 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 609 row = aij->rstart; 610 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 611 for ( i=0; i<m; i++ ) { 612 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 613 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 614 } 615 aj = Aloc->j; 616 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 617 618 /* copy over the B part */ 619 Aloc = (Mat_SeqAIJ*) aij->B->data; 620 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 621 row = aij->rstart; 622 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 623 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 624 for ( i=0; i<m; i++ ) { 625 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 626 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 627 } 628 PetscFree(ct); 629 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 630 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 631 if (!rank) { 632 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 633 } 634 ierr = MatDestroy(A); CHKERRQ(ierr); 635 } 636 } 637 return 0; 638 } 639 640 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 641 { 642 Mat mat = (Mat) obj; 643 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 644 int ierr; 645 PetscObject vobj = (PetscObject) viewer; 646 647 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix"); 648 if (!viewer) { 649 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 650 } 651 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 652 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 653 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 654 } 655 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 656 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 657 } 658 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 659 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 660 } 661 else if (vobj->cookie == DRAW_COOKIE) { 662 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 663 } 664 else if (vobj->type == BINARY_FILE_VIEWER) { 665 return MatView_MPIAIJ_Binary(mat,viewer); 666 } 667 return 0; 668 } 669 670 extern int MatMarkDiag_SeqAIJ(Mat); 671 /* 672 This has to provide several versions. 673 674 1) per sequential 675 2) a) use only local smoothing updating outer values only once. 676 b) local smoothing updating outer values each inner iteration 677 3) color updating out values betwen colors. 678 */ 679 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 680 double fshift,int its,Vec xx) 681 { 682 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 683 Mat AA = mat->A, BB = mat->B; 684 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 685 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 686 int ierr,*idx, *diag; 687 int n = mat->n, m = mat->m, i,shift = A->indexshift; 688 Vec tt; 689 690 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix"); 691 692 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 693 xs = x + shift; /* shift by one for index start of 1 */ 694 ls = ls + shift; 695 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 696 diag = A->diag; 697 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 698 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 699 } 700 if (flag & SOR_EISENSTAT) { 701 /* Let A = L + U + D; where L is lower trianglar, 702 U is upper triangular, E is diagonal; This routine applies 703 704 (L + E)^{-1} A (U + E)^{-1} 705 706 to a vector efficiently using Eisenstat's trick. This is for 707 the case of SSOR preconditioner, so E is D/omega where omega 708 is the relaxation factor. 709 */ 710 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 711 PLogObjectParent(matin,tt); 712 VecGetArray(tt,&t); 713 scale = (2.0/omega) - 1.0; 714 /* x = (E + U)^{-1} b */ 715 VecSet(&zero,mat->lvec); 716 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 717 mat->Mvctx); CHKERRQ(ierr); 718 for ( i=m-1; i>-1; i-- ) { 719 n = A->i[i+1] - diag[i] - 1; 720 idx = A->j + diag[i] + !shift; 721 v = A->a + diag[i] + !shift; 722 sum = b[i]; 723 SPARSEDENSEMDOT(sum,xs,v,idx,n); 724 d = fshift + A->a[diag[i]+shift]; 725 n = B->i[i+1] - B->i[i]; 726 idx = B->j + B->i[i] + shift; 727 v = B->a + B->i[i] + shift; 728 SPARSEDENSEMDOT(sum,ls,v,idx,n); 729 x[i] = omega*(sum/d); 730 } 731 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 732 mat->Mvctx); CHKERRQ(ierr); 733 734 /* t = b - (2*E - D)x */ 735 v = A->a; 736 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 737 738 /* t = (E + L)^{-1}t */ 739 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 740 diag = A->diag; 741 VecSet(&zero,mat->lvec); 742 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 743 mat->Mvctx); CHKERRQ(ierr); 744 for ( i=0; i<m; i++ ) { 745 n = diag[i] - A->i[i]; 746 idx = A->j + A->i[i] + shift; 747 v = A->a + A->i[i] + shift; 748 sum = t[i]; 749 SPARSEDENSEMDOT(sum,ts,v,idx,n); 750 d = fshift + A->a[diag[i]+shift]; 751 n = B->i[i+1] - B->i[i]; 752 idx = B->j + B->i[i] + shift; 753 v = B->a + B->i[i] + shift; 754 SPARSEDENSEMDOT(sum,ls,v,idx,n); 755 t[i] = omega*(sum/d); 756 } 757 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 758 mat->Mvctx); CHKERRQ(ierr); 759 /* x = x + t */ 760 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 761 VecDestroy(tt); 762 return 0; 763 } 764 765 766 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 767 if (flag & SOR_ZERO_INITIAL_GUESS) { 768 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 769 } 770 else { 771 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 772 CHKERRQ(ierr); 773 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 774 CHKERRQ(ierr); 775 } 776 while (its--) { 777 /* go down through the rows */ 778 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 779 mat->Mvctx); CHKERRQ(ierr); 780 for ( i=0; i<m; i++ ) { 781 n = A->i[i+1] - A->i[i]; 782 idx = A->j + A->i[i] + shift; 783 v = A->a + A->i[i] + shift; 784 sum = b[i]; 785 SPARSEDENSEMDOT(sum,xs,v,idx,n); 786 d = fshift + A->a[diag[i]+shift]; 787 n = B->i[i+1] - B->i[i]; 788 idx = B->j + B->i[i] + shift; 789 v = B->a + B->i[i] + shift; 790 SPARSEDENSEMDOT(sum,ls,v,idx,n); 791 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 792 } 793 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 794 mat->Mvctx); CHKERRQ(ierr); 795 /* come up through the rows */ 796 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 797 mat->Mvctx); CHKERRQ(ierr); 798 for ( i=m-1; i>-1; i-- ) { 799 n = A->i[i+1] - A->i[i]; 800 idx = A->j + A->i[i] + shift; 801 v = A->a + A->i[i] + shift; 802 sum = b[i]; 803 SPARSEDENSEMDOT(sum,xs,v,idx,n); 804 d = fshift + A->a[diag[i]+shift]; 805 n = B->i[i+1] - B->i[i]; 806 idx = B->j + B->i[i] + shift; 807 v = B->a + B->i[i] + shift; 808 SPARSEDENSEMDOT(sum,ls,v,idx,n); 809 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 810 } 811 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 812 mat->Mvctx); CHKERRQ(ierr); 813 } 814 } 815 else if (flag & SOR_FORWARD_SWEEP){ 816 if (flag & SOR_ZERO_INITIAL_GUESS) { 817 VecSet(&zero,mat->lvec); 818 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 819 mat->Mvctx); CHKERRQ(ierr); 820 for ( i=0; i<m; i++ ) { 821 n = diag[i] - A->i[i]; 822 idx = A->j + A->i[i] + shift; 823 v = A->a + A->i[i] + shift; 824 sum = b[i]; 825 SPARSEDENSEMDOT(sum,xs,v,idx,n); 826 d = fshift + A->a[diag[i]+shift]; 827 n = B->i[i+1] - B->i[i]; 828 idx = B->j + B->i[i] + shift; 829 v = B->a + B->i[i] + shift; 830 SPARSEDENSEMDOT(sum,ls,v,idx,n); 831 x[i] = omega*(sum/d); 832 } 833 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 834 mat->Mvctx); CHKERRQ(ierr); 835 its--; 836 } 837 while (its--) { 838 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 839 CHKERRQ(ierr); 840 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 841 CHKERRQ(ierr); 842 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 843 mat->Mvctx); CHKERRQ(ierr); 844 for ( i=0; i<m; i++ ) { 845 n = A->i[i+1] - A->i[i]; 846 idx = A->j + A->i[i] + shift; 847 v = A->a + A->i[i] + shift; 848 sum = b[i]; 849 SPARSEDENSEMDOT(sum,xs,v,idx,n); 850 d = fshift + A->a[diag[i]+shift]; 851 n = B->i[i+1] - B->i[i]; 852 idx = B->j + B->i[i] + shift; 853 v = B->a + B->i[i] + shift; 854 SPARSEDENSEMDOT(sum,ls,v,idx,n); 855 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 856 } 857 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 858 mat->Mvctx); CHKERRQ(ierr); 859 } 860 } 861 else if (flag & SOR_BACKWARD_SWEEP){ 862 if (flag & SOR_ZERO_INITIAL_GUESS) { 863 VecSet(&zero,mat->lvec); 864 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 865 mat->Mvctx); CHKERRQ(ierr); 866 for ( i=m-1; i>-1; i-- ) { 867 n = A->i[i+1] - diag[i] - 1; 868 idx = A->j + diag[i] + !shift; 869 v = A->a + diag[i] + !shift; 870 sum = b[i]; 871 SPARSEDENSEMDOT(sum,xs,v,idx,n); 872 d = fshift + A->a[diag[i]+shift]; 873 n = B->i[i+1] - B->i[i]; 874 idx = B->j + B->i[i] + shift; 875 v = B->a + B->i[i] + shift; 876 SPARSEDENSEMDOT(sum,ls,v,idx,n); 877 x[i] = omega*(sum/d); 878 } 879 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 880 mat->Mvctx); CHKERRQ(ierr); 881 its--; 882 } 883 while (its--) { 884 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 885 mat->Mvctx); CHKERRQ(ierr); 886 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 887 mat->Mvctx); CHKERRQ(ierr); 888 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 889 mat->Mvctx); CHKERRQ(ierr); 890 for ( i=m-1; i>-1; i-- ) { 891 n = A->i[i+1] - A->i[i]; 892 idx = A->j + A->i[i] + shift; 893 v = A->a + A->i[i] + shift; 894 sum = b[i]; 895 SPARSEDENSEMDOT(sum,xs,v,idx,n); 896 d = fshift + A->a[diag[i]+shift]; 897 n = B->i[i+1] - B->i[i]; 898 idx = B->j + B->i[i] + shift; 899 v = B->a + B->i[i] + shift; 900 SPARSEDENSEMDOT(sum,ls,v,idx,n); 901 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 902 } 903 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 904 mat->Mvctx); CHKERRQ(ierr); 905 } 906 } 907 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 908 if (flag & SOR_ZERO_INITIAL_GUESS) { 909 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 910 } 911 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 912 CHKERRQ(ierr); 913 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 914 CHKERRQ(ierr); 915 while (its--) { 916 /* go down through the rows */ 917 for ( i=0; i<m; i++ ) { 918 n = A->i[i+1] - A->i[i]; 919 idx = A->j + A->i[i] + shift; 920 v = A->a + A->i[i] + shift; 921 sum = b[i]; 922 SPARSEDENSEMDOT(sum,xs,v,idx,n); 923 d = fshift + A->a[diag[i]+shift]; 924 n = B->i[i+1] - B->i[i]; 925 idx = B->j + B->i[i] + shift; 926 v = B->a + B->i[i] + shift; 927 SPARSEDENSEMDOT(sum,ls,v,idx,n); 928 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 929 } 930 /* come up through the rows */ 931 for ( i=m-1; i>-1; i-- ) { 932 n = A->i[i+1] - A->i[i]; 933 idx = A->j + A->i[i] + shift; 934 v = A->a + A->i[i] + shift; 935 sum = b[i]; 936 SPARSEDENSEMDOT(sum,xs,v,idx,n); 937 d = fshift + A->a[diag[i]+shift]; 938 n = B->i[i+1] - B->i[i]; 939 idx = B->j + B->i[i] + shift; 940 v = B->a + B->i[i] + shift; 941 SPARSEDENSEMDOT(sum,ls,v,idx,n); 942 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 943 } 944 } 945 } 946 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 947 if (flag & SOR_ZERO_INITIAL_GUESS) { 948 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 949 } 950 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 951 CHKERRQ(ierr); 952 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 953 CHKERRQ(ierr); 954 while (its--) { 955 for ( i=0; i<m; i++ ) { 956 n = A->i[i+1] - A->i[i]; 957 idx = A->j + A->i[i] + shift; 958 v = A->a + A->i[i] + shift; 959 sum = b[i]; 960 SPARSEDENSEMDOT(sum,xs,v,idx,n); 961 d = fshift + A->a[diag[i]+shift]; 962 n = B->i[i+1] - B->i[i]; 963 idx = B->j + B->i[i] + shift; 964 v = B->a + B->i[i] + shift; 965 SPARSEDENSEMDOT(sum,ls,v,idx,n); 966 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 967 } 968 } 969 } 970 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 971 if (flag & SOR_ZERO_INITIAL_GUESS) { 972 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 973 } 974 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 975 mat->Mvctx); CHKERRQ(ierr); 976 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 977 mat->Mvctx); CHKERRQ(ierr); 978 while (its--) { 979 for ( i=m-1; i>-1; i-- ) { 980 n = A->i[i+1] - A->i[i]; 981 idx = A->j + A->i[i] + shift; 982 v = A->a + A->i[i] + shift; 983 sum = b[i]; 984 SPARSEDENSEMDOT(sum,xs,v,idx,n); 985 d = fshift + A->a[diag[i]+shift]; 986 n = B->i[i+1] - B->i[i]; 987 idx = B->j + B->i[i] + shift; 988 v = B->a + B->i[i] + shift; 989 SPARSEDENSEMDOT(sum,ls,v,idx,n); 990 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 991 } 992 } 993 } 994 return 0; 995 } 996 997 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 998 int *nzalloc,int *mem) 999 { 1000 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1001 Mat A = mat->A, B = mat->B; 1002 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1003 1004 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1005 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1006 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1007 if (flag == MAT_LOCAL) { 1008 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1009 } else if (flag == MAT_GLOBAL_MAX) { 1010 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1011 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1012 } else if (flag == MAT_GLOBAL_SUM) { 1013 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1014 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1015 } 1016 return 0; 1017 } 1018 1019 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1020 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1021 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1022 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1023 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1024 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1025 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1026 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1027 1028 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1029 { 1030 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1031 1032 if (op == NO_NEW_NONZERO_LOCATIONS || 1033 op == YES_NEW_NONZERO_LOCATIONS || 1034 op == COLUMNS_SORTED || 1035 op == ROW_ORIENTED) { 1036 MatSetOption(a->A,op); 1037 MatSetOption(a->B,op); 1038 } 1039 else if (op == ROWS_SORTED || 1040 op == SYMMETRIC_MATRIX || 1041 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1042 op == YES_NEW_DIAGONALS) 1043 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1044 else if (op == COLUMN_ORIENTED) 1045 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");} 1046 else if (op == NO_NEW_DIAGONALS) 1047 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1048 else 1049 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1050 return 0; 1051 } 1052 1053 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1054 { 1055 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1056 *m = mat->M; *n = mat->N; 1057 return 0; 1058 } 1059 1060 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1061 { 1062 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1063 *m = mat->m; *n = mat->N; 1064 return 0; 1065 } 1066 1067 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1068 { 1069 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1070 *m = mat->rstart; *n = mat->rend; 1071 return 0; 1072 } 1073 1074 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1075 { 1076 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1077 Scalar *vworkA, *vworkB, **pvA, **pvB; 1078 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1079 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1080 1081 if (!mat->assembled) SETERRQ(1,"MatGetRow_MPIAIJ:Must assemble matrix"); 1082 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1083 lrow = row - rstart; 1084 1085 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1086 if (!v) {pvA = 0; pvB = 0;} 1087 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1088 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1089 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1090 nztot = nzA + nzB; 1091 1092 if (v || idx) { 1093 if (nztot) { 1094 /* Sort by increasing column numbers, assuming A and B already sorted */ 1095 int imark; 1096 if (mat->assembled) { 1097 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1098 } 1099 if (v) { 1100 *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1101 for ( i=0; i<nzB; i++ ) { 1102 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1103 else break; 1104 } 1105 imark = i; 1106 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1107 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1108 } 1109 if (idx) { 1110 *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1111 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1112 for ( i=0; i<nzB; i++ ) { 1113 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1114 else break; 1115 } 1116 imark = i; 1117 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1118 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1119 } 1120 } 1121 else {*idx = 0; *v=0;} 1122 } 1123 *nz = nztot; 1124 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1125 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1126 return 0; 1127 } 1128 1129 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1130 { 1131 if (idx) PetscFree(*idx); 1132 if (v) PetscFree(*v); 1133 return 0; 1134 } 1135 1136 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1137 { 1138 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1139 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1140 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1141 double sum = 0.0; 1142 Scalar *v; 1143 1144 if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix"); 1145 if (aij->size == 1) { 1146 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1147 } else { 1148 if (type == NORM_FROBENIUS) { 1149 v = amat->a; 1150 for (i=0; i<amat->nz; i++ ) { 1151 #if defined(PETSC_COMPLEX) 1152 sum += real(conj(*v)*(*v)); v++; 1153 #else 1154 sum += (*v)*(*v); v++; 1155 #endif 1156 } 1157 v = bmat->a; 1158 for (i=0; i<bmat->nz; i++ ) { 1159 #if defined(PETSC_COMPLEX) 1160 sum += real(conj(*v)*(*v)); v++; 1161 #else 1162 sum += (*v)*(*v); v++; 1163 #endif 1164 } 1165 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1166 *norm = sqrt(*norm); 1167 } 1168 else if (type == NORM_1) { /* max column norm */ 1169 double *tmp, *tmp2; 1170 int *jj, *garray = aij->garray; 1171 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1172 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1173 PetscMemzero(tmp,aij->N*sizeof(double)); 1174 *norm = 0.0; 1175 v = amat->a; jj = amat->j; 1176 for ( j=0; j<amat->nz; j++ ) { 1177 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1178 } 1179 v = bmat->a; jj = bmat->j; 1180 for ( j=0; j<bmat->nz; j++ ) { 1181 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1182 } 1183 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1184 for ( j=0; j<aij->N; j++ ) { 1185 if (tmp2[j] > *norm) *norm = tmp2[j]; 1186 } 1187 PetscFree(tmp); PetscFree(tmp2); 1188 } 1189 else if (type == NORM_INFINITY) { /* max row norm */ 1190 double ntemp = 0.0; 1191 for ( j=0; j<amat->m; j++ ) { 1192 v = amat->a + amat->i[j] + shift; 1193 sum = 0.0; 1194 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1195 sum += PetscAbsScalar(*v); v++; 1196 } 1197 v = bmat->a + bmat->i[j] + shift; 1198 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1199 sum += PetscAbsScalar(*v); v++; 1200 } 1201 if (sum > ntemp) ntemp = sum; 1202 } 1203 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1204 } 1205 else { 1206 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1207 } 1208 } 1209 return 0; 1210 } 1211 1212 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1213 { 1214 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1215 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1216 int ierr,shift = Aloc->indexshift; 1217 Mat B; 1218 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1219 Scalar *array; 1220 1221 if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1222 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1223 CHKERRQ(ierr); 1224 1225 /* copy over the A part */ 1226 Aloc = (Mat_SeqAIJ*) a->A->data; 1227 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1228 row = a->rstart; 1229 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1230 for ( i=0; i<m; i++ ) { 1231 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1232 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1233 } 1234 aj = Aloc->j; 1235 for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1236 1237 /* copy over the B part */ 1238 Aloc = (Mat_SeqAIJ*) a->B->data; 1239 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1240 row = a->rstart; 1241 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1242 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1243 for ( i=0; i<m; i++ ) { 1244 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1245 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1246 } 1247 PetscFree(ct); 1248 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1249 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1250 if (matout) { 1251 *matout = B; 1252 } else { 1253 /* This isn't really an in-place transpose .... but free data structures from a */ 1254 PetscFree(a->rowners); 1255 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1256 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1257 if (a->colmap) PetscFree(a->colmap); 1258 if (a->garray) PetscFree(a->garray); 1259 if (a->lvec) VecDestroy(a->lvec); 1260 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1261 PetscFree(a); 1262 PetscMemcpy(A,B,sizeof(struct _Mat)); 1263 PetscHeaderDestroy(B); 1264 } 1265 return 0; 1266 } 1267 1268 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1269 static int MatCopyPrivate_MPIAIJ(Mat,Mat *,int); 1270 1271 /* -------------------------------------------------------------------*/ 1272 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1273 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1274 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1275 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1276 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1277 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1278 MatLUFactor_MPIAIJ,0, 1279 MatRelax_MPIAIJ, 1280 MatTranspose_MPIAIJ, 1281 MatGetInfo_MPIAIJ,0, 1282 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1283 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1284 0, 1285 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1286 MatGetReordering_MPIAIJ, 1287 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1288 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1289 MatILUFactorSymbolic_MPIAIJ,0, 1290 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ,0,0, 1291 0,0,0, 1292 0,0,MatGetValues_MPIAIJ}; 1293 1294 /*@C 1295 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1296 (the default parallel PETSc format). 1297 1298 Input Parameters: 1299 . comm - MPI communicator 1300 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1301 . n - number of local columns (or PETSC_DECIDE to have calculated 1302 if N is given) 1303 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1304 . N - number of global columns (or PETSC_DECIDE to have calculated 1305 if n is given) 1306 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1307 (same for all local rows) 1308 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1309 or null (possibly different for each row). You must leave room 1310 for the diagonal entry even if it is zero. 1311 . o_nz - number of nonzeros per row in off-diagonal portion of local 1312 submatrix (same for all local rows). 1313 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1314 submatrix or null (possibly different for each row). 1315 1316 Output Parameter: 1317 . newmat - the matrix 1318 1319 Notes: 1320 The AIJ format (also called the Yale sparse matrix format or 1321 compressed row storage), is fully compatible with standard Fortran 77 1322 storage. That is, the stored row and column indices can begin at 1323 either one (as in Fortran) or zero. See the users manual for details. 1324 1325 The user MUST specify either the local or global matrix dimensions 1326 (possibly both). 1327 1328 Storage Information: 1329 For a square global matrix we define each processor's diagonal portion 1330 to be its local rows and the corresponding columns (a square submatrix); 1331 each processor's off-diagonal portion encompasses the remainder of the 1332 local matrix (a rectangular submatrix). 1333 1334 The user can specify preallocated storage for the diagonal part of 1335 the local submatrix with either d_nz or d_nnz (not both). Set both 1336 d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1337 Likewise, specify preallocated storage for the off-diagonal part of 1338 the local submatrix with o_nz or o_nnz (not both). 1339 1340 .keywords: matrix, aij, compressed row, sparse, parallel 1341 1342 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1343 @*/ 1344 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1345 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 1346 { 1347 Mat mat; 1348 Mat_MPIAIJ *a; 1349 int ierr, i,sum[2],work[2]; 1350 1351 *newmat = 0; 1352 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1353 PLogObjectCreate(mat); 1354 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1355 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1356 mat->destroy = MatDestroy_MPIAIJ; 1357 mat->view = MatView_MPIAIJ; 1358 mat->factor = 0; 1359 1360 a->insertmode = NOT_SET_VALUES; 1361 MPI_Comm_rank(comm,&a->rank); 1362 MPI_Comm_size(comm,&a->size); 1363 1364 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) 1365 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1366 1367 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1368 work[0] = m; work[1] = n; 1369 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1370 if (M == PETSC_DECIDE) M = sum[0]; 1371 if (N == PETSC_DECIDE) N = sum[1]; 1372 } 1373 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1374 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1375 a->m = m; 1376 a->n = n; 1377 a->N = N; 1378 a->M = M; 1379 1380 /* build local table of row and column ownerships */ 1381 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1382 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1383 a->cowners = a->rowners + a->size + 1; 1384 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1385 a->rowners[0] = 0; 1386 for ( i=2; i<=a->size; i++ ) { 1387 a->rowners[i] += a->rowners[i-1]; 1388 } 1389 a->rstart = a->rowners[a->rank]; 1390 a->rend = a->rowners[a->rank+1]; 1391 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1392 a->cowners[0] = 0; 1393 for ( i=2; i<=a->size; i++ ) { 1394 a->cowners[i] += a->cowners[i-1]; 1395 } 1396 a->cstart = a->cowners[a->rank]; 1397 a->cend = a->cowners[a->rank+1]; 1398 1399 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1400 PLogObjectParent(mat,a->A); 1401 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1402 PLogObjectParent(mat,a->B); 1403 1404 /* build cache for off array entries formed */ 1405 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1406 a->colmap = 0; 1407 a->garray = 0; 1408 1409 /* stuff used for matrix vector multiply */ 1410 a->lvec = 0; 1411 a->Mvctx = 0; 1412 a->assembled = 0; 1413 1414 *newmat = mat; 1415 return 0; 1416 } 1417 1418 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1419 { 1420 Mat mat; 1421 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1422 int ierr, len; 1423 1424 if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix"); 1425 *newmat = 0; 1426 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1427 PLogObjectCreate(mat); 1428 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1429 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1430 mat->destroy = MatDestroy_MPIAIJ; 1431 mat->view = MatView_MPIAIJ; 1432 mat->factor = matin->factor; 1433 1434 a->m = oldmat->m; 1435 a->n = oldmat->n; 1436 a->M = oldmat->M; 1437 a->N = oldmat->N; 1438 1439 a->assembled = 1; 1440 a->rstart = oldmat->rstart; 1441 a->rend = oldmat->rend; 1442 a->cstart = oldmat->cstart; 1443 a->cend = oldmat->cend; 1444 a->size = oldmat->size; 1445 a->rank = oldmat->rank; 1446 a->insertmode = NOT_SET_VALUES; 1447 1448 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1449 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1450 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1451 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1452 if (oldmat->colmap) { 1453 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1454 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1455 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1456 } else a->colmap = 0; 1457 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1458 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1459 PLogObjectMemory(mat,len*sizeof(int)); 1460 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1461 } else a->garray = 0; 1462 1463 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1464 PLogObjectParent(mat,a->lvec); 1465 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1466 PLogObjectParent(mat,a->Mvctx); 1467 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1468 PLogObjectParent(mat,a->A); 1469 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1470 PLogObjectParent(mat,a->B); 1471 *newmat = mat; 1472 return 0; 1473 } 1474 1475 #include "sysio.h" 1476 1477 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat) 1478 { 1479 Mat A; 1480 int i, nz, ierr, j,rstart, rend, fd; 1481 Scalar *vals,*svals; 1482 PetscObject vobj = (PetscObject) bview; 1483 MPI_Comm comm = vobj->comm; 1484 MPI_Status status; 1485 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1486 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1487 int tag = ((PetscObject)bview)->tag; 1488 1489 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1490 if (!rank) { 1491 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1492 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1493 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object"); 1494 } 1495 1496 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1497 M = header[1]; N = header[2]; 1498 /* determine ownership of all rows */ 1499 m = M/size + ((M % size) > rank); 1500 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1501 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1502 rowners[0] = 0; 1503 for ( i=2; i<=size; i++ ) { 1504 rowners[i] += rowners[i-1]; 1505 } 1506 rstart = rowners[rank]; 1507 rend = rowners[rank+1]; 1508 1509 /* distribute row lengths to all processors */ 1510 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1511 offlens = ourlens + (rend-rstart); 1512 if (!rank) { 1513 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1514 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1515 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1516 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1517 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1518 PetscFree(sndcounts); 1519 } 1520 else { 1521 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1522 } 1523 1524 if (!rank) { 1525 /* calculate the number of nonzeros on each processor */ 1526 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1527 PetscMemzero(procsnz,size*sizeof(int)); 1528 for ( i=0; i<size; i++ ) { 1529 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1530 procsnz[i] += rowlengths[j]; 1531 } 1532 } 1533 PetscFree(rowlengths); 1534 1535 /* determine max buffer needed and allocate it */ 1536 maxnz = 0; 1537 for ( i=0; i<size; i++ ) { 1538 maxnz = PetscMax(maxnz,procsnz[i]); 1539 } 1540 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1541 1542 /* read in my part of the matrix column indices */ 1543 nz = procsnz[0]; 1544 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1545 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1546 1547 /* read in every one elses and ship off */ 1548 for ( i=1; i<size; i++ ) { 1549 nz = procsnz[i]; 1550 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1551 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1552 } 1553 PetscFree(cols); 1554 } 1555 else { 1556 /* determine buffer space needed for message */ 1557 nz = 0; 1558 for ( i=0; i<m; i++ ) { 1559 nz += ourlens[i]; 1560 } 1561 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1562 1563 /* receive message of column indices*/ 1564 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1565 MPI_Get_count(&status,MPI_INT,&maxnz); 1566 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1567 } 1568 1569 /* loop over local rows, determining number of off diagonal entries */ 1570 PetscMemzero(offlens,m*sizeof(int)); 1571 jj = 0; 1572 for ( i=0; i<m; i++ ) { 1573 for ( j=0; j<ourlens[i]; j++ ) { 1574 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1575 jj++; 1576 } 1577 } 1578 1579 /* create our matrix */ 1580 for ( i=0; i<m; i++ ) { 1581 ourlens[i] -= offlens[i]; 1582 } 1583 if (type == MATMPIAIJ) { 1584 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1585 } 1586 else if (type == MATMPIROW) { 1587 ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1588 } 1589 A = *newmat; 1590 MatSetOption(A,COLUMNS_SORTED); 1591 for ( i=0; i<m; i++ ) { 1592 ourlens[i] += offlens[i]; 1593 } 1594 1595 if (!rank) { 1596 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1597 1598 /* read in my part of the matrix numerical values */ 1599 nz = procsnz[0]; 1600 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1601 1602 /* insert into matrix */ 1603 jj = rstart; 1604 smycols = mycols; 1605 svals = vals; 1606 for ( i=0; i<m; i++ ) { 1607 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1608 smycols += ourlens[i]; 1609 svals += ourlens[i]; 1610 jj++; 1611 } 1612 1613 /* read in other processors and ship out */ 1614 for ( i=1; i<size; i++ ) { 1615 nz = procsnz[i]; 1616 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1617 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1618 } 1619 PetscFree(procsnz); 1620 } 1621 else { 1622 /* receive numeric values */ 1623 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1624 1625 /* receive message of values*/ 1626 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1627 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1628 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1629 1630 /* insert into matrix */ 1631 jj = rstart; 1632 smycols = mycols; 1633 svals = vals; 1634 for ( i=0; i<m; i++ ) { 1635 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1636 smycols += ourlens[i]; 1637 svals += ourlens[i]; 1638 jj++; 1639 } 1640 } 1641 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1642 1643 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1644 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1645 return 0; 1646 } 1647