1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.98 1995/11/06 21:31:33 bsmith Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 40 int *idxn,Scalar *v,InsertMode addv) 41 { 42 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 43 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (idxm[i] >= rstart && idxm[i] < rend) { 56 row = idxm[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (idxn[j] >= cstart && idxn[j] < cend){ 61 col = idxn[j] - cstart; 62 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 63 } 64 else { 65 if (aij->assembled) { 66 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 67 col = aij->colmap[idxn[j]] + shift; 68 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 69 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 70 col = idxn[j]; 71 } 72 } 73 else col = idxn[j]; 74 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 75 } 76 } 77 } 78 else { 79 ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr); 80 } 81 } 82 return 0; 83 } 84 85 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 86 { 87 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 88 MPI_Comm comm = mat->comm; 89 int size = aij->size, *owners = aij->rowners; 90 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 91 MPI_Request *send_waits,*recv_waits; 92 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 93 InsertMode addv; 94 Scalar *rvalues,*svalues; 95 96 /* make sure all processors are either in INSERTMODE or ADDMODE */ 97 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 98 if (addv == (ADD_VALUES|INSERT_VALUES)) { 99 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 100 } 101 aij->insertmode = addv; /* in case this processor had no cache */ 102 103 /* first count number of contributors to each processor */ 104 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 105 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 106 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 107 for ( i=0; i<aij->stash.n; i++ ) { 108 idx = aij->stash.idx[i]; 109 for ( j=0; j<size; j++ ) { 110 if (idx >= owners[j] && idx < owners[j+1]) { 111 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 112 } 113 } 114 } 115 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 116 117 /* inform other processors of number of messages and max length*/ 118 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 119 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 120 nreceives = work[rank]; 121 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 122 nmax = work[rank]; 123 PetscFree(work); 124 125 /* post receives: 126 1) each message will consist of ordered pairs 127 (global index,value) we store the global index as a double 128 to simplify the message passing. 129 2) since we don't know how long each individual message is we 130 allocate the largest needed buffer for each receive. Potentially 131 this is a lot of wasted space. 132 133 134 This could be done better. 135 */ 136 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 137 CHKPTRQ(rvalues); 138 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 139 CHKPTRQ(recv_waits); 140 for ( i=0; i<nreceives; i++ ) { 141 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 142 comm,recv_waits+i); 143 } 144 145 /* do sends: 146 1) starts[i] gives the starting index in svalues for stuff going to 147 the ith processor 148 */ 149 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 150 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 151 CHKPTRQ(send_waits); 152 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 153 starts[0] = 0; 154 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 155 for ( i=0; i<aij->stash.n; i++ ) { 156 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 157 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 158 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 159 } 160 PetscFree(owner); 161 starts[0] = 0; 162 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 163 count = 0; 164 for ( i=0; i<size; i++ ) { 165 if (procs[i]) { 166 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 167 comm,send_waits+count++); 168 } 169 } 170 PetscFree(starts); PetscFree(nprocs); 171 172 /* Free cache space */ 173 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 174 175 aij->svalues = svalues; aij->rvalues = rvalues; 176 aij->nsends = nsends; aij->nrecvs = nreceives; 177 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 178 aij->rmax = nmax; 179 180 return 0; 181 } 182 extern int MatSetUpMultiply_MPIAIJ(Mat); 183 184 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 187 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 188 MPI_Status *send_status,recv_status; 189 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 190 int row,col,other_disassembled,shift = C->indexshift; 191 Scalar *values,val; 192 InsertMode addv = aij->insertmode; 193 194 /* wait on receives */ 195 while (count) { 196 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 197 /* unpack receives into our local space */ 198 values = aij->rvalues + 3*imdex*aij->rmax; 199 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 200 n = n/3; 201 for ( i=0; i<n; i++ ) { 202 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 203 col = (int) PETSCREAL(values[3*i+1]); 204 val = values[3*i+2]; 205 if (col >= aij->cstart && col < aij->cend) { 206 col -= aij->cstart; 207 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 208 } 209 else { 210 if (aij->assembled) { 211 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 212 col = aij->colmap[col] + shift; 213 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 214 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 215 col = (int) PETSCREAL(values[3*i+1]); 216 } 217 } 218 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 219 } 220 } 221 count--; 222 } 223 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 224 225 /* wait on sends */ 226 if (aij->nsends) { 227 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 228 CHKPTRQ(send_status); 229 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 230 PetscFree(send_status); 231 } 232 PetscFree(aij->send_waits); PetscFree(aij->svalues); 233 234 aij->insertmode = NOT_SET_VALUES; 235 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 236 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 237 238 /* determine if any processor has disassembled, if so we must 239 also disassemble ourselfs, in order that we may reassemble. */ 240 MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 241 if (aij->assembled && !other_disassembled) { 242 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 243 } 244 245 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 246 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 247 aij->assembled = 1; 248 } 249 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 250 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 251 252 return 0; 253 } 254 255 static int MatZeroEntries_MPIAIJ(Mat A) 256 { 257 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 258 int ierr; 259 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 260 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 261 return 0; 262 } 263 264 /* the code does not do the diagonal entries correctly unless the 265 matrix is square and the column and row owerships are identical. 266 This is a BUG. The only way to fix it seems to be to access 267 aij->A and aij->B directly and not through the MatZeroRows() 268 routine. 269 */ 270 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 271 { 272 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 273 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 274 int *procs,*nprocs,j,found,idx,nsends,*work; 275 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 276 int *rvalues,tag = A->tag,count,base,slen,n,*source; 277 int *lens,imdex,*lrows,*values; 278 MPI_Comm comm = A->comm; 279 MPI_Request *send_waits,*recv_waits; 280 MPI_Status recv_status,*send_status; 281 IS istmp; 282 283 if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix"); 284 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 285 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 286 287 /* first count number of contributors to each processor */ 288 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 289 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 290 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 291 for ( i=0; i<N; i++ ) { 292 idx = rows[i]; 293 found = 0; 294 for ( j=0; j<size; j++ ) { 295 if (idx >= owners[j] && idx < owners[j+1]) { 296 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 297 } 298 } 299 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 300 } 301 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 302 303 /* inform other processors of number of messages and max length*/ 304 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 305 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 306 nrecvs = work[rank]; 307 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 308 nmax = work[rank]; 309 PetscFree(work); 310 311 /* post receives: */ 312 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 313 CHKPTRQ(rvalues); 314 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 315 CHKPTRQ(recv_waits); 316 for ( i=0; i<nrecvs; i++ ) { 317 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 318 } 319 320 /* do sends: 321 1) starts[i] gives the starting index in svalues for stuff going to 322 the ith processor 323 */ 324 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 325 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 326 CHKPTRQ(send_waits); 327 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 328 starts[0] = 0; 329 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 330 for ( i=0; i<N; i++ ) { 331 svalues[starts[owner[i]]++] = rows[i]; 332 } 333 ISRestoreIndices(is,&rows); 334 335 starts[0] = 0; 336 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 337 count = 0; 338 for ( i=0; i<size; i++ ) { 339 if (procs[i]) { 340 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 341 } 342 } 343 PetscFree(starts); 344 345 base = owners[rank]; 346 347 /* wait on receives */ 348 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 349 source = lens + nrecvs; 350 count = nrecvs; slen = 0; 351 while (count) { 352 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 353 /* unpack receives into our local space */ 354 MPI_Get_count(&recv_status,MPI_INT,&n); 355 source[imdex] = recv_status.MPI_SOURCE; 356 lens[imdex] = n; 357 slen += n; 358 count--; 359 } 360 PetscFree(recv_waits); 361 362 /* move the data into the send scatter */ 363 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 364 count = 0; 365 for ( i=0; i<nrecvs; i++ ) { 366 values = rvalues + i*nmax; 367 for ( j=0; j<lens[i]; j++ ) { 368 lrows[count++] = values[j] - base; 369 } 370 } 371 PetscFree(rvalues); PetscFree(lens); 372 PetscFree(owner); PetscFree(nprocs); 373 374 /* actually zap the local rows */ 375 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 376 PLogObjectParent(A,istmp); 377 PetscFree(lrows); 378 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 379 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 380 ierr = ISDestroy(istmp); CHKERRQ(ierr); 381 382 /* wait on sends */ 383 if (nsends) { 384 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 385 CHKPTRQ(send_status); 386 MPI_Waitall(nsends,send_waits,send_status); 387 PetscFree(send_status); 388 } 389 PetscFree(send_waits); PetscFree(svalues); 390 391 return 0; 392 } 393 394 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 395 { 396 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 397 int ierr; 398 399 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 400 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 401 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 402 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 403 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 404 return 0; 405 } 406 407 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 408 { 409 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 410 int ierr; 411 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 412 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 413 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 414 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 415 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 416 return 0; 417 } 418 419 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 420 { 421 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 422 int ierr; 423 424 if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix"); 425 /* do nondiagonal part */ 426 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 427 /* send it on its way */ 428 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 429 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 430 /* do local part */ 431 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 432 /* receive remote parts: note this assumes the values are not actually */ 433 /* inserted in yy until the next line, which is true for my implementation*/ 434 /* but is not perhaps always true. */ 435 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 436 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 437 return 0; 438 } 439 440 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 441 { 442 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 443 int ierr; 444 445 if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix"); 446 /* do nondiagonal part */ 447 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 448 /* send it on its way */ 449 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 450 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 451 /* do local part */ 452 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 453 /* receive remote parts: note this assumes the values are not actually */ 454 /* inserted in yy until the next line, which is true for my implementation*/ 455 /* but is not perhaps always true. */ 456 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 457 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 458 return 0; 459 } 460 461 /* 462 This only works correctly for square matrices where the subblock A->A is the 463 diagonal block 464 */ 465 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 466 { 467 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 468 if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix"); 469 return MatGetDiagonal(a->A,v); 470 } 471 472 static int MatDestroy_MPIAIJ(PetscObject obj) 473 { 474 Mat mat = (Mat) obj; 475 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 476 int ierr; 477 #if defined(PETSC_LOG) 478 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 479 #endif 480 PetscFree(aij->rowners); 481 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 482 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 483 if (aij->colmap) PetscFree(aij->colmap); 484 if (aij->garray) PetscFree(aij->garray); 485 if (aij->lvec) VecDestroy(aij->lvec); 486 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 487 PetscFree(aij); 488 PLogObjectDestroy(mat); 489 PetscHeaderDestroy(mat); 490 return 0; 491 } 492 #include "draw.h" 493 #include "pinclude/pviewer.h" 494 495 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 496 { 497 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 498 int ierr; 499 500 if (aij->size == 1) { 501 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 502 } 503 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 504 return 0; 505 } 506 507 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 508 { 509 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 510 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 511 int ierr, format,shift = C->indexshift,rank; 512 PetscObject vobj = (PetscObject) viewer; 513 FILE *fd; 514 515 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 516 ierr = ViewerFileGetFormat_Private(viewer,&format); 517 if (format == FILE_FORMAT_INFO_DETAILED) { 518 int nz,nzalloc,mem; 519 MPI_Comm_rank(mat->comm,&rank); 520 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 521 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 522 MPIU_Seq_begin(mat->comm,1); 523 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 524 nzalloc,mem); 525 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 526 fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 527 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 528 fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 529 fflush(fd); 530 MPIU_Seq_end(mat->comm,1); 531 ierr = VecScatterView(aij->Mvctx,viewer); 532 return 0; 533 } 534 else if (format == FILE_FORMAT_INFO) { 535 return 0; 536 } 537 } 538 539 if (vobj->type == ASCII_FILE_VIEWER) { 540 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 541 MPIU_Seq_begin(mat->comm,1); 542 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 543 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 544 aij->cend); 545 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 546 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 547 fflush(fd); 548 MPIU_Seq_end(mat->comm,1); 549 } 550 else { 551 int size = aij->size; 552 rank = aij->rank; 553 if (size == 1) { 554 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 555 } 556 else { 557 /* assemble the entire matrix onto first processor. */ 558 Mat A; 559 Mat_SeqAIJ *Aloc; 560 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 561 Scalar *a; 562 563 if (!rank) { 564 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr); 565 } 566 else { 567 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr); 568 } 569 PLogObjectParent(mat,A); 570 571 572 /* copy over the A part */ 573 Aloc = (Mat_SeqAIJ*) aij->A->data; 574 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 575 row = aij->rstart; 576 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 577 for ( i=0; i<m; i++ ) { 578 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 579 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 580 } 581 aj = Aloc->j; 582 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 583 584 /* copy over the B part */ 585 Aloc = (Mat_SeqAIJ*) aij->B->data; 586 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 587 row = aij->rstart; 588 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 589 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 590 for ( i=0; i<m; i++ ) { 591 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 592 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 593 } 594 PetscFree(ct); 595 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 596 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 597 if (!rank) { 598 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 599 } 600 ierr = MatDestroy(A); CHKERRQ(ierr); 601 } 602 } 603 return 0; 604 } 605 606 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 607 { 608 Mat mat = (Mat) obj; 609 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 610 int ierr; 611 PetscObject vobj = (PetscObject) viewer; 612 613 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix"); 614 if (!viewer) { 615 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 616 } 617 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 618 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 619 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 620 } 621 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 622 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 623 } 624 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 625 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 626 } 627 else if (vobj->cookie == DRAW_COOKIE) { 628 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 629 } 630 else if (vobj->type == BINARY_FILE_VIEWER) { 631 return MatView_MPIAIJ_Binary(mat,viewer); 632 } 633 return 0; 634 } 635 636 extern int MatMarkDiag_SeqAIJ(Mat); 637 /* 638 This has to provide several versions. 639 640 1) per sequential 641 2) a) use only local smoothing updating outer values only once. 642 b) local smoothing updating outer values each inner iteration 643 3) color updating out values betwen colors. 644 */ 645 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 646 double fshift,int its,Vec xx) 647 { 648 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 649 Mat AA = mat->A, BB = mat->B; 650 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 651 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 652 int ierr,*idx, *diag; 653 int n = mat->n, m = mat->m, i,shift = A->indexshift; 654 Vec tt; 655 656 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix"); 657 658 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 659 xs = x + shift; /* shift by one for index start of 1 */ 660 ls = ls + shift; 661 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 662 diag = A->diag; 663 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 664 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 665 } 666 if (flag & SOR_EISENSTAT) { 667 /* Let A = L + U + D; where L is lower trianglar, 668 U is upper triangular, E is diagonal; This routine applies 669 670 (L + E)^{-1} A (U + E)^{-1} 671 672 to a vector efficiently using Eisenstat's trick. This is for 673 the case of SSOR preconditioner, so E is D/omega where omega 674 is the relaxation factor. 675 */ 676 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 677 PLogObjectParent(matin,tt); 678 VecGetArray(tt,&t); 679 scale = (2.0/omega) - 1.0; 680 /* x = (E + U)^{-1} b */ 681 VecSet(&zero,mat->lvec); 682 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 683 mat->Mvctx); CHKERRQ(ierr); 684 for ( i=m-1; i>-1; i-- ) { 685 n = A->i[i+1] - diag[i] - 1; 686 idx = A->j + diag[i] + !shift; 687 v = A->a + diag[i] + !shift; 688 sum = b[i]; 689 SPARSEDENSEMDOT(sum,xs,v,idx,n); 690 d = fshift + A->a[diag[i]+shift]; 691 n = B->i[i+1] - B->i[i]; 692 idx = B->j + B->i[i] + shift; 693 v = B->a + B->i[i] + shift; 694 SPARSEDENSEMDOT(sum,ls,v,idx,n); 695 x[i] = omega*(sum/d); 696 } 697 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 698 mat->Mvctx); CHKERRQ(ierr); 699 700 /* t = b - (2*E - D)x */ 701 v = A->a; 702 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 703 704 /* t = (E + L)^{-1}t */ 705 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 706 diag = A->diag; 707 VecSet(&zero,mat->lvec); 708 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 709 mat->Mvctx); CHKERRQ(ierr); 710 for ( i=0; i<m; i++ ) { 711 n = diag[i] - A->i[i]; 712 idx = A->j + A->i[i] + shift; 713 v = A->a + A->i[i] + shift; 714 sum = t[i]; 715 SPARSEDENSEMDOT(sum,ts,v,idx,n); 716 d = fshift + A->a[diag[i]+shift]; 717 n = B->i[i+1] - B->i[i]; 718 idx = B->j + B->i[i] + shift; 719 v = B->a + B->i[i] + shift; 720 SPARSEDENSEMDOT(sum,ls,v,idx,n); 721 t[i] = omega*(sum/d); 722 } 723 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 724 mat->Mvctx); CHKERRQ(ierr); 725 /* x = x + t */ 726 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 727 VecDestroy(tt); 728 return 0; 729 } 730 731 732 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 733 if (flag & SOR_ZERO_INITIAL_GUESS) { 734 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 735 } 736 else { 737 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 738 CHKERRQ(ierr); 739 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 740 CHKERRQ(ierr); 741 } 742 while (its--) { 743 /* go down through the rows */ 744 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 745 mat->Mvctx); CHKERRQ(ierr); 746 for ( i=0; i<m; i++ ) { 747 n = A->i[i+1] - A->i[i]; 748 idx = A->j + A->i[i] + shift; 749 v = A->a + A->i[i] + shift; 750 sum = b[i]; 751 SPARSEDENSEMDOT(sum,xs,v,idx,n); 752 d = fshift + A->a[diag[i]+shift]; 753 n = B->i[i+1] - B->i[i]; 754 idx = B->j + B->i[i] + shift; 755 v = B->a + B->i[i] + shift; 756 SPARSEDENSEMDOT(sum,ls,v,idx,n); 757 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 758 } 759 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 760 mat->Mvctx); CHKERRQ(ierr); 761 /* come up through the rows */ 762 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 763 mat->Mvctx); CHKERRQ(ierr); 764 for ( i=m-1; i>-1; i-- ) { 765 n = A->i[i+1] - A->i[i]; 766 idx = A->j + A->i[i] + shift; 767 v = A->a + A->i[i] + shift; 768 sum = b[i]; 769 SPARSEDENSEMDOT(sum,xs,v,idx,n); 770 d = fshift + A->a[diag[i]+shift]; 771 n = B->i[i+1] - B->i[i]; 772 idx = B->j + B->i[i] + shift; 773 v = B->a + B->i[i] + shift; 774 SPARSEDENSEMDOT(sum,ls,v,idx,n); 775 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 776 } 777 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 778 mat->Mvctx); CHKERRQ(ierr); 779 } 780 } 781 else if (flag & SOR_FORWARD_SWEEP){ 782 if (flag & SOR_ZERO_INITIAL_GUESS) { 783 VecSet(&zero,mat->lvec); 784 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 785 mat->Mvctx); CHKERRQ(ierr); 786 for ( i=0; i<m; i++ ) { 787 n = diag[i] - A->i[i]; 788 idx = A->j + A->i[i] + shift; 789 v = A->a + A->i[i] + shift; 790 sum = b[i]; 791 SPARSEDENSEMDOT(sum,xs,v,idx,n); 792 d = fshift + A->a[diag[i]+shift]; 793 n = B->i[i+1] - B->i[i]; 794 idx = B->j + B->i[i] + shift; 795 v = B->a + B->i[i] + shift; 796 SPARSEDENSEMDOT(sum,ls,v,idx,n); 797 x[i] = omega*(sum/d); 798 } 799 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 800 mat->Mvctx); CHKERRQ(ierr); 801 its--; 802 } 803 while (its--) { 804 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 805 CHKERRQ(ierr); 806 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 807 CHKERRQ(ierr); 808 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 809 mat->Mvctx); CHKERRQ(ierr); 810 for ( i=0; i<m; i++ ) { 811 n = A->i[i+1] - A->i[i]; 812 idx = A->j + A->i[i] + shift; 813 v = A->a + A->i[i] + shift; 814 sum = b[i]; 815 SPARSEDENSEMDOT(sum,xs,v,idx,n); 816 d = fshift + A->a[diag[i]+shift]; 817 n = B->i[i+1] - B->i[i]; 818 idx = B->j + B->i[i] + shift; 819 v = B->a + B->i[i] + shift; 820 SPARSEDENSEMDOT(sum,ls,v,idx,n); 821 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 822 } 823 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 824 mat->Mvctx); CHKERRQ(ierr); 825 } 826 } 827 else if (flag & SOR_BACKWARD_SWEEP){ 828 if (flag & SOR_ZERO_INITIAL_GUESS) { 829 VecSet(&zero,mat->lvec); 830 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 831 mat->Mvctx); CHKERRQ(ierr); 832 for ( i=m-1; i>-1; i-- ) { 833 n = A->i[i+1] - diag[i] - 1; 834 idx = A->j + diag[i] + !shift; 835 v = A->a + diag[i] + !shift; 836 sum = b[i]; 837 SPARSEDENSEMDOT(sum,xs,v,idx,n); 838 d = fshift + A->a[diag[i]+shift]; 839 n = B->i[i+1] - B->i[i]; 840 idx = B->j + B->i[i] + shift; 841 v = B->a + B->i[i] + shift; 842 SPARSEDENSEMDOT(sum,ls,v,idx,n); 843 x[i] = omega*(sum/d); 844 } 845 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 846 mat->Mvctx); CHKERRQ(ierr); 847 its--; 848 } 849 while (its--) { 850 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 851 mat->Mvctx); CHKERRQ(ierr); 852 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 853 mat->Mvctx); CHKERRQ(ierr); 854 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 855 mat->Mvctx); CHKERRQ(ierr); 856 for ( i=m-1; i>-1; i-- ) { 857 n = A->i[i+1] - A->i[i]; 858 idx = A->j + A->i[i] + shift; 859 v = A->a + A->i[i] + shift; 860 sum = b[i]; 861 SPARSEDENSEMDOT(sum,xs,v,idx,n); 862 d = fshift + A->a[diag[i]+shift]; 863 n = B->i[i+1] - B->i[i]; 864 idx = B->j + B->i[i] + shift; 865 v = B->a + B->i[i] + shift; 866 SPARSEDENSEMDOT(sum,ls,v,idx,n); 867 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 868 } 869 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 870 mat->Mvctx); CHKERRQ(ierr); 871 } 872 } 873 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 874 if (flag & SOR_ZERO_INITIAL_GUESS) { 875 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 876 } 877 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 878 CHKERRQ(ierr); 879 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 880 CHKERRQ(ierr); 881 while (its--) { 882 /* go down through the rows */ 883 for ( i=0; i<m; i++ ) { 884 n = A->i[i+1] - A->i[i]; 885 idx = A->j + A->i[i] + shift; 886 v = A->a + A->i[i] + shift; 887 sum = b[i]; 888 SPARSEDENSEMDOT(sum,xs,v,idx,n); 889 d = fshift + A->a[diag[i]+shift]; 890 n = B->i[i+1] - B->i[i]; 891 idx = B->j + B->i[i] + shift; 892 v = B->a + B->i[i] + shift; 893 SPARSEDENSEMDOT(sum,ls,v,idx,n); 894 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 895 } 896 /* come up through the rows */ 897 for ( i=m-1; i>-1; i-- ) { 898 n = A->i[i+1] - A->i[i]; 899 idx = A->j + A->i[i] + shift; 900 v = A->a + A->i[i] + shift; 901 sum = b[i]; 902 SPARSEDENSEMDOT(sum,xs,v,idx,n); 903 d = fshift + A->a[diag[i]+shift]; 904 n = B->i[i+1] - B->i[i]; 905 idx = B->j + B->i[i] + shift; 906 v = B->a + B->i[i] + shift; 907 SPARSEDENSEMDOT(sum,ls,v,idx,n); 908 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 909 } 910 } 911 } 912 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 913 if (flag & SOR_ZERO_INITIAL_GUESS) { 914 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 915 } 916 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 917 CHKERRQ(ierr); 918 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 919 CHKERRQ(ierr); 920 while (its--) { 921 for ( i=0; i<m; i++ ) { 922 n = A->i[i+1] - A->i[i]; 923 idx = A->j + A->i[i] + shift; 924 v = A->a + A->i[i] + shift; 925 sum = b[i]; 926 SPARSEDENSEMDOT(sum,xs,v,idx,n); 927 d = fshift + A->a[diag[i]+shift]; 928 n = B->i[i+1] - B->i[i]; 929 idx = B->j + B->i[i] + shift; 930 v = B->a + B->i[i] + shift; 931 SPARSEDENSEMDOT(sum,ls,v,idx,n); 932 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 933 } 934 } 935 } 936 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 937 if (flag & SOR_ZERO_INITIAL_GUESS) { 938 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 939 } 940 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 941 mat->Mvctx); CHKERRQ(ierr); 942 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 943 mat->Mvctx); CHKERRQ(ierr); 944 while (its--) { 945 for ( i=m-1; i>-1; i-- ) { 946 n = A->i[i+1] - A->i[i]; 947 idx = A->j + A->i[i] + shift; 948 v = A->a + A->i[i] + shift; 949 sum = b[i]; 950 SPARSEDENSEMDOT(sum,xs,v,idx,n); 951 d = fshift + A->a[diag[i]+shift]; 952 n = B->i[i+1] - B->i[i]; 953 idx = B->j + B->i[i] + shift; 954 v = B->a + B->i[i] + shift; 955 SPARSEDENSEMDOT(sum,ls,v,idx,n); 956 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 957 } 958 } 959 } 960 return 0; 961 } 962 963 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 964 int *nzalloc,int *mem) 965 { 966 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 967 Mat A = mat->A, B = mat->B; 968 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 969 970 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 971 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 972 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 973 if (flag == MAT_LOCAL) { 974 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 975 } else if (flag == MAT_GLOBAL_MAX) { 976 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 977 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 978 } else if (flag == MAT_GLOBAL_SUM) { 979 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 980 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 981 } 982 return 0; 983 } 984 985 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 986 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 987 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 988 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 989 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 990 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 991 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 992 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 993 994 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 995 { 996 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 997 998 if (op == NO_NEW_NONZERO_LOCATIONS || 999 op == YES_NEW_NONZERO_LOCATIONS || 1000 op == COLUMNS_SORTED || 1001 op == ROW_ORIENTED) { 1002 MatSetOption(a->A,op); 1003 MatSetOption(a->B,op); 1004 } 1005 else if (op == ROWS_SORTED || 1006 op == SYMMETRIC_MATRIX || 1007 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1008 op == YES_NEW_DIAGONALS) 1009 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1010 else if (op == COLUMN_ORIENTED) 1011 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");} 1012 else if (op == NO_NEW_DIAGONALS) 1013 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1014 else 1015 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1016 return 0; 1017 } 1018 1019 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1020 { 1021 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1022 *m = mat->M; *n = mat->N; 1023 return 0; 1024 } 1025 1026 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1027 { 1028 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1029 *m = mat->m; *n = mat->N; 1030 return 0; 1031 } 1032 1033 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1034 { 1035 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1036 *m = mat->rstart; *n = mat->rend; 1037 return 0; 1038 } 1039 1040 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1041 { 1042 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1043 Scalar *vworkA, *vworkB, **pvA, **pvB; 1044 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1045 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1046 1047 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:only local rows") 1048 lrow = row - rstart; 1049 1050 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1051 if (!v) {pvA = 0; pvB = 0;} 1052 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1053 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1054 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1055 nztot = nzA + nzB; 1056 1057 if (v || idx) { 1058 if (nztot) { 1059 /* Sort by increasing column numbers, assuming A and B already sorted */ 1060 int imark; 1061 if (mat->assembled) { 1062 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1063 } 1064 if (v) { 1065 *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1066 for ( i=0; i<nzB; i++ ) { 1067 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1068 else break; 1069 } 1070 imark = i; 1071 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1072 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1073 } 1074 if (idx) { 1075 *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1076 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1077 for ( i=0; i<nzB; i++ ) { 1078 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1079 else break; 1080 } 1081 imark = i; 1082 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1083 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1084 } 1085 } 1086 else {*idx = 0; *v=0;} 1087 } 1088 *nz = nztot; 1089 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1090 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1091 return 0; 1092 } 1093 1094 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1095 { 1096 if (idx) PetscFree(*idx); 1097 if (v) PetscFree(*v); 1098 return 0; 1099 } 1100 1101 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1102 { 1103 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1104 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1105 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1106 double sum = 0.0; 1107 Scalar *v; 1108 1109 if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix"); 1110 if (aij->size == 1) { 1111 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1112 } else { 1113 if (type == NORM_FROBENIUS) { 1114 v = amat->a; 1115 for (i=0; i<amat->nz; i++ ) { 1116 #if defined(PETSC_COMPLEX) 1117 sum += real(conj(*v)*(*v)); v++; 1118 #else 1119 sum += (*v)*(*v); v++; 1120 #endif 1121 } 1122 v = bmat->a; 1123 for (i=0; i<bmat->nz; i++ ) { 1124 #if defined(PETSC_COMPLEX) 1125 sum += real(conj(*v)*(*v)); v++; 1126 #else 1127 sum += (*v)*(*v); v++; 1128 #endif 1129 } 1130 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1131 *norm = sqrt(*norm); 1132 } 1133 else if (type == NORM_1) { /* max column norm */ 1134 double *tmp, *tmp2; 1135 int *jj, *garray = aij->garray; 1136 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1137 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1138 PetscMemzero(tmp,aij->N*sizeof(double)); 1139 *norm = 0.0; 1140 v = amat->a; jj = amat->j; 1141 for ( j=0; j<amat->nz; j++ ) { 1142 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1143 } 1144 v = bmat->a; jj = bmat->j; 1145 for ( j=0; j<bmat->nz; j++ ) { 1146 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1147 } 1148 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1149 for ( j=0; j<aij->N; j++ ) { 1150 if (tmp2[j] > *norm) *norm = tmp2[j]; 1151 } 1152 PetscFree(tmp); PetscFree(tmp2); 1153 } 1154 else if (type == NORM_INFINITY) { /* max row norm */ 1155 double ntemp = 0.0; 1156 for ( j=0; j<amat->m; j++ ) { 1157 v = amat->a + amat->i[j] + shift; 1158 sum = 0.0; 1159 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1160 sum += PetscAbsScalar(*v); v++; 1161 } 1162 v = bmat->a + bmat->i[j] + shift; 1163 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1164 sum += PetscAbsScalar(*v); v++; 1165 } 1166 if (sum > ntemp) ntemp = sum; 1167 } 1168 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1169 } 1170 else { 1171 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1172 } 1173 } 1174 return 0; 1175 } 1176 1177 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1178 { 1179 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1180 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1181 int ierr,shift = Aloc->indexshift; 1182 Mat B; 1183 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1184 Scalar *array; 1185 1186 if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1187 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1188 CHKERRQ(ierr); 1189 1190 /* copy over the A part */ 1191 Aloc = (Mat_SeqAIJ*) a->A->data; 1192 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1193 row = a->rstart; 1194 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1195 for ( i=0; i<m; i++ ) { 1196 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1197 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1198 } 1199 aj = Aloc->j; 1200 for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1201 1202 /* copy over the B part */ 1203 Aloc = (Mat_SeqAIJ*) a->B->data; 1204 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1205 row = a->rstart; 1206 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1207 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1208 for ( i=0; i<m; i++ ) { 1209 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1210 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1211 } 1212 PetscFree(ct); 1213 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1214 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1215 if (matout) { 1216 *matout = B; 1217 } else { 1218 /* This isn't really an in-place transpose .... but free data structures from a */ 1219 PetscFree(a->rowners); 1220 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1221 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1222 if (a->colmap) PetscFree(a->colmap); 1223 if (a->garray) PetscFree(a->garray); 1224 if (a->lvec) VecDestroy(a->lvec); 1225 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1226 PetscFree(a); 1227 PetscMemcpy(A,B,sizeof(struct _Mat)); 1228 PetscHeaderDestroy(B); 1229 } 1230 return 0; 1231 } 1232 1233 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1234 static int MatCopyPrivate_MPIAIJ(Mat,Mat *,int); 1235 1236 /* -------------------------------------------------------------------*/ 1237 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1238 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1239 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1240 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1241 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1242 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1243 MatLUFactor_MPIAIJ,0, 1244 MatRelax_MPIAIJ, 1245 MatTranspose_MPIAIJ, 1246 MatGetInfo_MPIAIJ,0, 1247 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1248 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1249 0, 1250 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1251 MatGetReordering_MPIAIJ, 1252 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1253 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1254 MatILUFactorSymbolic_MPIAIJ,0, 1255 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 1256 1257 /*@C 1258 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1259 (the default parallel PETSc format). 1260 1261 Input Parameters: 1262 . comm - MPI communicator 1263 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1264 . n - number of local columns (or PETSC_DECIDE to have calculated 1265 if N is given) 1266 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1267 . N - number of global columns (or PETSC_DECIDE to have calculated 1268 if n is given) 1269 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1270 (same for all local rows) 1271 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1272 or null (possibly different for each row). You must leave room 1273 for the diagonal entry even if it is zero. 1274 . o_nz - number of nonzeros per row in off-diagonal portion of local 1275 submatrix (same for all local rows). 1276 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1277 submatrix or null (possibly different for each row). 1278 1279 Output Parameter: 1280 . newmat - the matrix 1281 1282 Notes: 1283 The AIJ format (also called the Yale sparse matrix format or 1284 compressed row storage), is fully compatible with standard Fortran 77 1285 storage. That is, the stored row and column indices can begin at 1286 either one (as in Fortran) or zero. See the users manual for details. 1287 1288 The user MUST specify either the local or global matrix dimensions 1289 (possibly both). 1290 1291 Storage Information: 1292 For a square global matrix we define each processor's diagonal portion 1293 to be its local rows and the corresponding columns (a square submatrix); 1294 each processor's off-diagonal portion encompasses the remainder of the 1295 local matrix (a rectangular submatrix). 1296 1297 The user can specify preallocated storage for the diagonal part of 1298 the local submatrix with either d_nz or d_nnz (not both). Set both 1299 d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1300 Likewise, specify preallocated storage for the off-diagonal part of 1301 the local submatrix with o_nz or o_nnz (not both). 1302 1303 .keywords: matrix, aij, compressed row, sparse, parallel 1304 1305 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1306 @*/ 1307 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1308 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 1309 { 1310 Mat mat; 1311 Mat_MPIAIJ *a; 1312 int ierr, i,sum[2],work[2]; 1313 1314 *newmat = 0; 1315 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1316 PLogObjectCreate(mat); 1317 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1318 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1319 mat->destroy = MatDestroy_MPIAIJ; 1320 mat->view = MatView_MPIAIJ; 1321 mat->factor = 0; 1322 1323 a->insertmode = NOT_SET_VALUES; 1324 MPI_Comm_rank(comm,&a->rank); 1325 MPI_Comm_size(comm,&a->size); 1326 1327 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) 1328 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1329 1330 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1331 work[0] = m; work[1] = n; 1332 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1333 if (M == PETSC_DECIDE) M = sum[0]; 1334 if (N == PETSC_DECIDE) N = sum[1]; 1335 } 1336 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1337 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1338 a->m = m; 1339 a->n = n; 1340 a->N = N; 1341 a->M = M; 1342 1343 /* build local table of row and column ownerships */ 1344 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1345 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1346 a->cowners = a->rowners + a->size + 1; 1347 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1348 a->rowners[0] = 0; 1349 for ( i=2; i<=a->size; i++ ) { 1350 a->rowners[i] += a->rowners[i-1]; 1351 } 1352 a->rstart = a->rowners[a->rank]; 1353 a->rend = a->rowners[a->rank+1]; 1354 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1355 a->cowners[0] = 0; 1356 for ( i=2; i<=a->size; i++ ) { 1357 a->cowners[i] += a->cowners[i-1]; 1358 } 1359 a->cstart = a->cowners[a->rank]; 1360 a->cend = a->cowners[a->rank+1]; 1361 1362 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1363 PLogObjectParent(mat,a->A); 1364 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1365 PLogObjectParent(mat,a->B); 1366 1367 /* build cache for off array entries formed */ 1368 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1369 a->colmap = 0; 1370 a->garray = 0; 1371 1372 /* stuff used for matrix vector multiply */ 1373 a->lvec = 0; 1374 a->Mvctx = 0; 1375 a->assembled = 0; 1376 1377 *newmat = mat; 1378 return 0; 1379 } 1380 1381 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1382 { 1383 Mat mat; 1384 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1385 int ierr, len; 1386 1387 if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix"); 1388 *newmat = 0; 1389 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1390 PLogObjectCreate(mat); 1391 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1392 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1393 mat->destroy = MatDestroy_MPIAIJ; 1394 mat->view = MatView_MPIAIJ; 1395 mat->factor = matin->factor; 1396 1397 a->m = oldmat->m; 1398 a->n = oldmat->n; 1399 a->M = oldmat->M; 1400 a->N = oldmat->N; 1401 1402 a->assembled = 1; 1403 a->rstart = oldmat->rstart; 1404 a->rend = oldmat->rend; 1405 a->cstart = oldmat->cstart; 1406 a->cend = oldmat->cend; 1407 a->size = oldmat->size; 1408 a->rank = oldmat->rank; 1409 a->insertmode = NOT_SET_VALUES; 1410 1411 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1412 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1413 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1414 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1415 if (oldmat->colmap) { 1416 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1417 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1418 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1419 } else a->colmap = 0; 1420 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1421 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1422 PLogObjectMemory(mat,len*sizeof(int)); 1423 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1424 } else a->garray = 0; 1425 1426 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1427 PLogObjectParent(mat,a->lvec); 1428 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1429 PLogObjectParent(mat,a->Mvctx); 1430 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1431 PLogObjectParent(mat,a->A); 1432 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1433 PLogObjectParent(mat,a->B); 1434 *newmat = mat; 1435 return 0; 1436 } 1437 1438 #include "sysio.h" 1439 1440 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat) 1441 { 1442 Mat A; 1443 int i, nz, ierr, j,rstart, rend, fd; 1444 Scalar *vals,*svals; 1445 PetscObject vobj = (PetscObject) bview; 1446 MPI_Comm comm = vobj->comm; 1447 MPI_Status status; 1448 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1449 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1450 int tag = ((PetscObject)bview)->tag; 1451 1452 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1453 if (!rank) { 1454 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1455 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1456 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object"); 1457 } 1458 1459 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1460 M = header[1]; N = header[2]; 1461 /* determine ownership of all rows */ 1462 m = M/size + ((M % size) > rank); 1463 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1464 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1465 rowners[0] = 0; 1466 for ( i=2; i<=size; i++ ) { 1467 rowners[i] += rowners[i-1]; 1468 } 1469 rstart = rowners[rank]; 1470 rend = rowners[rank+1]; 1471 1472 /* distribute row lengths to all processors */ 1473 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1474 offlens = ourlens + (rend-rstart); 1475 if (!rank) { 1476 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1477 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1478 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1479 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1480 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1481 PetscFree(sndcounts); 1482 } 1483 else { 1484 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1485 } 1486 1487 if (!rank) { 1488 /* calculate the number of nonzeros on each processor */ 1489 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1490 PetscMemzero(procsnz,size*sizeof(int)); 1491 for ( i=0; i<size; i++ ) { 1492 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1493 procsnz[i] += rowlengths[j]; 1494 } 1495 } 1496 PetscFree(rowlengths); 1497 1498 /* determine max buffer needed and allocate it */ 1499 maxnz = 0; 1500 for ( i=0; i<size; i++ ) { 1501 maxnz = PetscMax(maxnz,procsnz[i]); 1502 } 1503 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1504 1505 /* read in my part of the matrix column indices */ 1506 nz = procsnz[0]; 1507 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1508 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1509 1510 /* read in every one elses and ship off */ 1511 for ( i=1; i<size; i++ ) { 1512 nz = procsnz[i]; 1513 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1514 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1515 } 1516 PetscFree(cols); 1517 } 1518 else { 1519 /* determine buffer space needed for message */ 1520 nz = 0; 1521 for ( i=0; i<m; i++ ) { 1522 nz += ourlens[i]; 1523 } 1524 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1525 1526 /* receive message of column indices*/ 1527 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1528 MPI_Get_count(&status,MPI_INT,&maxnz); 1529 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1530 } 1531 1532 /* loop over local rows, determining number of off diagonal entries */ 1533 PetscMemzero(offlens,m*sizeof(int)); 1534 jj = 0; 1535 for ( i=0; i<m; i++ ) { 1536 for ( j=0; j<ourlens[i]; j++ ) { 1537 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1538 jj++; 1539 } 1540 } 1541 1542 /* create our matrix */ 1543 for ( i=0; i<m; i++ ) { 1544 ourlens[i] -= offlens[i]; 1545 } 1546 if (type == MATMPIAIJ) { 1547 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1548 } 1549 else if (type == MATMPIROW) { 1550 ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1551 } 1552 A = *newmat; 1553 MatSetOption(A,COLUMNS_SORTED); 1554 for ( i=0; i<m; i++ ) { 1555 ourlens[i] += offlens[i]; 1556 } 1557 1558 if (!rank) { 1559 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1560 1561 /* read in my part of the matrix numerical values */ 1562 nz = procsnz[0]; 1563 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1564 1565 /* insert into matrix */ 1566 jj = rstart; 1567 smycols = mycols; 1568 svals = vals; 1569 for ( i=0; i<m; i++ ) { 1570 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1571 smycols += ourlens[i]; 1572 svals += ourlens[i]; 1573 jj++; 1574 } 1575 1576 /* read in other processors and ship out */ 1577 for ( i=1; i<size; i++ ) { 1578 nz = procsnz[i]; 1579 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1580 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1581 } 1582 PetscFree(procsnz); 1583 } 1584 else { 1585 /* receive numeric values */ 1586 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1587 1588 /* receive message of values*/ 1589 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1590 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1591 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1592 1593 /* insert into matrix */ 1594 jj = rstart; 1595 smycols = mycols; 1596 svals = vals; 1597 for ( i=0; i<m; i++ ) { 1598 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1599 smycols += ourlens[i]; 1600 svals += ourlens[i]; 1601 jj++; 1602 } 1603 } 1604 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1605 1606 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1607 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1608 return 0; 1609 } 1610