1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.96 1995/11/01 19:10:10 bsmith Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 40 int *idxn,Scalar *v,InsertMode addv) 41 { 42 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 43 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (idxm[i] >= rstart && idxm[i] < rend) { 56 row = idxm[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (idxn[j] >= cstart && idxn[j] < cend){ 61 col = idxn[j] - cstart; 62 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 63 } 64 else { 65 if (aij->assembled) { 66 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 67 col = aij->colmap[idxn[j]] + shift; 68 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 69 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 70 col = idxn[j]; 71 } 72 } 73 else col = idxn[j]; 74 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 75 } 76 } 77 } 78 else { 79 ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr); 80 } 81 } 82 return 0; 83 } 84 85 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 86 { 87 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 88 MPI_Comm comm = mat->comm; 89 int size = aij->size, *owners = aij->rowners; 90 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 91 MPI_Request *send_waits,*recv_waits; 92 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 93 InsertMode addv; 94 Scalar *rvalues,*svalues; 95 96 /* make sure all processors are either in INSERTMODE or ADDMODE */ 97 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 98 if (addv == (ADD_VALUES|INSERT_VALUES)) { 99 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 100 } 101 aij->insertmode = addv; /* in case this processor had no cache */ 102 103 /* first count number of contributors to each processor */ 104 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 105 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 106 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 107 for ( i=0; i<aij->stash.n; i++ ) { 108 idx = aij->stash.idx[i]; 109 for ( j=0; j<size; j++ ) { 110 if (idx >= owners[j] && idx < owners[j+1]) { 111 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 112 } 113 } 114 } 115 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 116 117 /* inform other processors of number of messages and max length*/ 118 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 119 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 120 nreceives = work[rank]; 121 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 122 nmax = work[rank]; 123 PetscFree(work); 124 125 /* post receives: 126 1) each message will consist of ordered pairs 127 (global index,value) we store the global index as a double 128 to simplify the message passing. 129 2) since we don't know how long each individual message is we 130 allocate the largest needed buffer for each receive. Potentially 131 this is a lot of wasted space. 132 133 134 This could be done better. 135 */ 136 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 137 CHKPTRQ(rvalues); 138 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 139 CHKPTRQ(recv_waits); 140 for ( i=0; i<nreceives; i++ ) { 141 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 142 comm,recv_waits+i); 143 } 144 145 /* do sends: 146 1) starts[i] gives the starting index in svalues for stuff going to 147 the ith processor 148 */ 149 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 150 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 151 CHKPTRQ(send_waits); 152 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 153 starts[0] = 0; 154 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 155 for ( i=0; i<aij->stash.n; i++ ) { 156 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 157 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 158 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 159 } 160 PetscFree(owner); 161 starts[0] = 0; 162 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 163 count = 0; 164 for ( i=0; i<size; i++ ) { 165 if (procs[i]) { 166 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 167 comm,send_waits+count++); 168 } 169 } 170 PetscFree(starts); PetscFree(nprocs); 171 172 /* Free cache space */ 173 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 174 175 aij->svalues = svalues; aij->rvalues = rvalues; 176 aij->nsends = nsends; aij->nrecvs = nreceives; 177 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 178 aij->rmax = nmax; 179 180 return 0; 181 } 182 extern int MatSetUpMultiply_MPIAIJ(Mat); 183 184 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 185 { 186 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 187 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 188 MPI_Status *send_status,recv_status; 189 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 190 int row,col,other_disassembled,shift = C->indexshift; 191 Scalar *values,val; 192 InsertMode addv = aij->insertmode; 193 194 /* wait on receives */ 195 while (count) { 196 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 197 /* unpack receives into our local space */ 198 values = aij->rvalues + 3*imdex*aij->rmax; 199 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 200 n = n/3; 201 for ( i=0; i<n; i++ ) { 202 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 203 col = (int) PETSCREAL(values[3*i+1]); 204 val = values[3*i+2]; 205 if (col >= aij->cstart && col < aij->cend) { 206 col -= aij->cstart; 207 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 208 } 209 else { 210 if (aij->assembled) { 211 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 212 col = aij->colmap[col] + shift; 213 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 214 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 215 col = (int) PETSCREAL(values[3*i+1]); 216 } 217 } 218 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 219 } 220 } 221 count--; 222 } 223 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 224 225 /* wait on sends */ 226 if (aij->nsends) { 227 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 228 CHKPTRQ(send_status); 229 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 230 PetscFree(send_status); 231 } 232 PetscFree(aij->send_waits); PetscFree(aij->svalues); 233 234 aij->insertmode = NOT_SET_VALUES; 235 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 236 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 237 238 /* determine if any processor has disassembled, if so we must 239 also disassemble ourselfs, in order that we may reassemble. */ 240 MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 241 if (aij->assembled && !other_disassembled) { 242 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 243 } 244 245 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 246 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 247 aij->assembled = 1; 248 } 249 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 250 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 251 252 return 0; 253 } 254 255 static int MatZeroEntries_MPIAIJ(Mat A) 256 { 257 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 258 int ierr; 259 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 260 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 261 return 0; 262 } 263 264 /* the code does not do the diagonal entries correctly unless the 265 matrix is square and the column and row owerships are identical. 266 This is a BUG. The only way to fix it seems to be to access 267 aij->A and aij->B directly and not through the MatZeroRows() 268 routine. 269 */ 270 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 271 { 272 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 273 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 274 int *procs,*nprocs,j,found,idx,nsends,*work; 275 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 276 int *rvalues,tag = A->tag,count,base,slen,n,*source; 277 int *lens,imdex,*lrows,*values; 278 MPI_Comm comm = A->comm; 279 MPI_Request *send_waits,*recv_waits; 280 MPI_Status recv_status,*send_status; 281 IS istmp; 282 283 if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix"); 284 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 285 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 286 287 /* first count number of contributors to each processor */ 288 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 289 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 290 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 291 for ( i=0; i<N; i++ ) { 292 idx = rows[i]; 293 found = 0; 294 for ( j=0; j<size; j++ ) { 295 if (idx >= owners[j] && idx < owners[j+1]) { 296 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 297 } 298 } 299 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 300 } 301 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 302 303 /* inform other processors of number of messages and max length*/ 304 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 305 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 306 nrecvs = work[rank]; 307 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 308 nmax = work[rank]; 309 PetscFree(work); 310 311 /* post receives: */ 312 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 313 CHKPTRQ(rvalues); 314 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 315 CHKPTRQ(recv_waits); 316 for ( i=0; i<nrecvs; i++ ) { 317 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 318 } 319 320 /* do sends: 321 1) starts[i] gives the starting index in svalues for stuff going to 322 the ith processor 323 */ 324 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 325 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 326 CHKPTRQ(send_waits); 327 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 328 starts[0] = 0; 329 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 330 for ( i=0; i<N; i++ ) { 331 svalues[starts[owner[i]]++] = rows[i]; 332 } 333 ISRestoreIndices(is,&rows); 334 335 starts[0] = 0; 336 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 337 count = 0; 338 for ( i=0; i<size; i++ ) { 339 if (procs[i]) { 340 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 341 } 342 } 343 PetscFree(starts); 344 345 base = owners[rank]; 346 347 /* wait on receives */ 348 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 349 source = lens + nrecvs; 350 count = nrecvs; slen = 0; 351 while (count) { 352 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 353 /* unpack receives into our local space */ 354 MPI_Get_count(&recv_status,MPI_INT,&n); 355 source[imdex] = recv_status.MPI_SOURCE; 356 lens[imdex] = n; 357 slen += n; 358 count--; 359 } 360 PetscFree(recv_waits); 361 362 /* move the data into the send scatter */ 363 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 364 count = 0; 365 for ( i=0; i<nrecvs; i++ ) { 366 values = rvalues + i*nmax; 367 for ( j=0; j<lens[i]; j++ ) { 368 lrows[count++] = values[j] - base; 369 } 370 } 371 PetscFree(rvalues); PetscFree(lens); 372 PetscFree(owner); PetscFree(nprocs); 373 374 /* actually zap the local rows */ 375 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 376 PLogObjectParent(A,istmp); 377 PetscFree(lrows); 378 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 379 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 380 ierr = ISDestroy(istmp); CHKERRQ(ierr); 381 382 /* wait on sends */ 383 if (nsends) { 384 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 385 CHKPTRQ(send_status); 386 MPI_Waitall(nsends,send_waits,send_status); 387 PetscFree(send_status); 388 } 389 PetscFree(send_waits); PetscFree(svalues); 390 391 return 0; 392 } 393 394 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 395 { 396 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 397 int ierr; 398 399 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 400 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 401 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 402 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 403 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 404 return 0; 405 } 406 407 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 408 { 409 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 410 int ierr; 411 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 412 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 413 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 414 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 415 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 416 return 0; 417 } 418 419 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 420 { 421 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 422 int ierr; 423 424 if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix"); 425 /* do nondiagonal part */ 426 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 427 /* send it on its way */ 428 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 429 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 430 /* do local part */ 431 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 432 /* receive remote parts: note this assumes the values are not actually */ 433 /* inserted in yy until the next line, which is true for my implementation*/ 434 /* but is not perhaps always true. */ 435 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 436 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 437 return 0; 438 } 439 440 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 441 { 442 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 443 int ierr; 444 445 if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix"); 446 /* do nondiagonal part */ 447 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 448 /* send it on its way */ 449 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 450 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 451 /* do local part */ 452 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 453 /* receive remote parts: note this assumes the values are not actually */ 454 /* inserted in yy until the next line, which is true for my implementation*/ 455 /* but is not perhaps always true. */ 456 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 457 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 458 return 0; 459 } 460 461 /* 462 This only works correctly for square matrices where the subblock A->A is the 463 diagonal block 464 */ 465 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 466 { 467 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 468 if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix"); 469 return MatGetDiagonal(a->A,v); 470 } 471 472 static int MatDestroy_MPIAIJ(PetscObject obj) 473 { 474 Mat mat = (Mat) obj; 475 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 476 int ierr; 477 #if defined(PETSC_LOG) 478 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 479 #endif 480 PetscFree(aij->rowners); 481 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 482 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 483 if (aij->colmap) PetscFree(aij->colmap); 484 if (aij->garray) PetscFree(aij->garray); 485 if (aij->lvec) VecDestroy(aij->lvec); 486 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 487 PetscFree(aij); 488 PLogObjectDestroy(mat); 489 PetscHeaderDestroy(mat); 490 return 0; 491 } 492 #include "draw.h" 493 #include "pinclude/pviewer.h" 494 495 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 496 { 497 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 498 int ierr; 499 500 if (aij->size == 1) { 501 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 502 } 503 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 504 return 0; 505 } 506 507 static int MatView_MPIAIJ_ASCIIorDraw(Mat mat,Viewer viewer) 508 { 509 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 510 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 511 int ierr, format,shift = C->indexshift,rank; 512 PetscObject vobj = (PetscObject) viewer; 513 FILE *fd; 514 515 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 516 ierr = ViewerFileGetFormat_Private(viewer,&format); 517 if (format == FILE_FORMAT_INFO_DETAILED) { 518 int nz,nzalloc,mem; 519 MPI_Comm_rank(mat->comm,&rank); 520 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 521 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 522 MPIU_Seq_begin(mat->comm,1); 523 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 524 nzalloc,mem); 525 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 526 fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 527 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 528 fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 529 fflush(fd); 530 MPIU_Seq_end(mat->comm,1); 531 ierr = VecScatterView(aij->Mvctx,viewer); 532 return 0; 533 } 534 else if (format == FILE_FORMAT_INFO) { 535 return 0; 536 } 537 } 538 539 if (vobj->type == ASCII_FILE_VIEWER) { 540 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 541 MPIU_Seq_begin(mat->comm,1); 542 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 543 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 544 aij->cend); 545 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 546 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 547 fflush(fd); 548 MPIU_Seq_end(mat->comm,1); 549 } 550 else { 551 int size = aij->size; 552 rank = aij->rank; 553 if (size == 1) { 554 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 555 } 556 else { 557 /* assemble the entire matrix onto first processor. */ 558 Mat A; 559 Mat_SeqAIJ *Aloc; 560 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 561 Scalar *a; 562 563 if (!rank) { 564 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr); 565 } 566 else { 567 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr); 568 } 569 PLogObjectParent(mat,A); 570 571 572 /* copy over the A part */ 573 Aloc = (Mat_SeqAIJ*) aij->A->data; 574 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 575 row = aij->rstart; 576 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 577 for ( i=0; i<m; i++ ) { 578 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 579 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 580 } 581 aj = Aloc->j; 582 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 583 584 /* copy over the B part */ 585 Aloc = (Mat_SeqAIJ*) aij->B->data; 586 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 587 row = aij->rstart; 588 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 589 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 590 for ( i=0; i<m; i++ ) { 591 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 592 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 593 } 594 PetscFree(ct); 595 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 596 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 597 if (!rank) { 598 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 599 } 600 ierr = MatDestroy(A); CHKERRQ(ierr); 601 } 602 } 603 return 0; 604 } 605 606 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 607 { 608 Mat mat = (Mat) obj; 609 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 610 int ierr; 611 PetscObject vobj = (PetscObject) viewer; 612 613 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix"); 614 if (!viewer) { 615 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 616 } 617 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 618 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 619 ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 620 } 621 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 622 ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 623 } 624 else if (vobj->cookie == DRAW_COOKIE) { 625 ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 626 } 627 else if (vobj->type == BINARY_FILE_VIEWER) { 628 return MatView_MPIAIJ_Binary(mat,viewer); 629 } 630 return 0; 631 } 632 633 extern int MatMarkDiag_SeqAIJ(Mat); 634 /* 635 This has to provide several versions. 636 637 1) per sequential 638 2) a) use only local smoothing updating outer values only once. 639 b) local smoothing updating outer values each inner iteration 640 3) color updating out values betwen colors. 641 */ 642 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 643 double fshift,int its,Vec xx) 644 { 645 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 646 Mat AA = mat->A, BB = mat->B; 647 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 648 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 649 int ierr,*idx, *diag; 650 int n = mat->n, m = mat->m, i,shift = A->indexshift; 651 Vec tt; 652 653 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix"); 654 655 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 656 xs = x + shift; /* shift by one for index start of 1 */ 657 ls = ls + shift; 658 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 659 diag = A->diag; 660 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 661 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 662 } 663 if (flag & SOR_EISENSTAT) { 664 /* Let A = L + U + D; where L is lower trianglar, 665 U is upper triangular, E is diagonal; This routine applies 666 667 (L + E)^{-1} A (U + E)^{-1} 668 669 to a vector efficiently using Eisenstat's trick. This is for 670 the case of SSOR preconditioner, so E is D/omega where omega 671 is the relaxation factor. 672 */ 673 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 674 PLogObjectParent(matin,tt); 675 VecGetArray(tt,&t); 676 scale = (2.0/omega) - 1.0; 677 /* x = (E + U)^{-1} b */ 678 VecSet(&zero,mat->lvec); 679 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 680 mat->Mvctx); CHKERRQ(ierr); 681 for ( i=m-1; i>-1; i-- ) { 682 n = A->i[i+1] - diag[i] - 1; 683 idx = A->j + diag[i] + !shift; 684 v = A->a + diag[i] + !shift; 685 sum = b[i]; 686 SPARSEDENSEMDOT(sum,xs,v,idx,n); 687 d = fshift + A->a[diag[i]+shift]; 688 n = B->i[i+1] - B->i[i]; 689 idx = B->j + B->i[i] + shift; 690 v = B->a + B->i[i] + shift; 691 SPARSEDENSEMDOT(sum,ls,v,idx,n); 692 x[i] = omega*(sum/d); 693 } 694 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 695 mat->Mvctx); CHKERRQ(ierr); 696 697 /* t = b - (2*E - D)x */ 698 v = A->a; 699 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 700 701 /* t = (E + L)^{-1}t */ 702 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 703 diag = A->diag; 704 VecSet(&zero,mat->lvec); 705 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 706 mat->Mvctx); CHKERRQ(ierr); 707 for ( i=0; i<m; i++ ) { 708 n = diag[i] - A->i[i]; 709 idx = A->j + A->i[i] + shift; 710 v = A->a + A->i[i] + shift; 711 sum = t[i]; 712 SPARSEDENSEMDOT(sum,ts,v,idx,n); 713 d = fshift + A->a[diag[i]+shift]; 714 n = B->i[i+1] - B->i[i]; 715 idx = B->j + B->i[i] + shift; 716 v = B->a + B->i[i] + shift; 717 SPARSEDENSEMDOT(sum,ls,v,idx,n); 718 t[i] = omega*(sum/d); 719 } 720 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 721 mat->Mvctx); CHKERRQ(ierr); 722 /* x = x + t */ 723 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 724 VecDestroy(tt); 725 return 0; 726 } 727 728 729 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 730 if (flag & SOR_ZERO_INITIAL_GUESS) { 731 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 732 } 733 else { 734 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 735 CHKERRQ(ierr); 736 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 737 CHKERRQ(ierr); 738 } 739 while (its--) { 740 /* go down through the rows */ 741 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 742 mat->Mvctx); CHKERRQ(ierr); 743 for ( i=0; i<m; i++ ) { 744 n = A->i[i+1] - A->i[i]; 745 idx = A->j + A->i[i] + shift; 746 v = A->a + A->i[i] + shift; 747 sum = b[i]; 748 SPARSEDENSEMDOT(sum,xs,v,idx,n); 749 d = fshift + A->a[diag[i]+shift]; 750 n = B->i[i+1] - B->i[i]; 751 idx = B->j + B->i[i] + shift; 752 v = B->a + B->i[i] + shift; 753 SPARSEDENSEMDOT(sum,ls,v,idx,n); 754 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 755 } 756 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 757 mat->Mvctx); CHKERRQ(ierr); 758 /* come up through the rows */ 759 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 760 mat->Mvctx); CHKERRQ(ierr); 761 for ( i=m-1; i>-1; i-- ) { 762 n = A->i[i+1] - A->i[i]; 763 idx = A->j + A->i[i] + shift; 764 v = A->a + A->i[i] + shift; 765 sum = b[i]; 766 SPARSEDENSEMDOT(sum,xs,v,idx,n); 767 d = fshift + A->a[diag[i]+shift]; 768 n = B->i[i+1] - B->i[i]; 769 idx = B->j + B->i[i] + shift; 770 v = B->a + B->i[i] + shift; 771 SPARSEDENSEMDOT(sum,ls,v,idx,n); 772 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 773 } 774 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 775 mat->Mvctx); CHKERRQ(ierr); 776 } 777 } 778 else if (flag & SOR_FORWARD_SWEEP){ 779 if (flag & SOR_ZERO_INITIAL_GUESS) { 780 VecSet(&zero,mat->lvec); 781 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 782 mat->Mvctx); CHKERRQ(ierr); 783 for ( i=0; i<m; i++ ) { 784 n = diag[i] - A->i[i]; 785 idx = A->j + A->i[i] + shift; 786 v = A->a + A->i[i] + shift; 787 sum = b[i]; 788 SPARSEDENSEMDOT(sum,xs,v,idx,n); 789 d = fshift + A->a[diag[i]+shift]; 790 n = B->i[i+1] - B->i[i]; 791 idx = B->j + B->i[i] + shift; 792 v = B->a + B->i[i] + shift; 793 SPARSEDENSEMDOT(sum,ls,v,idx,n); 794 x[i] = omega*(sum/d); 795 } 796 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 797 mat->Mvctx); CHKERRQ(ierr); 798 its--; 799 } 800 while (its--) { 801 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 802 CHKERRQ(ierr); 803 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 804 CHKERRQ(ierr); 805 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 806 mat->Mvctx); CHKERRQ(ierr); 807 for ( i=0; i<m; i++ ) { 808 n = A->i[i+1] - A->i[i]; 809 idx = A->j + A->i[i] + shift; 810 v = A->a + A->i[i] + shift; 811 sum = b[i]; 812 SPARSEDENSEMDOT(sum,xs,v,idx,n); 813 d = fshift + A->a[diag[i]+shift]; 814 n = B->i[i+1] - B->i[i]; 815 idx = B->j + B->i[i] + shift; 816 v = B->a + B->i[i] + shift; 817 SPARSEDENSEMDOT(sum,ls,v,idx,n); 818 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 819 } 820 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 821 mat->Mvctx); CHKERRQ(ierr); 822 } 823 } 824 else if (flag & SOR_BACKWARD_SWEEP){ 825 if (flag & SOR_ZERO_INITIAL_GUESS) { 826 VecSet(&zero,mat->lvec); 827 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 828 mat->Mvctx); CHKERRQ(ierr); 829 for ( i=m-1; i>-1; i-- ) { 830 n = A->i[i+1] - diag[i] - 1; 831 idx = A->j + diag[i] + !shift; 832 v = A->a + diag[i] + !shift; 833 sum = b[i]; 834 SPARSEDENSEMDOT(sum,xs,v,idx,n); 835 d = fshift + A->a[diag[i]+shift]; 836 n = B->i[i+1] - B->i[i]; 837 idx = B->j + B->i[i] + shift; 838 v = B->a + B->i[i] + shift; 839 SPARSEDENSEMDOT(sum,ls,v,idx,n); 840 x[i] = omega*(sum/d); 841 } 842 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 843 mat->Mvctx); CHKERRQ(ierr); 844 its--; 845 } 846 while (its--) { 847 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 848 mat->Mvctx); CHKERRQ(ierr); 849 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 850 mat->Mvctx); CHKERRQ(ierr); 851 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 852 mat->Mvctx); CHKERRQ(ierr); 853 for ( i=m-1; i>-1; i-- ) { 854 n = A->i[i+1] - A->i[i]; 855 idx = A->j + A->i[i] + shift; 856 v = A->a + A->i[i] + shift; 857 sum = b[i]; 858 SPARSEDENSEMDOT(sum,xs,v,idx,n); 859 d = fshift + A->a[diag[i]+shift]; 860 n = B->i[i+1] - B->i[i]; 861 idx = B->j + B->i[i] + shift; 862 v = B->a + B->i[i] + shift; 863 SPARSEDENSEMDOT(sum,ls,v,idx,n); 864 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 865 } 866 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 867 mat->Mvctx); CHKERRQ(ierr); 868 } 869 } 870 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 871 if (flag & SOR_ZERO_INITIAL_GUESS) { 872 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 873 } 874 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 875 CHKERRQ(ierr); 876 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 877 CHKERRQ(ierr); 878 while (its--) { 879 /* go down through the rows */ 880 for ( i=0; i<m; i++ ) { 881 n = A->i[i+1] - A->i[i]; 882 idx = A->j + A->i[i] + shift; 883 v = A->a + A->i[i] + shift; 884 sum = b[i]; 885 SPARSEDENSEMDOT(sum,xs,v,idx,n); 886 d = fshift + A->a[diag[i]+shift]; 887 n = B->i[i+1] - B->i[i]; 888 idx = B->j + B->i[i] + shift; 889 v = B->a + B->i[i] + shift; 890 SPARSEDENSEMDOT(sum,ls,v,idx,n); 891 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 892 } 893 /* come up through the rows */ 894 for ( i=m-1; i>-1; i-- ) { 895 n = A->i[i+1] - A->i[i]; 896 idx = A->j + A->i[i] + shift; 897 v = A->a + A->i[i] + shift; 898 sum = b[i]; 899 SPARSEDENSEMDOT(sum,xs,v,idx,n); 900 d = fshift + A->a[diag[i]+shift]; 901 n = B->i[i+1] - B->i[i]; 902 idx = B->j + B->i[i] + shift; 903 v = B->a + B->i[i] + shift; 904 SPARSEDENSEMDOT(sum,ls,v,idx,n); 905 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 906 } 907 } 908 } 909 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 910 if (flag & SOR_ZERO_INITIAL_GUESS) { 911 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 912 } 913 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 914 CHKERRQ(ierr); 915 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 916 CHKERRQ(ierr); 917 while (its--) { 918 for ( i=0; i<m; i++ ) { 919 n = A->i[i+1] - A->i[i]; 920 idx = A->j + A->i[i] + shift; 921 v = A->a + A->i[i] + shift; 922 sum = b[i]; 923 SPARSEDENSEMDOT(sum,xs,v,idx,n); 924 d = fshift + A->a[diag[i]+shift]; 925 n = B->i[i+1] - B->i[i]; 926 idx = B->j + B->i[i] + shift; 927 v = B->a + B->i[i] + shift; 928 SPARSEDENSEMDOT(sum,ls,v,idx,n); 929 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 930 } 931 } 932 } 933 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 934 if (flag & SOR_ZERO_INITIAL_GUESS) { 935 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 936 } 937 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 938 mat->Mvctx); CHKERRQ(ierr); 939 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 940 mat->Mvctx); CHKERRQ(ierr); 941 while (its--) { 942 for ( i=m-1; i>-1; i-- ) { 943 n = A->i[i+1] - A->i[i]; 944 idx = A->j + A->i[i] + shift; 945 v = A->a + A->i[i] + shift; 946 sum = b[i]; 947 SPARSEDENSEMDOT(sum,xs,v,idx,n); 948 d = fshift + A->a[diag[i]+shift]; 949 n = B->i[i+1] - B->i[i]; 950 idx = B->j + B->i[i] + shift; 951 v = B->a + B->i[i] + shift; 952 SPARSEDENSEMDOT(sum,ls,v,idx,n); 953 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 954 } 955 } 956 } 957 return 0; 958 } 959 960 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 961 int *nzalloc,int *mem) 962 { 963 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 964 Mat A = mat->A, B = mat->B; 965 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 966 967 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 968 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 969 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 970 if (flag == MAT_LOCAL) { 971 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 972 } else if (flag == MAT_GLOBAL_MAX) { 973 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 974 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 975 } else if (flag == MAT_GLOBAL_SUM) { 976 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 977 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 978 } 979 return 0; 980 } 981 982 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 983 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 984 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 985 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 986 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 987 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 988 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 989 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 990 991 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 992 { 993 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 994 995 if (op == NO_NEW_NONZERO_LOCATIONS || 996 op == YES_NEW_NONZERO_LOCATIONS || 997 op == COLUMNS_SORTED || 998 op == ROW_ORIENTED) { 999 MatSetOption(a->A,op); 1000 MatSetOption(a->B,op); 1001 } 1002 else if (op == ROWS_SORTED || 1003 op == SYMMETRIC_MATRIX || 1004 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1005 op == YES_NEW_DIAGONALS) 1006 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1007 else if (op == COLUMN_ORIENTED) 1008 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");} 1009 else if (op == NO_NEW_DIAGONALS) 1010 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1011 else 1012 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1013 return 0; 1014 } 1015 1016 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1019 *m = mat->M; *n = mat->N; 1020 return 0; 1021 } 1022 1023 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1024 { 1025 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1026 *m = mat->m; *n = mat->N; 1027 return 0; 1028 } 1029 1030 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1031 { 1032 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1033 *m = mat->rstart; *n = mat->rend; 1034 return 0; 1035 } 1036 1037 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1038 { 1039 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1040 Scalar *vworkA, *vworkB, **pvA, **pvB; 1041 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1042 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1043 1044 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:only local rows") 1045 lrow = row - rstart; 1046 1047 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1048 if (!v) {pvA = 0; pvB = 0;} 1049 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1050 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1051 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1052 nztot = nzA + nzB; 1053 1054 if (v || idx) { 1055 if (nztot) { 1056 /* Sort by increasing column numbers, assuming A and B already sorted */ 1057 int imark; 1058 if (mat->assembled) { 1059 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1060 } 1061 if (v) { 1062 *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1063 for ( i=0; i<nzB; i++ ) { 1064 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1065 else break; 1066 } 1067 imark = i; 1068 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1069 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1070 } 1071 if (idx) { 1072 *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1073 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1074 for ( i=0; i<nzB; i++ ) { 1075 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1076 else break; 1077 } 1078 imark = i; 1079 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1080 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1081 } 1082 } 1083 else {*idx = 0; *v=0;} 1084 } 1085 *nz = nztot; 1086 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1087 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1088 return 0; 1089 } 1090 1091 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1092 { 1093 if (idx) PetscFree(*idx); 1094 if (v) PetscFree(*v); 1095 return 0; 1096 } 1097 1098 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1099 { 1100 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1101 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1102 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1103 double sum = 0.0; 1104 Scalar *v; 1105 1106 if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix"); 1107 if (aij->size == 1) { 1108 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1109 } else { 1110 if (type == NORM_FROBENIUS) { 1111 v = amat->a; 1112 for (i=0; i<amat->nz; i++ ) { 1113 #if defined(PETSC_COMPLEX) 1114 sum += real(conj(*v)*(*v)); v++; 1115 #else 1116 sum += (*v)*(*v); v++; 1117 #endif 1118 } 1119 v = bmat->a; 1120 for (i=0; i<bmat->nz; i++ ) { 1121 #if defined(PETSC_COMPLEX) 1122 sum += real(conj(*v)*(*v)); v++; 1123 #else 1124 sum += (*v)*(*v); v++; 1125 #endif 1126 } 1127 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1128 *norm = sqrt(*norm); 1129 } 1130 else if (type == NORM_1) { /* max column norm */ 1131 double *tmp, *tmp2; 1132 int *jj, *garray = aij->garray; 1133 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1134 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1135 PetscMemzero(tmp,aij->N*sizeof(double)); 1136 *norm = 0.0; 1137 v = amat->a; jj = amat->j; 1138 for ( j=0; j<amat->nz; j++ ) { 1139 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v++); 1140 } 1141 v = bmat->a; jj = bmat->j; 1142 for ( j=0; j<bmat->nz; j++ ) { 1143 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v++); 1144 } 1145 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1146 for ( j=0; j<aij->N; j++ ) { 1147 if (tmp2[j] > *norm) *norm = tmp2[j]; 1148 } 1149 PetscFree(tmp); PetscFree(tmp2); 1150 } 1151 else if (type == NORM_INFINITY) { /* max row norm */ 1152 double ntemp = 0.0; 1153 for ( j=0; j<amat->m; j++ ) { 1154 v = amat->a + amat->i[j] + shift; 1155 sum = 0.0; 1156 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1157 sum += PetscAbsScalar(*v); v++; 1158 } 1159 v = bmat->a + bmat->i[j] + shift; 1160 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1161 sum += PetscAbsScalar(*v); v++; 1162 } 1163 if (sum > ntemp) ntemp = sum; 1164 } 1165 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1166 } 1167 else { 1168 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1169 } 1170 } 1171 return 0; 1172 } 1173 1174 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1175 { 1176 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1177 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1178 int ierr,shift = Aloc->indexshift; 1179 Mat B; 1180 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1181 Scalar *array; 1182 1183 if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1184 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1185 CHKERRQ(ierr); 1186 1187 /* copy over the A part */ 1188 Aloc = (Mat_SeqAIJ*) a->A->data; 1189 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1190 row = a->rstart; 1191 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1192 for ( i=0; i<m; i++ ) { 1193 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1194 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1195 } 1196 aj = Aloc->j; 1197 for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1198 1199 /* copy over the B part */ 1200 Aloc = (Mat_SeqAIJ*) a->B->data; 1201 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1202 row = a->rstart; 1203 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1204 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1205 for ( i=0; i<m; i++ ) { 1206 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1207 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1208 } 1209 PetscFree(ct); 1210 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1211 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1212 if (matout) { 1213 *matout = B; 1214 } else { 1215 /* This isn't really an in-place transpose .... but free data structures from a */ 1216 PetscFree(a->rowners); 1217 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1218 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1219 if (a->colmap) PetscFree(a->colmap); 1220 if (a->garray) PetscFree(a->garray); 1221 if (a->lvec) VecDestroy(a->lvec); 1222 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1223 PetscFree(a); 1224 PetscMemcpy(A,B,sizeof(struct _Mat)); 1225 PetscHeaderDestroy(B); 1226 } 1227 return 0; 1228 } 1229 1230 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1231 static int MatCopyPrivate_MPIAIJ(Mat,Mat *,int); 1232 1233 /* -------------------------------------------------------------------*/ 1234 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1235 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1236 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1237 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1238 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1239 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1240 MatLUFactor_MPIAIJ,0, 1241 MatRelax_MPIAIJ, 1242 MatTranspose_MPIAIJ, 1243 MatGetInfo_MPIAIJ,0, 1244 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1245 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1246 0, 1247 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1248 MatGetReordering_MPIAIJ, 1249 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1250 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1251 MatILUFactorSymbolic_MPIAIJ,0, 1252 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 1253 1254 /*@C 1255 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1256 (the default parallel PETSc format). 1257 1258 Input Parameters: 1259 . comm - MPI communicator 1260 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1261 . n - number of local columns (or PETSC_DECIDE to have calculated 1262 if N is given) 1263 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1264 . N - number of global columns (or PETSC_DECIDE to have calculated 1265 if n is given) 1266 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1267 (same for all local rows) 1268 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1269 or null (possibly different for each row). You must leave room 1270 for the diagonal entry even if it is zero. 1271 . o_nz - number of nonzeros per row in off-diagonal portion of local 1272 submatrix (same for all local rows). 1273 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1274 submatrix or null (possibly different for each row). 1275 1276 Output Parameter: 1277 . newmat - the matrix 1278 1279 Notes: 1280 The AIJ format (also called the Yale sparse matrix format or 1281 compressed row storage), is fully compatible with standard Fortran 77 1282 storage. That is, the stored row and column indices can begin at 1283 either one (as in Fortran) or zero. See the users manual for details. 1284 1285 The user MUST specify either the local or global matrix dimensions 1286 (possibly both). 1287 1288 Storage Information: 1289 For a square global matrix we define each processor's diagonal portion 1290 to be its local rows and the corresponding columns (a square submatrix); 1291 each processor's off-diagonal portion encompasses the remainder of the 1292 local matrix (a rectangular submatrix). 1293 1294 The user can specify preallocated storage for the diagonal part of 1295 the local submatrix with either d_nz or d_nnz (not both). Set both 1296 d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1297 Likewise, specify preallocated storage for the off-diagonal part of 1298 the local submatrix with o_nz or o_nnz (not both). 1299 1300 .keywords: matrix, aij, compressed row, sparse, parallel 1301 1302 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1303 @*/ 1304 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1305 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 1306 { 1307 Mat mat; 1308 Mat_MPIAIJ *a; 1309 int ierr, i,sum[2],work[2]; 1310 1311 *newmat = 0; 1312 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1313 PLogObjectCreate(mat); 1314 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1315 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1316 mat->destroy = MatDestroy_MPIAIJ; 1317 mat->view = MatView_MPIAIJ; 1318 mat->factor = 0; 1319 1320 a->insertmode = NOT_SET_VALUES; 1321 MPI_Comm_rank(comm,&a->rank); 1322 MPI_Comm_size(comm,&a->size); 1323 1324 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) 1325 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1326 1327 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1328 work[0] = m; work[1] = n; 1329 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1330 if (M == PETSC_DECIDE) M = sum[0]; 1331 if (N == PETSC_DECIDE) N = sum[1]; 1332 } 1333 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1334 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1335 a->m = m; 1336 a->n = n; 1337 a->N = N; 1338 a->M = M; 1339 1340 /* build local table of row and column ownerships */ 1341 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1342 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 1343 sizeof(Mat_MPIAIJ)); 1344 a->cowners = a->rowners + a->size + 1; 1345 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1346 a->rowners[0] = 0; 1347 for ( i=2; i<=a->size; i++ ) { 1348 a->rowners[i] += a->rowners[i-1]; 1349 } 1350 a->rstart = a->rowners[a->rank]; 1351 a->rend = a->rowners[a->rank+1]; 1352 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1353 a->cowners[0] = 0; 1354 for ( i=2; i<=a->size; i++ ) { 1355 a->cowners[i] += a->cowners[i-1]; 1356 } 1357 a->cstart = a->cowners[a->rank]; 1358 a->cend = a->cowners[a->rank+1]; 1359 1360 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1361 PLogObjectParent(mat,a->A); 1362 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1363 PLogObjectParent(mat,a->B); 1364 1365 /* build cache for off array entries formed */ 1366 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1367 a->colmap = 0; 1368 a->garray = 0; 1369 1370 /* stuff used for matrix vector multiply */ 1371 a->lvec = 0; 1372 a->Mvctx = 0; 1373 a->assembled = 0; 1374 1375 *newmat = mat; 1376 return 0; 1377 } 1378 1379 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1380 { 1381 Mat mat; 1382 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1383 int ierr, len; 1384 1385 if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix"); 1386 *newmat = 0; 1387 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1388 PLogObjectCreate(mat); 1389 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1390 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1391 mat->destroy = MatDestroy_MPIAIJ; 1392 mat->view = MatView_MPIAIJ; 1393 mat->factor = matin->factor; 1394 1395 a->m = oldmat->m; 1396 a->n = oldmat->n; 1397 a->M = oldmat->M; 1398 a->N = oldmat->N; 1399 1400 a->assembled = 1; 1401 a->rstart = oldmat->rstart; 1402 a->rend = oldmat->rend; 1403 a->cstart = oldmat->cstart; 1404 a->cend = oldmat->cend; 1405 a->size = oldmat->size; 1406 a->rank = oldmat->rank; 1407 a->insertmode = NOT_SET_VALUES; 1408 1409 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1410 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1411 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1412 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1413 if (oldmat->colmap) { 1414 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1415 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1416 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1417 } else a->colmap = 0; 1418 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1419 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1420 PLogObjectMemory(mat,len*sizeof(int)); 1421 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1422 } else a->garray = 0; 1423 1424 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1425 PLogObjectParent(mat,a->lvec); 1426 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1427 PLogObjectParent(mat,a->Mvctx); 1428 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1429 PLogObjectParent(mat,a->A); 1430 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1431 PLogObjectParent(mat,a->B); 1432 *newmat = mat; 1433 return 0; 1434 } 1435 1436 #include "sysio.h" 1437 1438 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat) 1439 { 1440 Mat A; 1441 int i, nz, ierr, j,rstart, rend, fd; 1442 Scalar *vals,*svals; 1443 PetscObject vobj = (PetscObject) bview; 1444 MPI_Comm comm = vobj->comm; 1445 MPI_Status status; 1446 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1447 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1448 int tag = ((PetscObject)bview)->tag; 1449 1450 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1451 if (!rank) { 1452 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1453 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1454 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object"); 1455 } 1456 1457 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1458 M = header[1]; N = header[2]; 1459 /* determine ownership of all rows */ 1460 m = M/size + ((M % size) > rank); 1461 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1462 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1463 rowners[0] = 0; 1464 for ( i=2; i<=size; i++ ) { 1465 rowners[i] += rowners[i-1]; 1466 } 1467 rstart = rowners[rank]; 1468 rend = rowners[rank+1]; 1469 1470 /* distribute row lengths to all processors */ 1471 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1472 offlens = ourlens + (rend-rstart); 1473 if (!rank) { 1474 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1475 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1476 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1477 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1478 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1479 PetscFree(sndcounts); 1480 } 1481 else { 1482 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1483 } 1484 1485 if (!rank) { 1486 /* calculate the number of nonzeros on each processor */ 1487 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1488 PetscMemzero(procsnz,size*sizeof(int)); 1489 for ( i=0; i<size; i++ ) { 1490 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1491 procsnz[i] += rowlengths[j]; 1492 } 1493 } 1494 PetscFree(rowlengths); 1495 1496 /* determine max buffer needed and allocate it */ 1497 maxnz = 0; 1498 for ( i=0; i<size; i++ ) { 1499 maxnz = PetscMax(maxnz,procsnz[i]); 1500 } 1501 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1502 1503 /* read in my part of the matrix column indices */ 1504 nz = procsnz[0]; 1505 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1506 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1507 1508 /* read in every one elses and ship off */ 1509 for ( i=1; i<size; i++ ) { 1510 nz = procsnz[i]; 1511 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1512 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1513 } 1514 PetscFree(cols); 1515 } 1516 else { 1517 /* determine buffer space needed for message */ 1518 nz = 0; 1519 for ( i=0; i<m; i++ ) { 1520 nz += ourlens[i]; 1521 } 1522 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1523 1524 /* receive message of column indices*/ 1525 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1526 MPI_Get_count(&status,MPI_INT,&maxnz); 1527 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1528 } 1529 1530 /* loop over local rows, determining number of off diagonal entries */ 1531 PetscMemzero(offlens,m*sizeof(int)); 1532 jj = 0; 1533 for ( i=0; i<m; i++ ) { 1534 for ( j=0; j<ourlens[i]; j++ ) { 1535 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1536 jj++; 1537 } 1538 } 1539 1540 /* create our matrix */ 1541 for ( i=0; i<m; i++ ) { 1542 ourlens[i] -= offlens[i]; 1543 } 1544 if (type == MATMPIAIJ) { 1545 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1546 } 1547 else if (type == MATMPIROW) { 1548 ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1549 } 1550 A = *newmat; 1551 MatSetOption(A,COLUMNS_SORTED); 1552 for ( i=0; i<m; i++ ) { 1553 ourlens[i] += offlens[i]; 1554 } 1555 1556 if (!rank) { 1557 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1558 1559 /* read in my part of the matrix numerical values */ 1560 nz = procsnz[0]; 1561 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1562 1563 /* insert into matrix */ 1564 jj = rstart; 1565 smycols = mycols; 1566 svals = vals; 1567 for ( i=0; i<m; i++ ) { 1568 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1569 smycols += ourlens[i]; 1570 svals += ourlens[i]; 1571 jj++; 1572 } 1573 1574 /* read in other processors and ship out */ 1575 for ( i=1; i<size; i++ ) { 1576 nz = procsnz[i]; 1577 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1578 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1579 } 1580 PetscFree(procsnz); 1581 } 1582 else { 1583 /* receive numeric values */ 1584 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1585 1586 /* receive message of values*/ 1587 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1588 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1589 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1590 1591 /* insert into matrix */ 1592 jj = rstart; 1593 smycols = mycols; 1594 svals = vals; 1595 for ( i=0; i<m; i++ ) { 1596 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1597 smycols += ourlens[i]; 1598 svals += ourlens[i]; 1599 jj++; 1600 } 1601 } 1602 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1603 1604 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1605 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1606 return 0; 1607 } 1608