1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.103 1995/12/23 04:54:07 bsmith Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i,shift = B->indexshift; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 30 { 31 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32 int ierr; 33 if (aij->size == 1) { 34 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 35 } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36 return 0; 37 } 38 39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 43 Scalar value; 44 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 45 int cstart = aij->cstart, cend = aij->cend,row,col; 46 int shift = C->indexshift,roworiented = aij->roworiented; 47 48 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 49 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 50 } 51 aij->insertmode = addv; 52 for ( i=0; i<m; i++ ) { 53 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 54 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 55 if (im[i] >= rstart && im[i] < rend) { 56 row = im[i] - rstart; 57 for ( j=0; j<n; j++ ) { 58 if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 59 if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 60 if (in[j] >= cstart && in[j] < cend){ 61 col = in[j] - cstart; 62 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 63 ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 64 } 65 else { 66 if (aij->assembled) { 67 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 68 col = aij->colmap[in[j]] + shift; 69 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 70 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 71 col = in[j]; 72 } 73 } 74 else col = in[j]; 75 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 76 ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 77 } 78 } 79 } 80 else { 81 if (roworiented) { 82 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 83 } 84 else { 85 row = im[i]; 86 for ( j=0; j<n; j++ ) { 87 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 88 } 89 } 90 } 91 } 92 return 0; 93 } 94 95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96 { 97 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98 Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100 int cstart = aij->cstart, cend = aij->cend,row,col; 101 int shift = C->indexshift; 102 103 if (!aij->assembled) SETERRQ(1,"MatGetValues_MPIAIJ:Not for unassembled matrix"); 104 for ( i=0; i<m; i++ ) { 105 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 106 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 107 if (idxm[i] >= rstart && idxm[i] < rend) { 108 row = idxm[i] - rstart; 109 for ( j=0; j<n; j++ ) { 110 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 111 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 112 if (idxn[j] >= cstart && idxn[j] < cend){ 113 col = idxn[j] - cstart; 114 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 115 } 116 else { 117 col = aij->colmap[idxn[j]] + shift; 118 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 119 } 120 } 121 } 122 else { 123 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 124 } 125 } 126 return 0; 127 } 128 129 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 130 { 131 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 132 MPI_Comm comm = mat->comm; 133 int size = aij->size, *owners = aij->rowners; 134 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 135 MPI_Request *send_waits,*recv_waits; 136 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 137 InsertMode addv; 138 Scalar *rvalues,*svalues; 139 140 /* make sure all processors are either in INSERTMODE or ADDMODE */ 141 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 142 if (addv == (ADD_VALUES|INSERT_VALUES)) { 143 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 144 } 145 aij->insertmode = addv; /* in case this processor had no cache */ 146 147 /* first count number of contributors to each processor */ 148 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 149 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 150 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 151 for ( i=0; i<aij->stash.n; i++ ) { 152 idx = aij->stash.idx[i]; 153 for ( j=0; j<size; j++ ) { 154 if (idx >= owners[j] && idx < owners[j+1]) { 155 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 156 } 157 } 158 } 159 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 160 161 /* inform other processors of number of messages and max length*/ 162 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 163 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 164 nreceives = work[rank]; 165 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 166 nmax = work[rank]; 167 PetscFree(work); 168 169 /* post receives: 170 1) each message will consist of ordered pairs 171 (global index,value) we store the global index as a double 172 to simplify the message passing. 173 2) since we don't know how long each individual message is we 174 allocate the largest needed buffer for each receive. Potentially 175 this is a lot of wasted space. 176 177 178 This could be done better. 179 */ 180 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 181 CHKPTRQ(rvalues); 182 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 183 CHKPTRQ(recv_waits); 184 for ( i=0; i<nreceives; i++ ) { 185 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 186 comm,recv_waits+i); 187 } 188 189 /* do sends: 190 1) starts[i] gives the starting index in svalues for stuff going to 191 the ith processor 192 */ 193 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 194 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 195 CHKPTRQ(send_waits); 196 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 197 starts[0] = 0; 198 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 199 for ( i=0; i<aij->stash.n; i++ ) { 200 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 201 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 202 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 203 } 204 PetscFree(owner); 205 starts[0] = 0; 206 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 207 count = 0; 208 for ( i=0; i<size; i++ ) { 209 if (procs[i]) { 210 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 211 comm,send_waits+count++); 212 } 213 } 214 PetscFree(starts); PetscFree(nprocs); 215 216 /* Free cache space */ 217 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 218 219 aij->svalues = svalues; aij->rvalues = rvalues; 220 aij->nsends = nsends; aij->nrecvs = nreceives; 221 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 222 aij->rmax = nmax; 223 224 return 0; 225 } 226 extern int MatSetUpMultiply_MPIAIJ(Mat); 227 228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 229 { 230 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 231 Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 232 MPI_Status *send_status,recv_status; 233 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 234 int row,col,other_disassembled,shift = C->indexshift; 235 Scalar *values,val; 236 InsertMode addv = aij->insertmode; 237 238 /* wait on receives */ 239 while (count) { 240 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 241 /* unpack receives into our local space */ 242 values = aij->rvalues + 3*imdex*aij->rmax; 243 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 244 n = n/3; 245 for ( i=0; i<n; i++ ) { 246 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 247 col = (int) PETSCREAL(values[3*i+1]); 248 val = values[3*i+2]; 249 if (col >= aij->cstart && col < aij->cend) { 250 col -= aij->cstart; 251 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 252 } 253 else { 254 if (aij->assembled) { 255 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 256 col = aij->colmap[col] + shift; 257 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 258 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 259 col = (int) PETSCREAL(values[3*i+1]); 260 } 261 } 262 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 263 } 264 } 265 count--; 266 } 267 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 268 269 /* wait on sends */ 270 if (aij->nsends) { 271 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 272 CHKPTRQ(send_status); 273 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 274 PetscFree(send_status); 275 } 276 PetscFree(aij->send_waits); PetscFree(aij->svalues); 277 278 aij->insertmode = NOT_SET_VALUES; 279 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 280 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 281 282 /* determine if any processor has disassembled, if so we must 283 also disassemble ourselfs, in order that we may reassemble. */ 284 MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 285 if (aij->assembled && !other_disassembled) { 286 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287 } 288 289 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 290 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 291 aij->assembled = 1; 292 } 293 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 294 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 295 296 return 0; 297 } 298 299 static int MatZeroEntries_MPIAIJ(Mat A) 300 { 301 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 302 int ierr; 303 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 304 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 305 return 0; 306 } 307 308 /* the code does not do the diagonal entries correctly unless the 309 matrix is square and the column and row owerships are identical. 310 This is a BUG. The only way to fix it seems to be to access 311 aij->A and aij->B directly and not through the MatZeroRows() 312 routine. 313 */ 314 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 315 { 316 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 317 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 318 int *procs,*nprocs,j,found,idx,nsends,*work; 319 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 320 int *rvalues,tag = A->tag,count,base,slen,n,*source; 321 int *lens,imdex,*lrows,*values; 322 MPI_Comm comm = A->comm; 323 MPI_Request *send_waits,*recv_waits; 324 MPI_Status recv_status,*send_status; 325 IS istmp; 326 327 if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix"); 328 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 329 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 330 331 /* first count number of contributors to each processor */ 332 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 333 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 334 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 335 for ( i=0; i<N; i++ ) { 336 idx = rows[i]; 337 found = 0; 338 for ( j=0; j<size; j++ ) { 339 if (idx >= owners[j] && idx < owners[j+1]) { 340 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 341 } 342 } 343 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 344 } 345 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 346 347 /* inform other processors of number of messages and max length*/ 348 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 349 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 350 nrecvs = work[rank]; 351 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 352 nmax = work[rank]; 353 PetscFree(work); 354 355 /* post receives: */ 356 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 357 CHKPTRQ(rvalues); 358 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 359 CHKPTRQ(recv_waits); 360 for ( i=0; i<nrecvs; i++ ) { 361 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 362 } 363 364 /* do sends: 365 1) starts[i] gives the starting index in svalues for stuff going to 366 the ith processor 367 */ 368 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 369 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 370 CHKPTRQ(send_waits); 371 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 372 starts[0] = 0; 373 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 374 for ( i=0; i<N; i++ ) { 375 svalues[starts[owner[i]]++] = rows[i]; 376 } 377 ISRestoreIndices(is,&rows); 378 379 starts[0] = 0; 380 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 381 count = 0; 382 for ( i=0; i<size; i++ ) { 383 if (procs[i]) { 384 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 385 } 386 } 387 PetscFree(starts); 388 389 base = owners[rank]; 390 391 /* wait on receives */ 392 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 393 source = lens + nrecvs; 394 count = nrecvs; slen = 0; 395 while (count) { 396 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 397 /* unpack receives into our local space */ 398 MPI_Get_count(&recv_status,MPI_INT,&n); 399 source[imdex] = recv_status.MPI_SOURCE; 400 lens[imdex] = n; 401 slen += n; 402 count--; 403 } 404 PetscFree(recv_waits); 405 406 /* move the data into the send scatter */ 407 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 408 count = 0; 409 for ( i=0; i<nrecvs; i++ ) { 410 values = rvalues + i*nmax; 411 for ( j=0; j<lens[i]; j++ ) { 412 lrows[count++] = values[j] - base; 413 } 414 } 415 PetscFree(rvalues); PetscFree(lens); 416 PetscFree(owner); PetscFree(nprocs); 417 418 /* actually zap the local rows */ 419 ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 420 PLogObjectParent(A,istmp); 421 PetscFree(lrows); 422 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 423 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 424 ierr = ISDestroy(istmp); CHKERRQ(ierr); 425 426 /* wait on sends */ 427 if (nsends) { 428 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 429 CHKPTRQ(send_status); 430 MPI_Waitall(nsends,send_waits,send_status); 431 PetscFree(send_status); 432 } 433 PetscFree(send_waits); PetscFree(svalues); 434 435 return 0; 436 } 437 438 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 439 { 440 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 441 int ierr; 442 443 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 444 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 445 ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 446 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 447 ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 448 return 0; 449 } 450 451 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 452 { 453 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 454 int ierr; 455 if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 456 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 457 ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 458 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 459 ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 460 return 0; 461 } 462 463 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 464 { 465 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 466 int ierr; 467 468 if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix"); 469 /* do nondiagonal part */ 470 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 471 /* send it on its way */ 472 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 473 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 474 /* do local part */ 475 ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 476 /* receive remote parts: note this assumes the values are not actually */ 477 /* inserted in yy until the next line, which is true for my implementation*/ 478 /* but is not perhaps always true. */ 479 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 480 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 481 return 0; 482 } 483 484 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 485 { 486 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 487 int ierr; 488 489 if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix"); 490 /* do nondiagonal part */ 491 ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 492 /* send it on its way */ 493 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 494 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 495 /* do local part */ 496 ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 497 /* receive remote parts: note this assumes the values are not actually */ 498 /* inserted in yy until the next line, which is true for my implementation*/ 499 /* but is not perhaps always true. */ 500 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 501 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 502 return 0; 503 } 504 505 /* 506 This only works correctly for square matrices where the subblock A->A is the 507 diagonal block 508 */ 509 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 510 { 511 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 512 if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix"); 513 return MatGetDiagonal(a->A,v); 514 } 515 516 static int MatDestroy_MPIAIJ(PetscObject obj) 517 { 518 Mat mat = (Mat) obj; 519 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 520 int ierr; 521 #if defined(PETSC_LOG) 522 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 523 #endif 524 PetscFree(aij->rowners); 525 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 526 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 527 if (aij->colmap) PetscFree(aij->colmap); 528 if (aij->garray) PetscFree(aij->garray); 529 if (aij->lvec) VecDestroy(aij->lvec); 530 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 531 PetscFree(aij); 532 PLogObjectDestroy(mat); 533 PetscHeaderDestroy(mat); 534 return 0; 535 } 536 #include "draw.h" 537 #include "pinclude/pviewer.h" 538 539 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 540 { 541 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 542 int ierr; 543 544 if (aij->size == 1) { 545 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 546 } 547 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 548 return 0; 549 } 550 551 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 552 { 553 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 554 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 555 int ierr, format,shift = C->indexshift,rank; 556 PetscObject vobj = (PetscObject) viewer; 557 FILE *fd; 558 559 if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 560 ierr = ViewerFileGetFormat_Private(viewer,&format); 561 if (format == FILE_FORMAT_INFO_DETAILED) { 562 int nz,nzalloc,mem; 563 MPI_Comm_rank(mat->comm,&rank); 564 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 565 ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 566 MPIU_Seq_begin(mat->comm,1); 567 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz, 568 nzalloc,mem); 569 ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 570 fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz); 571 ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 572 fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz); 573 fflush(fd); 574 MPIU_Seq_end(mat->comm,1); 575 ierr = VecScatterView(aij->Mvctx,viewer); 576 return 0; 577 } 578 else if (format == FILE_FORMAT_INFO) { 579 return 0; 580 } 581 } 582 583 if (vobj->type == ASCII_FILE_VIEWER) { 584 ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 585 MPIU_Seq_begin(mat->comm,1); 586 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 587 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 588 aij->cend); 589 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 590 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 591 fflush(fd); 592 MPIU_Seq_end(mat->comm,1); 593 } 594 else { 595 int size = aij->size; 596 rank = aij->rank; 597 if (size == 1) { 598 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 599 } 600 else { 601 /* assemble the entire matrix onto first processor. */ 602 Mat A; 603 Mat_SeqAIJ *Aloc; 604 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 605 Scalar *a; 606 607 if (!rank) { 608 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 609 CHKERRQ(ierr); 610 } 611 else { 612 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 613 CHKERRQ(ierr); 614 } 615 PLogObjectParent(mat,A); 616 617 /* copy over the A part */ 618 Aloc = (Mat_SeqAIJ*) aij->A->data; 619 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 620 row = aij->rstart; 621 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 622 for ( i=0; i<m; i++ ) { 623 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 624 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 625 } 626 aj = Aloc->j; 627 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 628 629 /* copy over the B part */ 630 Aloc = (Mat_SeqAIJ*) aij->B->data; 631 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 632 row = aij->rstart; 633 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 634 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 635 for ( i=0; i<m; i++ ) { 636 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 637 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 638 } 639 PetscFree(ct); 640 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 641 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 642 if (!rank) { 643 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 644 } 645 ierr = MatDestroy(A); CHKERRQ(ierr); 646 } 647 } 648 return 0; 649 } 650 651 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 652 { 653 Mat mat = (Mat) obj; 654 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 655 int ierr; 656 PetscObject vobj = (PetscObject) viewer; 657 658 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix"); 659 if (!viewer) { 660 viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 661 } 662 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 663 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 664 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 665 } 666 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 667 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 668 } 669 else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) { 670 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 671 } 672 else if (vobj->cookie == DRAW_COOKIE) { 673 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 674 } 675 else if (vobj->type == BINARY_FILE_VIEWER) { 676 return MatView_MPIAIJ_Binary(mat,viewer); 677 } 678 return 0; 679 } 680 681 extern int MatMarkDiag_SeqAIJ(Mat); 682 /* 683 This has to provide several versions. 684 685 1) per sequential 686 2) a) use only local smoothing updating outer values only once. 687 b) local smoothing updating outer values each inner iteration 688 3) color updating out values betwen colors. 689 */ 690 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 691 double fshift,int its,Vec xx) 692 { 693 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 694 Mat AA = mat->A, BB = mat->B; 695 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 696 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 697 int ierr,*idx, *diag; 698 int n = mat->n, m = mat->m, i,shift = A->indexshift; 699 Vec tt; 700 701 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix"); 702 703 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 704 xs = x + shift; /* shift by one for index start of 1 */ 705 ls = ls + shift; 706 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 707 diag = A->diag; 708 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 709 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 710 } 711 if (flag & SOR_EISENSTAT) { 712 /* Let A = L + U + D; where L is lower trianglar, 713 U is upper triangular, E is diagonal; This routine applies 714 715 (L + E)^{-1} A (U + E)^{-1} 716 717 to a vector efficiently using Eisenstat's trick. This is for 718 the case of SSOR preconditioner, so E is D/omega where omega 719 is the relaxation factor. 720 */ 721 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 722 PLogObjectParent(matin,tt); 723 VecGetArray(tt,&t); 724 scale = (2.0/omega) - 1.0; 725 /* x = (E + U)^{-1} b */ 726 VecSet(&zero,mat->lvec); 727 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 728 mat->Mvctx); CHKERRQ(ierr); 729 for ( i=m-1; i>-1; i-- ) { 730 n = A->i[i+1] - diag[i] - 1; 731 idx = A->j + diag[i] + !shift; 732 v = A->a + diag[i] + !shift; 733 sum = b[i]; 734 SPARSEDENSEMDOT(sum,xs,v,idx,n); 735 d = fshift + A->a[diag[i]+shift]; 736 n = B->i[i+1] - B->i[i]; 737 idx = B->j + B->i[i] + shift; 738 v = B->a + B->i[i] + shift; 739 SPARSEDENSEMDOT(sum,ls,v,idx,n); 740 x[i] = omega*(sum/d); 741 } 742 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 743 mat->Mvctx); CHKERRQ(ierr); 744 745 /* t = b - (2*E - D)x */ 746 v = A->a; 747 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 748 749 /* t = (E + L)^{-1}t */ 750 ts = t + shift; /* shifted by one for index start of a or mat->j*/ 751 diag = A->diag; 752 VecSet(&zero,mat->lvec); 753 ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 754 mat->Mvctx); CHKERRQ(ierr); 755 for ( i=0; i<m; i++ ) { 756 n = diag[i] - A->i[i]; 757 idx = A->j + A->i[i] + shift; 758 v = A->a + A->i[i] + shift; 759 sum = t[i]; 760 SPARSEDENSEMDOT(sum,ts,v,idx,n); 761 d = fshift + A->a[diag[i]+shift]; 762 n = B->i[i+1] - B->i[i]; 763 idx = B->j + B->i[i] + shift; 764 v = B->a + B->i[i] + shift; 765 SPARSEDENSEMDOT(sum,ls,v,idx,n); 766 t[i] = omega*(sum/d); 767 } 768 ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 769 mat->Mvctx); CHKERRQ(ierr); 770 /* x = x + t */ 771 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 772 VecDestroy(tt); 773 return 0; 774 } 775 776 777 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 778 if (flag & SOR_ZERO_INITIAL_GUESS) { 779 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 780 } 781 else { 782 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 783 CHKERRQ(ierr); 784 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 785 CHKERRQ(ierr); 786 } 787 while (its--) { 788 /* go down through the rows */ 789 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 790 mat->Mvctx); CHKERRQ(ierr); 791 for ( i=0; i<m; i++ ) { 792 n = A->i[i+1] - A->i[i]; 793 idx = A->j + A->i[i] + shift; 794 v = A->a + A->i[i] + shift; 795 sum = b[i]; 796 SPARSEDENSEMDOT(sum,xs,v,idx,n); 797 d = fshift + A->a[diag[i]+shift]; 798 n = B->i[i+1] - B->i[i]; 799 idx = B->j + B->i[i] + shift; 800 v = B->a + B->i[i] + shift; 801 SPARSEDENSEMDOT(sum,ls,v,idx,n); 802 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 803 } 804 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 805 mat->Mvctx); CHKERRQ(ierr); 806 /* come up through the rows */ 807 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 808 mat->Mvctx); CHKERRQ(ierr); 809 for ( i=m-1; i>-1; i-- ) { 810 n = A->i[i+1] - A->i[i]; 811 idx = A->j + A->i[i] + shift; 812 v = A->a + A->i[i] + shift; 813 sum = b[i]; 814 SPARSEDENSEMDOT(sum,xs,v,idx,n); 815 d = fshift + A->a[diag[i]+shift]; 816 n = B->i[i+1] - B->i[i]; 817 idx = B->j + B->i[i] + shift; 818 v = B->a + B->i[i] + shift; 819 SPARSEDENSEMDOT(sum,ls,v,idx,n); 820 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 821 } 822 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 823 mat->Mvctx); CHKERRQ(ierr); 824 } 825 } 826 else if (flag & SOR_FORWARD_SWEEP){ 827 if (flag & SOR_ZERO_INITIAL_GUESS) { 828 VecSet(&zero,mat->lvec); 829 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 830 mat->Mvctx); CHKERRQ(ierr); 831 for ( i=0; i<m; i++ ) { 832 n = diag[i] - A->i[i]; 833 idx = A->j + A->i[i] + shift; 834 v = A->a + A->i[i] + shift; 835 sum = b[i]; 836 SPARSEDENSEMDOT(sum,xs,v,idx,n); 837 d = fshift + A->a[diag[i]+shift]; 838 n = B->i[i+1] - B->i[i]; 839 idx = B->j + B->i[i] + shift; 840 v = B->a + B->i[i] + shift; 841 SPARSEDENSEMDOT(sum,ls,v,idx,n); 842 x[i] = omega*(sum/d); 843 } 844 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 845 mat->Mvctx); CHKERRQ(ierr); 846 its--; 847 } 848 while (its--) { 849 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 850 CHKERRQ(ierr); 851 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 852 CHKERRQ(ierr); 853 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 854 mat->Mvctx); CHKERRQ(ierr); 855 for ( i=0; i<m; i++ ) { 856 n = A->i[i+1] - A->i[i]; 857 idx = A->j + A->i[i] + shift; 858 v = A->a + A->i[i] + shift; 859 sum = b[i]; 860 SPARSEDENSEMDOT(sum,xs,v,idx,n); 861 d = fshift + A->a[diag[i]+shift]; 862 n = B->i[i+1] - B->i[i]; 863 idx = B->j + B->i[i] + shift; 864 v = B->a + B->i[i] + shift; 865 SPARSEDENSEMDOT(sum,ls,v,idx,n); 866 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 867 } 868 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 869 mat->Mvctx); CHKERRQ(ierr); 870 } 871 } 872 else if (flag & SOR_BACKWARD_SWEEP){ 873 if (flag & SOR_ZERO_INITIAL_GUESS) { 874 VecSet(&zero,mat->lvec); 875 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 876 mat->Mvctx); CHKERRQ(ierr); 877 for ( i=m-1; i>-1; i-- ) { 878 n = A->i[i+1] - diag[i] - 1; 879 idx = A->j + diag[i] + !shift; 880 v = A->a + diag[i] + !shift; 881 sum = b[i]; 882 SPARSEDENSEMDOT(sum,xs,v,idx,n); 883 d = fshift + A->a[diag[i]+shift]; 884 n = B->i[i+1] - B->i[i]; 885 idx = B->j + B->i[i] + shift; 886 v = B->a + B->i[i] + shift; 887 SPARSEDENSEMDOT(sum,ls,v,idx,n); 888 x[i] = omega*(sum/d); 889 } 890 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 891 mat->Mvctx); CHKERRQ(ierr); 892 its--; 893 } 894 while (its--) { 895 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 896 mat->Mvctx); CHKERRQ(ierr); 897 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 898 mat->Mvctx); CHKERRQ(ierr); 899 ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 900 mat->Mvctx); CHKERRQ(ierr); 901 for ( i=m-1; i>-1; i-- ) { 902 n = A->i[i+1] - A->i[i]; 903 idx = A->j + A->i[i] + shift; 904 v = A->a + A->i[i] + shift; 905 sum = b[i]; 906 SPARSEDENSEMDOT(sum,xs,v,idx,n); 907 d = fshift + A->a[diag[i]+shift]; 908 n = B->i[i+1] - B->i[i]; 909 idx = B->j + B->i[i] + shift; 910 v = B->a + B->i[i] + shift; 911 SPARSEDENSEMDOT(sum,ls,v,idx,n); 912 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 913 } 914 ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 915 mat->Mvctx); CHKERRQ(ierr); 916 } 917 } 918 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 919 if (flag & SOR_ZERO_INITIAL_GUESS) { 920 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 921 } 922 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 923 CHKERRQ(ierr); 924 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 925 CHKERRQ(ierr); 926 while (its--) { 927 /* go down through the rows */ 928 for ( i=0; i<m; i++ ) { 929 n = A->i[i+1] - A->i[i]; 930 idx = A->j + A->i[i] + shift; 931 v = A->a + A->i[i] + shift; 932 sum = b[i]; 933 SPARSEDENSEMDOT(sum,xs,v,idx,n); 934 d = fshift + A->a[diag[i]+shift]; 935 n = B->i[i+1] - B->i[i]; 936 idx = B->j + B->i[i] + shift; 937 v = B->a + B->i[i] + shift; 938 SPARSEDENSEMDOT(sum,ls,v,idx,n); 939 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 940 } 941 /* come up through the rows */ 942 for ( i=m-1; i>-1; i-- ) { 943 n = A->i[i+1] - A->i[i]; 944 idx = A->j + A->i[i] + shift; 945 v = A->a + A->i[i] + shift; 946 sum = b[i]; 947 SPARSEDENSEMDOT(sum,xs,v,idx,n); 948 d = fshift + A->a[diag[i]+shift]; 949 n = B->i[i+1] - B->i[i]; 950 idx = B->j + B->i[i] + shift; 951 v = B->a + B->i[i] + shift; 952 SPARSEDENSEMDOT(sum,ls,v,idx,n); 953 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 954 } 955 } 956 } 957 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 958 if (flag & SOR_ZERO_INITIAL_GUESS) { 959 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 960 } 961 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 962 CHKERRQ(ierr); 963 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 964 CHKERRQ(ierr); 965 while (its--) { 966 for ( i=0; i<m; i++ ) { 967 n = A->i[i+1] - A->i[i]; 968 idx = A->j + A->i[i] + shift; 969 v = A->a + A->i[i] + shift; 970 sum = b[i]; 971 SPARSEDENSEMDOT(sum,xs,v,idx,n); 972 d = fshift + A->a[diag[i]+shift]; 973 n = B->i[i+1] - B->i[i]; 974 idx = B->j + B->i[i] + shift; 975 v = B->a + B->i[i] + shift; 976 SPARSEDENSEMDOT(sum,ls,v,idx,n); 977 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 978 } 979 } 980 } 981 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 982 if (flag & SOR_ZERO_INITIAL_GUESS) { 983 return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 984 } 985 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 986 mat->Mvctx); CHKERRQ(ierr); 987 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 988 mat->Mvctx); CHKERRQ(ierr); 989 while (its--) { 990 for ( i=m-1; i>-1; i-- ) { 991 n = A->i[i+1] - A->i[i]; 992 idx = A->j + A->i[i] + shift; 993 v = A->a + A->i[i] + shift; 994 sum = b[i]; 995 SPARSEDENSEMDOT(sum,xs,v,idx,n); 996 d = fshift + A->a[diag[i]+shift]; 997 n = B->i[i+1] - B->i[i]; 998 idx = B->j + B->i[i] + shift; 999 v = B->a + B->i[i] + shift; 1000 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1001 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1002 } 1003 } 1004 } 1005 return 0; 1006 } 1007 1008 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1009 int *nzalloc,int *mem) 1010 { 1011 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1012 Mat A = mat->A, B = mat->B; 1013 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1014 1015 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 1016 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1017 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1018 if (flag == MAT_LOCAL) { 1019 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 1020 } else if (flag == MAT_GLOBAL_MAX) { 1021 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1022 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1023 } else if (flag == MAT_GLOBAL_SUM) { 1024 MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1025 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 1026 } 1027 return 0; 1028 } 1029 1030 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1031 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1032 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1033 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1034 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1035 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1036 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1037 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1038 1039 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1040 { 1041 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1042 1043 if (op == NO_NEW_NONZERO_LOCATIONS || 1044 op == YES_NEW_NONZERO_LOCATIONS || 1045 op == COLUMNS_SORTED || 1046 op == ROW_ORIENTED) { 1047 MatSetOption(a->A,op); 1048 MatSetOption(a->B,op); 1049 } 1050 else if (op == ROWS_SORTED || 1051 op == SYMMETRIC_MATRIX || 1052 op == STRUCTURALLY_SYMMETRIC_MATRIX || 1053 op == YES_NEW_DIAGONALS) 1054 PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1055 else if (op == COLUMN_ORIENTED) { 1056 a->roworiented = 0; 1057 MatSetOption(a->A,op); 1058 MatSetOption(a->B,op); 1059 } 1060 else if (op == NO_NEW_DIAGONALS) 1061 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 1062 else 1063 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1064 return 0; 1065 } 1066 1067 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1068 { 1069 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1070 *m = mat->M; *n = mat->N; 1071 return 0; 1072 } 1073 1074 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1075 { 1076 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1077 *m = mat->m; *n = mat->N; 1078 return 0; 1079 } 1080 1081 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1082 { 1083 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1084 *m = mat->rstart; *n = mat->rend; 1085 return 0; 1086 } 1087 1088 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1089 { 1090 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1091 Scalar *vworkA, *vworkB, **pvA, **pvB; 1092 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1093 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1094 1095 if (!mat->assembled) SETERRQ(1,"MatGetRow_MPIAIJ:Must assemble matrix"); 1096 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1097 lrow = row - rstart; 1098 1099 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1100 if (!v) {pvA = 0; pvB = 0;} 1101 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1102 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1103 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1104 nztot = nzA + nzB; 1105 1106 if (v || idx) { 1107 if (nztot) { 1108 /* Sort by increasing column numbers, assuming A and B already sorted */ 1109 int imark; 1110 if (mat->assembled) { 1111 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1112 } 1113 if (v) { 1114 *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1115 for ( i=0; i<nzB; i++ ) { 1116 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1117 else break; 1118 } 1119 imark = i; 1120 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1121 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1122 } 1123 if (idx) { 1124 *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1125 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1126 for ( i=0; i<nzB; i++ ) { 1127 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1128 else break; 1129 } 1130 imark = i; 1131 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1132 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1133 } 1134 } 1135 else {*idx = 0; *v=0;} 1136 } 1137 *nz = nztot; 1138 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1139 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1140 return 0; 1141 } 1142 1143 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1144 { 1145 if (idx) PetscFree(*idx); 1146 if (v) PetscFree(*v); 1147 return 0; 1148 } 1149 1150 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1151 { 1152 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1153 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1154 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1155 double sum = 0.0; 1156 Scalar *v; 1157 1158 if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix"); 1159 if (aij->size == 1) { 1160 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1161 } else { 1162 if (type == NORM_FROBENIUS) { 1163 v = amat->a; 1164 for (i=0; i<amat->nz; i++ ) { 1165 #if defined(PETSC_COMPLEX) 1166 sum += real(conj(*v)*(*v)); v++; 1167 #else 1168 sum += (*v)*(*v); v++; 1169 #endif 1170 } 1171 v = bmat->a; 1172 for (i=0; i<bmat->nz; i++ ) { 1173 #if defined(PETSC_COMPLEX) 1174 sum += real(conj(*v)*(*v)); v++; 1175 #else 1176 sum += (*v)*(*v); v++; 1177 #endif 1178 } 1179 MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1180 *norm = sqrt(*norm); 1181 } 1182 else if (type == NORM_1) { /* max column norm */ 1183 double *tmp, *tmp2; 1184 int *jj, *garray = aij->garray; 1185 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1186 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1187 PetscMemzero(tmp,aij->N*sizeof(double)); 1188 *norm = 0.0; 1189 v = amat->a; jj = amat->j; 1190 for ( j=0; j<amat->nz; j++ ) { 1191 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1192 } 1193 v = bmat->a; jj = bmat->j; 1194 for ( j=0; j<bmat->nz; j++ ) { 1195 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1196 } 1197 MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1198 for ( j=0; j<aij->N; j++ ) { 1199 if (tmp2[j] > *norm) *norm = tmp2[j]; 1200 } 1201 PetscFree(tmp); PetscFree(tmp2); 1202 } 1203 else if (type == NORM_INFINITY) { /* max row norm */ 1204 double ntemp = 0.0; 1205 for ( j=0; j<amat->m; j++ ) { 1206 v = amat->a + amat->i[j] + shift; 1207 sum = 0.0; 1208 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1209 sum += PetscAbsScalar(*v); v++; 1210 } 1211 v = bmat->a + bmat->i[j] + shift; 1212 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1213 sum += PetscAbsScalar(*v); v++; 1214 } 1215 if (sum > ntemp) ntemp = sum; 1216 } 1217 MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1218 } 1219 else { 1220 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1221 } 1222 } 1223 return 0; 1224 } 1225 1226 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1227 { 1228 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1229 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1230 int ierr,shift = Aloc->indexshift; 1231 Mat B; 1232 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1233 Scalar *array; 1234 1235 if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1236 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1237 PETSC_NULL,&B); CHKERRQ(ierr); 1238 1239 /* copy over the A part */ 1240 Aloc = (Mat_SeqAIJ*) a->A->data; 1241 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1242 row = a->rstart; 1243 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1244 for ( i=0; i<m; i++ ) { 1245 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1246 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1247 } 1248 aj = Aloc->j; 1249 for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1250 1251 /* copy over the B part */ 1252 Aloc = (Mat_SeqAIJ*) a->B->data; 1253 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1254 row = a->rstart; 1255 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1256 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1257 for ( i=0; i<m; i++ ) { 1258 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1259 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1260 } 1261 PetscFree(ct); 1262 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1263 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1264 if (matout) { 1265 *matout = B; 1266 } else { 1267 /* This isn't really an in-place transpose .... but free data structures from a */ 1268 PetscFree(a->rowners); 1269 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1270 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1271 if (a->colmap) PetscFree(a->colmap); 1272 if (a->garray) PetscFree(a->garray); 1273 if (a->lvec) VecDestroy(a->lvec); 1274 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1275 PetscFree(a); 1276 PetscMemcpy(A,B,sizeof(struct _Mat)); 1277 PetscHeaderDestroy(B); 1278 } 1279 return 0; 1280 } 1281 1282 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1283 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1284 1285 /* -------------------------------------------------------------------*/ 1286 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1287 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1288 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1289 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1290 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1291 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1292 MatLUFactor_MPIAIJ,0, 1293 MatRelax_MPIAIJ, 1294 MatTranspose_MPIAIJ, 1295 MatGetInfo_MPIAIJ,0, 1296 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1297 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1298 0, 1299 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1300 MatGetReordering_MPIAIJ, 1301 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1302 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1303 MatILUFactorSymbolic_MPIAIJ,0, 1304 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1305 0,0,0, 1306 0,0,MatGetValues_MPIAIJ}; 1307 1308 /*@C 1309 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1310 (the default parallel PETSc format). 1311 1312 Input Parameters: 1313 . comm - MPI communicator 1314 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1315 . n - number of local columns (or PETSC_DECIDE to have calculated 1316 if N is given) 1317 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1318 . N - number of global columns (or PETSC_DECIDE to have calculated 1319 if n is given) 1320 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1321 (same for all local rows) 1322 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1323 or null (possibly different for each row). You must leave room 1324 for the diagonal entry even if it is zero. 1325 . o_nz - number of nonzeros per row in off-diagonal portion of local 1326 submatrix (same for all local rows). 1327 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1328 submatrix or null (possibly different for each row). 1329 1330 Output Parameter: 1331 . newmat - the matrix 1332 1333 Notes: 1334 The AIJ format (also called the Yale sparse matrix format or 1335 compressed row storage), is fully compatible with standard Fortran 77 1336 storage. That is, the stored row and column indices can begin at 1337 either one (as in Fortran) or zero. See the users manual for details. 1338 1339 The user MUST specify either the local or global matrix dimensions 1340 (possibly both). 1341 1342 Storage Information: 1343 For a square global matrix we define each processor's diagonal portion 1344 to be its local rows and the corresponding columns (a square submatrix); 1345 each processor's off-diagonal portion encompasses the remainder of the 1346 local matrix (a rectangular submatrix). 1347 1348 The user can specify preallocated storage for the diagonal part of 1349 the local submatrix with either d_nz or d_nnz (not both). Set d_nz=0 1350 and d_nnz=PETSC_NULL for PETSc to control dynamic memory allocation. 1351 Likewise, specify preallocated storage for the off-diagonal part of 1352 the local submatrix with o_nz or o_nnz (not both). 1353 1354 By default, this format uses inodes (identical nodes) when possible. 1355 We search for consecutive rows with the same nonzero structure, thereby 1356 reusing matrix information to achieve increased efficiency. 1357 1358 Options Database Keys: 1359 $ -mat_aij_no_inode - Do not use inodes 1360 $ -mat_aij_inode_limit <limit> - Set inode limit (max limit=5) 1361 1362 .keywords: matrix, aij, compressed row, sparse, parallel 1363 1364 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1365 @*/ 1366 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1367 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat) 1368 { 1369 Mat mat; 1370 Mat_MPIAIJ *a; 1371 int ierr, i,sum[2],work[2]; 1372 1373 *newmat = 0; 1374 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1375 PLogObjectCreate(mat); 1376 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1377 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1378 mat->destroy = MatDestroy_MPIAIJ; 1379 mat->view = MatView_MPIAIJ; 1380 mat->factor = 0; 1381 1382 a->insertmode = NOT_SET_VALUES; 1383 MPI_Comm_rank(comm,&a->rank); 1384 MPI_Comm_size(comm,&a->size); 1385 1386 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1387 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1388 1389 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1390 work[0] = m; work[1] = n; 1391 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1392 if (M == PETSC_DECIDE) M = sum[0]; 1393 if (N == PETSC_DECIDE) N = sum[1]; 1394 } 1395 if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1396 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1397 a->m = m; 1398 a->n = n; 1399 a->N = N; 1400 a->M = M; 1401 1402 /* build local table of row and column ownerships */ 1403 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1404 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1405 a->cowners = a->rowners + a->size + 1; 1406 MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1407 a->rowners[0] = 0; 1408 for ( i=2; i<=a->size; i++ ) { 1409 a->rowners[i] += a->rowners[i-1]; 1410 } 1411 a->rstart = a->rowners[a->rank]; 1412 a->rend = a->rowners[a->rank+1]; 1413 MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1414 a->cowners[0] = 0; 1415 for ( i=2; i<=a->size; i++ ) { 1416 a->cowners[i] += a->cowners[i-1]; 1417 } 1418 a->cstart = a->cowners[a->rank]; 1419 a->cend = a->cowners[a->rank+1]; 1420 1421 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1422 PLogObjectParent(mat,a->A); 1423 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1424 PLogObjectParent(mat,a->B); 1425 1426 /* build cache for off array entries formed */ 1427 ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1428 a->colmap = 0; 1429 a->garray = 0; 1430 a->roworiented = 1; 1431 1432 /* stuff used for matrix vector multiply */ 1433 a->lvec = 0; 1434 a->Mvctx = 0; 1435 a->assembled = 0; 1436 1437 *newmat = mat; 1438 return 0; 1439 } 1440 1441 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1442 { 1443 Mat mat; 1444 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1445 int ierr, len; 1446 1447 if (!oldmat->assembled) SETERRQ(1,"MatConvertSameType_MPIAIJ:Must assemble matrix"); 1448 *newmat = 0; 1449 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1450 PLogObjectCreate(mat); 1451 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1452 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1453 mat->destroy = MatDestroy_MPIAIJ; 1454 mat->view = MatView_MPIAIJ; 1455 mat->factor = matin->factor; 1456 1457 a->m = oldmat->m; 1458 a->n = oldmat->n; 1459 a->M = oldmat->M; 1460 a->N = oldmat->N; 1461 1462 a->assembled = 1; 1463 a->rstart = oldmat->rstart; 1464 a->rend = oldmat->rend; 1465 a->cstart = oldmat->cstart; 1466 a->cend = oldmat->cend; 1467 a->size = oldmat->size; 1468 a->rank = oldmat->rank; 1469 a->insertmode = NOT_SET_VALUES; 1470 1471 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1472 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1473 PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1474 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1475 if (oldmat->colmap) { 1476 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1477 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1478 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1479 } else a->colmap = 0; 1480 if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1481 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1482 PLogObjectMemory(mat,len*sizeof(int)); 1483 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1484 } else a->garray = 0; 1485 1486 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1487 PLogObjectParent(mat,a->lvec); 1488 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1489 PLogObjectParent(mat,a->Mvctx); 1490 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1491 PLogObjectParent(mat,a->A); 1492 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1493 PLogObjectParent(mat,a->B); 1494 *newmat = mat; 1495 return 0; 1496 } 1497 1498 #include "sysio.h" 1499 1500 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat) 1501 { 1502 Mat A; 1503 int i, nz, ierr, j,rstart, rend, fd; 1504 Scalar *vals,*svals; 1505 PetscObject vobj = (PetscObject) bview; 1506 MPI_Comm comm = vobj->comm; 1507 MPI_Status status; 1508 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1509 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1510 int tag = ((PetscObject)bview)->tag; 1511 1512 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1513 if (!rank) { 1514 ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1515 ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1516 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object"); 1517 } 1518 1519 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1520 M = header[1]; N = header[2]; 1521 /* determine ownership of all rows */ 1522 m = M/size + ((M % size) > rank); 1523 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1524 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1525 rowners[0] = 0; 1526 for ( i=2; i<=size; i++ ) { 1527 rowners[i] += rowners[i-1]; 1528 } 1529 rstart = rowners[rank]; 1530 rend = rowners[rank+1]; 1531 1532 /* distribute row lengths to all processors */ 1533 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1534 offlens = ourlens + (rend-rstart); 1535 if (!rank) { 1536 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1537 ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1538 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1539 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1540 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1541 PetscFree(sndcounts); 1542 } 1543 else { 1544 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1545 } 1546 1547 if (!rank) { 1548 /* calculate the number of nonzeros on each processor */ 1549 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1550 PetscMemzero(procsnz,size*sizeof(int)); 1551 for ( i=0; i<size; i++ ) { 1552 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1553 procsnz[i] += rowlengths[j]; 1554 } 1555 } 1556 PetscFree(rowlengths); 1557 1558 /* determine max buffer needed and allocate it */ 1559 maxnz = 0; 1560 for ( i=0; i<size; i++ ) { 1561 maxnz = PetscMax(maxnz,procsnz[i]); 1562 } 1563 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1564 1565 /* read in my part of the matrix column indices */ 1566 nz = procsnz[0]; 1567 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1568 ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1569 1570 /* read in every one elses and ship off */ 1571 for ( i=1; i<size; i++ ) { 1572 nz = procsnz[i]; 1573 ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1574 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1575 } 1576 PetscFree(cols); 1577 } 1578 else { 1579 /* determine buffer space needed for message */ 1580 nz = 0; 1581 for ( i=0; i<m; i++ ) { 1582 nz += ourlens[i]; 1583 } 1584 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1585 1586 /* receive message of column indices*/ 1587 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1588 MPI_Get_count(&status,MPI_INT,&maxnz); 1589 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1590 } 1591 1592 /* loop over local rows, determining number of off diagonal entries */ 1593 PetscMemzero(offlens,m*sizeof(int)); 1594 jj = 0; 1595 for ( i=0; i<m; i++ ) { 1596 for ( j=0; j<ourlens[i]; j++ ) { 1597 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1598 jj++; 1599 } 1600 } 1601 1602 /* create our matrix */ 1603 for ( i=0; i<m; i++ ) { 1604 ourlens[i] -= offlens[i]; 1605 } 1606 if (type == MATMPIAIJ) { 1607 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1608 } 1609 else if (type == MATMPIROW) { 1610 ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1611 } 1612 A = *newmat; 1613 MatSetOption(A,COLUMNS_SORTED); 1614 for ( i=0; i<m; i++ ) { 1615 ourlens[i] += offlens[i]; 1616 } 1617 1618 if (!rank) { 1619 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1620 1621 /* read in my part of the matrix numerical values */ 1622 nz = procsnz[0]; 1623 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1624 1625 /* insert into matrix */ 1626 jj = rstart; 1627 smycols = mycols; 1628 svals = vals; 1629 for ( i=0; i<m; i++ ) { 1630 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1631 smycols += ourlens[i]; 1632 svals += ourlens[i]; 1633 jj++; 1634 } 1635 1636 /* read in other processors and ship out */ 1637 for ( i=1; i<size; i++ ) { 1638 nz = procsnz[i]; 1639 ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1640 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1641 } 1642 PetscFree(procsnz); 1643 } 1644 else { 1645 /* receive numeric values */ 1646 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1647 1648 /* receive message of values*/ 1649 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1650 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1651 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1652 1653 /* insert into matrix */ 1654 jj = rstart; 1655 smycols = mycols; 1656 svals = vals; 1657 for ( i=0; i<m; i++ ) { 1658 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1659 smycols += ourlens[i]; 1660 svals += ourlens[i]; 1661 jj++; 1662 } 1663 } 1664 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1665 1666 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1667 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1668 return 0; 1669 } 1670