1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.175 1996/11/20 05:04:38 curfman Exp curfman $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 int CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18 int n = B->n,i; 19 20 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21 PLogObjectMemory(mat,aij->N*sizeof(int)); 22 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 24 return 0; 25 } 26 27 extern int DisAssemble_MPIAIJ(Mat); 28 29 static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 30 PetscTruth *done) 31 { 32 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 33 int ierr; 34 if (aij->size == 1) { 35 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 36 } else SETERRQ(1,"MatGetRowIJ_MPIAIJ:not supported in parallel"); 37 return 0; 38 } 39 40 static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 41 PetscTruth *done) 42 { 43 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 44 int ierr; 45 if (aij->size == 1) { 46 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 47 } else SETERRQ(1,"MatRestoreRowIJ_MPIAIJ:not supported in parallel"); 48 return 0; 49 } 50 51 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 52 53 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 54 { 55 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 56 Scalar value; 57 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 58 int cstart = aij->cstart, cend = aij->cend,row,col; 59 int roworiented = aij->roworiented; 60 61 #if defined(PETSC_BOPT_g) 62 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 63 SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 64 } 65 #endif 66 aij->insertmode = addv; 67 for ( i=0; i<m; i++ ) { 68 #if defined(PETSC_BOPT_g) 69 if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 70 if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 71 #endif 72 if (im[i] >= rstart && im[i] < rend) { 73 row = im[i] - rstart; 74 for ( j=0; j<n; j++ ) { 75 if (in[j] >= cstart && in[j] < cend){ 76 col = in[j] - cstart; 77 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 78 ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 79 } 80 #if defined(PETSC_BOPT_g) 81 else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");} 82 else if (in[j] >= aij->N) {SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");} 83 #endif 84 else { 85 if (mat->was_assembled) { 86 if (!aij->colmap) { 87 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 88 } 89 col = aij->colmap[in[j]] - 1; 90 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 91 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 92 col = in[j]; 93 } 94 } 95 else col = in[j]; 96 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 97 ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 98 } 99 } 100 } 101 else { 102 if (roworiented && !aij->donotstash) { 103 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 104 } 105 else { 106 if (!aij->donotstash) { 107 row = im[i]; 108 for ( j=0; j<n; j++ ) { 109 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 110 } 111 } 112 } 113 } 114 } 115 return 0; 116 } 117 118 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 119 { 120 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 121 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 122 int cstart = aij->cstart, cend = aij->cend,row,col; 123 124 for ( i=0; i<m; i++ ) { 125 if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 126 if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 127 if (idxm[i] >= rstart && idxm[i] < rend) { 128 row = idxm[i] - rstart; 129 for ( j=0; j<n; j++ ) { 130 if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 131 if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 132 if (idxn[j] >= cstart && idxn[j] < cend){ 133 col = idxn[j] - cstart; 134 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 135 } 136 else { 137 if (!aij->colmap) { 138 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 139 } 140 col = aij->colmap[idxn[j]] - 1; 141 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 142 else { 143 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 144 } 145 } 146 } 147 } 148 else { 149 SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 150 } 151 } 152 return 0; 153 } 154 155 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 156 { 157 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 158 MPI_Comm comm = mat->comm; 159 int size = aij->size, *owners = aij->rowners; 160 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 161 MPI_Request *send_waits,*recv_waits; 162 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 163 InsertMode addv; 164 Scalar *rvalues,*svalues; 165 166 /* make sure all processors are either in INSERTMODE or ADDMODE */ 167 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 168 if (addv == (ADD_VALUES|INSERT_VALUES)) { 169 SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 170 } 171 aij->insertmode = addv; /* in case this processor had no cache */ 172 173 /* first count number of contributors to each processor */ 174 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 175 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 176 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 177 for ( i=0; i<aij->stash.n; i++ ) { 178 idx = aij->stash.idx[i]; 179 for ( j=0; j<size; j++ ) { 180 if (idx >= owners[j] && idx < owners[j+1]) { 181 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 182 } 183 } 184 } 185 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 186 187 /* inform other processors of number of messages and max length*/ 188 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 189 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 190 nreceives = work[rank]; 191 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 192 nmax = work[rank]; 193 PetscFree(work); 194 195 /* post receives: 196 1) each message will consist of ordered pairs 197 (global index,value) we store the global index as a double 198 to simplify the message passing. 199 2) since we don't know how long each individual message is we 200 allocate the largest needed buffer for each receive. Potentially 201 this is a lot of wasted space. 202 203 204 This could be done better. 205 */ 206 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 207 CHKPTRQ(rvalues); 208 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 209 CHKPTRQ(recv_waits); 210 for ( i=0; i<nreceives; i++ ) { 211 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 212 comm,recv_waits+i); 213 } 214 215 /* do sends: 216 1) starts[i] gives the starting index in svalues for stuff going to 217 the ith processor 218 */ 219 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 220 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 221 CHKPTRQ(send_waits); 222 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 223 starts[0] = 0; 224 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 225 for ( i=0; i<aij->stash.n; i++ ) { 226 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 227 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 228 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 229 } 230 PetscFree(owner); 231 starts[0] = 0; 232 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 233 count = 0; 234 for ( i=0; i<size; i++ ) { 235 if (procs[i]) { 236 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 237 comm,send_waits+count++); 238 } 239 } 240 PetscFree(starts); PetscFree(nprocs); 241 242 /* Free cache space */ 243 PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 244 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 245 246 aij->svalues = svalues; aij->rvalues = rvalues; 247 aij->nsends = nsends; aij->nrecvs = nreceives; 248 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 249 aij->rmax = nmax; 250 251 return 0; 252 } 253 extern int MatSetUpMultiply_MPIAIJ(Mat); 254 255 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 256 { 257 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 258 MPI_Status *send_status,recv_status; 259 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 260 int row,col,other_disassembled; 261 Scalar *values,val; 262 InsertMode addv = aij->insertmode; 263 264 /* wait on receives */ 265 while (count) { 266 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 267 /* unpack receives into our local space */ 268 values = aij->rvalues + 3*imdex*aij->rmax; 269 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 270 n = n/3; 271 for ( i=0; i<n; i++ ) { 272 row = (int) PetscReal(values[3*i]) - aij->rstart; 273 col = (int) PetscReal(values[3*i+1]); 274 val = values[3*i+2]; 275 if (col >= aij->cstart && col < aij->cend) { 276 col -= aij->cstart; 277 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 278 } 279 else { 280 if (mat->was_assembled || mat->assembled) { 281 if (!aij->colmap) { 282 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 283 } 284 col = aij->colmap[col] - 1; 285 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 286 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 287 col = (int) PetscReal(values[3*i+1]); 288 } 289 } 290 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 291 } 292 } 293 count--; 294 } 295 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 296 297 /* wait on sends */ 298 if (aij->nsends) { 299 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 300 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 301 PetscFree(send_status); 302 } 303 PetscFree(aij->send_waits); PetscFree(aij->svalues); 304 305 aij->insertmode = NOT_SET_VALUES; 306 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 307 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 308 309 /* determine if any processor has disassembled, if so we must 310 also disassemble ourselfs, in order that we may reassemble. */ 311 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 312 if (mat->was_assembled && !other_disassembled) { 313 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 314 } 315 316 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 317 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 318 } 319 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 321 322 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 323 return 0; 324 } 325 326 static int MatZeroEntries_MPIAIJ(Mat A) 327 { 328 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 329 int ierr; 330 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 331 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 332 return 0; 333 } 334 335 /* the code does not do the diagonal entries correctly unless the 336 matrix is square and the column and row owerships are identical. 337 This is a BUG. The only way to fix it seems to be to access 338 aij->A and aij->B directly and not through the MatZeroRows() 339 routine. 340 */ 341 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 342 { 343 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 344 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 345 int *procs,*nprocs,j,found,idx,nsends,*work; 346 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 347 int *rvalues,tag = A->tag,count,base,slen,n,*source; 348 int *lens,imdex,*lrows,*values; 349 MPI_Comm comm = A->comm; 350 MPI_Request *send_waits,*recv_waits; 351 MPI_Status recv_status,*send_status; 352 IS istmp; 353 354 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 355 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 356 357 /* first count number of contributors to each processor */ 358 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 359 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 360 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 361 for ( i=0; i<N; i++ ) { 362 idx = rows[i]; 363 found = 0; 364 for ( j=0; j<size; j++ ) { 365 if (idx >= owners[j] && idx < owners[j+1]) { 366 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 367 } 368 } 369 if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 370 } 371 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 372 373 /* inform other processors of number of messages and max length*/ 374 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 375 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 376 nrecvs = work[rank]; 377 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 378 nmax = work[rank]; 379 PetscFree(work); 380 381 /* post receives: */ 382 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 383 CHKPTRQ(rvalues); 384 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 385 CHKPTRQ(recv_waits); 386 for ( i=0; i<nrecvs; i++ ) { 387 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 388 } 389 390 /* do sends: 391 1) starts[i] gives the starting index in svalues for stuff going to 392 the ith processor 393 */ 394 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 395 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 396 CHKPTRQ(send_waits); 397 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 398 starts[0] = 0; 399 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 400 for ( i=0; i<N; i++ ) { 401 svalues[starts[owner[i]]++] = rows[i]; 402 } 403 ISRestoreIndices(is,&rows); 404 405 starts[0] = 0; 406 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 407 count = 0; 408 for ( i=0; i<size; i++ ) { 409 if (procs[i]) { 410 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 411 } 412 } 413 PetscFree(starts); 414 415 base = owners[rank]; 416 417 /* wait on receives */ 418 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 419 source = lens + nrecvs; 420 count = nrecvs; slen = 0; 421 while (count) { 422 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 423 /* unpack receives into our local space */ 424 MPI_Get_count(&recv_status,MPI_INT,&n); 425 source[imdex] = recv_status.MPI_SOURCE; 426 lens[imdex] = n; 427 slen += n; 428 count--; 429 } 430 PetscFree(recv_waits); 431 432 /* move the data into the send scatter */ 433 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 434 count = 0; 435 for ( i=0; i<nrecvs; i++ ) { 436 values = rvalues + i*nmax; 437 for ( j=0; j<lens[i]; j++ ) { 438 lrows[count++] = values[j] - base; 439 } 440 } 441 PetscFree(rvalues); PetscFree(lens); 442 PetscFree(owner); PetscFree(nprocs); 443 444 /* actually zap the local rows */ 445 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 446 PLogObjectParent(A,istmp); 447 PetscFree(lrows); 448 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 449 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 450 ierr = ISDestroy(istmp); CHKERRQ(ierr); 451 452 /* wait on sends */ 453 if (nsends) { 454 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 455 CHKPTRQ(send_status); 456 MPI_Waitall(nsends,send_waits,send_status); 457 PetscFree(send_status); 458 } 459 PetscFree(send_waits); PetscFree(svalues); 460 461 return 0; 462 } 463 464 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 465 { 466 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 467 int ierr,nt; 468 469 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 470 if (nt != a->n) { 471 SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx"); 472 } 473 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 474 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 475 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 476 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 477 return 0; 478 } 479 480 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 483 int ierr; 484 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 485 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 486 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 487 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 488 return 0; 489 } 490 491 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 492 { 493 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 494 int ierr; 495 496 /* do nondiagonal part */ 497 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 498 /* send it on its way */ 499 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 500 /* do local part */ 501 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 502 /* receive remote parts: note this assumes the values are not actually */ 503 /* inserted in yy until the next line, which is true for my implementation*/ 504 /* but is not perhaps always true. */ 505 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 506 return 0; 507 } 508 509 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 510 { 511 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 512 int ierr; 513 514 /* do nondiagonal part */ 515 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 516 /* send it on its way */ 517 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 518 /* do local part */ 519 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 520 /* receive remote parts: note this assumes the values are not actually */ 521 /* inserted in yy until the next line, which is true for my implementation*/ 522 /* but is not perhaps always true. */ 523 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 524 return 0; 525 } 526 527 /* 528 This only works correctly for square matrices where the subblock A->A is the 529 diagonal block 530 */ 531 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 532 { 533 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 534 if (a->M != a->N) 535 SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 536 if (a->rstart != a->cstart || a->rend != a->cend) { 537 SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition"); } 538 return MatGetDiagonal(a->A,v); 539 } 540 541 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 542 { 543 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 544 int ierr; 545 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 546 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 547 return 0; 548 } 549 550 static int MatDestroy_MPIAIJ(PetscObject obj) 551 { 552 Mat mat = (Mat) obj; 553 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 554 int ierr; 555 #if defined(PETSC_LOG) 556 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 557 #endif 558 PetscFree(aij->rowners); 559 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 560 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 561 if (aij->colmap) PetscFree(aij->colmap); 562 if (aij->garray) PetscFree(aij->garray); 563 if (aij->lvec) VecDestroy(aij->lvec); 564 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 565 if (aij->rowvalues) PetscFree(aij->rowvalues); 566 PetscFree(aij); 567 if (mat->mapping) { 568 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 569 } 570 PLogObjectDestroy(mat); 571 PetscHeaderDestroy(mat); 572 return 0; 573 } 574 #include "draw.h" 575 #include "pinclude/pviewer.h" 576 577 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 578 { 579 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 580 int ierr; 581 582 if (aij->size == 1) { 583 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 584 } 585 else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 586 return 0; 587 } 588 589 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 590 { 591 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 592 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 593 int ierr, format,shift = C->indexshift,rank; 594 FILE *fd; 595 ViewerType vtype; 596 597 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 598 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 599 ierr = ViewerGetFormat(viewer,&format); 600 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 601 MatInfo info; 602 int flg; 603 MPI_Comm_rank(mat->comm,&rank); 604 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 605 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 606 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 607 PetscSequentialPhaseBegin(mat->comm,1); 608 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 609 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 610 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 611 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 612 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 613 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 614 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 615 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 616 fflush(fd); 617 PetscSequentialPhaseEnd(mat->comm,1); 618 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 619 return 0; 620 } 621 else if (format == VIEWER_FORMAT_ASCII_INFO) { 622 return 0; 623 } 624 } 625 626 if (vtype == DRAW_VIEWER) { 627 Draw draw; 628 PetscTruth isnull; 629 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 630 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 631 } 632 633 if (vtype == ASCII_FILE_VIEWER) { 634 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 635 PetscSequentialPhaseBegin(mat->comm,1); 636 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 637 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 638 aij->cend); 639 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 640 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 641 fflush(fd); 642 PetscSequentialPhaseEnd(mat->comm,1); 643 } 644 else { 645 int size = aij->size; 646 rank = aij->rank; 647 if (size == 1) { 648 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 649 } 650 else { 651 /* assemble the entire matrix onto first processor. */ 652 Mat A; 653 Mat_SeqAIJ *Aloc; 654 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 655 Scalar *a; 656 657 if (!rank) { 658 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 659 CHKERRQ(ierr); 660 } 661 else { 662 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 663 CHKERRQ(ierr); 664 } 665 PLogObjectParent(mat,A); 666 667 /* copy over the A part */ 668 Aloc = (Mat_SeqAIJ*) aij->A->data; 669 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 670 row = aij->rstart; 671 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 672 for ( i=0; i<m; i++ ) { 673 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 674 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 675 } 676 aj = Aloc->j; 677 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 678 679 /* copy over the B part */ 680 Aloc = (Mat_SeqAIJ*) aij->B->data; 681 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 682 row = aij->rstart; 683 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 684 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 685 for ( i=0; i<m; i++ ) { 686 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 687 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 688 } 689 PetscFree(ct); 690 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 691 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 692 if (!rank) { 693 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 694 } 695 ierr = MatDestroy(A); CHKERRQ(ierr); 696 } 697 } 698 return 0; 699 } 700 701 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 702 { 703 Mat mat = (Mat) obj; 704 int ierr; 705 ViewerType vtype; 706 707 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 708 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 709 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 710 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 711 } 712 else if (vtype == BINARY_FILE_VIEWER) { 713 return MatView_MPIAIJ_Binary(mat,viewer); 714 } 715 return 0; 716 } 717 718 /* 719 This has to provide several versions. 720 721 2) a) use only local smoothing updating outer values only once. 722 b) local smoothing updating outer values each inner iteration 723 3) color updating out values betwen colors. 724 */ 725 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 726 double fshift,int its,Vec xx) 727 { 728 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 729 Mat AA = mat->A, BB = mat->B; 730 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 731 Scalar *b,*x,*xs,*ls,d,*v,sum; 732 int ierr,*idx, *diag; 733 int n = mat->n, m = mat->m, i,shift = A->indexshift; 734 735 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 736 xs = x + shift; /* shift by one for index start of 1 */ 737 ls = ls + shift; 738 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 739 diag = A->diag; 740 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 741 if (flag & SOR_ZERO_INITIAL_GUESS) { 742 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 743 } 744 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 745 CHKERRQ(ierr); 746 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 747 CHKERRQ(ierr); 748 while (its--) { 749 /* go down through the rows */ 750 for ( i=0; i<m; i++ ) { 751 n = A->i[i+1] - A->i[i]; 752 idx = A->j + A->i[i] + shift; 753 v = A->a + A->i[i] + shift; 754 sum = b[i]; 755 SPARSEDENSEMDOT(sum,xs,v,idx,n); 756 d = fshift + A->a[diag[i]+shift]; 757 n = B->i[i+1] - B->i[i]; 758 idx = B->j + B->i[i] + shift; 759 v = B->a + B->i[i] + shift; 760 SPARSEDENSEMDOT(sum,ls,v,idx,n); 761 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 762 } 763 /* come up through the rows */ 764 for ( i=m-1; i>-1; i-- ) { 765 n = A->i[i+1] - A->i[i]; 766 idx = A->j + A->i[i] + shift; 767 v = A->a + A->i[i] + shift; 768 sum = b[i]; 769 SPARSEDENSEMDOT(sum,xs,v,idx,n); 770 d = fshift + A->a[diag[i]+shift]; 771 n = B->i[i+1] - B->i[i]; 772 idx = B->j + B->i[i] + shift; 773 v = B->a + B->i[i] + shift; 774 SPARSEDENSEMDOT(sum,ls,v,idx,n); 775 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 776 } 777 } 778 } 779 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 780 if (flag & SOR_ZERO_INITIAL_GUESS) { 781 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 782 } 783 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 784 CHKERRQ(ierr); 785 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 786 CHKERRQ(ierr); 787 while (its--) { 788 for ( i=0; i<m; i++ ) { 789 n = A->i[i+1] - A->i[i]; 790 idx = A->j + A->i[i] + shift; 791 v = A->a + A->i[i] + shift; 792 sum = b[i]; 793 SPARSEDENSEMDOT(sum,xs,v,idx,n); 794 d = fshift + A->a[diag[i]+shift]; 795 n = B->i[i+1] - B->i[i]; 796 idx = B->j + B->i[i] + shift; 797 v = B->a + B->i[i] + shift; 798 SPARSEDENSEMDOT(sum,ls,v,idx,n); 799 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 800 } 801 } 802 } 803 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 804 if (flag & SOR_ZERO_INITIAL_GUESS) { 805 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 806 } 807 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 808 mat->Mvctx); CHKERRQ(ierr); 809 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 810 mat->Mvctx); CHKERRQ(ierr); 811 while (its--) { 812 for ( i=m-1; i>-1; i-- ) { 813 n = A->i[i+1] - A->i[i]; 814 idx = A->j + A->i[i] + shift; 815 v = A->a + A->i[i] + shift; 816 sum = b[i]; 817 SPARSEDENSEMDOT(sum,xs,v,idx,n); 818 d = fshift + A->a[diag[i]+shift]; 819 n = B->i[i+1] - B->i[i]; 820 idx = B->j + B->i[i] + shift; 821 v = B->a + B->i[i] + shift; 822 SPARSEDENSEMDOT(sum,ls,v,idx,n); 823 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 824 } 825 } 826 } 827 else { 828 SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 829 } 830 return 0; 831 } 832 833 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 834 { 835 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 836 Mat A = mat->A, B = mat->B; 837 int ierr; 838 double isend[5], irecv[5]; 839 840 info->rows_global = (double)mat->M; 841 info->columns_global = (double)mat->N; 842 info->rows_local = (double)mat->m; 843 info->columns_local = (double)mat->N; 844 info->block_size = 1.0; 845 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 846 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 847 isend[3] = info->memory; isend[4] = info->mallocs; 848 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 849 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 850 isend[3] += info->memory; isend[4] += info->mallocs; 851 if (flag == MAT_LOCAL) { 852 info->nz_used = isend[0]; 853 info->nz_allocated = isend[1]; 854 info->nz_unneeded = isend[2]; 855 info->memory = isend[3]; 856 info->mallocs = isend[4]; 857 } else if (flag == MAT_GLOBAL_MAX) { 858 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 859 info->nz_used = irecv[0]; 860 info->nz_allocated = irecv[1]; 861 info->nz_unneeded = irecv[2]; 862 info->memory = irecv[3]; 863 info->mallocs = irecv[4]; 864 } else if (flag == MAT_GLOBAL_SUM) { 865 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 866 info->nz_used = irecv[0]; 867 info->nz_allocated = irecv[1]; 868 info->nz_unneeded = irecv[2]; 869 info->memory = irecv[3]; 870 info->mallocs = irecv[4]; 871 } 872 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 873 info->fill_ratio_needed = 0; 874 info->factor_mallocs = 0; 875 876 return 0; 877 } 878 879 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 880 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 881 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 882 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 883 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 884 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 885 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 886 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 887 888 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 889 { 890 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 891 892 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 893 op == MAT_YES_NEW_NONZERO_LOCATIONS || 894 op == MAT_COLUMNS_SORTED) { 895 MatSetOption(a->A,op); 896 MatSetOption(a->B,op); 897 } else if (op == MAT_ROW_ORIENTED) { 898 a->roworiented = 1; 899 MatSetOption(a->A,op); 900 MatSetOption(a->B,op); 901 } else if (op == MAT_ROWS_SORTED || 902 op == MAT_SYMMETRIC || 903 op == MAT_STRUCTURALLY_SYMMETRIC || 904 op == MAT_YES_NEW_DIAGONALS) 905 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 906 else if (op == MAT_COLUMN_ORIENTED) { 907 a->roworiented = 0; 908 MatSetOption(a->A,op); 909 MatSetOption(a->B,op); 910 } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 911 a->donotstash = 1; 912 } else if (op == MAT_NO_NEW_DIAGONALS) 913 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");} 914 else 915 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 916 return 0; 917 } 918 919 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 920 { 921 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 922 *m = mat->M; *n = mat->N; 923 return 0; 924 } 925 926 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 927 { 928 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 929 *m = mat->m; *n = mat->N; 930 return 0; 931 } 932 933 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 934 { 935 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 936 *m = mat->rstart; *n = mat->rend; 937 return 0; 938 } 939 940 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 941 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 942 943 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 944 { 945 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 946 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 947 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 948 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 949 int *cmap, *idx_p; 950 951 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 952 mat->getrowactive = PETSC_TRUE; 953 954 if (!mat->rowvalues && (idx || v)) { 955 /* 956 allocate enough space to hold information from the longest row. 957 */ 958 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 959 int max = 1,m = mat->m,tmp; 960 for ( i=0; i<m; i++ ) { 961 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 962 if (max < tmp) { max = tmp; } 963 } 964 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 965 CHKPTRQ(mat->rowvalues); 966 mat->rowindices = (int *) (mat->rowvalues + max); 967 } 968 969 if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 970 lrow = row - rstart; 971 972 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 973 if (!v) {pvA = 0; pvB = 0;} 974 if (!idx) {pcA = 0; if (!v) pcB = 0;} 975 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 976 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 977 nztot = nzA + nzB; 978 979 cmap = mat->garray; 980 if (v || idx) { 981 if (nztot) { 982 /* Sort by increasing column numbers, assuming A and B already sorted */ 983 int imark = -1; 984 if (v) { 985 *v = v_p = mat->rowvalues; 986 for ( i=0; i<nzB; i++ ) { 987 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 988 else break; 989 } 990 imark = i; 991 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 992 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 993 } 994 if (idx) { 995 *idx = idx_p = mat->rowindices; 996 if (imark > -1) { 997 for ( i=0; i<imark; i++ ) { 998 idx_p[i] = cmap[cworkB[i]]; 999 } 1000 } else { 1001 for ( i=0; i<nzB; i++ ) { 1002 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1003 else break; 1004 } 1005 imark = i; 1006 } 1007 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1008 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1009 } 1010 } 1011 else { 1012 if (idx) *idx = 0; 1013 if (v) *v = 0; 1014 } 1015 } 1016 *nz = nztot; 1017 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1018 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1019 return 0; 1020 } 1021 1022 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1023 { 1024 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1025 if (aij->getrowactive == PETSC_FALSE) { 1026 SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 1027 } 1028 aij->getrowactive = PETSC_FALSE; 1029 return 0; 1030 } 1031 1032 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1033 { 1034 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1035 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1036 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1037 double sum = 0.0; 1038 Scalar *v; 1039 1040 if (aij->size == 1) { 1041 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1042 } else { 1043 if (type == NORM_FROBENIUS) { 1044 v = amat->a; 1045 for (i=0; i<amat->nz; i++ ) { 1046 #if defined(PETSC_COMPLEX) 1047 sum += real(conj(*v)*(*v)); v++; 1048 #else 1049 sum += (*v)*(*v); v++; 1050 #endif 1051 } 1052 v = bmat->a; 1053 for (i=0; i<bmat->nz; i++ ) { 1054 #if defined(PETSC_COMPLEX) 1055 sum += real(conj(*v)*(*v)); v++; 1056 #else 1057 sum += (*v)*(*v); v++; 1058 #endif 1059 } 1060 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1061 *norm = sqrt(*norm); 1062 } 1063 else if (type == NORM_1) { /* max column norm */ 1064 double *tmp, *tmp2; 1065 int *jj, *garray = aij->garray; 1066 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1067 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1068 PetscMemzero(tmp,aij->N*sizeof(double)); 1069 *norm = 0.0; 1070 v = amat->a; jj = amat->j; 1071 for ( j=0; j<amat->nz; j++ ) { 1072 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1073 } 1074 v = bmat->a; jj = bmat->j; 1075 for ( j=0; j<bmat->nz; j++ ) { 1076 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1077 } 1078 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1079 for ( j=0; j<aij->N; j++ ) { 1080 if (tmp2[j] > *norm) *norm = tmp2[j]; 1081 } 1082 PetscFree(tmp); PetscFree(tmp2); 1083 } 1084 else if (type == NORM_INFINITY) { /* max row norm */ 1085 double ntemp = 0.0; 1086 for ( j=0; j<amat->m; j++ ) { 1087 v = amat->a + amat->i[j] + shift; 1088 sum = 0.0; 1089 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1090 sum += PetscAbsScalar(*v); v++; 1091 } 1092 v = bmat->a + bmat->i[j] + shift; 1093 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1094 sum += PetscAbsScalar(*v); v++; 1095 } 1096 if (sum > ntemp) ntemp = sum; 1097 } 1098 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1099 } 1100 else { 1101 SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 1102 } 1103 } 1104 return 0; 1105 } 1106 1107 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1108 { 1109 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1110 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1111 int ierr,shift = Aloc->indexshift; 1112 Mat B; 1113 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1114 Scalar *array; 1115 1116 if (matout == PETSC_NULL && M != N) 1117 SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1118 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1119 PETSC_NULL,&B); CHKERRQ(ierr); 1120 1121 /* copy over the A part */ 1122 Aloc = (Mat_SeqAIJ*) a->A->data; 1123 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1124 row = a->rstart; 1125 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1126 for ( i=0; i<m; i++ ) { 1127 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1128 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1129 } 1130 aj = Aloc->j; 1131 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1132 1133 /* copy over the B part */ 1134 Aloc = (Mat_SeqAIJ*) a->B->data; 1135 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1136 row = a->rstart; 1137 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1138 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1139 for ( i=0; i<m; i++ ) { 1140 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1141 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1142 } 1143 PetscFree(ct); 1144 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1145 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1146 if (matout != PETSC_NULL) { 1147 *matout = B; 1148 } else { 1149 /* This isn't really an in-place transpose .... but free data structures from a */ 1150 PetscFree(a->rowners); 1151 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1152 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1153 if (a->colmap) PetscFree(a->colmap); 1154 if (a->garray) PetscFree(a->garray); 1155 if (a->lvec) VecDestroy(a->lvec); 1156 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1157 PetscFree(a); 1158 PetscMemcpy(A,B,sizeof(struct _Mat)); 1159 PetscHeaderDestroy(B); 1160 } 1161 return 0; 1162 } 1163 1164 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1165 { 1166 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1167 Mat a = aij->A, b = aij->B; 1168 int ierr,s1,s2,s3; 1169 1170 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1171 if (rr) { 1172 s3 = aij->n; 1173 VecGetLocalSize_Fast(rr,s1); 1174 if (s1!=s3) SETERRQ(1,"MatDiagonalScale: right vector non-conforming local size"); 1175 /* Overlap communication with computation. */ 1176 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_ALL,aij->Mvctx); CHKERRQ(ierr); 1177 } 1178 if (ll) { 1179 VecGetLocalSize_Fast(ll,s1); 1180 if (s1!=s2) SETERRQ(1,"MatDiagonalScale: left vector non-conforming local size"); 1181 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1182 } 1183 /* scale the diagonal block */ 1184 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1185 1186 if (rr) { 1187 /* Do a scatter end and then right scale the off-diagonal block */ 1188 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_ALL,aij->Mvctx); CHKERRQ(ierr); 1189 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1190 } 1191 1192 return 0; 1193 } 1194 1195 1196 extern int MatPrintHelp_SeqAIJ(Mat); 1197 static int MatPrintHelp_MPIAIJ(Mat A) 1198 { 1199 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1200 1201 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1202 else return 0; 1203 } 1204 1205 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1206 { 1207 *bs = 1; 1208 return 0; 1209 } 1210 1211 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1212 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1213 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1214 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1215 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1216 /* -------------------------------------------------------------------*/ 1217 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1218 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1219 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1220 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1221 0,0, 1222 0,0, 1223 0,0, 1224 MatRelax_MPIAIJ, 1225 MatTranspose_MPIAIJ, 1226 MatGetInfo_MPIAIJ,0, 1227 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1228 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1229 0, 1230 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1231 0,0,0,0, 1232 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1233 0,0, 1234 0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0, 1235 0,0,0, 1236 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1237 MatPrintHelp_MPIAIJ, 1238 MatScale_MPIAIJ,0,0,0, 1239 MatGetBlockSize_MPIAIJ,0,0,0,0, 1240 MatFDColoringCreate_MPIAIJ}; 1241 1242 1243 /*@C 1244 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1245 (the default parallel PETSc format). For good matrix assembly performance 1246 the user should preallocate the matrix storage by setting the parameters 1247 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1248 performance can be increased by more than a factor of 50. 1249 1250 Input Parameters: 1251 . comm - MPI communicator 1252 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1253 This value should be the same as the local size used in creating the 1254 y vector for the matrix-vector product y = Ax. 1255 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1256 This value should be the same as the local size used in creating the 1257 x vector for the matrix-vector product y = Ax. 1258 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1259 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1260 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1261 (same for all local rows) 1262 . d_nzz - array containing the number of nonzeros in the various rows of the 1263 diagonal portion of the local submatrix (possibly different for each row) 1264 or PETSC_NULL. You must leave room for the diagonal entry even if 1265 it is zero. 1266 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1267 submatrix (same for all local rows). 1268 . o_nzz - array containing the number of nonzeros in the various rows of the 1269 off-diagonal portion of the local submatrix (possibly different for 1270 each row) or PETSC_NULL. 1271 1272 Output Parameter: 1273 . A - the matrix 1274 1275 Notes: 1276 The AIJ format (also called the Yale sparse matrix format or 1277 compressed row storage), is fully compatible with standard Fortran 77 1278 storage. That is, the stored row and column indices can begin at 1279 either one (as in Fortran) or zero. See the users manual for details. 1280 1281 The user MUST specify either the local or global matrix dimensions 1282 (possibly both). 1283 1284 By default, this format uses inodes (identical nodes) when possible. 1285 We search for consecutive rows with the same nonzero structure, thereby 1286 reusing matrix information to achieve increased efficiency. 1287 1288 Options Database Keys: 1289 $ -mat_aij_no_inode - Do not use inodes 1290 $ -mat_aij_inode_limit <limit> - Set inode limit. 1291 $ (max limit=5) 1292 $ -mat_aij_oneindex - Internally use indexing starting at 1 1293 $ rather than 0. Note: When calling MatSetValues(), 1294 $ the user still MUST index entries starting at 0! 1295 1296 Storage Information: 1297 For a square global matrix we define each processor's diagonal portion 1298 to be its local rows and the corresponding columns (a square submatrix); 1299 each processor's off-diagonal portion encompasses the remainder of the 1300 local matrix (a rectangular submatrix). 1301 1302 The user can specify preallocated storage for the diagonal part of 1303 the local submatrix with either d_nz or d_nnz (not both). Set 1304 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1305 memory allocation. Likewise, specify preallocated storage for the 1306 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1307 1308 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1309 the figure below we depict these three local rows and all columns (0-11). 1310 1311 $ 0 1 2 3 4 5 6 7 8 9 10 11 1312 $ ------------------- 1313 $ row 3 | o o o d d d o o o o o o 1314 $ row 4 | o o o d d d o o o o o o 1315 $ row 5 | o o o d d d o o o o o o 1316 $ ------------------- 1317 $ 1318 1319 Thus, any entries in the d locations are stored in the d (diagonal) 1320 submatrix, and any entries in the o locations are stored in the 1321 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1322 stored simply in the MATSEQAIJ format for compressed row storage. 1323 1324 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1325 and o_nz should indicate the number of nonzeros per row in the o matrix. 1326 In general, for PDE problems in which most nonzeros are near the diagonal, 1327 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1328 or you will get TERRIBLE performance; see the users' manual chapter on 1329 matrices and the file $(PETSC_DIR)/Performance. 1330 1331 .keywords: matrix, aij, compressed row, sparse, parallel 1332 1333 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1334 @*/ 1335 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1336 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1337 { 1338 Mat B; 1339 Mat_MPIAIJ *b; 1340 int ierr, i,sum[2],work[2],size; 1341 1342 *A = 0; 1343 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1344 PLogObjectCreate(B); 1345 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1346 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1347 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1348 MPI_Comm_size(comm,&size); 1349 if (size == 1) { 1350 B->ops.getrowij = MatGetRowIJ_MPIAIJ; 1351 B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 1352 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 1353 B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 1354 B->ops.lufactor = MatLUFactor_MPIAIJ; 1355 B->ops.solve = MatSolve_MPIAIJ; 1356 B->ops.solveadd = MatSolveAdd_MPIAIJ; 1357 B->ops.solvetrans = MatSolveTrans_MPIAIJ; 1358 B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 1359 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 1360 } 1361 B->destroy = MatDestroy_MPIAIJ; 1362 B->view = MatView_MPIAIJ; 1363 B->factor = 0; 1364 B->assembled = PETSC_FALSE; 1365 B->mapping = 0; 1366 1367 b->insertmode = NOT_SET_VALUES; 1368 MPI_Comm_rank(comm,&b->rank); 1369 MPI_Comm_size(comm,&b->size); 1370 1371 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1372 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1373 1374 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1375 work[0] = m; work[1] = n; 1376 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1377 if (M == PETSC_DECIDE) M = sum[0]; 1378 if (N == PETSC_DECIDE) N = sum[1]; 1379 } 1380 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1381 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1382 b->m = m; B->m = m; 1383 b->n = n; B->n = n; 1384 b->N = N; B->N = N; 1385 b->M = M; B->M = M; 1386 1387 /* build local table of row and column ownerships */ 1388 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1389 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1390 b->cowners = b->rowners + b->size + 2; 1391 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1392 b->rowners[0] = 0; 1393 for ( i=2; i<=b->size; i++ ) { 1394 b->rowners[i] += b->rowners[i-1]; 1395 } 1396 b->rstart = b->rowners[b->rank]; 1397 b->rend = b->rowners[b->rank+1]; 1398 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1399 b->cowners[0] = 0; 1400 for ( i=2; i<=b->size; i++ ) { 1401 b->cowners[i] += b->cowners[i-1]; 1402 } 1403 b->cstart = b->cowners[b->rank]; 1404 b->cend = b->cowners[b->rank+1]; 1405 1406 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1407 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1408 PLogObjectParent(B,b->A); 1409 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1410 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1411 PLogObjectParent(B,b->B); 1412 1413 /* build cache for off array entries formed */ 1414 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1415 b->donotstash = 0; 1416 b->colmap = 0; 1417 b->garray = 0; 1418 b->roworiented = 1; 1419 1420 /* stuff used for matrix vector multiply */ 1421 b->lvec = 0; 1422 b->Mvctx = 0; 1423 1424 /* stuff for MatGetRow() */ 1425 b->rowindices = 0; 1426 b->rowvalues = 0; 1427 b->getrowactive = PETSC_FALSE; 1428 1429 *A = B; 1430 return 0; 1431 } 1432 1433 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1434 { 1435 Mat mat; 1436 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1437 int ierr, len=0, flg; 1438 1439 *newmat = 0; 1440 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1441 PLogObjectCreate(mat); 1442 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1443 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1444 mat->destroy = MatDestroy_MPIAIJ; 1445 mat->view = MatView_MPIAIJ; 1446 mat->factor = matin->factor; 1447 mat->assembled = PETSC_TRUE; 1448 1449 a->m = mat->m = oldmat->m; 1450 a->n = mat->n = oldmat->n; 1451 a->M = mat->M = oldmat->M; 1452 a->N = mat->N = oldmat->N; 1453 1454 a->rstart = oldmat->rstart; 1455 a->rend = oldmat->rend; 1456 a->cstart = oldmat->cstart; 1457 a->cend = oldmat->cend; 1458 a->size = oldmat->size; 1459 a->rank = oldmat->rank; 1460 a->insertmode = NOT_SET_VALUES; 1461 a->rowvalues = 0; 1462 a->getrowactive = PETSC_FALSE; 1463 1464 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1465 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1466 a->cowners = a->rowners + a->size + 2; 1467 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1468 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1469 if (oldmat->colmap) { 1470 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1471 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1472 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1473 } else a->colmap = 0; 1474 if (oldmat->garray) { 1475 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1476 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1477 PLogObjectMemory(mat,len*sizeof(int)); 1478 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1479 } else a->garray = 0; 1480 1481 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1482 PLogObjectParent(mat,a->lvec); 1483 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1484 PLogObjectParent(mat,a->Mvctx); 1485 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1486 PLogObjectParent(mat,a->A); 1487 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1488 PLogObjectParent(mat,a->B); 1489 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1490 if (flg) { 1491 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1492 } 1493 *newmat = mat; 1494 return 0; 1495 } 1496 1497 #include "sys.h" 1498 1499 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1500 { 1501 Mat A; 1502 int i, nz, ierr, j,rstart, rend, fd; 1503 Scalar *vals,*svals; 1504 MPI_Comm comm = ((PetscObject)viewer)->comm; 1505 MPI_Status status; 1506 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1507 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1508 int tag = ((PetscObject)viewer)->tag; 1509 1510 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1511 if (!rank) { 1512 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1513 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1514 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1515 } 1516 1517 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1518 M = header[1]; N = header[2]; 1519 /* determine ownership of all rows */ 1520 m = M/size + ((M % size) > rank); 1521 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1522 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1523 rowners[0] = 0; 1524 for ( i=2; i<=size; i++ ) { 1525 rowners[i] += rowners[i-1]; 1526 } 1527 rstart = rowners[rank]; 1528 rend = rowners[rank+1]; 1529 1530 /* distribute row lengths to all processors */ 1531 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1532 offlens = ourlens + (rend-rstart); 1533 if (!rank) { 1534 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1535 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1536 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1537 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1538 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1539 PetscFree(sndcounts); 1540 } 1541 else { 1542 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1543 } 1544 1545 if (!rank) { 1546 /* calculate the number of nonzeros on each processor */ 1547 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1548 PetscMemzero(procsnz,size*sizeof(int)); 1549 for ( i=0; i<size; i++ ) { 1550 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1551 procsnz[i] += rowlengths[j]; 1552 } 1553 } 1554 PetscFree(rowlengths); 1555 1556 /* determine max buffer needed and allocate it */ 1557 maxnz = 0; 1558 for ( i=0; i<size; i++ ) { 1559 maxnz = PetscMax(maxnz,procsnz[i]); 1560 } 1561 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1562 1563 /* read in my part of the matrix column indices */ 1564 nz = procsnz[0]; 1565 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1566 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1567 1568 /* read in every one elses and ship off */ 1569 for ( i=1; i<size; i++ ) { 1570 nz = procsnz[i]; 1571 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1572 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1573 } 1574 PetscFree(cols); 1575 } 1576 else { 1577 /* determine buffer space needed for message */ 1578 nz = 0; 1579 for ( i=0; i<m; i++ ) { 1580 nz += ourlens[i]; 1581 } 1582 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1583 1584 /* receive message of column indices*/ 1585 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1586 MPI_Get_count(&status,MPI_INT,&maxnz); 1587 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1588 } 1589 1590 /* loop over local rows, determining number of off diagonal entries */ 1591 PetscMemzero(offlens,m*sizeof(int)); 1592 jj = 0; 1593 for ( i=0; i<m; i++ ) { 1594 for ( j=0; j<ourlens[i]; j++ ) { 1595 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1596 jj++; 1597 } 1598 } 1599 1600 /* create our matrix */ 1601 for ( i=0; i<m; i++ ) { 1602 ourlens[i] -= offlens[i]; 1603 } 1604 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1605 A = *newmat; 1606 MatSetOption(A,MAT_COLUMNS_SORTED); 1607 for ( i=0; i<m; i++ ) { 1608 ourlens[i] += offlens[i]; 1609 } 1610 1611 if (!rank) { 1612 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1613 1614 /* read in my part of the matrix numerical values */ 1615 nz = procsnz[0]; 1616 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1617 1618 /* insert into matrix */ 1619 jj = rstart; 1620 smycols = mycols; 1621 svals = vals; 1622 for ( i=0; i<m; i++ ) { 1623 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1624 smycols += ourlens[i]; 1625 svals += ourlens[i]; 1626 jj++; 1627 } 1628 1629 /* read in other processors and ship out */ 1630 for ( i=1; i<size; i++ ) { 1631 nz = procsnz[i]; 1632 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1633 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1634 } 1635 PetscFree(procsnz); 1636 } 1637 else { 1638 /* receive numeric values */ 1639 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1640 1641 /* receive message of values*/ 1642 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1643 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1644 if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1645 1646 /* insert into matrix */ 1647 jj = rstart; 1648 smycols = mycols; 1649 svals = vals; 1650 for ( i=0; i<m; i++ ) { 1651 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1652 smycols += ourlens[i]; 1653 svals += ourlens[i]; 1654 jj++; 1655 } 1656 } 1657 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1658 1659 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1660 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1661 return 0; 1662 } 1663