1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.183 1997/01/01 03:37:44 bsmith Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 #undef __FUNCTION__ 15 #define __FUNCTION__ "CreateColmap_MPIAIJ_Private" 16 int CreateColmap_MPIAIJ_Private(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 19 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 20 int n = B->n,i; 21 22 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 23 PLogObjectMemory(mat,aij->N*sizeof(int)); 24 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 25 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 26 return 0; 27 } 28 29 extern int DisAssemble_MPIAIJ(Mat); 30 31 #undef __FUNCTION__ 32 #define __FUNCTION__ "MatGetRowIJ_MPIAIJ" 33 static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 34 PetscTruth *done) 35 { 36 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 37 int ierr; 38 if (aij->size == 1) { 39 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 40 } else SETERRQ(1,0,"not supported in parallel"); 41 return 0; 42 } 43 44 #undef __FUNCTION__ 45 #define __FUNCTION__ "MatRestoreRowIJ_MPIAIJ" 46 static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 47 PetscTruth *done) 48 { 49 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 50 int ierr; 51 if (aij->size == 1) { 52 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 53 } else SETERRQ(1,0,"not supported in parallel"); 54 return 0; 55 } 56 57 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 58 59 #undef __FUNCTION__ 60 #define __FUNCTION__ "MatSetValues_MPIAIJ" 61 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 62 { 63 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 64 Scalar value; 65 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 66 int cstart = aij->cstart, cend = aij->cend,row,col; 67 int roworiented = aij->roworiented; 68 69 #if defined(PETSC_BOPT_g) 70 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 71 SETERRQ(1,0,"Cannot mix inserts and adds"); 72 } 73 #endif 74 aij->insertmode = addv; 75 for ( i=0; i<m; i++ ) { 76 #if defined(PETSC_BOPT_g) 77 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 78 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 79 #endif 80 if (im[i] >= rstart && im[i] < rend) { 81 row = im[i] - rstart; 82 for ( j=0; j<n; j++ ) { 83 if (in[j] >= cstart && in[j] < cend){ 84 col = in[j] - cstart; 85 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 86 ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 87 } 88 #if defined(PETSC_BOPT_g) 89 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 90 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 91 #endif 92 else { 93 if (mat->was_assembled) { 94 if (!aij->colmap) { 95 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 96 } 97 col = aij->colmap[in[j]] - 1; 98 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 99 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 100 col = in[j]; 101 } 102 } 103 else col = in[j]; 104 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 105 ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 106 } 107 } 108 } 109 else { 110 if (roworiented && !aij->donotstash) { 111 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 112 } 113 else { 114 if (!aij->donotstash) { 115 row = im[i]; 116 for ( j=0; j<n; j++ ) { 117 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 118 } 119 } 120 } 121 } 122 } 123 return 0; 124 } 125 126 #undef __FUNCTION__ 127 #define __FUNCTION__ "MatGetValues_MPIAIJ" 128 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 132 int cstart = aij->cstart, cend = aij->cend,row,col; 133 134 for ( i=0; i<m; i++ ) { 135 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 136 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 137 if (idxm[i] >= rstart && idxm[i] < rend) { 138 row = idxm[i] - rstart; 139 for ( j=0; j<n; j++ ) { 140 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 141 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 142 if (idxn[j] >= cstart && idxn[j] < cend){ 143 col = idxn[j] - cstart; 144 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 145 } 146 else { 147 if (!aij->colmap) { 148 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 149 } 150 col = aij->colmap[idxn[j]] - 1; 151 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 152 else { 153 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 154 } 155 } 156 } 157 } 158 else { 159 SETERRQ(1,0,"Only local values currently supported"); 160 } 161 } 162 return 0; 163 } 164 165 #undef __FUNCTION__ 166 #define __FUNCTION__ "MatAssemblyBegin_MPIAIJ" 167 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 168 { 169 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 170 MPI_Comm comm = mat->comm; 171 int size = aij->size, *owners = aij->rowners; 172 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 173 MPI_Request *send_waits,*recv_waits; 174 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 175 InsertMode addv; 176 Scalar *rvalues,*svalues; 177 178 /* make sure all processors are either in INSERTMODE or ADDMODE */ 179 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 180 if (addv == (ADD_VALUES|INSERT_VALUES)) { 181 SETERRQ(1,0,"Some processors inserted others added"); 182 } 183 aij->insertmode = addv; /* in case this processor had no cache */ 184 185 /* first count number of contributors to each processor */ 186 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 187 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 188 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 189 for ( i=0; i<aij->stash.n; i++ ) { 190 idx = aij->stash.idx[i]; 191 for ( j=0; j<size; j++ ) { 192 if (idx >= owners[j] && idx < owners[j+1]) { 193 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 194 } 195 } 196 } 197 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 198 199 /* inform other processors of number of messages and max length*/ 200 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 201 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 202 nreceives = work[rank]; 203 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 204 nmax = work[rank]; 205 PetscFree(work); 206 207 /* post receives: 208 1) each message will consist of ordered pairs 209 (global index,value) we store the global index as a double 210 to simplify the message passing. 211 2) since we don't know how long each individual message is we 212 allocate the largest needed buffer for each receive. Potentially 213 this is a lot of wasted space. 214 215 216 This could be done better. 217 */ 218 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 219 CHKPTRQ(rvalues); 220 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 221 CHKPTRQ(recv_waits); 222 for ( i=0; i<nreceives; i++ ) { 223 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 224 comm,recv_waits+i); 225 } 226 227 /* do sends: 228 1) starts[i] gives the starting index in svalues for stuff going to 229 the ith processor 230 */ 231 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 232 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 233 CHKPTRQ(send_waits); 234 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 235 starts[0] = 0; 236 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 237 for ( i=0; i<aij->stash.n; i++ ) { 238 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 239 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 240 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 241 } 242 PetscFree(owner); 243 starts[0] = 0; 244 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 245 count = 0; 246 for ( i=0; i<size; i++ ) { 247 if (procs[i]) { 248 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 249 comm,send_waits+count++); 250 } 251 } 252 PetscFree(starts); PetscFree(nprocs); 253 254 /* Free cache space */ 255 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 256 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 257 258 aij->svalues = svalues; aij->rvalues = rvalues; 259 aij->nsends = nsends; aij->nrecvs = nreceives; 260 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 261 aij->rmax = nmax; 262 263 return 0; 264 } 265 extern int MatSetUpMultiply_MPIAIJ(Mat); 266 267 #undef __FUNCTION__ 268 #define __FUNCTION__ "MatAssemblyEnd_MPIAIJ" 269 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 270 { 271 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 272 MPI_Status *send_status,recv_status; 273 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 274 int row,col,other_disassembled; 275 Scalar *values,val; 276 InsertMode addv = aij->insertmode; 277 278 /* wait on receives */ 279 while (count) { 280 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 281 /* unpack receives into our local space */ 282 values = aij->rvalues + 3*imdex*aij->rmax; 283 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 284 n = n/3; 285 for ( i=0; i<n; i++ ) { 286 row = (int) PetscReal(values[3*i]) - aij->rstart; 287 col = (int) PetscReal(values[3*i+1]); 288 val = values[3*i+2]; 289 if (col >= aij->cstart && col < aij->cend) { 290 col -= aij->cstart; 291 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 292 } 293 else { 294 if (mat->was_assembled || mat->assembled) { 295 if (!aij->colmap) { 296 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 297 } 298 col = aij->colmap[col] - 1; 299 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 300 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 301 col = (int) PetscReal(values[3*i+1]); 302 } 303 } 304 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 305 } 306 } 307 count--; 308 } 309 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 310 311 /* wait on sends */ 312 if (aij->nsends) { 313 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 314 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 315 PetscFree(send_status); 316 } 317 PetscFree(aij->send_waits); PetscFree(aij->svalues); 318 319 aij->insertmode = NOT_SET_VALUES; 320 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 322 323 /* determine if any processor has disassembled, if so we must 324 also disassemble ourselfs, in order that we may reassemble. */ 325 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 326 if (mat->was_assembled && !other_disassembled) { 327 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 328 } 329 330 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 331 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 332 } 333 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 334 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 335 336 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 337 return 0; 338 } 339 340 #undef __FUNCTION__ 341 #define __FUNCTION__ "MatZeroEntries_MPIAIJ" 342 static int MatZeroEntries_MPIAIJ(Mat A) 343 { 344 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 345 int ierr; 346 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 347 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 348 return 0; 349 } 350 351 /* the code does not do the diagonal entries correctly unless the 352 matrix is square and the column and row owerships are identical. 353 This is a BUG. The only way to fix it seems to be to access 354 aij->A and aij->B directly and not through the MatZeroRows() 355 routine. 356 */ 357 #undef __FUNCTION__ 358 #define __FUNCTION__ "MatZeroRows_MPIAIJ" 359 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 360 { 361 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 362 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 363 int *procs,*nprocs,j,found,idx,nsends,*work; 364 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 365 int *rvalues,tag = A->tag,count,base,slen,n,*source; 366 int *lens,imdex,*lrows,*values; 367 MPI_Comm comm = A->comm; 368 MPI_Request *send_waits,*recv_waits; 369 MPI_Status recv_status,*send_status; 370 IS istmp; 371 372 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 373 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 374 375 /* first count number of contributors to each processor */ 376 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 377 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 378 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 379 for ( i=0; i<N; i++ ) { 380 idx = rows[i]; 381 found = 0; 382 for ( j=0; j<size; j++ ) { 383 if (idx >= owners[j] && idx < owners[j+1]) { 384 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 385 } 386 } 387 if (!found) SETERRQ(1,0,"Index out of range"); 388 } 389 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 390 391 /* inform other processors of number of messages and max length*/ 392 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 393 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 394 nrecvs = work[rank]; 395 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 396 nmax = work[rank]; 397 PetscFree(work); 398 399 /* post receives: */ 400 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 401 CHKPTRQ(rvalues); 402 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 403 CHKPTRQ(recv_waits); 404 for ( i=0; i<nrecvs; i++ ) { 405 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 406 } 407 408 /* do sends: 409 1) starts[i] gives the starting index in svalues for stuff going to 410 the ith processor 411 */ 412 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 413 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 414 CHKPTRQ(send_waits); 415 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 416 starts[0] = 0; 417 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 418 for ( i=0; i<N; i++ ) { 419 svalues[starts[owner[i]]++] = rows[i]; 420 } 421 ISRestoreIndices(is,&rows); 422 423 starts[0] = 0; 424 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 425 count = 0; 426 for ( i=0; i<size; i++ ) { 427 if (procs[i]) { 428 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 429 } 430 } 431 PetscFree(starts); 432 433 base = owners[rank]; 434 435 /* wait on receives */ 436 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 437 source = lens + nrecvs; 438 count = nrecvs; slen = 0; 439 while (count) { 440 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 441 /* unpack receives into our local space */ 442 MPI_Get_count(&recv_status,MPI_INT,&n); 443 source[imdex] = recv_status.MPI_SOURCE; 444 lens[imdex] = n; 445 slen += n; 446 count--; 447 } 448 PetscFree(recv_waits); 449 450 /* move the data into the send scatter */ 451 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 452 count = 0; 453 for ( i=0; i<nrecvs; i++ ) { 454 values = rvalues + i*nmax; 455 for ( j=0; j<lens[i]; j++ ) { 456 lrows[count++] = values[j] - base; 457 } 458 } 459 PetscFree(rvalues); PetscFree(lens); 460 PetscFree(owner); PetscFree(nprocs); 461 462 /* actually zap the local rows */ 463 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 464 PLogObjectParent(A,istmp); 465 PetscFree(lrows); 466 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 467 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 468 ierr = ISDestroy(istmp); CHKERRQ(ierr); 469 470 /* wait on sends */ 471 if (nsends) { 472 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 473 CHKPTRQ(send_status); 474 MPI_Waitall(nsends,send_waits,send_status); 475 PetscFree(send_status); 476 } 477 PetscFree(send_waits); PetscFree(svalues); 478 479 return 0; 480 } 481 482 #undef __FUNCTION__ 483 #define __FUNCTION__ "MatMult_MPIAIJ" 484 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 485 { 486 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 487 int ierr,nt; 488 489 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 490 if (nt != a->n) { 491 SETERRQ(1,0,"Incompatible parition of A and xx"); 492 } 493 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 494 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 495 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 496 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 497 return 0; 498 } 499 500 #undef __FUNCTION__ 501 #define __FUNCTION__ "MatMultAdd_MPIAIJ" 502 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 503 { 504 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 505 int ierr; 506 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 507 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 508 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 509 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 510 return 0; 511 } 512 513 #undef __FUNCTION__ 514 #define __FUNCTION__ "MatMultTrans_MPIAIJ" 515 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 516 { 517 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 518 int ierr; 519 520 /* do nondiagonal part */ 521 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 522 /* send it on its way */ 523 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 524 /* do local part */ 525 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 526 /* receive remote parts: note this assumes the values are not actually */ 527 /* inserted in yy until the next line, which is true for my implementation*/ 528 /* but is not perhaps always true. */ 529 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 530 return 0; 531 } 532 533 #undef __FUNCTION__ 534 #define __FUNCTION__ "MatMultTransAdd_MPIAIJ" 535 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 536 { 537 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 538 int ierr; 539 540 /* do nondiagonal part */ 541 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 542 /* send it on its way */ 543 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 544 /* do local part */ 545 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 546 /* receive remote parts: note this assumes the values are not actually */ 547 /* inserted in yy until the next line, which is true for my implementation*/ 548 /* but is not perhaps always true. */ 549 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 550 return 0; 551 } 552 553 /* 554 This only works correctly for square matrices where the subblock A->A is the 555 diagonal block 556 */ 557 #undef __FUNCTION__ 558 #define __FUNCTION__ "MatGetDiagonal_MPIAIJ" 559 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 560 { 561 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 562 if (a->M != a->N) 563 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 564 if (a->rstart != a->cstart || a->rend != a->cend) { 565 SETERRQ(1,0,"row partition must equal col partition"); } 566 return MatGetDiagonal(a->A,v); 567 } 568 569 #undef __FUNCTION__ 570 #define __FUNCTION__ "MatScale_MPIAIJ" 571 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 572 { 573 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 574 int ierr; 575 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 576 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 577 return 0; 578 } 579 580 #undef __FUNCTION__ 581 #define __FUNCTION__ "MatDestroy_MPIAIJ" 582 static int MatDestroy_MPIAIJ(PetscObject obj) 583 { 584 Mat mat = (Mat) obj; 585 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 586 int ierr; 587 #if defined(PETSC_LOG) 588 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 589 #endif 590 PetscFree(aij->rowners); 591 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 592 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 593 if (aij->colmap) PetscFree(aij->colmap); 594 if (aij->garray) PetscFree(aij->garray); 595 if (aij->lvec) VecDestroy(aij->lvec); 596 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 597 if (aij->rowvalues) PetscFree(aij->rowvalues); 598 PetscFree(aij); 599 if (mat->mapping) { 600 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 601 } 602 PLogObjectDestroy(mat); 603 PetscHeaderDestroy(mat); 604 return 0; 605 } 606 #include "draw.h" 607 #include "pinclude/pviewer.h" 608 609 #undef __FUNCTION__ 610 #define __FUNCTION__ "MatView_MPIAIJ_Binary" 611 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 612 { 613 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 614 int ierr; 615 616 if (aij->size == 1) { 617 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 618 } 619 else SETERRQ(1,0,"Only uniprocessor output supported"); 620 return 0; 621 } 622 623 #undef __FUNCTION__ 624 #define __FUNCTION__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 625 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 626 { 627 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 628 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 629 int ierr, format,shift = C->indexshift,rank; 630 FILE *fd; 631 ViewerType vtype; 632 633 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 634 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 635 ierr = ViewerGetFormat(viewer,&format); 636 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 637 MatInfo info; 638 int flg; 639 MPI_Comm_rank(mat->comm,&rank); 640 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 641 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 642 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 643 PetscSequentialPhaseBegin(mat->comm,1); 644 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 645 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 646 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 647 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 648 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 649 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 650 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 651 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 652 fflush(fd); 653 PetscSequentialPhaseEnd(mat->comm,1); 654 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 655 return 0; 656 } 657 else if (format == VIEWER_FORMAT_ASCII_INFO) { 658 return 0; 659 } 660 } 661 662 if (vtype == DRAW_VIEWER) { 663 Draw draw; 664 PetscTruth isnull; 665 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 666 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 667 } 668 669 if (vtype == ASCII_FILE_VIEWER) { 670 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 671 PetscSequentialPhaseBegin(mat->comm,1); 672 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 673 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 674 aij->cend); 675 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 676 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 677 fflush(fd); 678 PetscSequentialPhaseEnd(mat->comm,1); 679 } 680 else { 681 int size = aij->size; 682 rank = aij->rank; 683 if (size == 1) { 684 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 685 } 686 else { 687 /* assemble the entire matrix onto first processor. */ 688 Mat A; 689 Mat_SeqAIJ *Aloc; 690 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 691 Scalar *a; 692 693 if (!rank) { 694 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 695 CHKERRQ(ierr); 696 } 697 else { 698 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 699 CHKERRQ(ierr); 700 } 701 PLogObjectParent(mat,A); 702 703 /* copy over the A part */ 704 Aloc = (Mat_SeqAIJ*) aij->A->data; 705 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 706 row = aij->rstart; 707 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 708 for ( i=0; i<m; i++ ) { 709 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 710 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 711 } 712 aj = Aloc->j; 713 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 714 715 /* copy over the B part */ 716 Aloc = (Mat_SeqAIJ*) aij->B->data; 717 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 718 row = aij->rstart; 719 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 720 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 721 for ( i=0; i<m; i++ ) { 722 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 723 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 724 } 725 PetscFree(ct); 726 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 727 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 728 if (!rank) { 729 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 730 } 731 ierr = MatDestroy(A); CHKERRQ(ierr); 732 } 733 } 734 return 0; 735 } 736 737 #undef __FUNCTION__ 738 #define __FUNCTION__ "MatView_MPIAIJ" 739 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 740 { 741 Mat mat = (Mat) obj; 742 int ierr; 743 ViewerType vtype; 744 745 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 746 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 747 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 748 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 749 } 750 else if (vtype == BINARY_FILE_VIEWER) { 751 return MatView_MPIAIJ_Binary(mat,viewer); 752 } 753 return 0; 754 } 755 756 /* 757 This has to provide several versions. 758 759 2) a) use only local smoothing updating outer values only once. 760 b) local smoothing updating outer values each inner iteration 761 3) color updating out values betwen colors. 762 */ 763 #undef __FUNCTION__ 764 #define __FUNCTION__ "MatRelax_MPIAIJ" 765 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 766 double fshift,int its,Vec xx) 767 { 768 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 769 Mat AA = mat->A, BB = mat->B; 770 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 771 Scalar *b,*x,*xs,*ls,d,*v,sum; 772 int ierr,*idx, *diag; 773 int n = mat->n, m = mat->m, i,shift = A->indexshift; 774 775 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 776 xs = x + shift; /* shift by one for index start of 1 */ 777 ls = ls + shift; 778 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 779 diag = A->diag; 780 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 781 if (flag & SOR_ZERO_INITIAL_GUESS) { 782 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 783 } 784 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 785 CHKERRQ(ierr); 786 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 787 CHKERRQ(ierr); 788 while (its--) { 789 /* go down through the rows */ 790 for ( i=0; i<m; i++ ) { 791 n = A->i[i+1] - A->i[i]; 792 idx = A->j + A->i[i] + shift; 793 v = A->a + A->i[i] + shift; 794 sum = b[i]; 795 SPARSEDENSEMDOT(sum,xs,v,idx,n); 796 d = fshift + A->a[diag[i]+shift]; 797 n = B->i[i+1] - B->i[i]; 798 idx = B->j + B->i[i] + shift; 799 v = B->a + B->i[i] + shift; 800 SPARSEDENSEMDOT(sum,ls,v,idx,n); 801 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 802 } 803 /* come up through the rows */ 804 for ( i=m-1; i>-1; i-- ) { 805 n = A->i[i+1] - A->i[i]; 806 idx = A->j + A->i[i] + shift; 807 v = A->a + A->i[i] + shift; 808 sum = b[i]; 809 SPARSEDENSEMDOT(sum,xs,v,idx,n); 810 d = fshift + A->a[diag[i]+shift]; 811 n = B->i[i+1] - B->i[i]; 812 idx = B->j + B->i[i] + shift; 813 v = B->a + B->i[i] + shift; 814 SPARSEDENSEMDOT(sum,ls,v,idx,n); 815 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 816 } 817 } 818 } 819 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 820 if (flag & SOR_ZERO_INITIAL_GUESS) { 821 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 822 } 823 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 824 CHKERRQ(ierr); 825 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 826 CHKERRQ(ierr); 827 while (its--) { 828 for ( i=0; i<m; i++ ) { 829 n = A->i[i+1] - A->i[i]; 830 idx = A->j + A->i[i] + shift; 831 v = A->a + A->i[i] + shift; 832 sum = b[i]; 833 SPARSEDENSEMDOT(sum,xs,v,idx,n); 834 d = fshift + A->a[diag[i]+shift]; 835 n = B->i[i+1] - B->i[i]; 836 idx = B->j + B->i[i] + shift; 837 v = B->a + B->i[i] + shift; 838 SPARSEDENSEMDOT(sum,ls,v,idx,n); 839 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 840 } 841 } 842 } 843 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 844 if (flag & SOR_ZERO_INITIAL_GUESS) { 845 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 846 } 847 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 848 mat->Mvctx); CHKERRQ(ierr); 849 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 850 mat->Mvctx); CHKERRQ(ierr); 851 while (its--) { 852 for ( i=m-1; i>-1; i-- ) { 853 n = A->i[i+1] - A->i[i]; 854 idx = A->j + A->i[i] + shift; 855 v = A->a + A->i[i] + shift; 856 sum = b[i]; 857 SPARSEDENSEMDOT(sum,xs,v,idx,n); 858 d = fshift + A->a[diag[i]+shift]; 859 n = B->i[i+1] - B->i[i]; 860 idx = B->j + B->i[i] + shift; 861 v = B->a + B->i[i] + shift; 862 SPARSEDENSEMDOT(sum,ls,v,idx,n); 863 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 864 } 865 } 866 } 867 else { 868 SETERRQ(1,0,"Option not supported"); 869 } 870 return 0; 871 } 872 873 #undef __FUNCTION__ 874 #define __FUNCTION__ "MatGetInfo_MPIAIJ" 875 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 876 { 877 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 878 Mat A = mat->A, B = mat->B; 879 int ierr; 880 double isend[5], irecv[5]; 881 882 info->rows_global = (double)mat->M; 883 info->columns_global = (double)mat->N; 884 info->rows_local = (double)mat->m; 885 info->columns_local = (double)mat->N; 886 info->block_size = 1.0; 887 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 888 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 889 isend[3] = info->memory; isend[4] = info->mallocs; 890 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 891 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 892 isend[3] += info->memory; isend[4] += info->mallocs; 893 if (flag == MAT_LOCAL) { 894 info->nz_used = isend[0]; 895 info->nz_allocated = isend[1]; 896 info->nz_unneeded = isend[2]; 897 info->memory = isend[3]; 898 info->mallocs = isend[4]; 899 } else if (flag == MAT_GLOBAL_MAX) { 900 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 901 info->nz_used = irecv[0]; 902 info->nz_allocated = irecv[1]; 903 info->nz_unneeded = irecv[2]; 904 info->memory = irecv[3]; 905 info->mallocs = irecv[4]; 906 } else if (flag == MAT_GLOBAL_SUM) { 907 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 908 info->nz_used = irecv[0]; 909 info->nz_allocated = irecv[1]; 910 info->nz_unneeded = irecv[2]; 911 info->memory = irecv[3]; 912 info->mallocs = irecv[4]; 913 } 914 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 915 info->fill_ratio_needed = 0; 916 info->factor_mallocs = 0; 917 918 return 0; 919 } 920 921 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 922 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 923 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 924 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 925 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 926 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 927 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 928 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 929 930 #undef __FUNCTION__ 931 #define __FUNCTION__ "MatSetOption_MPIAIJ" 932 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 933 { 934 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 935 936 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 937 op == MAT_YES_NEW_NONZERO_LOCATIONS || 938 op == MAT_COLUMNS_UNSORTED || 939 op == MAT_COLUMNS_SORTED) { 940 MatSetOption(a->A,op); 941 MatSetOption(a->B,op); 942 } else if (op == MAT_ROW_ORIENTED) { 943 a->roworiented = 1; 944 MatSetOption(a->A,op); 945 MatSetOption(a->B,op); 946 } else if (op == MAT_ROWS_SORTED || 947 op == MAT_ROWS_UNSORTED || 948 op == MAT_SYMMETRIC || 949 op == MAT_STRUCTURALLY_SYMMETRIC || 950 op == MAT_YES_NEW_DIAGONALS) 951 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 952 else if (op == MAT_COLUMN_ORIENTED) { 953 a->roworiented = 0; 954 MatSetOption(a->A,op); 955 MatSetOption(a->B,op); 956 } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 957 a->donotstash = 1; 958 } else if (op == MAT_NO_NEW_DIAGONALS) 959 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 960 else 961 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 962 return 0; 963 } 964 965 #undef __FUNCTION__ 966 #define __FUNCTION__ "MatGetSize_MPIAIJ" 967 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 968 { 969 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 970 *m = mat->M; *n = mat->N; 971 return 0; 972 } 973 974 #undef __FUNCTION__ 975 #define __FUNCTION__ "MatGetLocalSize_MPIAIJ" 976 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 977 { 978 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 979 *m = mat->m; *n = mat->N; 980 return 0; 981 } 982 983 #undef __FUNCTION__ 984 #define __FUNCTION__ "MatGetOwnershipRange_MPIAIJ" 985 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 986 { 987 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 988 *m = mat->rstart; *n = mat->rend; 989 return 0; 990 } 991 992 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 993 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 994 995 #undef __FUNCTION__ 996 #define __FUNCTION__ "MatGetRow_MPIAIJ" 997 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 998 { 999 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1000 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1001 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1002 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1003 int *cmap, *idx_p; 1004 1005 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1006 mat->getrowactive = PETSC_TRUE; 1007 1008 if (!mat->rowvalues && (idx || v)) { 1009 /* 1010 allocate enough space to hold information from the longest row. 1011 */ 1012 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1013 int max = 1,m = mat->m,tmp; 1014 for ( i=0; i<m; i++ ) { 1015 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1016 if (max < tmp) { max = tmp; } 1017 } 1018 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1019 CHKPTRQ(mat->rowvalues); 1020 mat->rowindices = (int *) (mat->rowvalues + max); 1021 } 1022 1023 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1024 lrow = row - rstart; 1025 1026 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1027 if (!v) {pvA = 0; pvB = 0;} 1028 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1029 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1030 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1031 nztot = nzA + nzB; 1032 1033 cmap = mat->garray; 1034 if (v || idx) { 1035 if (nztot) { 1036 /* Sort by increasing column numbers, assuming A and B already sorted */ 1037 int imark = -1; 1038 if (v) { 1039 *v = v_p = mat->rowvalues; 1040 for ( i=0; i<nzB; i++ ) { 1041 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1042 else break; 1043 } 1044 imark = i; 1045 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1046 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1047 } 1048 if (idx) { 1049 *idx = idx_p = mat->rowindices; 1050 if (imark > -1) { 1051 for ( i=0; i<imark; i++ ) { 1052 idx_p[i] = cmap[cworkB[i]]; 1053 } 1054 } else { 1055 for ( i=0; i<nzB; i++ ) { 1056 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1057 else break; 1058 } 1059 imark = i; 1060 } 1061 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1062 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1063 } 1064 } 1065 else { 1066 if (idx) *idx = 0; 1067 if (v) *v = 0; 1068 } 1069 } 1070 *nz = nztot; 1071 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1072 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1073 return 0; 1074 } 1075 1076 #undef __FUNCTION__ 1077 #define __FUNCTION__ "MatRestoreRow_MPIAIJ" 1078 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1079 { 1080 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1081 if (aij->getrowactive == PETSC_FALSE) { 1082 SETERRQ(1,0,"MatGetRow not called"); 1083 } 1084 aij->getrowactive = PETSC_FALSE; 1085 return 0; 1086 } 1087 1088 #undef __FUNCTION__ 1089 #define __FUNCTION__ "MatNorm_MPIAIJ" 1090 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1091 { 1092 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1093 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1094 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1095 double sum = 0.0; 1096 Scalar *v; 1097 1098 if (aij->size == 1) { 1099 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1100 } else { 1101 if (type == NORM_FROBENIUS) { 1102 v = amat->a; 1103 for (i=0; i<amat->nz; i++ ) { 1104 #if defined(PETSC_COMPLEX) 1105 sum += real(conj(*v)*(*v)); v++; 1106 #else 1107 sum += (*v)*(*v); v++; 1108 #endif 1109 } 1110 v = bmat->a; 1111 for (i=0; i<bmat->nz; i++ ) { 1112 #if defined(PETSC_COMPLEX) 1113 sum += real(conj(*v)*(*v)); v++; 1114 #else 1115 sum += (*v)*(*v); v++; 1116 #endif 1117 } 1118 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1119 *norm = sqrt(*norm); 1120 } 1121 else if (type == NORM_1) { /* max column norm */ 1122 double *tmp, *tmp2; 1123 int *jj, *garray = aij->garray; 1124 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1125 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1126 PetscMemzero(tmp,aij->N*sizeof(double)); 1127 *norm = 0.0; 1128 v = amat->a; jj = amat->j; 1129 for ( j=0; j<amat->nz; j++ ) { 1130 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1131 } 1132 v = bmat->a; jj = bmat->j; 1133 for ( j=0; j<bmat->nz; j++ ) { 1134 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1135 } 1136 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1137 for ( j=0; j<aij->N; j++ ) { 1138 if (tmp2[j] > *norm) *norm = tmp2[j]; 1139 } 1140 PetscFree(tmp); PetscFree(tmp2); 1141 } 1142 else if (type == NORM_INFINITY) { /* max row norm */ 1143 double ntemp = 0.0; 1144 for ( j=0; j<amat->m; j++ ) { 1145 v = amat->a + amat->i[j] + shift; 1146 sum = 0.0; 1147 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1148 sum += PetscAbsScalar(*v); v++; 1149 } 1150 v = bmat->a + bmat->i[j] + shift; 1151 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1152 sum += PetscAbsScalar(*v); v++; 1153 } 1154 if (sum > ntemp) ntemp = sum; 1155 } 1156 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1157 } 1158 else { 1159 SETERRQ(1,0,"No support for two norm"); 1160 } 1161 } 1162 return 0; 1163 } 1164 1165 #undef __FUNCTION__ 1166 #define __FUNCTION__ "MatTranspose_MPIAIJ" 1167 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1168 { 1169 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1170 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1171 int ierr,shift = Aloc->indexshift; 1172 Mat B; 1173 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1174 Scalar *array; 1175 1176 if (matout == PETSC_NULL && M != N) 1177 SETERRQ(1,0,"Square matrix only for in-place"); 1178 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1179 PETSC_NULL,&B); CHKERRQ(ierr); 1180 1181 /* copy over the A part */ 1182 Aloc = (Mat_SeqAIJ*) a->A->data; 1183 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1184 row = a->rstart; 1185 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1186 for ( i=0; i<m; i++ ) { 1187 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1188 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1189 } 1190 aj = Aloc->j; 1191 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1192 1193 /* copy over the B part */ 1194 Aloc = (Mat_SeqAIJ*) a->B->data; 1195 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1196 row = a->rstart; 1197 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1198 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1199 for ( i=0; i<m; i++ ) { 1200 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1201 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1202 } 1203 PetscFree(ct); 1204 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1205 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1206 if (matout != PETSC_NULL) { 1207 *matout = B; 1208 } else { 1209 /* This isn't really an in-place transpose .... but free data structures from a */ 1210 PetscFree(a->rowners); 1211 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1212 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1213 if (a->colmap) PetscFree(a->colmap); 1214 if (a->garray) PetscFree(a->garray); 1215 if (a->lvec) VecDestroy(a->lvec); 1216 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1217 PetscFree(a); 1218 PetscMemcpy(A,B,sizeof(struct _Mat)); 1219 PetscHeaderDestroy(B); 1220 } 1221 return 0; 1222 } 1223 1224 #undef __FUNCTION__ 1225 #define __FUNCTION__ "MatDiagonalScale_MPIAIJ" 1226 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1227 { 1228 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1229 Mat a = aij->A, b = aij->B; 1230 int ierr,s1,s2,s3; 1231 1232 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1233 if (rr) { 1234 s3 = aij->n; 1235 VecGetLocalSize_Fast(rr,s1); 1236 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1237 /* Overlap communication with computation. */ 1238 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1239 } 1240 if (ll) { 1241 VecGetLocalSize_Fast(ll,s1); 1242 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1243 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1244 } 1245 /* scale the diagonal block */ 1246 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1247 1248 if (rr) { 1249 /* Do a scatter end and then right scale the off-diagonal block */ 1250 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1251 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1252 } 1253 1254 return 0; 1255 } 1256 1257 1258 extern int MatPrintHelp_SeqAIJ(Mat); 1259 #undef __FUNCTION__ 1260 #define __FUNCTION__ "MatPrintHelp_MPIAIJ" 1261 static int MatPrintHelp_MPIAIJ(Mat A) 1262 { 1263 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1264 1265 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1266 else return 0; 1267 } 1268 1269 #undef __FUNCTION__ 1270 #define __FUNCTION__ "MatGetBlockSize_MPIAIJ" 1271 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1272 { 1273 *bs = 1; 1274 return 0; 1275 } 1276 #undef __FUNCTION__ 1277 #define __FUNCTION__ "MatSetUnfactored_MPIAIJ" 1278 static int MatSetUnfactored_MPIAIJ(Mat A) 1279 { 1280 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1281 int ierr; 1282 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1283 return 0; 1284 } 1285 1286 1287 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1288 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1289 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1290 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1291 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1292 /* -------------------------------------------------------------------*/ 1293 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1294 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1295 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1296 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1297 0,0, 1298 0,0, 1299 0,0, 1300 MatRelax_MPIAIJ, 1301 MatTranspose_MPIAIJ, 1302 MatGetInfo_MPIAIJ,0, 1303 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1304 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1305 0, 1306 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1307 0,0,0,0, 1308 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1309 0,0, 1310 0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0, 1311 0,0,0, 1312 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1313 MatPrintHelp_MPIAIJ, 1314 MatScale_MPIAIJ,0,0,0, 1315 MatGetBlockSize_MPIAIJ,0,0,0,0, 1316 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 1317 1318 1319 #undef __FUNCTION__ 1320 #define __FUNCTION__ "MatCreateMPIAIJ" 1321 /*@C 1322 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1323 (the default parallel PETSc format). For good matrix assembly performance 1324 the user should preallocate the matrix storage by setting the parameters 1325 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1326 performance can be increased by more than a factor of 50. 1327 1328 Input Parameters: 1329 . comm - MPI communicator 1330 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1331 This value should be the same as the local size used in creating the 1332 y vector for the matrix-vector product y = Ax. 1333 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1334 This value should be the same as the local size used in creating the 1335 x vector for the matrix-vector product y = Ax. 1336 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1337 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1338 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1339 (same for all local rows) 1340 . d_nzz - array containing the number of nonzeros in the various rows of the 1341 diagonal portion of the local submatrix (possibly different for each row) 1342 or PETSC_NULL. You must leave room for the diagonal entry even if 1343 it is zero. 1344 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1345 submatrix (same for all local rows). 1346 . o_nzz - array containing the number of nonzeros in the various rows of the 1347 off-diagonal portion of the local submatrix (possibly different for 1348 each row) or PETSC_NULL. 1349 1350 Output Parameter: 1351 . A - the matrix 1352 1353 Notes: 1354 The AIJ format (also called the Yale sparse matrix format or 1355 compressed row storage), is fully compatible with standard Fortran 77 1356 storage. That is, the stored row and column indices can begin at 1357 either one (as in Fortran) or zero. See the users manual for details. 1358 1359 The user MUST specify either the local or global matrix dimensions 1360 (possibly both). 1361 1362 By default, this format uses inodes (identical nodes) when possible. 1363 We search for consecutive rows with the same nonzero structure, thereby 1364 reusing matrix information to achieve increased efficiency. 1365 1366 Options Database Keys: 1367 $ -mat_aij_no_inode - Do not use inodes 1368 $ -mat_aij_inode_limit <limit> - Set inode limit. 1369 $ (max limit=5) 1370 $ -mat_aij_oneindex - Internally use indexing starting at 1 1371 $ rather than 0. Note: When calling MatSetValues(), 1372 $ the user still MUST index entries starting at 0! 1373 1374 Storage Information: 1375 For a square global matrix we define each processor's diagonal portion 1376 to be its local rows and the corresponding columns (a square submatrix); 1377 each processor's off-diagonal portion encompasses the remainder of the 1378 local matrix (a rectangular submatrix). 1379 1380 The user can specify preallocated storage for the diagonal part of 1381 the local submatrix with either d_nz or d_nnz (not both). Set 1382 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1383 memory allocation. Likewise, specify preallocated storage for the 1384 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1385 1386 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1387 the figure below we depict these three local rows and all columns (0-11). 1388 1389 $ 0 1 2 3 4 5 6 7 8 9 10 11 1390 $ ------------------- 1391 $ row 3 | o o o d d d o o o o o o 1392 $ row 4 | o o o d d d o o o o o o 1393 $ row 5 | o o o d d d o o o o o o 1394 $ ------------------- 1395 $ 1396 1397 Thus, any entries in the d locations are stored in the d (diagonal) 1398 submatrix, and any entries in the o locations are stored in the 1399 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1400 stored simply in the MATSEQAIJ format for compressed row storage. 1401 1402 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1403 and o_nz should indicate the number of nonzeros per row in the o matrix. 1404 In general, for PDE problems in which most nonzeros are near the diagonal, 1405 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1406 or you will get TERRIBLE performance; see the users' manual chapter on 1407 matrices. 1408 1409 .keywords: matrix, aij, compressed row, sparse, parallel 1410 1411 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1412 @*/ 1413 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1414 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1415 { 1416 Mat B; 1417 Mat_MPIAIJ *b; 1418 int ierr, i,sum[2],work[2],size; 1419 1420 *A = 0; 1421 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1422 PLogObjectCreate(B); 1423 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1424 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1425 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1426 MPI_Comm_size(comm,&size); 1427 if (size == 1) { 1428 B->ops.getrowij = MatGetRowIJ_MPIAIJ; 1429 B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 1430 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 1431 B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 1432 B->ops.lufactor = MatLUFactor_MPIAIJ; 1433 B->ops.solve = MatSolve_MPIAIJ; 1434 B->ops.solveadd = MatSolveAdd_MPIAIJ; 1435 B->ops.solvetrans = MatSolveTrans_MPIAIJ; 1436 B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 1437 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 1438 } 1439 B->destroy = MatDestroy_MPIAIJ; 1440 B->view = MatView_MPIAIJ; 1441 B->factor = 0; 1442 B->assembled = PETSC_FALSE; 1443 B->mapping = 0; 1444 1445 b->insertmode = NOT_SET_VALUES; 1446 MPI_Comm_rank(comm,&b->rank); 1447 MPI_Comm_size(comm,&b->size); 1448 1449 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1450 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1451 1452 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1453 work[0] = m; work[1] = n; 1454 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1455 if (M == PETSC_DECIDE) M = sum[0]; 1456 if (N == PETSC_DECIDE) N = sum[1]; 1457 } 1458 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1459 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1460 b->m = m; B->m = m; 1461 b->n = n; B->n = n; 1462 b->N = N; B->N = N; 1463 b->M = M; B->M = M; 1464 1465 /* build local table of row and column ownerships */ 1466 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1467 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1468 b->cowners = b->rowners + b->size + 2; 1469 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1470 b->rowners[0] = 0; 1471 for ( i=2; i<=b->size; i++ ) { 1472 b->rowners[i] += b->rowners[i-1]; 1473 } 1474 b->rstart = b->rowners[b->rank]; 1475 b->rend = b->rowners[b->rank+1]; 1476 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1477 b->cowners[0] = 0; 1478 for ( i=2; i<=b->size; i++ ) { 1479 b->cowners[i] += b->cowners[i-1]; 1480 } 1481 b->cstart = b->cowners[b->rank]; 1482 b->cend = b->cowners[b->rank+1]; 1483 1484 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1485 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1486 PLogObjectParent(B,b->A); 1487 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1488 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1489 PLogObjectParent(B,b->B); 1490 1491 /* build cache for off array entries formed */ 1492 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1493 b->donotstash = 0; 1494 b->colmap = 0; 1495 b->garray = 0; 1496 b->roworiented = 1; 1497 1498 /* stuff used for matrix vector multiply */ 1499 b->lvec = 0; 1500 b->Mvctx = 0; 1501 1502 /* stuff for MatGetRow() */ 1503 b->rowindices = 0; 1504 b->rowvalues = 0; 1505 b->getrowactive = PETSC_FALSE; 1506 1507 *A = B; 1508 return 0; 1509 } 1510 1511 #undef __FUNCTION__ 1512 #define __FUNCTION__ "MatConvertSameType_MPIAIJ" 1513 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1514 { 1515 Mat mat; 1516 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1517 int ierr, len=0, flg; 1518 1519 *newmat = 0; 1520 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1521 PLogObjectCreate(mat); 1522 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1523 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1524 mat->destroy = MatDestroy_MPIAIJ; 1525 mat->view = MatView_MPIAIJ; 1526 mat->factor = matin->factor; 1527 mat->assembled = PETSC_TRUE; 1528 1529 a->m = mat->m = oldmat->m; 1530 a->n = mat->n = oldmat->n; 1531 a->M = mat->M = oldmat->M; 1532 a->N = mat->N = oldmat->N; 1533 1534 a->rstart = oldmat->rstart; 1535 a->rend = oldmat->rend; 1536 a->cstart = oldmat->cstart; 1537 a->cend = oldmat->cend; 1538 a->size = oldmat->size; 1539 a->rank = oldmat->rank; 1540 a->insertmode = NOT_SET_VALUES; 1541 a->rowvalues = 0; 1542 a->getrowactive = PETSC_FALSE; 1543 1544 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1545 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1546 a->cowners = a->rowners + a->size + 2; 1547 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1548 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1549 if (oldmat->colmap) { 1550 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1551 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1552 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1553 } else a->colmap = 0; 1554 if (oldmat->garray) { 1555 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1556 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1557 PLogObjectMemory(mat,len*sizeof(int)); 1558 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1559 } else a->garray = 0; 1560 1561 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1562 PLogObjectParent(mat,a->lvec); 1563 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1564 PLogObjectParent(mat,a->Mvctx); 1565 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1566 PLogObjectParent(mat,a->A); 1567 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1568 PLogObjectParent(mat,a->B); 1569 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1570 if (flg) { 1571 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1572 } 1573 *newmat = mat; 1574 return 0; 1575 } 1576 1577 #include "sys.h" 1578 1579 #undef __FUNCTION__ 1580 #define __FUNCTION__ "MatLoad_MPIAIJ" 1581 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1582 { 1583 Mat A; 1584 int i, nz, ierr, j,rstart, rend, fd; 1585 Scalar *vals,*svals; 1586 MPI_Comm comm = ((PetscObject)viewer)->comm; 1587 MPI_Status status; 1588 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1589 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1590 int tag = ((PetscObject)viewer)->tag; 1591 1592 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1593 if (!rank) { 1594 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1595 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1596 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1597 } 1598 1599 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1600 M = header[1]; N = header[2]; 1601 /* determine ownership of all rows */ 1602 m = M/size + ((M % size) > rank); 1603 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1604 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1605 rowners[0] = 0; 1606 for ( i=2; i<=size; i++ ) { 1607 rowners[i] += rowners[i-1]; 1608 } 1609 rstart = rowners[rank]; 1610 rend = rowners[rank+1]; 1611 1612 /* distribute row lengths to all processors */ 1613 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1614 offlens = ourlens + (rend-rstart); 1615 if (!rank) { 1616 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1617 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1618 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1619 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1620 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1621 PetscFree(sndcounts); 1622 } 1623 else { 1624 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1625 } 1626 1627 if (!rank) { 1628 /* calculate the number of nonzeros on each processor */ 1629 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1630 PetscMemzero(procsnz,size*sizeof(int)); 1631 for ( i=0; i<size; i++ ) { 1632 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1633 procsnz[i] += rowlengths[j]; 1634 } 1635 } 1636 PetscFree(rowlengths); 1637 1638 /* determine max buffer needed and allocate it */ 1639 maxnz = 0; 1640 for ( i=0; i<size; i++ ) { 1641 maxnz = PetscMax(maxnz,procsnz[i]); 1642 } 1643 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1644 1645 /* read in my part of the matrix column indices */ 1646 nz = procsnz[0]; 1647 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1648 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1649 1650 /* read in every one elses and ship off */ 1651 for ( i=1; i<size; i++ ) { 1652 nz = procsnz[i]; 1653 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1654 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1655 } 1656 PetscFree(cols); 1657 } 1658 else { 1659 /* determine buffer space needed for message */ 1660 nz = 0; 1661 for ( i=0; i<m; i++ ) { 1662 nz += ourlens[i]; 1663 } 1664 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1665 1666 /* receive message of column indices*/ 1667 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1668 MPI_Get_count(&status,MPI_INT,&maxnz); 1669 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1670 } 1671 1672 /* loop over local rows, determining number of off diagonal entries */ 1673 PetscMemzero(offlens,m*sizeof(int)); 1674 jj = 0; 1675 for ( i=0; i<m; i++ ) { 1676 for ( j=0; j<ourlens[i]; j++ ) { 1677 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1678 jj++; 1679 } 1680 } 1681 1682 /* create our matrix */ 1683 for ( i=0; i<m; i++ ) { 1684 ourlens[i] -= offlens[i]; 1685 } 1686 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1687 A = *newmat; 1688 MatSetOption(A,MAT_COLUMNS_SORTED); 1689 for ( i=0; i<m; i++ ) { 1690 ourlens[i] += offlens[i]; 1691 } 1692 1693 if (!rank) { 1694 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1695 1696 /* read in my part of the matrix numerical values */ 1697 nz = procsnz[0]; 1698 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1699 1700 /* insert into matrix */ 1701 jj = rstart; 1702 smycols = mycols; 1703 svals = vals; 1704 for ( i=0; i<m; i++ ) { 1705 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1706 smycols += ourlens[i]; 1707 svals += ourlens[i]; 1708 jj++; 1709 } 1710 1711 /* read in other processors and ship out */ 1712 for ( i=1; i<size; i++ ) { 1713 nz = procsnz[i]; 1714 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1715 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1716 } 1717 PetscFree(procsnz); 1718 } 1719 else { 1720 /* receive numeric values */ 1721 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1722 1723 /* receive message of values*/ 1724 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1725 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1726 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1727 1728 /* insert into matrix */ 1729 jj = rstart; 1730 smycols = mycols; 1731 svals = vals; 1732 for ( i=0; i<m; i++ ) { 1733 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1734 smycols += ourlens[i]; 1735 svals += ourlens[i]; 1736 jj++; 1737 } 1738 } 1739 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1740 1741 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1742 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1743 return 0; 1744 } 1745