1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.203 1997/05/03 22:43:34 curfman Exp curfman $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/spops.h" 9 10 /* local utility routine that creates a mapping from the global column 11 number to the local number in the off-diagonal part of the local 12 storage of the matrix. This is done in a non scable way since the 13 length of colmap equals the global matrix length. 14 */ 15 #undef __FUNC__ 16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */ 17 int CreateColmap_MPIAIJ_Private(Mat mat) 18 { 19 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21 int n = B->n,i; 22 23 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 24 PLogObjectMemory(mat,aij->N*sizeof(int)); 25 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 26 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 27 return 0; 28 } 29 30 extern int DisAssemble_MPIAIJ(Mat); 31 32 #undef __FUNC__ 33 #define __FUNC__ "MatGetRowIJ_MPIAIJ" 34 int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 35 PetscTruth *done) 36 { 37 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 38 int ierr; 39 if (aij->size == 1) { 40 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 41 } else SETERRQ(1,0,"not supported in parallel"); 42 return 0; 43 } 44 45 #undef __FUNC__ 46 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */ 47 int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 48 PetscTruth *done) 49 { 50 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 51 int ierr; 52 if (aij->size == 1) { 53 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 54 } else SETERRQ(1,0,"not supported in parallel"); 55 return 0; 56 } 57 58 #define CHUNKSIZE 15 59 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 60 { \ 61 \ 62 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 63 rmax = aimax[row]; nrow = ailen[row]; \ 64 col1 = col - shift; \ 65 \ 66 for ( _i=0; _i<nrow; _i++ ) { \ 67 if (rp[_i] > col1) break; \ 68 if (rp[_i] == col1) { \ 69 if (addv == ADD_VALUES) ap[_i] += value; \ 70 else ap[_i] = value; \ 71 goto a_noinsert; \ 72 } \ 73 } \ 74 if (nonew == 1) goto a_noinsert; \ 75 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 76 if (nrow >= rmax) { \ 77 /* there is no extra room in row, therefore enlarge */ \ 78 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 79 Scalar *new_a; \ 80 \ 81 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 82 \ 83 /* malloc new storage space */ \ 84 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 85 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 86 new_j = (int *) (new_a + new_nz); \ 87 new_i = new_j + new_nz; \ 88 \ 89 /* copy over old data into new slots */ \ 90 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 91 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 92 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 93 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 94 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 95 len*sizeof(int)); \ 96 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 97 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 98 len*sizeof(Scalar)); \ 99 /* free up old matrix storage */ \ 100 \ 101 PetscFree(a->a); \ 102 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 103 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 104 a->singlemalloc = 1; \ 105 \ 106 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 107 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 108 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 109 a->maxnz += CHUNKSIZE; \ 110 a->reallocs++; \ 111 } \ 112 N = nrow++ - 1; a->nz++; \ 113 /* shift up all the later entries in this row */ \ 114 for ( ii=N; ii>=_i; ii-- ) { \ 115 rp[ii+1] = rp[ii]; \ 116 ap[ii+1] = ap[ii]; \ 117 } \ 118 rp[_i] = col1; \ 119 ap[_i] = value; \ 120 a_noinsert: ; \ 121 ailen[row] = nrow; \ 122 } 123 124 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 125 { \ 126 \ 127 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 128 rmax = bimax[row]; nrow = bilen[row]; \ 129 col1 = col - shift; \ 130 \ 131 for ( _i=0; _i<nrow; _i++ ) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 144 Scalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 150 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 151 new_j = (int *) (new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 156 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int)); \ 161 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 162 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(Scalar)); \ 164 /* free up old matrix storage */ \ 165 \ 166 PetscFree(b->a); \ 167 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 168 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 169 b->singlemalloc = 1; \ 170 \ 171 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 172 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 173 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 174 b->maxnz += CHUNKSIZE; \ 175 b->reallocs++; \ 176 } \ 177 N = nrow++ - 1; b->nz++; \ 178 /* shift up all the later entries in this row */ \ 179 for ( ii=N; ii>=_i; ii-- ) { \ 180 rp[ii+1] = rp[ii]; \ 181 ap[ii+1] = ap[ii]; \ 182 } \ 183 rp[_i] = col1; \ 184 ap[_i] = value; \ 185 b_noinsert: ; \ 186 bilen[row] = nrow; \ 187 } 188 189 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 190 #undef __FUNC__ 191 #define __FUNC__ "MatSetValues_MPIAIJ" 192 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 193 { 194 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 195 Scalar value; 196 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 197 int cstart = aij->cstart, cend = aij->cend,row,col; 198 int roworiented = aij->roworiented; 199 200 /* Some Variables required in the macro */ 201 Mat A = aij->A; 202 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 203 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 204 Scalar *aa = a->a; 205 206 Mat B = aij->B; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 208 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 209 Scalar *ba = b->a; 210 211 int *rp,ii,nrow,_i,rmax, N, col1; 212 int nonew = a->nonew,shift = a->indexshift; 213 Scalar *ap; 214 215 for ( i=0; i<m; i++ ) { 216 #if defined(PETSC_BOPT_g) 217 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 218 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 219 #endif 220 if (im[i] >= rstart && im[i] < rend) { 221 row = im[i] - rstart; 222 for ( j=0; j<n; j++ ) { 223 if (in[j] >= cstart && in[j] < cend){ 224 col = in[j] - cstart; 225 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 226 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 227 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 228 } 229 #if defined(PETSC_BOPT_g) 230 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 231 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 232 #endif 233 else { 234 if (mat->was_assembled) { 235 if (!aij->colmap) { 236 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 col = aij->colmap[in[j]] - 1; 239 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 240 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 241 col = in[j]; 242 } 243 } 244 else col = in[j]; 245 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 246 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 247 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 248 } 249 } 250 } 251 else { 252 if (roworiented && !aij->donotstash) { 253 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 254 } 255 else { 256 if (!aij->donotstash) { 257 row = im[i]; 258 for ( j=0; j<n; j++ ) { 259 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 260 } 261 } 262 } 263 } 264 } 265 return 0; 266 } 267 268 #undef __FUNC__ 269 #define __FUNC__ "MatGetValues_MPIAIJ" 270 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 271 { 272 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 273 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 274 int cstart = aij->cstart, cend = aij->cend,row,col; 275 276 for ( i=0; i<m; i++ ) { 277 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 278 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 279 if (idxm[i] >= rstart && idxm[i] < rend) { 280 row = idxm[i] - rstart; 281 for ( j=0; j<n; j++ ) { 282 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 283 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 284 if (idxn[j] >= cstart && idxn[j] < cend){ 285 col = idxn[j] - cstart; 286 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 287 } 288 else { 289 if (!aij->colmap) { 290 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 291 } 292 col = aij->colmap[idxn[j]] - 1; 293 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 294 else { 295 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 296 } 297 } 298 } 299 } 300 else { 301 SETERRQ(1,0,"Only local values currently supported"); 302 } 303 } 304 return 0; 305 } 306 307 #undef __FUNC__ 308 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 309 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 310 { 311 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 312 MPI_Comm comm = mat->comm; 313 int size = aij->size, *owners = aij->rowners; 314 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 315 MPI_Request *send_waits,*recv_waits; 316 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 317 InsertMode addv; 318 Scalar *rvalues,*svalues; 319 320 /* make sure all processors are either in INSERTMODE or ADDMODE */ 321 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 322 if (addv == (ADD_VALUES|INSERT_VALUES)) { 323 SETERRQ(1,0,"Some processors inserted others added"); 324 } 325 mat->insertmode = addv; /* in case this processor had no cache */ 326 327 /* first count number of contributors to each processor */ 328 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 329 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 330 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 331 for ( i=0; i<aij->stash.n; i++ ) { 332 idx = aij->stash.idx[i]; 333 for ( j=0; j<size; j++ ) { 334 if (idx >= owners[j] && idx < owners[j+1]) { 335 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 336 } 337 } 338 } 339 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 340 341 /* inform other processors of number of messages and max length*/ 342 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 343 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 344 nreceives = work[rank]; 345 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 346 nmax = work[rank]; 347 PetscFree(work); 348 349 /* post receives: 350 1) each message will consist of ordered pairs 351 (global index,value) we store the global index as a double 352 to simplify the message passing. 353 2) since we don't know how long each individual message is we 354 allocate the largest needed buffer for each receive. Potentially 355 this is a lot of wasted space. 356 357 358 This could be done better. 359 */ 360 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 361 CHKPTRQ(rvalues); 362 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 363 CHKPTRQ(recv_waits); 364 for ( i=0; i<nreceives; i++ ) { 365 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 366 comm,recv_waits+i); 367 } 368 369 /* do sends: 370 1) starts[i] gives the starting index in svalues for stuff going to 371 the ith processor 372 */ 373 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 374 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 375 CHKPTRQ(send_waits); 376 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 377 starts[0] = 0; 378 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 379 for ( i=0; i<aij->stash.n; i++ ) { 380 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 381 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 382 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 383 } 384 PetscFree(owner); 385 starts[0] = 0; 386 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 387 count = 0; 388 for ( i=0; i<size; i++ ) { 389 if (procs[i]) { 390 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 391 comm,send_waits+count++); 392 } 393 } 394 PetscFree(starts); PetscFree(nprocs); 395 396 /* Free cache space */ 397 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 398 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 399 400 aij->svalues = svalues; aij->rvalues = rvalues; 401 aij->nsends = nsends; aij->nrecvs = nreceives; 402 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 403 aij->rmax = nmax; 404 405 return 0; 406 } 407 extern int MatSetUpMultiply_MPIAIJ(Mat); 408 409 #undef __FUNC__ 410 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 411 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 412 { 413 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 414 MPI_Status *send_status,recv_status; 415 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 416 int row,col,other_disassembled; 417 Scalar *values,val; 418 InsertMode addv = mat->insertmode; 419 420 /* wait on receives */ 421 while (count) { 422 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 423 /* unpack receives into our local space */ 424 values = aij->rvalues + 3*imdex*aij->rmax; 425 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 426 n = n/3; 427 for ( i=0; i<n; i++ ) { 428 row = (int) PetscReal(values[3*i]) - aij->rstart; 429 col = (int) PetscReal(values[3*i+1]); 430 val = values[3*i+2]; 431 if (col >= aij->cstart && col < aij->cend) { 432 col -= aij->cstart; 433 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 434 } 435 else { 436 if (mat->was_assembled || mat->assembled) { 437 if (!aij->colmap) { 438 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 439 } 440 col = aij->colmap[col] - 1; 441 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 442 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 443 col = (int) PetscReal(values[3*i+1]); 444 } 445 } 446 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 447 } 448 } 449 count--; 450 } 451 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 452 453 /* wait on sends */ 454 if (aij->nsends) { 455 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 456 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 457 PetscFree(send_status); 458 } 459 PetscFree(aij->send_waits); PetscFree(aij->svalues); 460 461 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 462 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 463 464 /* determine if any processor has disassembled, if so we must 465 also disassemble ourselfs, in order that we may reassemble. */ 466 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 467 if (mat->was_assembled && !other_disassembled) { 468 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 469 } 470 471 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 472 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 473 } 474 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 475 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 476 477 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 478 return 0; 479 } 480 481 #undef __FUNC__ 482 #define __FUNC__ "MatZeroEntries_MPIAIJ" 483 int MatZeroEntries_MPIAIJ(Mat A) 484 { 485 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 486 int ierr; 487 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 488 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 489 return 0; 490 } 491 492 /* the code does not do the diagonal entries correctly unless the 493 matrix is square and the column and row owerships are identical. 494 This is a BUG. The only way to fix it seems to be to access 495 aij->A and aij->B directly and not through the MatZeroRows() 496 routine. 497 */ 498 #undef __FUNC__ 499 #define __FUNC__ "MatZeroRows_MPIAIJ" 500 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 501 { 502 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 503 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 504 int *procs,*nprocs,j,found,idx,nsends,*work; 505 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 506 int *rvalues,tag = A->tag,count,base,slen,n,*source; 507 int *lens,imdex,*lrows,*values; 508 MPI_Comm comm = A->comm; 509 MPI_Request *send_waits,*recv_waits; 510 MPI_Status recv_status,*send_status; 511 IS istmp; 512 513 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 514 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 515 516 /* first count number of contributors to each processor */ 517 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 518 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 519 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 520 for ( i=0; i<N; i++ ) { 521 idx = rows[i]; 522 found = 0; 523 for ( j=0; j<size; j++ ) { 524 if (idx >= owners[j] && idx < owners[j+1]) { 525 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 526 } 527 } 528 if (!found) SETERRQ(1,0,"Index out of range"); 529 } 530 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 531 532 /* inform other processors of number of messages and max length*/ 533 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 534 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 535 nrecvs = work[rank]; 536 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 537 nmax = work[rank]; 538 PetscFree(work); 539 540 /* post receives: */ 541 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 542 CHKPTRQ(rvalues); 543 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 544 CHKPTRQ(recv_waits); 545 for ( i=0; i<nrecvs; i++ ) { 546 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 547 } 548 549 /* do sends: 550 1) starts[i] gives the starting index in svalues for stuff going to 551 the ith processor 552 */ 553 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 554 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 555 CHKPTRQ(send_waits); 556 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 557 starts[0] = 0; 558 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 559 for ( i=0; i<N; i++ ) { 560 svalues[starts[owner[i]]++] = rows[i]; 561 } 562 ISRestoreIndices(is,&rows); 563 564 starts[0] = 0; 565 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 566 count = 0; 567 for ( i=0; i<size; i++ ) { 568 if (procs[i]) { 569 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 570 } 571 } 572 PetscFree(starts); 573 574 base = owners[rank]; 575 576 /* wait on receives */ 577 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 578 source = lens + nrecvs; 579 count = nrecvs; slen = 0; 580 while (count) { 581 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 582 /* unpack receives into our local space */ 583 MPI_Get_count(&recv_status,MPI_INT,&n); 584 source[imdex] = recv_status.MPI_SOURCE; 585 lens[imdex] = n; 586 slen += n; 587 count--; 588 } 589 PetscFree(recv_waits); 590 591 /* move the data into the send scatter */ 592 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 593 count = 0; 594 for ( i=0; i<nrecvs; i++ ) { 595 values = rvalues + i*nmax; 596 for ( j=0; j<lens[i]; j++ ) { 597 lrows[count++] = values[j] - base; 598 } 599 } 600 PetscFree(rvalues); PetscFree(lens); 601 PetscFree(owner); PetscFree(nprocs); 602 603 /* actually zap the local rows */ 604 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 605 PLogObjectParent(A,istmp); 606 PetscFree(lrows); 607 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 608 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 609 ierr = ISDestroy(istmp); CHKERRQ(ierr); 610 611 /* wait on sends */ 612 if (nsends) { 613 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 614 CHKPTRQ(send_status); 615 MPI_Waitall(nsends,send_waits,send_status); 616 PetscFree(send_status); 617 } 618 PetscFree(send_waits); PetscFree(svalues); 619 620 return 0; 621 } 622 623 #undef __FUNC__ 624 #define __FUNC__ "MatMult_MPIAIJ" 625 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 626 { 627 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 628 int ierr,nt; 629 630 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 631 if (nt != a->n) { 632 SETERRQ(1,0,"Incompatible partition of A and xx"); 633 } 634 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 635 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 636 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 637 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 638 return 0; 639 } 640 641 #undef __FUNC__ 642 #define __FUNC__ "MatMultAdd_MPIAIJ" 643 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 644 { 645 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 646 int ierr; 647 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 648 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 649 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 650 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 651 return 0; 652 } 653 654 #undef __FUNC__ 655 #define __FUNC__ "MatMultTrans_MPIAIJ" 656 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 657 { 658 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 659 int ierr; 660 661 /* do nondiagonal part */ 662 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 663 /* send it on its way */ 664 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 665 /* do local part */ 666 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 667 /* receive remote parts: note this assumes the values are not actually */ 668 /* inserted in yy until the next line, which is true for my implementation*/ 669 /* but is not perhaps always true. */ 670 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 671 return 0; 672 } 673 674 #undef __FUNC__ 675 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 676 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 679 int ierr; 680 681 /* do nondiagonal part */ 682 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 683 /* send it on its way */ 684 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 685 /* do local part */ 686 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 687 /* receive remote parts: note this assumes the values are not actually */ 688 /* inserted in yy until the next line, which is true for my implementation*/ 689 /* but is not perhaps always true. */ 690 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 691 return 0; 692 } 693 694 /* 695 This only works correctly for square matrices where the subblock A->A is the 696 diagonal block 697 */ 698 #undef __FUNC__ 699 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 700 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 701 { 702 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 703 if (a->M != a->N) 704 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 705 if (a->rstart != a->cstart || a->rend != a->cend) { 706 SETERRQ(1,0,"row partition must equal col partition"); } 707 return MatGetDiagonal(a->A,v); 708 } 709 710 #undef __FUNC__ 711 #define __FUNC__ "MatScale_MPIAIJ" 712 int MatScale_MPIAIJ(Scalar *aa,Mat A) 713 { 714 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 715 int ierr; 716 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 717 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 718 return 0; 719 } 720 721 #undef __FUNC__ 722 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */ 723 int MatDestroy_MPIAIJ(PetscObject obj) 724 { 725 Mat mat = (Mat) obj; 726 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 727 int ierr; 728 #if defined(PETSC_LOG) 729 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 730 #endif 731 PetscFree(aij->rowners); 732 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 733 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 734 if (aij->colmap) PetscFree(aij->colmap); 735 if (aij->garray) PetscFree(aij->garray); 736 if (aij->lvec) VecDestroy(aij->lvec); 737 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 738 if (aij->rowvalues) PetscFree(aij->rowvalues); 739 PetscFree(aij); 740 if (mat->mapping) { 741 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 742 } 743 PLogObjectDestroy(mat); 744 PetscHeaderDestroy(mat); 745 return 0; 746 } 747 748 #undef __FUNC__ 749 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */ 750 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 751 { 752 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 753 int ierr; 754 755 if (aij->size == 1) { 756 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 757 } 758 else SETERRQ(1,0,"Only uniprocessor output supported"); 759 return 0; 760 } 761 762 #undef __FUNC__ 763 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */ 764 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 765 { 766 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 767 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 768 int ierr, format,shift = C->indexshift,rank; 769 FILE *fd; 770 ViewerType vtype; 771 772 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 773 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 774 ierr = ViewerGetFormat(viewer,&format); 775 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 776 MatInfo info; 777 int flg; 778 MPI_Comm_rank(mat->comm,&rank); 779 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 780 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 781 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 782 PetscSequentialPhaseBegin(mat->comm,1); 783 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 784 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 785 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 786 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 787 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 788 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 789 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 790 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 791 fflush(fd); 792 PetscSequentialPhaseEnd(mat->comm,1); 793 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 794 return 0; 795 } 796 else if (format == VIEWER_FORMAT_ASCII_INFO) { 797 return 0; 798 } 799 } 800 801 if (vtype == DRAW_VIEWER) { 802 Draw draw; 803 PetscTruth isnull; 804 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 805 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 806 } 807 808 if (vtype == ASCII_FILE_VIEWER) { 809 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 810 PetscSequentialPhaseBegin(mat->comm,1); 811 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 812 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 813 aij->cend); 814 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 815 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 816 fflush(fd); 817 PetscSequentialPhaseEnd(mat->comm,1); 818 } 819 else { 820 int size = aij->size; 821 rank = aij->rank; 822 if (size == 1) { 823 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 824 } 825 else { 826 /* assemble the entire matrix onto first processor. */ 827 Mat A; 828 Mat_SeqAIJ *Aloc; 829 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 830 Scalar *a; 831 832 if (!rank) { 833 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 834 CHKERRQ(ierr); 835 } 836 else { 837 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 838 CHKERRQ(ierr); 839 } 840 PLogObjectParent(mat,A); 841 842 /* copy over the A part */ 843 Aloc = (Mat_SeqAIJ*) aij->A->data; 844 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 845 row = aij->rstart; 846 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 847 for ( i=0; i<m; i++ ) { 848 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 849 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 850 } 851 aj = Aloc->j; 852 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 853 854 /* copy over the B part */ 855 Aloc = (Mat_SeqAIJ*) aij->B->data; 856 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 857 row = aij->rstart; 858 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 859 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 860 for ( i=0; i<m; i++ ) { 861 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 862 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 863 } 864 PetscFree(ct); 865 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 866 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 867 if (!rank) { 868 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 869 } 870 ierr = MatDestroy(A); CHKERRQ(ierr); 871 } 872 } 873 return 0; 874 } 875 876 #undef __FUNC__ 877 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */ 878 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 879 { 880 Mat mat = (Mat) obj; 881 int ierr; 882 ViewerType vtype; 883 884 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 885 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 886 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 887 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 888 } 889 else if (vtype == BINARY_FILE_VIEWER) { 890 return MatView_MPIAIJ_Binary(mat,viewer); 891 } 892 return 0; 893 } 894 895 /* 896 This has to provide several versions. 897 898 2) a) use only local smoothing updating outer values only once. 899 b) local smoothing updating outer values each inner iteration 900 3) color updating out values betwen colors. 901 */ 902 #undef __FUNC__ 903 #define __FUNC__ "MatRelax_MPIAIJ" 904 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 905 double fshift,int its,Vec xx) 906 { 907 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 908 Mat AA = mat->A, BB = mat->B; 909 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 910 Scalar *b,*x,*xs,*ls,d,*v,sum; 911 int ierr,*idx, *diag; 912 int n = mat->n, m = mat->m, i,shift = A->indexshift; 913 914 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 915 xs = x + shift; /* shift by one for index start of 1 */ 916 ls = ls + shift; 917 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 918 diag = A->diag; 919 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 920 if (flag & SOR_ZERO_INITIAL_GUESS) { 921 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 922 } 923 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 924 CHKERRQ(ierr); 925 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 926 CHKERRQ(ierr); 927 while (its--) { 928 /* go down through the rows */ 929 for ( i=0; i<m; i++ ) { 930 n = A->i[i+1] - A->i[i]; 931 idx = A->j + A->i[i] + shift; 932 v = A->a + A->i[i] + shift; 933 sum = b[i]; 934 SPARSEDENSEMDOT(sum,xs,v,idx,n); 935 d = fshift + A->a[diag[i]+shift]; 936 n = B->i[i+1] - B->i[i]; 937 idx = B->j + B->i[i] + shift; 938 v = B->a + B->i[i] + shift; 939 SPARSEDENSEMDOT(sum,ls,v,idx,n); 940 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 941 } 942 /* come up through the rows */ 943 for ( i=m-1; i>-1; i-- ) { 944 n = A->i[i+1] - A->i[i]; 945 idx = A->j + A->i[i] + shift; 946 v = A->a + A->i[i] + shift; 947 sum = b[i]; 948 SPARSEDENSEMDOT(sum,xs,v,idx,n); 949 d = fshift + A->a[diag[i]+shift]; 950 n = B->i[i+1] - B->i[i]; 951 idx = B->j + B->i[i] + shift; 952 v = B->a + B->i[i] + shift; 953 SPARSEDENSEMDOT(sum,ls,v,idx,n); 954 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 955 } 956 } 957 } 958 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 959 if (flag & SOR_ZERO_INITIAL_GUESS) { 960 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 961 } 962 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 963 CHKERRQ(ierr); 964 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 965 CHKERRQ(ierr); 966 while (its--) { 967 for ( i=0; i<m; i++ ) { 968 n = A->i[i+1] - A->i[i]; 969 idx = A->j + A->i[i] + shift; 970 v = A->a + A->i[i] + shift; 971 sum = b[i]; 972 SPARSEDENSEMDOT(sum,xs,v,idx,n); 973 d = fshift + A->a[diag[i]+shift]; 974 n = B->i[i+1] - B->i[i]; 975 idx = B->j + B->i[i] + shift; 976 v = B->a + B->i[i] + shift; 977 SPARSEDENSEMDOT(sum,ls,v,idx,n); 978 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 979 } 980 } 981 } 982 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 983 if (flag & SOR_ZERO_INITIAL_GUESS) { 984 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 985 } 986 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 987 mat->Mvctx); CHKERRQ(ierr); 988 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 989 mat->Mvctx); CHKERRQ(ierr); 990 while (its--) { 991 for ( i=m-1; i>-1; i-- ) { 992 n = A->i[i+1] - A->i[i]; 993 idx = A->j + A->i[i] + shift; 994 v = A->a + A->i[i] + shift; 995 sum = b[i]; 996 SPARSEDENSEMDOT(sum,xs,v,idx,n); 997 d = fshift + A->a[diag[i]+shift]; 998 n = B->i[i+1] - B->i[i]; 999 idx = B->j + B->i[i] + shift; 1000 v = B->a + B->i[i] + shift; 1001 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1002 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1003 } 1004 } 1005 } 1006 else { 1007 SETERRQ(1,0,"Option not supported"); 1008 } 1009 return 0; 1010 } 1011 1012 #undef __FUNC__ 1013 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */ 1014 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1015 { 1016 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1017 Mat A = mat->A, B = mat->B; 1018 int ierr; 1019 double isend[5], irecv[5]; 1020 1021 info->rows_global = (double)mat->M; 1022 info->columns_global = (double)mat->N; 1023 info->rows_local = (double)mat->m; 1024 info->columns_local = (double)mat->N; 1025 info->block_size = 1.0; 1026 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1027 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1028 isend[3] = info->memory; isend[4] = info->mallocs; 1029 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1030 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1031 isend[3] += info->memory; isend[4] += info->mallocs; 1032 if (flag == MAT_LOCAL) { 1033 info->nz_used = isend[0]; 1034 info->nz_allocated = isend[1]; 1035 info->nz_unneeded = isend[2]; 1036 info->memory = isend[3]; 1037 info->mallocs = isend[4]; 1038 } else if (flag == MAT_GLOBAL_MAX) { 1039 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 1040 info->nz_used = irecv[0]; 1041 info->nz_allocated = irecv[1]; 1042 info->nz_unneeded = irecv[2]; 1043 info->memory = irecv[3]; 1044 info->mallocs = irecv[4]; 1045 } else if (flag == MAT_GLOBAL_SUM) { 1046 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 1047 info->nz_used = irecv[0]; 1048 info->nz_allocated = irecv[1]; 1049 info->nz_unneeded = irecv[2]; 1050 info->memory = irecv[3]; 1051 info->mallocs = irecv[4]; 1052 } 1053 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1054 info->fill_ratio_needed = 0; 1055 info->factor_mallocs = 0; 1056 1057 return 0; 1058 } 1059 1060 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1061 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1062 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1063 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1064 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1065 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1066 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1067 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1068 1069 #undef __FUNC__ 1070 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */ 1071 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1074 1075 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1076 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1077 op == MAT_COLUMNS_UNSORTED || 1078 op == MAT_COLUMNS_SORTED || 1079 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1080 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1081 MatSetOption(a->A,op); 1082 MatSetOption(a->B,op); 1083 } else if (op == MAT_ROW_ORIENTED) { 1084 a->roworiented = 1; 1085 MatSetOption(a->A,op); 1086 MatSetOption(a->B,op); 1087 } else if (op == MAT_ROWS_SORTED || 1088 op == MAT_ROWS_UNSORTED || 1089 op == MAT_SYMMETRIC || 1090 op == MAT_STRUCTURALLY_SYMMETRIC || 1091 op == MAT_YES_NEW_DIAGONALS) 1092 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1093 else if (op == MAT_COLUMN_ORIENTED) { 1094 a->roworiented = 0; 1095 MatSetOption(a->A,op); 1096 MatSetOption(a->B,op); 1097 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1098 a->donotstash = 1; 1099 } else if (op == MAT_NO_NEW_DIAGONALS) 1100 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1101 else 1102 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1103 return 0; 1104 } 1105 1106 #undef __FUNC__ 1107 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */ 1108 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1109 { 1110 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1111 *m = mat->M; *n = mat->N; 1112 return 0; 1113 } 1114 1115 #undef __FUNC__ 1116 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */ 1117 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1118 { 1119 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1120 *m = mat->m; *n = mat->N; 1121 return 0; 1122 } 1123 1124 #undef __FUNC__ 1125 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */ 1126 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1127 { 1128 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1129 *m = mat->rstart; *n = mat->rend; 1130 return 0; 1131 } 1132 1133 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1134 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1135 1136 #undef __FUNC__ 1137 #define __FUNC__ "MatGetRow_MPIAIJ" 1138 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1139 { 1140 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1141 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1142 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1143 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1144 int *cmap, *idx_p; 1145 1146 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1147 mat->getrowactive = PETSC_TRUE; 1148 1149 if (!mat->rowvalues && (idx || v)) { 1150 /* 1151 allocate enough space to hold information from the longest row. 1152 */ 1153 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1154 int max = 1,m = mat->m,tmp; 1155 for ( i=0; i<m; i++ ) { 1156 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1157 if (max < tmp) { max = tmp; } 1158 } 1159 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1160 CHKPTRQ(mat->rowvalues); 1161 mat->rowindices = (int *) (mat->rowvalues + max); 1162 } 1163 1164 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1165 lrow = row - rstart; 1166 1167 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1168 if (!v) {pvA = 0; pvB = 0;} 1169 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1170 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1171 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1172 nztot = nzA + nzB; 1173 1174 cmap = mat->garray; 1175 if (v || idx) { 1176 if (nztot) { 1177 /* Sort by increasing column numbers, assuming A and B already sorted */ 1178 int imark = -1; 1179 if (v) { 1180 *v = v_p = mat->rowvalues; 1181 for ( i=0; i<nzB; i++ ) { 1182 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1183 else break; 1184 } 1185 imark = i; 1186 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1187 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1188 } 1189 if (idx) { 1190 *idx = idx_p = mat->rowindices; 1191 if (imark > -1) { 1192 for ( i=0; i<imark; i++ ) { 1193 idx_p[i] = cmap[cworkB[i]]; 1194 } 1195 } else { 1196 for ( i=0; i<nzB; i++ ) { 1197 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1198 else break; 1199 } 1200 imark = i; 1201 } 1202 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1203 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1204 } 1205 } 1206 else { 1207 if (idx) *idx = 0; 1208 if (v) *v = 0; 1209 } 1210 } 1211 *nz = nztot; 1212 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1213 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1214 return 0; 1215 } 1216 1217 #undef __FUNC__ 1218 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */ 1219 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1220 { 1221 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1222 if (aij->getrowactive == PETSC_FALSE) { 1223 SETERRQ(1,0,"MatGetRow not called"); 1224 } 1225 aij->getrowactive = PETSC_FALSE; 1226 return 0; 1227 } 1228 1229 #undef __FUNC__ 1230 #define __FUNC__ "MatNorm_MPIAIJ" 1231 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1232 { 1233 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1234 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1235 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1236 double sum = 0.0; 1237 Scalar *v; 1238 1239 if (aij->size == 1) { 1240 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1241 } else { 1242 if (type == NORM_FROBENIUS) { 1243 v = amat->a; 1244 for (i=0; i<amat->nz; i++ ) { 1245 #if defined(PETSC_COMPLEX) 1246 sum += real(conj(*v)*(*v)); v++; 1247 #else 1248 sum += (*v)*(*v); v++; 1249 #endif 1250 } 1251 v = bmat->a; 1252 for (i=0; i<bmat->nz; i++ ) { 1253 #if defined(PETSC_COMPLEX) 1254 sum += real(conj(*v)*(*v)); v++; 1255 #else 1256 sum += (*v)*(*v); v++; 1257 #endif 1258 } 1259 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1260 *norm = sqrt(*norm); 1261 } 1262 else if (type == NORM_1) { /* max column norm */ 1263 double *tmp, *tmp2; 1264 int *jj, *garray = aij->garray; 1265 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1266 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1267 PetscMemzero(tmp,aij->N*sizeof(double)); 1268 *norm = 0.0; 1269 v = amat->a; jj = amat->j; 1270 for ( j=0; j<amat->nz; j++ ) { 1271 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1272 } 1273 v = bmat->a; jj = bmat->j; 1274 for ( j=0; j<bmat->nz; j++ ) { 1275 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1276 } 1277 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1278 for ( j=0; j<aij->N; j++ ) { 1279 if (tmp2[j] > *norm) *norm = tmp2[j]; 1280 } 1281 PetscFree(tmp); PetscFree(tmp2); 1282 } 1283 else if (type == NORM_INFINITY) { /* max row norm */ 1284 double ntemp = 0.0; 1285 for ( j=0; j<amat->m; j++ ) { 1286 v = amat->a + amat->i[j] + shift; 1287 sum = 0.0; 1288 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1289 sum += PetscAbsScalar(*v); v++; 1290 } 1291 v = bmat->a + bmat->i[j] + shift; 1292 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1293 sum += PetscAbsScalar(*v); v++; 1294 } 1295 if (sum > ntemp) ntemp = sum; 1296 } 1297 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1298 } 1299 else { 1300 SETERRQ(1,0,"No support for two norm"); 1301 } 1302 } 1303 return 0; 1304 } 1305 1306 #undef __FUNC__ 1307 #define __FUNC__ "MatTranspose_MPIAIJ" 1308 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1309 { 1310 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1311 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1312 int ierr,shift = Aloc->indexshift; 1313 Mat B; 1314 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1315 Scalar *array; 1316 1317 if (matout == PETSC_NULL && M != N) 1318 SETERRQ(1,0,"Square matrix only for in-place"); 1319 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1320 PETSC_NULL,&B); CHKERRQ(ierr); 1321 1322 /* copy over the A part */ 1323 Aloc = (Mat_SeqAIJ*) a->A->data; 1324 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1325 row = a->rstart; 1326 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1327 for ( i=0; i<m; i++ ) { 1328 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1329 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1330 } 1331 aj = Aloc->j; 1332 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1333 1334 /* copy over the B part */ 1335 Aloc = (Mat_SeqAIJ*) a->B->data; 1336 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1337 row = a->rstart; 1338 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1339 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1340 for ( i=0; i<m; i++ ) { 1341 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1342 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1343 } 1344 PetscFree(ct); 1345 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1346 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1347 if (matout != PETSC_NULL) { 1348 *matout = B; 1349 } else { 1350 /* This isn't really an in-place transpose .... but free data structures from a */ 1351 PetscFree(a->rowners); 1352 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1353 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1354 if (a->colmap) PetscFree(a->colmap); 1355 if (a->garray) PetscFree(a->garray); 1356 if (a->lvec) VecDestroy(a->lvec); 1357 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1358 PetscFree(a); 1359 PetscMemcpy(A,B,sizeof(struct _Mat)); 1360 PetscHeaderDestroy(B); 1361 } 1362 return 0; 1363 } 1364 1365 #undef __FUNC__ 1366 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1367 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1368 { 1369 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1370 Mat a = aij->A, b = aij->B; 1371 int ierr,s1,s2,s3; 1372 1373 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1374 if (rr) { 1375 s3 = aij->n; 1376 VecGetLocalSize_Fast(rr,s1); 1377 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1378 /* Overlap communication with computation. */ 1379 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1380 } 1381 if (ll) { 1382 VecGetLocalSize_Fast(ll,s1); 1383 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1384 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1385 } 1386 /* scale the diagonal block */ 1387 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1388 1389 if (rr) { 1390 /* Do a scatter end and then right scale the off-diagonal block */ 1391 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1392 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1393 } 1394 1395 return 0; 1396 } 1397 1398 1399 extern int MatPrintHelp_SeqAIJ(Mat); 1400 #undef __FUNC__ 1401 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */ 1402 int MatPrintHelp_MPIAIJ(Mat A) 1403 { 1404 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1405 1406 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1407 else return 0; 1408 } 1409 1410 #undef __FUNC__ 1411 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */ 1412 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1413 { 1414 *bs = 1; 1415 return 0; 1416 } 1417 #undef __FUNC__ 1418 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */ 1419 int MatSetUnfactored_MPIAIJ(Mat A) 1420 { 1421 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1422 int ierr; 1423 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1424 return 0; 1425 } 1426 1427 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1428 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1429 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1430 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1431 /* -------------------------------------------------------------------*/ 1432 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1433 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1434 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1435 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1436 0,0, 1437 0,0, 1438 0,0, 1439 MatRelax_MPIAIJ, 1440 MatTranspose_MPIAIJ, 1441 MatGetInfo_MPIAIJ,0, 1442 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1443 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1444 0, 1445 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1446 0,0,0,0, 1447 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1448 0,0, 1449 0,0,MatConvertSameType_MPIAIJ,0,0, 1450 0,0,0, 1451 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1452 MatPrintHelp_MPIAIJ, 1453 MatScale_MPIAIJ,0,0,0, 1454 MatGetBlockSize_MPIAIJ,0,0,0,0, 1455 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 1456 1457 1458 #undef __FUNC__ 1459 #define __FUNC__ "MatCreateMPIAIJ" 1460 /*@C 1461 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1462 (the default parallel PETSc format). For good matrix assembly performance 1463 the user should preallocate the matrix storage by setting the parameters 1464 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1465 performance can be increased by more than a factor of 50. 1466 1467 Input Parameters: 1468 . comm - MPI communicator 1469 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1470 This value should be the same as the local size used in creating the 1471 y vector for the matrix-vector product y = Ax. 1472 . n - This value should be the same as the local size used in creating the 1473 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1474 calculated if N is given) For square matrices n is almost always m. 1475 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1476 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1477 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1478 (same for all local rows) 1479 . d_nzz - array containing the number of nonzeros in the various rows of the 1480 diagonal portion of the local submatrix (possibly different for each row) 1481 or PETSC_NULL. You must leave room for the diagonal entry even if 1482 it is zero. 1483 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1484 submatrix (same for all local rows). 1485 . o_nzz - array containing the number of nonzeros in the various rows of the 1486 off-diagonal portion of the local submatrix (possibly different for 1487 each row) or PETSC_NULL. 1488 1489 Output Parameter: 1490 . A - the matrix 1491 1492 Notes: 1493 The AIJ format (also called the Yale sparse matrix format or 1494 compressed row storage), is fully compatible with standard Fortran 77 1495 storage. That is, the stored row and column indices can begin at 1496 either one (as in Fortran) or zero. See the users manual for details. 1497 1498 The user MUST specify either the local or global matrix dimensions 1499 (possibly both). 1500 1501 By default, this format uses inodes (identical nodes) when possible. 1502 We search for consecutive rows with the same nonzero structure, thereby 1503 reusing matrix information to achieve increased efficiency. 1504 1505 Options Database Keys: 1506 $ -mat_aij_no_inode - Do not use inodes 1507 $ -mat_aij_inode_limit <limit> - Set inode limit. 1508 $ (max limit=5) 1509 $ -mat_aij_oneindex - Internally use indexing starting at 1 1510 $ rather than 0. Note: When calling MatSetValues(), 1511 $ the user still MUST index entries starting at 0! 1512 1513 Storage Information: 1514 For a square global matrix we define each processor's diagonal portion 1515 to be its local rows and the corresponding columns (a square submatrix); 1516 each processor's off-diagonal portion encompasses the remainder of the 1517 local matrix (a rectangular submatrix). 1518 1519 The user can specify preallocated storage for the diagonal part of 1520 the local submatrix with either d_nz or d_nnz (not both). Set 1521 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1522 memory allocation. Likewise, specify preallocated storage for the 1523 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1524 1525 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1526 the figure below we depict these three local rows and all columns (0-11). 1527 1528 $ 0 1 2 3 4 5 6 7 8 9 10 11 1529 $ ------------------- 1530 $ row 3 | o o o d d d o o o o o o 1531 $ row 4 | o o o d d d o o o o o o 1532 $ row 5 | o o o d d d o o o o o o 1533 $ ------------------- 1534 $ 1535 1536 Thus, any entries in the d locations are stored in the d (diagonal) 1537 submatrix, and any entries in the o locations are stored in the 1538 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1539 stored simply in the MATSEQAIJ format for compressed row storage. 1540 1541 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1542 and o_nz should indicate the number of nonzeros per row in the o matrix. 1543 In general, for PDE problems in which most nonzeros are near the diagonal, 1544 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1545 or you will get TERRIBLE performance; see the users' manual chapter on 1546 matrices. 1547 1548 .keywords: matrix, aij, compressed row, sparse, parallel 1549 1550 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1551 @*/ 1552 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1553 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1554 { 1555 Mat B; 1556 Mat_MPIAIJ *b; 1557 int ierr, i,sum[2],work[2],size; 1558 1559 *A = 0; 1560 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1561 PLogObjectCreate(B); 1562 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1563 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1564 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1565 MPI_Comm_size(comm,&size); 1566 if (size == 1) { 1567 B->ops.getrowij = MatGetRowIJ_MPIAIJ; 1568 B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 1569 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 1570 B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 1571 B->ops.lufactor = MatLUFactor_MPIAIJ; 1572 B->ops.solve = MatSolve_MPIAIJ; 1573 B->ops.solveadd = MatSolveAdd_MPIAIJ; 1574 B->ops.solvetrans = MatSolveTrans_MPIAIJ; 1575 B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 1576 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 1577 } 1578 B->destroy = MatDestroy_MPIAIJ; 1579 B->view = MatView_MPIAIJ; 1580 B->factor = 0; 1581 B->assembled = PETSC_FALSE; 1582 B->mapping = 0; 1583 1584 B->insertmode = NOT_SET_VALUES; 1585 MPI_Comm_rank(comm,&b->rank); 1586 MPI_Comm_size(comm,&b->size); 1587 1588 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1589 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1590 1591 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1592 work[0] = m; work[1] = n; 1593 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1594 if (M == PETSC_DECIDE) M = sum[0]; 1595 if (N == PETSC_DECIDE) N = sum[1]; 1596 } 1597 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1598 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1599 b->m = m; B->m = m; 1600 b->n = n; B->n = n; 1601 b->N = N; B->N = N; 1602 b->M = M; B->M = M; 1603 1604 /* build local table of row and column ownerships */ 1605 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1606 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1607 b->cowners = b->rowners + b->size + 2; 1608 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1609 b->rowners[0] = 0; 1610 for ( i=2; i<=b->size; i++ ) { 1611 b->rowners[i] += b->rowners[i-1]; 1612 } 1613 b->rstart = b->rowners[b->rank]; 1614 b->rend = b->rowners[b->rank+1]; 1615 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1616 b->cowners[0] = 0; 1617 for ( i=2; i<=b->size; i++ ) { 1618 b->cowners[i] += b->cowners[i-1]; 1619 } 1620 b->cstart = b->cowners[b->rank]; 1621 b->cend = b->cowners[b->rank+1]; 1622 1623 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1624 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1625 PLogObjectParent(B,b->A); 1626 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1627 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1628 PLogObjectParent(B,b->B); 1629 1630 /* build cache for off array entries formed */ 1631 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1632 b->donotstash = 0; 1633 b->colmap = 0; 1634 b->garray = 0; 1635 b->roworiented = 1; 1636 1637 /* stuff used for matrix vector multiply */ 1638 b->lvec = 0; 1639 b->Mvctx = 0; 1640 1641 /* stuff for MatGetRow() */ 1642 b->rowindices = 0; 1643 b->rowvalues = 0; 1644 b->getrowactive = PETSC_FALSE; 1645 1646 *A = B; 1647 return 0; 1648 } 1649 1650 #undef __FUNC__ 1651 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1652 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1653 { 1654 Mat mat; 1655 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1656 int ierr, len=0, flg; 1657 1658 *newmat = 0; 1659 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1660 PLogObjectCreate(mat); 1661 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1662 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1663 mat->destroy = MatDestroy_MPIAIJ; 1664 mat->view = MatView_MPIAIJ; 1665 mat->factor = matin->factor; 1666 mat->assembled = PETSC_TRUE; 1667 1668 a->m = mat->m = oldmat->m; 1669 a->n = mat->n = oldmat->n; 1670 a->M = mat->M = oldmat->M; 1671 a->N = mat->N = oldmat->N; 1672 1673 a->rstart = oldmat->rstart; 1674 a->rend = oldmat->rend; 1675 a->cstart = oldmat->cstart; 1676 a->cend = oldmat->cend; 1677 a->size = oldmat->size; 1678 a->rank = oldmat->rank; 1679 mat->insertmode = NOT_SET_VALUES; 1680 a->rowvalues = 0; 1681 a->getrowactive = PETSC_FALSE; 1682 1683 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1684 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1685 a->cowners = a->rowners + a->size + 2; 1686 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1687 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1688 if (oldmat->colmap) { 1689 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1690 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1691 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1692 } else a->colmap = 0; 1693 if (oldmat->garray) { 1694 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1695 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1696 PLogObjectMemory(mat,len*sizeof(int)); 1697 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1698 } else a->garray = 0; 1699 1700 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1701 PLogObjectParent(mat,a->lvec); 1702 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1703 PLogObjectParent(mat,a->Mvctx); 1704 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1705 PLogObjectParent(mat,a->A); 1706 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1707 PLogObjectParent(mat,a->B); 1708 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1709 if (flg) { 1710 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1711 } 1712 *newmat = mat; 1713 return 0; 1714 } 1715 1716 #include "sys.h" 1717 1718 #undef __FUNC__ 1719 #define __FUNC__ "MatLoad_MPIAIJ" 1720 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1721 { 1722 Mat A; 1723 int i, nz, ierr, j,rstart, rend, fd; 1724 Scalar *vals,*svals; 1725 MPI_Comm comm = ((PetscObject)viewer)->comm; 1726 MPI_Status status; 1727 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1728 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1729 int tag = ((PetscObject)viewer)->tag; 1730 1731 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1732 if (!rank) { 1733 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1734 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1735 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1736 } 1737 1738 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1739 M = header[1]; N = header[2]; 1740 /* determine ownership of all rows */ 1741 m = M/size + ((M % size) > rank); 1742 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1743 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1744 rowners[0] = 0; 1745 for ( i=2; i<=size; i++ ) { 1746 rowners[i] += rowners[i-1]; 1747 } 1748 rstart = rowners[rank]; 1749 rend = rowners[rank+1]; 1750 1751 /* distribute row lengths to all processors */ 1752 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1753 offlens = ourlens + (rend-rstart); 1754 if (!rank) { 1755 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1756 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1757 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1758 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1759 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1760 PetscFree(sndcounts); 1761 } 1762 else { 1763 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1764 } 1765 1766 if (!rank) { 1767 /* calculate the number of nonzeros on each processor */ 1768 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1769 PetscMemzero(procsnz,size*sizeof(int)); 1770 for ( i=0; i<size; i++ ) { 1771 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1772 procsnz[i] += rowlengths[j]; 1773 } 1774 } 1775 PetscFree(rowlengths); 1776 1777 /* determine max buffer needed and allocate it */ 1778 maxnz = 0; 1779 for ( i=0; i<size; i++ ) { 1780 maxnz = PetscMax(maxnz,procsnz[i]); 1781 } 1782 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1783 1784 /* read in my part of the matrix column indices */ 1785 nz = procsnz[0]; 1786 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1787 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1788 1789 /* read in every one elses and ship off */ 1790 for ( i=1; i<size; i++ ) { 1791 nz = procsnz[i]; 1792 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1793 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1794 } 1795 PetscFree(cols); 1796 } 1797 else { 1798 /* determine buffer space needed for message */ 1799 nz = 0; 1800 for ( i=0; i<m; i++ ) { 1801 nz += ourlens[i]; 1802 } 1803 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1804 1805 /* receive message of column indices*/ 1806 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1807 MPI_Get_count(&status,MPI_INT,&maxnz); 1808 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1809 } 1810 1811 /* loop over local rows, determining number of off diagonal entries */ 1812 PetscMemzero(offlens,m*sizeof(int)); 1813 jj = 0; 1814 for ( i=0; i<m; i++ ) { 1815 for ( j=0; j<ourlens[i]; j++ ) { 1816 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1817 jj++; 1818 } 1819 } 1820 1821 /* create our matrix */ 1822 for ( i=0; i<m; i++ ) { 1823 ourlens[i] -= offlens[i]; 1824 } 1825 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1826 A = *newmat; 1827 MatSetOption(A,MAT_COLUMNS_SORTED); 1828 for ( i=0; i<m; i++ ) { 1829 ourlens[i] += offlens[i]; 1830 } 1831 1832 if (!rank) { 1833 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1834 1835 /* read in my part of the matrix numerical values */ 1836 nz = procsnz[0]; 1837 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1838 1839 /* insert into matrix */ 1840 jj = rstart; 1841 smycols = mycols; 1842 svals = vals; 1843 for ( i=0; i<m; i++ ) { 1844 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1845 smycols += ourlens[i]; 1846 svals += ourlens[i]; 1847 jj++; 1848 } 1849 1850 /* read in other processors and ship out */ 1851 for ( i=1; i<size; i++ ) { 1852 nz = procsnz[i]; 1853 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1854 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1855 } 1856 PetscFree(procsnz); 1857 } 1858 else { 1859 /* receive numeric values */ 1860 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1861 1862 /* receive message of values*/ 1863 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1864 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1865 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1866 1867 /* insert into matrix */ 1868 jj = rstart; 1869 smycols = mycols; 1870 svals = vals; 1871 for ( i=0; i<m; i++ ) { 1872 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1873 smycols += ourlens[i]; 1874 svals += ourlens[i]; 1875 jj++; 1876 } 1877 } 1878 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1879 1880 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1881 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1882 return 0; 1883 } 1884