1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.211 1997/07/29 14:09:26 bsmith Exp bsmith $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/spops.h" 9 10 /* local utility routine that creates a mapping from the global column 11 number to the local number in the off-diagonal part of the local 12 storage of the matrix. This is done in a non scable way since the 13 length of colmap equals the global matrix length. 14 */ 15 #undef __FUNC__ 16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */ 17 int CreateColmap_MPIAIJ_Private(Mat mat) 18 { 19 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21 int n = B->n,i; 22 23 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 24 PLogObjectMemory(mat,aij->N*sizeof(int)); 25 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 26 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 27 return 0; 28 } 29 30 extern int DisAssemble_MPIAIJ(Mat); 31 32 #define CHUNKSIZE 15 33 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 34 { \ 35 \ 36 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 37 rmax = aimax[row]; nrow = ailen[row]; \ 38 col1 = col - shift; \ 39 \ 40 low = 0; high = nrow; \ 41 while (high-low > 5) { \ 42 t = (low+high)/2; \ 43 if (rp[t] > col) high = t; \ 44 else low = t; \ 45 } \ 46 for ( _i=0; _i<nrow; _i++ ) { \ 47 if (rp[_i] > col1) break; \ 48 if (rp[_i] == col1) { \ 49 if (addv == ADD_VALUES) ap[_i] += value; \ 50 else ap[_i] = value; \ 51 goto a_noinsert; \ 52 } \ 53 } \ 54 if (nonew == 1) goto a_noinsert; \ 55 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 56 if (nrow >= rmax) { \ 57 /* there is no extra room in row, therefore enlarge */ \ 58 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 59 Scalar *new_a; \ 60 \ 61 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 62 \ 63 /* malloc new storage space */ \ 64 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 65 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 66 new_j = (int *) (new_a + new_nz); \ 67 new_i = new_j + new_nz; \ 68 \ 69 /* copy over old data into new slots */ \ 70 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 71 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 72 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 73 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 74 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 75 len*sizeof(int)); \ 76 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 77 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 78 len*sizeof(Scalar)); \ 79 /* free up old matrix storage */ \ 80 \ 81 PetscFree(a->a); \ 82 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 83 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 84 a->singlemalloc = 1; \ 85 \ 86 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 87 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 88 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 89 a->maxnz += CHUNKSIZE; \ 90 a->reallocs++; \ 91 } \ 92 N = nrow++ - 1; a->nz++; \ 93 /* shift up all the later entries in this row */ \ 94 for ( ii=N; ii>=_i; ii-- ) { \ 95 rp[ii+1] = rp[ii]; \ 96 ap[ii+1] = ap[ii]; \ 97 } \ 98 rp[_i] = col1; \ 99 ap[_i] = value; \ 100 a_noinsert: ; \ 101 ailen[row] = nrow; \ 102 } 103 104 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 105 { \ 106 \ 107 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 108 rmax = bimax[row]; nrow = bilen[row]; \ 109 col1 = col - shift; \ 110 \ 111 low = 0; high = nrow; \ 112 while (high-low > 5) { \ 113 t = (low+high)/2; \ 114 if (rp[t] > col) high = t; \ 115 else low = t; \ 116 } \ 117 for ( _i=0; _i<nrow; _i++ ) { \ 118 if (rp[_i] > col1) break; \ 119 if (rp[_i] == col1) { \ 120 if (addv == ADD_VALUES) ap[_i] += value; \ 121 else ap[_i] = value; \ 122 goto b_noinsert; \ 123 } \ 124 } \ 125 if (nonew == 1) goto b_noinsert; \ 126 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 127 if (nrow >= rmax) { \ 128 /* there is no extra room in row, therefore enlarge */ \ 129 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 130 Scalar *new_a; \ 131 \ 132 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 133 \ 134 /* malloc new storage space */ \ 135 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 136 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 137 new_j = (int *) (new_a + new_nz); \ 138 new_i = new_j + new_nz; \ 139 \ 140 /* copy over old data into new slots */ \ 141 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 142 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 143 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 144 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 145 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 146 len*sizeof(int)); \ 147 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 148 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 149 len*sizeof(Scalar)); \ 150 /* free up old matrix storage */ \ 151 \ 152 PetscFree(b->a); \ 153 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 154 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 155 b->singlemalloc = 1; \ 156 \ 157 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 158 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 159 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 160 b->maxnz += CHUNKSIZE; \ 161 b->reallocs++; \ 162 } \ 163 N = nrow++ - 1; b->nz++; \ 164 /* shift up all the later entries in this row */ \ 165 for ( ii=N; ii>=_i; ii-- ) { \ 166 rp[ii+1] = rp[ii]; \ 167 ap[ii+1] = ap[ii]; \ 168 } \ 169 rp[_i] = col1; \ 170 ap[_i] = value; \ 171 b_noinsert: ; \ 172 bilen[row] = nrow; \ 173 } 174 175 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 176 #undef __FUNC__ 177 #define __FUNC__ "MatSetValues_MPIAIJ" 178 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 179 { 180 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 181 Scalar value; 182 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 183 int cstart = aij->cstart, cend = aij->cend,row,col; 184 int roworiented = aij->roworiented; 185 186 /* Some Variables required in the macro */ 187 Mat A = aij->A; 188 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 189 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 190 Scalar *aa = a->a; 191 192 Mat B = aij->B; 193 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 194 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 195 Scalar *ba = b->a; 196 197 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 198 int nonew = a->nonew,shift = a->indexshift; 199 Scalar *ap; 200 201 for ( i=0; i<m; i++ ) { 202 #if defined(PETSC_BOPT_g) 203 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 204 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 205 #endif 206 if (im[i] >= rstart && im[i] < rend) { 207 row = im[i] - rstart; 208 for ( j=0; j<n; j++ ) { 209 if (in[j] >= cstart && in[j] < cend){ 210 col = in[j] - cstart; 211 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 212 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 213 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 #if defined(PETSC_BOPT_g) 216 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 217 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 218 #endif 219 else { 220 if (mat->was_assembled) { 221 if (!aij->colmap) { 222 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 223 } 224 col = aij->colmap[in[j]] - 1; 225 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 226 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 227 col = in[j]; 228 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 229 B = aij->B; 230 b = (Mat_SeqAIJ *) B->data; 231 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 232 ba = b->a; 233 } 234 } 235 else col = in[j]; 236 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 237 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 238 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 239 } 240 } 241 } 242 else { 243 if (roworiented && !aij->donotstash) { 244 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 245 } 246 else { 247 if (!aij->donotstash) { 248 row = im[i]; 249 for ( j=0; j<n; j++ ) { 250 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 251 } 252 } 253 } 254 } 255 } 256 return 0; 257 } 258 259 #undef __FUNC__ 260 #define __FUNC__ "MatGetValues_MPIAIJ" 261 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 262 { 263 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 264 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 265 int cstart = aij->cstart, cend = aij->cend,row,col; 266 267 for ( i=0; i<m; i++ ) { 268 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 269 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 270 if (idxm[i] >= rstart && idxm[i] < rend) { 271 row = idxm[i] - rstart; 272 for ( j=0; j<n; j++ ) { 273 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 274 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 275 if (idxn[j] >= cstart && idxn[j] < cend){ 276 col = idxn[j] - cstart; 277 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 278 } 279 else { 280 if (!aij->colmap) { 281 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 282 } 283 col = aij->colmap[idxn[j]] - 1; 284 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 285 else { 286 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 287 } 288 } 289 } 290 } 291 else { 292 SETERRQ(1,0,"Only local values currently supported"); 293 } 294 } 295 return 0; 296 } 297 298 #undef __FUNC__ 299 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 300 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 301 { 302 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 303 MPI_Comm comm = mat->comm; 304 int size = aij->size, *owners = aij->rowners; 305 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 306 MPI_Request *send_waits,*recv_waits; 307 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 308 InsertMode addv; 309 Scalar *rvalues,*svalues; 310 311 /* make sure all processors are either in INSERTMODE or ADDMODE */ 312 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 313 if (addv == (ADD_VALUES|INSERT_VALUES)) { 314 SETERRQ(1,0,"Some processors inserted others added"); 315 } 316 mat->insertmode = addv; /* in case this processor had no cache */ 317 318 /* first count number of contributors to each processor */ 319 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 320 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 321 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 322 for ( i=0; i<aij->stash.n; i++ ) { 323 idx = aij->stash.idx[i]; 324 for ( j=0; j<size; j++ ) { 325 if (idx >= owners[j] && idx < owners[j+1]) { 326 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 327 } 328 } 329 } 330 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 331 332 /* inform other processors of number of messages and max length*/ 333 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 334 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 335 nreceives = work[rank]; 336 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 337 nmax = work[rank]; 338 PetscFree(work); 339 340 /* post receives: 341 1) each message will consist of ordered pairs 342 (global index,value) we store the global index as a double 343 to simplify the message passing. 344 2) since we don't know how long each individual message is we 345 allocate the largest needed buffer for each receive. Potentially 346 this is a lot of wasted space. 347 348 349 This could be done better. 350 */ 351 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 352 CHKPTRQ(rvalues); 353 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 354 CHKPTRQ(recv_waits); 355 for ( i=0; i<nreceives; i++ ) { 356 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 357 comm,recv_waits+i); 358 } 359 360 /* do sends: 361 1) starts[i] gives the starting index in svalues for stuff going to 362 the ith processor 363 */ 364 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 365 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 366 CHKPTRQ(send_waits); 367 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 368 starts[0] = 0; 369 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 370 for ( i=0; i<aij->stash.n; i++ ) { 371 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 372 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 373 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 374 } 375 PetscFree(owner); 376 starts[0] = 0; 377 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 378 count = 0; 379 for ( i=0; i<size; i++ ) { 380 if (procs[i]) { 381 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 382 comm,send_waits+count++); 383 } 384 } 385 PetscFree(starts); PetscFree(nprocs); 386 387 /* Free cache space */ 388 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 389 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 390 391 aij->svalues = svalues; aij->rvalues = rvalues; 392 aij->nsends = nsends; aij->nrecvs = nreceives; 393 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 394 aij->rmax = nmax; 395 396 return 0; 397 } 398 extern int MatSetUpMultiply_MPIAIJ(Mat); 399 400 #undef __FUNC__ 401 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 402 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 403 { 404 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 405 MPI_Status *send_status,recv_status; 406 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 407 int row,col,other_disassembled; 408 Scalar *values,val; 409 InsertMode addv = mat->insertmode; 410 411 /* wait on receives */ 412 while (count) { 413 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 414 /* unpack receives into our local space */ 415 values = aij->rvalues + 3*imdex*aij->rmax; 416 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 417 n = n/3; 418 for ( i=0; i<n; i++ ) { 419 row = (int) PetscReal(values[3*i]) - aij->rstart; 420 col = (int) PetscReal(values[3*i+1]); 421 val = values[3*i+2]; 422 if (col >= aij->cstart && col < aij->cend) { 423 col -= aij->cstart; 424 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 425 } 426 else { 427 if (mat->was_assembled || mat->assembled) { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 430 } 431 col = aij->colmap[col] - 1; 432 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 433 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 434 col = (int) PetscReal(values[3*i+1]); 435 } 436 } 437 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 438 } 439 } 440 count--; 441 } 442 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 443 444 /* wait on sends */ 445 if (aij->nsends) { 446 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 447 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 448 PetscFree(send_status); 449 } 450 PetscFree(aij->send_waits); PetscFree(aij->svalues); 451 452 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 453 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 454 455 /* determine if any processor has disassembled, if so we must 456 also disassemble ourselfs, in order that we may reassemble. */ 457 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 458 if (mat->was_assembled && !other_disassembled) { 459 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460 } 461 462 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 463 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 464 } 465 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 467 468 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 469 return 0; 470 } 471 472 #undef __FUNC__ 473 #define __FUNC__ "MatZeroEntries_MPIAIJ" 474 int MatZeroEntries_MPIAIJ(Mat A) 475 { 476 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 477 int ierr; 478 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 479 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 480 return 0; 481 } 482 483 /* the code does not do the diagonal entries correctly unless the 484 matrix is square and the column and row owerships are identical. 485 This is a BUG. The only way to fix it seems to be to access 486 aij->A and aij->B directly and not through the MatZeroRows() 487 routine. 488 */ 489 #undef __FUNC__ 490 #define __FUNC__ "MatZeroRows_MPIAIJ" 491 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 492 { 493 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 494 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 495 int *procs,*nprocs,j,found,idx,nsends,*work; 496 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 497 int *rvalues,tag = A->tag,count,base,slen,n,*source; 498 int *lens,imdex,*lrows,*values; 499 MPI_Comm comm = A->comm; 500 MPI_Request *send_waits,*recv_waits; 501 MPI_Status recv_status,*send_status; 502 IS istmp; 503 504 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 505 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 506 507 /* first count number of contributors to each processor */ 508 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 509 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 510 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 511 for ( i=0; i<N; i++ ) { 512 idx = rows[i]; 513 found = 0; 514 for ( j=0; j<size; j++ ) { 515 if (idx >= owners[j] && idx < owners[j+1]) { 516 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 517 } 518 } 519 if (!found) SETERRQ(1,0,"Index out of range"); 520 } 521 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 522 523 /* inform other processors of number of messages and max length*/ 524 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 525 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 526 nrecvs = work[rank]; 527 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 528 nmax = work[rank]; 529 PetscFree(work); 530 531 /* post receives: */ 532 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 533 CHKPTRQ(rvalues); 534 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 535 CHKPTRQ(recv_waits); 536 for ( i=0; i<nrecvs; i++ ) { 537 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 538 } 539 540 /* do sends: 541 1) starts[i] gives the starting index in svalues for stuff going to 542 the ith processor 543 */ 544 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 545 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 546 CHKPTRQ(send_waits); 547 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 548 starts[0] = 0; 549 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 550 for ( i=0; i<N; i++ ) { 551 svalues[starts[owner[i]]++] = rows[i]; 552 } 553 ISRestoreIndices(is,&rows); 554 555 starts[0] = 0; 556 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 557 count = 0; 558 for ( i=0; i<size; i++ ) { 559 if (procs[i]) { 560 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 561 } 562 } 563 PetscFree(starts); 564 565 base = owners[rank]; 566 567 /* wait on receives */ 568 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 569 source = lens + nrecvs; 570 count = nrecvs; slen = 0; 571 while (count) { 572 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 573 /* unpack receives into our local space */ 574 MPI_Get_count(&recv_status,MPI_INT,&n); 575 source[imdex] = recv_status.MPI_SOURCE; 576 lens[imdex] = n; 577 slen += n; 578 count--; 579 } 580 PetscFree(recv_waits); 581 582 /* move the data into the send scatter */ 583 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 584 count = 0; 585 for ( i=0; i<nrecvs; i++ ) { 586 values = rvalues + i*nmax; 587 for ( j=0; j<lens[i]; j++ ) { 588 lrows[count++] = values[j] - base; 589 } 590 } 591 PetscFree(rvalues); PetscFree(lens); 592 PetscFree(owner); PetscFree(nprocs); 593 594 /* actually zap the local rows */ 595 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 596 PLogObjectParent(A,istmp); 597 PetscFree(lrows); 598 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 599 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 600 ierr = ISDestroy(istmp); CHKERRQ(ierr); 601 602 /* wait on sends */ 603 if (nsends) { 604 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 605 CHKPTRQ(send_status); 606 MPI_Waitall(nsends,send_waits,send_status); 607 PetscFree(send_status); 608 } 609 PetscFree(send_waits); PetscFree(svalues); 610 611 return 0; 612 } 613 614 #undef __FUNC__ 615 #define __FUNC__ "MatMult_MPIAIJ" 616 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 617 { 618 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 619 int ierr,nt; 620 621 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 622 if (nt != a->n) { 623 SETERRQ(1,0,"Incompatible partition of A and xx"); 624 } 625 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 626 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 627 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 628 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 629 return 0; 630 } 631 632 #undef __FUNC__ 633 #define __FUNC__ "MatMultAdd_MPIAIJ" 634 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 635 { 636 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 637 int ierr; 638 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 639 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 640 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 641 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 642 return 0; 643 } 644 645 #undef __FUNC__ 646 #define __FUNC__ "MatMultTrans_MPIAIJ" 647 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 648 { 649 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 650 int ierr; 651 652 /* do nondiagonal part */ 653 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 654 /* send it on its way */ 655 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 656 /* do local part */ 657 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 658 /* receive remote parts: note this assumes the values are not actually */ 659 /* inserted in yy until the next line, which is true for my implementation*/ 660 /* but is not perhaps always true. */ 661 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 662 return 0; 663 } 664 665 #undef __FUNC__ 666 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 667 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 668 { 669 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 670 int ierr; 671 672 /* do nondiagonal part */ 673 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 674 /* send it on its way */ 675 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 676 /* do local part */ 677 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 678 /* receive remote parts: note this assumes the values are not actually */ 679 /* inserted in yy until the next line, which is true for my implementation*/ 680 /* but is not perhaps always true. */ 681 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 682 return 0; 683 } 684 685 /* 686 This only works correctly for square matrices where the subblock A->A is the 687 diagonal block 688 */ 689 #undef __FUNC__ 690 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 691 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 692 { 693 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 694 if (a->M != a->N) 695 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 696 if (a->rstart != a->cstart || a->rend != a->cend) { 697 SETERRQ(1,0,"row partition must equal col partition"); } 698 return MatGetDiagonal(a->A,v); 699 } 700 701 #undef __FUNC__ 702 #define __FUNC__ "MatScale_MPIAIJ" 703 int MatScale_MPIAIJ(Scalar *aa,Mat A) 704 { 705 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 706 int ierr; 707 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 708 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 709 return 0; 710 } 711 712 #undef __FUNC__ 713 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */ 714 int MatDestroy_MPIAIJ(PetscObject obj) 715 { 716 Mat mat = (Mat) obj; 717 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 718 int ierr; 719 720 #if defined(PETSC_LOG) 721 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 722 #endif 723 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 724 PetscFree(aij->rowners); 725 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 726 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 727 if (aij->colmap) PetscFree(aij->colmap); 728 if (aij->garray) PetscFree(aij->garray); 729 if (aij->lvec) VecDestroy(aij->lvec); 730 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 731 if (aij->rowvalues) PetscFree(aij->rowvalues); 732 PetscFree(aij); 733 if (mat->mapping) { 734 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 735 } 736 PLogObjectDestroy(mat); 737 PetscHeaderDestroy(mat); 738 return 0; 739 } 740 741 #undef __FUNC__ 742 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */ 743 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 744 { 745 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 746 int ierr; 747 748 if (aij->size == 1) { 749 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 750 } 751 else SETERRQ(1,0,"Only uniprocessor output supported"); 752 return 0; 753 } 754 755 #undef __FUNC__ 756 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */ 757 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 758 { 759 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 760 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 761 int ierr, format,shift = C->indexshift,rank; 762 FILE *fd; 763 ViewerType vtype; 764 765 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 766 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 767 ierr = ViewerGetFormat(viewer,&format); 768 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 769 MatInfo info; 770 int flg; 771 MPI_Comm_rank(mat->comm,&rank); 772 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 773 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 774 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 775 PetscSequentialPhaseBegin(mat->comm,1); 776 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 777 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 778 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 779 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 780 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 781 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 782 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 783 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 784 fflush(fd); 785 PetscSequentialPhaseEnd(mat->comm,1); 786 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 787 return 0; 788 } 789 else if (format == VIEWER_FORMAT_ASCII_INFO) { 790 return 0; 791 } 792 } 793 794 if (vtype == DRAW_VIEWER) { 795 Draw draw; 796 PetscTruth isnull; 797 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 798 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 799 } 800 801 if (vtype == ASCII_FILE_VIEWER) { 802 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 803 PetscSequentialPhaseBegin(mat->comm,1); 804 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 805 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 806 aij->cend); 807 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 808 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 809 fflush(fd); 810 PetscSequentialPhaseEnd(mat->comm,1); 811 } 812 else { 813 int size = aij->size; 814 rank = aij->rank; 815 if (size == 1) { 816 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 817 } 818 else { 819 /* assemble the entire matrix onto first processor. */ 820 Mat A; 821 Mat_SeqAIJ *Aloc; 822 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 823 Scalar *a; 824 825 if (!rank) { 826 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 827 CHKERRQ(ierr); 828 } 829 else { 830 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 831 CHKERRQ(ierr); 832 } 833 PLogObjectParent(mat,A); 834 835 /* copy over the A part */ 836 Aloc = (Mat_SeqAIJ*) aij->A->data; 837 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 838 row = aij->rstart; 839 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 840 for ( i=0; i<m; i++ ) { 841 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 842 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 843 } 844 aj = Aloc->j; 845 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 846 847 /* copy over the B part */ 848 Aloc = (Mat_SeqAIJ*) aij->B->data; 849 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 850 row = aij->rstart; 851 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 852 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 853 for ( i=0; i<m; i++ ) { 854 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 855 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 856 } 857 PetscFree(ct); 858 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 859 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 860 if (!rank) { 861 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 862 } 863 ierr = MatDestroy(A); CHKERRQ(ierr); 864 } 865 } 866 return 0; 867 } 868 869 #undef __FUNC__ 870 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */ 871 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 872 { 873 Mat mat = (Mat) obj; 874 int ierr; 875 ViewerType vtype; 876 877 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 878 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 879 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 880 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 881 } 882 else if (vtype == BINARY_FILE_VIEWER) { 883 return MatView_MPIAIJ_Binary(mat,viewer); 884 } 885 return 0; 886 } 887 888 /* 889 This has to provide several versions. 890 891 2) a) use only local smoothing updating outer values only once. 892 b) local smoothing updating outer values each inner iteration 893 3) color updating out values betwen colors. 894 */ 895 #undef __FUNC__ 896 #define __FUNC__ "MatRelax_MPIAIJ" 897 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 898 double fshift,int its,Vec xx) 899 { 900 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 901 Mat AA = mat->A, BB = mat->B; 902 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 903 Scalar *b,*x,*xs,*ls,d,*v,sum; 904 int ierr,*idx, *diag; 905 int n = mat->n, m = mat->m, i,shift = A->indexshift; 906 907 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 908 xs = x + shift; /* shift by one for index start of 1 */ 909 ls = ls + shift; 910 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 911 diag = A->diag; 912 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 913 if (flag & SOR_ZERO_INITIAL_GUESS) { 914 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 915 } 916 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 917 CHKERRQ(ierr); 918 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 919 CHKERRQ(ierr); 920 while (its--) { 921 /* go down through the rows */ 922 for ( i=0; i<m; i++ ) { 923 n = A->i[i+1] - A->i[i]; 924 idx = A->j + A->i[i] + shift; 925 v = A->a + A->i[i] + shift; 926 sum = b[i]; 927 SPARSEDENSEMDOT(sum,xs,v,idx,n); 928 d = fshift + A->a[diag[i]+shift]; 929 n = B->i[i+1] - B->i[i]; 930 idx = B->j + B->i[i] + shift; 931 v = B->a + B->i[i] + shift; 932 SPARSEDENSEMDOT(sum,ls,v,idx,n); 933 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 934 } 935 /* come up through the rows */ 936 for ( i=m-1; i>-1; i-- ) { 937 n = A->i[i+1] - A->i[i]; 938 idx = A->j + A->i[i] + shift; 939 v = A->a + A->i[i] + shift; 940 sum = b[i]; 941 SPARSEDENSEMDOT(sum,xs,v,idx,n); 942 d = fshift + A->a[diag[i]+shift]; 943 n = B->i[i+1] - B->i[i]; 944 idx = B->j + B->i[i] + shift; 945 v = B->a + B->i[i] + shift; 946 SPARSEDENSEMDOT(sum,ls,v,idx,n); 947 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 948 } 949 } 950 } 951 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 952 if (flag & SOR_ZERO_INITIAL_GUESS) { 953 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 954 } 955 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 956 CHKERRQ(ierr); 957 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 958 CHKERRQ(ierr); 959 while (its--) { 960 for ( i=0; i<m; i++ ) { 961 n = A->i[i+1] - A->i[i]; 962 idx = A->j + A->i[i] + shift; 963 v = A->a + A->i[i] + shift; 964 sum = b[i]; 965 SPARSEDENSEMDOT(sum,xs,v,idx,n); 966 d = fshift + A->a[diag[i]+shift]; 967 n = B->i[i+1] - B->i[i]; 968 idx = B->j + B->i[i] + shift; 969 v = B->a + B->i[i] + shift; 970 SPARSEDENSEMDOT(sum,ls,v,idx,n); 971 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 972 } 973 } 974 } 975 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 976 if (flag & SOR_ZERO_INITIAL_GUESS) { 977 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 978 } 979 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 980 mat->Mvctx); CHKERRQ(ierr); 981 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 982 mat->Mvctx); CHKERRQ(ierr); 983 while (its--) { 984 for ( i=m-1; i>-1; i-- ) { 985 n = A->i[i+1] - A->i[i]; 986 idx = A->j + A->i[i] + shift; 987 v = A->a + A->i[i] + shift; 988 sum = b[i]; 989 SPARSEDENSEMDOT(sum,xs,v,idx,n); 990 d = fshift + A->a[diag[i]+shift]; 991 n = B->i[i+1] - B->i[i]; 992 idx = B->j + B->i[i] + shift; 993 v = B->a + B->i[i] + shift; 994 SPARSEDENSEMDOT(sum,ls,v,idx,n); 995 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 996 } 997 } 998 } 999 else { 1000 SETERRQ(1,0,"Option not supported"); 1001 } 1002 return 0; 1003 } 1004 1005 #undef __FUNC__ 1006 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */ 1007 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1008 { 1009 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1010 Mat A = mat->A, B = mat->B; 1011 int ierr; 1012 double isend[5], irecv[5]; 1013 1014 info->rows_global = (double)mat->M; 1015 info->columns_global = (double)mat->N; 1016 info->rows_local = (double)mat->m; 1017 info->columns_local = (double)mat->N; 1018 info->block_size = 1.0; 1019 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1020 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1021 isend[3] = info->memory; isend[4] = info->mallocs; 1022 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1023 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1024 isend[3] += info->memory; isend[4] += info->mallocs; 1025 if (flag == MAT_LOCAL) { 1026 info->nz_used = isend[0]; 1027 info->nz_allocated = isend[1]; 1028 info->nz_unneeded = isend[2]; 1029 info->memory = isend[3]; 1030 info->mallocs = isend[4]; 1031 } else if (flag == MAT_GLOBAL_MAX) { 1032 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 1033 info->nz_used = irecv[0]; 1034 info->nz_allocated = irecv[1]; 1035 info->nz_unneeded = irecv[2]; 1036 info->memory = irecv[3]; 1037 info->mallocs = irecv[4]; 1038 } else if (flag == MAT_GLOBAL_SUM) { 1039 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 1040 info->nz_used = irecv[0]; 1041 info->nz_allocated = irecv[1]; 1042 info->nz_unneeded = irecv[2]; 1043 info->memory = irecv[3]; 1044 info->mallocs = irecv[4]; 1045 } 1046 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1047 info->fill_ratio_needed = 0; 1048 info->factor_mallocs = 0; 1049 1050 return 0; 1051 } 1052 1053 #undef __FUNC__ 1054 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */ 1055 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1056 { 1057 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1058 1059 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1060 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1061 op == MAT_COLUMNS_UNSORTED || 1062 op == MAT_COLUMNS_SORTED || 1063 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1064 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1065 MatSetOption(a->A,op); 1066 MatSetOption(a->B,op); 1067 } else if (op == MAT_ROW_ORIENTED) { 1068 a->roworiented = 1; 1069 MatSetOption(a->A,op); 1070 MatSetOption(a->B,op); 1071 } else if (op == MAT_ROWS_SORTED || 1072 op == MAT_ROWS_UNSORTED || 1073 op == MAT_SYMMETRIC || 1074 op == MAT_STRUCTURALLY_SYMMETRIC || 1075 op == MAT_YES_NEW_DIAGONALS) 1076 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1077 else if (op == MAT_COLUMN_ORIENTED) { 1078 a->roworiented = 0; 1079 MatSetOption(a->A,op); 1080 MatSetOption(a->B,op); 1081 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1082 a->donotstash = 1; 1083 } else if (op == MAT_NO_NEW_DIAGONALS) 1084 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1085 else 1086 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1087 return 0; 1088 } 1089 1090 #undef __FUNC__ 1091 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */ 1092 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1093 { 1094 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1095 *m = mat->M; *n = mat->N; 1096 return 0; 1097 } 1098 1099 #undef __FUNC__ 1100 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */ 1101 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1102 { 1103 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1104 *m = mat->m; *n = mat->N; 1105 return 0; 1106 } 1107 1108 #undef __FUNC__ 1109 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */ 1110 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1111 { 1112 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1113 *m = mat->rstart; *n = mat->rend; 1114 return 0; 1115 } 1116 1117 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1118 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1119 1120 #undef __FUNC__ 1121 #define __FUNC__ "MatGetRow_MPIAIJ" 1122 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1123 { 1124 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1125 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1126 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1127 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1128 int *cmap, *idx_p; 1129 1130 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1131 mat->getrowactive = PETSC_TRUE; 1132 1133 if (!mat->rowvalues && (idx || v)) { 1134 /* 1135 allocate enough space to hold information from the longest row. 1136 */ 1137 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1138 int max = 1,m = mat->m,tmp; 1139 for ( i=0; i<m; i++ ) { 1140 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1141 if (max < tmp) { max = tmp; } 1142 } 1143 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1144 CHKPTRQ(mat->rowvalues); 1145 mat->rowindices = (int *) (mat->rowvalues + max); 1146 } 1147 1148 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1149 lrow = row - rstart; 1150 1151 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1152 if (!v) {pvA = 0; pvB = 0;} 1153 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1154 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1155 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1156 nztot = nzA + nzB; 1157 1158 cmap = mat->garray; 1159 if (v || idx) { 1160 if (nztot) { 1161 /* Sort by increasing column numbers, assuming A and B already sorted */ 1162 int imark = -1; 1163 if (v) { 1164 *v = v_p = mat->rowvalues; 1165 for ( i=0; i<nzB; i++ ) { 1166 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1167 else break; 1168 } 1169 imark = i; 1170 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1171 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1172 } 1173 if (idx) { 1174 *idx = idx_p = mat->rowindices; 1175 if (imark > -1) { 1176 for ( i=0; i<imark; i++ ) { 1177 idx_p[i] = cmap[cworkB[i]]; 1178 } 1179 } else { 1180 for ( i=0; i<nzB; i++ ) { 1181 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1182 else break; 1183 } 1184 imark = i; 1185 } 1186 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1187 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1188 } 1189 } 1190 else { 1191 if (idx) *idx = 0; 1192 if (v) *v = 0; 1193 } 1194 } 1195 *nz = nztot; 1196 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1197 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1198 return 0; 1199 } 1200 1201 #undef __FUNC__ 1202 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */ 1203 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1204 { 1205 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1206 if (aij->getrowactive == PETSC_FALSE) { 1207 SETERRQ(1,0,"MatGetRow not called"); 1208 } 1209 aij->getrowactive = PETSC_FALSE; 1210 return 0; 1211 } 1212 1213 #undef __FUNC__ 1214 #define __FUNC__ "MatNorm_MPIAIJ" 1215 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1216 { 1217 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1218 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1219 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1220 double sum = 0.0; 1221 Scalar *v; 1222 1223 if (aij->size == 1) { 1224 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1225 } else { 1226 if (type == NORM_FROBENIUS) { 1227 v = amat->a; 1228 for (i=0; i<amat->nz; i++ ) { 1229 #if defined(PETSC_COMPLEX) 1230 sum += real(conj(*v)*(*v)); v++; 1231 #else 1232 sum += (*v)*(*v); v++; 1233 #endif 1234 } 1235 v = bmat->a; 1236 for (i=0; i<bmat->nz; i++ ) { 1237 #if defined(PETSC_COMPLEX) 1238 sum += real(conj(*v)*(*v)); v++; 1239 #else 1240 sum += (*v)*(*v); v++; 1241 #endif 1242 } 1243 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1244 *norm = sqrt(*norm); 1245 } 1246 else if (type == NORM_1) { /* max column norm */ 1247 double *tmp, *tmp2; 1248 int *jj, *garray = aij->garray; 1249 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1250 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1251 PetscMemzero(tmp,aij->N*sizeof(double)); 1252 *norm = 0.0; 1253 v = amat->a; jj = amat->j; 1254 for ( j=0; j<amat->nz; j++ ) { 1255 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1256 } 1257 v = bmat->a; jj = bmat->j; 1258 for ( j=0; j<bmat->nz; j++ ) { 1259 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1260 } 1261 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1262 for ( j=0; j<aij->N; j++ ) { 1263 if (tmp2[j] > *norm) *norm = tmp2[j]; 1264 } 1265 PetscFree(tmp); PetscFree(tmp2); 1266 } 1267 else if (type == NORM_INFINITY) { /* max row norm */ 1268 double ntemp = 0.0; 1269 for ( j=0; j<amat->m; j++ ) { 1270 v = amat->a + amat->i[j] + shift; 1271 sum = 0.0; 1272 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1273 sum += PetscAbsScalar(*v); v++; 1274 } 1275 v = bmat->a + bmat->i[j] + shift; 1276 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1277 sum += PetscAbsScalar(*v); v++; 1278 } 1279 if (sum > ntemp) ntemp = sum; 1280 } 1281 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1282 } 1283 else { 1284 SETERRQ(1,0,"No support for two norm"); 1285 } 1286 } 1287 return 0; 1288 } 1289 1290 #undef __FUNC__ 1291 #define __FUNC__ "MatTranspose_MPIAIJ" 1292 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1293 { 1294 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1295 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1296 int ierr,shift = Aloc->indexshift; 1297 Mat B; 1298 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1299 Scalar *array; 1300 1301 if (matout == PETSC_NULL && M != N) 1302 SETERRQ(1,0,"Square matrix only for in-place"); 1303 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1304 PETSC_NULL,&B); CHKERRQ(ierr); 1305 1306 /* copy over the A part */ 1307 Aloc = (Mat_SeqAIJ*) a->A->data; 1308 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1309 row = a->rstart; 1310 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1311 for ( i=0; i<m; i++ ) { 1312 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1313 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1314 } 1315 aj = Aloc->j; 1316 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1317 1318 /* copy over the B part */ 1319 Aloc = (Mat_SeqAIJ*) a->B->data; 1320 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1321 row = a->rstart; 1322 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1323 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1324 for ( i=0; i<m; i++ ) { 1325 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1326 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1327 } 1328 PetscFree(ct); 1329 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1330 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1331 if (matout != PETSC_NULL) { 1332 *matout = B; 1333 } else { 1334 /* This isn't really an in-place transpose .... but free data structures from a */ 1335 PetscFree(a->rowners); 1336 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1337 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1338 if (a->colmap) PetscFree(a->colmap); 1339 if (a->garray) PetscFree(a->garray); 1340 if (a->lvec) VecDestroy(a->lvec); 1341 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1342 PetscFree(a); 1343 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1344 PetscHeaderDestroy(B); 1345 } 1346 return 0; 1347 } 1348 1349 #undef __FUNC__ 1350 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1351 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1352 { 1353 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1354 Mat a = aij->A, b = aij->B; 1355 int ierr,s1,s2,s3; 1356 1357 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1358 if (rr) { 1359 s3 = aij->n; 1360 VecGetLocalSize_Fast(rr,s1); 1361 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1362 /* Overlap communication with computation. */ 1363 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1364 } 1365 if (ll) { 1366 VecGetLocalSize_Fast(ll,s1); 1367 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1368 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1369 } 1370 /* scale the diagonal block */ 1371 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1372 1373 if (rr) { 1374 /* Do a scatter end and then right scale the off-diagonal block */ 1375 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1376 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1377 } 1378 1379 return 0; 1380 } 1381 1382 1383 extern int MatPrintHelp_SeqAIJ(Mat); 1384 #undef __FUNC__ 1385 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */ 1386 int MatPrintHelp_MPIAIJ(Mat A) 1387 { 1388 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1389 1390 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1391 else return 0; 1392 } 1393 1394 #undef __FUNC__ 1395 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */ 1396 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1397 { 1398 *bs = 1; 1399 return 0; 1400 } 1401 #undef __FUNC__ 1402 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */ 1403 int MatSetUnfactored_MPIAIJ(Mat A) 1404 { 1405 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1406 int ierr; 1407 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1408 return 0; 1409 } 1410 1411 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1412 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1413 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1414 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1415 /* -------------------------------------------------------------------*/ 1416 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1417 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1418 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1419 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1420 0,0, 1421 0,0, 1422 0,0, 1423 MatRelax_MPIAIJ, 1424 MatTranspose_MPIAIJ, 1425 MatGetInfo_MPIAIJ,0, 1426 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1427 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1428 0, 1429 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1430 0,0,0,0, 1431 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1432 0,0, 1433 0,0,MatConvertSameType_MPIAIJ,0,0, 1434 0,0,0, 1435 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1436 MatPrintHelp_MPIAIJ, 1437 MatScale_MPIAIJ,0,0,0, 1438 MatGetBlockSize_MPIAIJ,0,0,0,0, 1439 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 1440 1441 1442 #undef __FUNC__ 1443 #define __FUNC__ "MatCreateMPIAIJ" 1444 /*@C 1445 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1446 (the default parallel PETSc format). For good matrix assembly performance 1447 the user should preallocate the matrix storage by setting the parameters 1448 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1449 performance can be increased by more than a factor of 50. 1450 1451 Input Parameters: 1452 . comm - MPI communicator 1453 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1454 This value should be the same as the local size used in creating the 1455 y vector for the matrix-vector product y = Ax. 1456 . n - This value should be the same as the local size used in creating the 1457 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1458 calculated if N is given) For square matrices n is almost always m. 1459 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1460 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1461 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1462 (same for all local rows) 1463 . d_nzz - array containing the number of nonzeros in the various rows of the 1464 diagonal portion of the local submatrix (possibly different for each row) 1465 or PETSC_NULL. You must leave room for the diagonal entry even if 1466 it is zero. 1467 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1468 submatrix (same for all local rows). 1469 . o_nzz - array containing the number of nonzeros in the various rows of the 1470 off-diagonal portion of the local submatrix (possibly different for 1471 each row) or PETSC_NULL. 1472 1473 Output Parameter: 1474 . A - the matrix 1475 1476 Notes: 1477 The AIJ format (also called the Yale sparse matrix format or 1478 compressed row storage), is fully compatible with standard Fortran 77 1479 storage. That is, the stored row and column indices can begin at 1480 either one (as in Fortran) or zero. See the users manual for details. 1481 1482 The user MUST specify either the local or global matrix dimensions 1483 (possibly both). 1484 1485 By default, this format uses inodes (identical nodes) when possible. 1486 We search for consecutive rows with the same nonzero structure, thereby 1487 reusing matrix information to achieve increased efficiency. 1488 1489 Options Database Keys: 1490 $ -mat_aij_no_inode - Do not use inodes 1491 $ -mat_aij_inode_limit <limit> - Set inode limit. 1492 $ (max limit=5) 1493 $ -mat_aij_oneindex - Internally use indexing starting at 1 1494 $ rather than 0. Note: When calling MatSetValues(), 1495 $ the user still MUST index entries starting at 0! 1496 1497 Storage Information: 1498 For a square global matrix we define each processor's diagonal portion 1499 to be its local rows and the corresponding columns (a square submatrix); 1500 each processor's off-diagonal portion encompasses the remainder of the 1501 local matrix (a rectangular submatrix). 1502 1503 The user can specify preallocated storage for the diagonal part of 1504 the local submatrix with either d_nz or d_nnz (not both). Set 1505 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1506 memory allocation. Likewise, specify preallocated storage for the 1507 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1508 1509 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1510 the figure below we depict these three local rows and all columns (0-11). 1511 1512 $ 0 1 2 3 4 5 6 7 8 9 10 11 1513 $ ------------------- 1514 $ row 3 | o o o d d d o o o o o o 1515 $ row 4 | o o o d d d o o o o o o 1516 $ row 5 | o o o d d d o o o o o o 1517 $ ------------------- 1518 $ 1519 1520 Thus, any entries in the d locations are stored in the d (diagonal) 1521 submatrix, and any entries in the o locations are stored in the 1522 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1523 stored simply in the MATSEQAIJ format for compressed row storage. 1524 1525 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1526 and o_nz should indicate the number of nonzeros per row in the o matrix. 1527 In general, for PDE problems in which most nonzeros are near the diagonal, 1528 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1529 or you will get TERRIBLE performance; see the users' manual chapter on 1530 matrices. 1531 1532 .keywords: matrix, aij, compressed row, sparse, parallel 1533 1534 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1535 @*/ 1536 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1537 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1538 { 1539 Mat B; 1540 Mat_MPIAIJ *b; 1541 int ierr, i,sum[2],work[2],size; 1542 1543 MPI_Comm_size(comm,&size); 1544 if (size == 1) { 1545 if (M == PETSC_DECIDE) M = m; 1546 if (N == PETSC_DECIDE) N = n; 1547 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1548 return 0; 1549 } 1550 1551 *A = 0; 1552 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1553 PLogObjectCreate(B); 1554 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1555 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1556 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1557 B->destroy = MatDestroy_MPIAIJ; 1558 B->view = MatView_MPIAIJ; 1559 B->factor = 0; 1560 B->assembled = PETSC_FALSE; 1561 B->mapping = 0; 1562 1563 B->insertmode = NOT_SET_VALUES; 1564 MPI_Comm_rank(comm,&b->rank); 1565 1566 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1567 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1568 1569 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1570 work[0] = m; work[1] = n; 1571 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1572 if (M == PETSC_DECIDE) M = sum[0]; 1573 if (N == PETSC_DECIDE) N = sum[1]; 1574 } 1575 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1576 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1577 b->m = m; B->m = m; 1578 b->n = n; B->n = n; 1579 b->N = N; B->N = N; 1580 b->M = M; B->M = M; 1581 1582 /* build local table of row and column ownerships */ 1583 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1584 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1585 b->cowners = b->rowners + b->size + 2; 1586 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1587 b->rowners[0] = 0; 1588 for ( i=2; i<=b->size; i++ ) { 1589 b->rowners[i] += b->rowners[i-1]; 1590 } 1591 b->rstart = b->rowners[b->rank]; 1592 b->rend = b->rowners[b->rank+1]; 1593 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1594 b->cowners[0] = 0; 1595 for ( i=2; i<=b->size; i++ ) { 1596 b->cowners[i] += b->cowners[i-1]; 1597 } 1598 b->cstart = b->cowners[b->rank]; 1599 b->cend = b->cowners[b->rank+1]; 1600 1601 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1602 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1603 PLogObjectParent(B,b->A); 1604 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1605 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1606 PLogObjectParent(B,b->B); 1607 1608 /* build cache for off array entries formed */ 1609 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1610 b->donotstash = 0; 1611 b->colmap = 0; 1612 b->garray = 0; 1613 b->roworiented = 1; 1614 1615 /* stuff used for matrix vector multiply */ 1616 b->lvec = 0; 1617 b->Mvctx = 0; 1618 1619 /* stuff for MatGetRow() */ 1620 b->rowindices = 0; 1621 b->rowvalues = 0; 1622 b->getrowactive = PETSC_FALSE; 1623 1624 *A = B; 1625 return 0; 1626 } 1627 1628 #undef __FUNC__ 1629 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1630 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1631 { 1632 Mat mat; 1633 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1634 int ierr, len=0, flg; 1635 1636 *newmat = 0; 1637 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1638 PLogObjectCreate(mat); 1639 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1640 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1641 mat->destroy = MatDestroy_MPIAIJ; 1642 mat->view = MatView_MPIAIJ; 1643 mat->factor = matin->factor; 1644 mat->assembled = PETSC_TRUE; 1645 1646 a->m = mat->m = oldmat->m; 1647 a->n = mat->n = oldmat->n; 1648 a->M = mat->M = oldmat->M; 1649 a->N = mat->N = oldmat->N; 1650 1651 a->rstart = oldmat->rstart; 1652 a->rend = oldmat->rend; 1653 a->cstart = oldmat->cstart; 1654 a->cend = oldmat->cend; 1655 a->size = oldmat->size; 1656 a->rank = oldmat->rank; 1657 mat->insertmode = NOT_SET_VALUES; 1658 a->rowvalues = 0; 1659 a->getrowactive = PETSC_FALSE; 1660 1661 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1662 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1663 a->cowners = a->rowners + a->size + 2; 1664 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1665 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1666 if (oldmat->colmap) { 1667 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1668 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1669 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1670 } else a->colmap = 0; 1671 if (oldmat->garray) { 1672 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1673 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1674 PLogObjectMemory(mat,len*sizeof(int)); 1675 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1676 } else a->garray = 0; 1677 1678 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1679 PLogObjectParent(mat,a->lvec); 1680 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1681 PLogObjectParent(mat,a->Mvctx); 1682 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1683 PLogObjectParent(mat,a->A); 1684 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1685 PLogObjectParent(mat,a->B); 1686 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1687 if (flg) { 1688 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1689 } 1690 *newmat = mat; 1691 return 0; 1692 } 1693 1694 #include "sys.h" 1695 1696 #undef __FUNC__ 1697 #define __FUNC__ "MatLoad_MPIAIJ" 1698 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1699 { 1700 Mat A; 1701 int i, nz, ierr, j,rstart, rend, fd; 1702 Scalar *vals,*svals; 1703 MPI_Comm comm = ((PetscObject)viewer)->comm; 1704 MPI_Status status; 1705 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1706 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1707 int tag = ((PetscObject)viewer)->tag; 1708 1709 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1710 if (!rank) { 1711 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1712 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1713 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1714 } 1715 1716 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1717 M = header[1]; N = header[2]; 1718 /* determine ownership of all rows */ 1719 m = M/size + ((M % size) > rank); 1720 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1721 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1722 rowners[0] = 0; 1723 for ( i=2; i<=size; i++ ) { 1724 rowners[i] += rowners[i-1]; 1725 } 1726 rstart = rowners[rank]; 1727 rend = rowners[rank+1]; 1728 1729 /* distribute row lengths to all processors */ 1730 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1731 offlens = ourlens + (rend-rstart); 1732 if (!rank) { 1733 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1734 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1735 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1736 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1737 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1738 PetscFree(sndcounts); 1739 } 1740 else { 1741 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1742 } 1743 1744 if (!rank) { 1745 /* calculate the number of nonzeros on each processor */ 1746 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1747 PetscMemzero(procsnz,size*sizeof(int)); 1748 for ( i=0; i<size; i++ ) { 1749 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1750 procsnz[i] += rowlengths[j]; 1751 } 1752 } 1753 PetscFree(rowlengths); 1754 1755 /* determine max buffer needed and allocate it */ 1756 maxnz = 0; 1757 for ( i=0; i<size; i++ ) { 1758 maxnz = PetscMax(maxnz,procsnz[i]); 1759 } 1760 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1761 1762 /* read in my part of the matrix column indices */ 1763 nz = procsnz[0]; 1764 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1765 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1766 1767 /* read in every one elses and ship off */ 1768 for ( i=1; i<size; i++ ) { 1769 nz = procsnz[i]; 1770 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1771 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1772 } 1773 PetscFree(cols); 1774 } 1775 else { 1776 /* determine buffer space needed for message */ 1777 nz = 0; 1778 for ( i=0; i<m; i++ ) { 1779 nz += ourlens[i]; 1780 } 1781 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1782 1783 /* receive message of column indices*/ 1784 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1785 MPI_Get_count(&status,MPI_INT,&maxnz); 1786 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1787 } 1788 1789 /* loop over local rows, determining number of off diagonal entries */ 1790 PetscMemzero(offlens,m*sizeof(int)); 1791 jj = 0; 1792 for ( i=0; i<m; i++ ) { 1793 for ( j=0; j<ourlens[i]; j++ ) { 1794 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1795 jj++; 1796 } 1797 } 1798 1799 /* create our matrix */ 1800 for ( i=0; i<m; i++ ) { 1801 ourlens[i] -= offlens[i]; 1802 } 1803 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1804 A = *newmat; 1805 MatSetOption(A,MAT_COLUMNS_SORTED); 1806 for ( i=0; i<m; i++ ) { 1807 ourlens[i] += offlens[i]; 1808 } 1809 1810 if (!rank) { 1811 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1812 1813 /* read in my part of the matrix numerical values */ 1814 nz = procsnz[0]; 1815 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1816 1817 /* insert into matrix */ 1818 jj = rstart; 1819 smycols = mycols; 1820 svals = vals; 1821 for ( i=0; i<m; i++ ) { 1822 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1823 smycols += ourlens[i]; 1824 svals += ourlens[i]; 1825 jj++; 1826 } 1827 1828 /* read in other processors and ship out */ 1829 for ( i=1; i<size; i++ ) { 1830 nz = procsnz[i]; 1831 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1832 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1833 } 1834 PetscFree(procsnz); 1835 } 1836 else { 1837 /* receive numeric values */ 1838 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1839 1840 /* receive message of values*/ 1841 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1842 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1843 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1844 1845 /* insert into matrix */ 1846 jj = rstart; 1847 smycols = mycols; 1848 svals = vals; 1849 for ( i=0; i<m; i++ ) { 1850 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1851 smycols += ourlens[i]; 1852 svals += ourlens[i]; 1853 jj++; 1854 } 1855 } 1856 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1857 1858 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1859 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1860 return 0; 1861 } 1862