1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.212 1997/08/07 14:39:24 bsmith Exp balay $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/spops.h" 9 10 /* local utility routine that creates a mapping from the global column 11 number to the local number in the off-diagonal part of the local 12 storage of the matrix. This is done in a non scable way since the 13 length of colmap equals the global matrix length. 14 */ 15 #undef __FUNC__ 16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */ 17 int CreateColmap_MPIAIJ_Private(Mat mat) 18 { 19 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21 int n = B->n,i; 22 23 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 24 PLogObjectMemory(mat,aij->N*sizeof(int)); 25 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 26 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 27 return 0; 28 } 29 30 extern int DisAssemble_MPIAIJ(Mat); 31 32 #define CHUNKSIZE 15 33 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 34 { \ 35 \ 36 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 37 rmax = aimax[row]; nrow = ailen[row]; \ 38 col1 = col - shift; \ 39 \ 40 low = 0; high = nrow; \ 41 while (high-low > 5) { \ 42 t = (low+high)/2; \ 43 if (rp[t] > col) high = t; \ 44 else low = t; \ 45 } \ 46 for ( _i=0; _i<nrow; _i++ ) { \ 47 if (rp[_i] > col1) break; \ 48 if (rp[_i] == col1) { \ 49 if (addv == ADD_VALUES) ap[_i] += value; \ 50 else ap[_i] = value; \ 51 goto a_noinsert; \ 52 } \ 53 } \ 54 if (nonew == 1) goto a_noinsert; \ 55 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 56 if (nrow >= rmax) { \ 57 /* there is no extra room in row, therefore enlarge */ \ 58 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 59 Scalar *new_a; \ 60 \ 61 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 62 \ 63 /* malloc new storage space */ \ 64 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 65 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 66 new_j = (int *) (new_a + new_nz); \ 67 new_i = new_j + new_nz; \ 68 \ 69 /* copy over old data into new slots */ \ 70 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 71 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 72 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 73 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 74 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 75 len*sizeof(int)); \ 76 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 77 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 78 len*sizeof(Scalar)); \ 79 /* free up old matrix storage */ \ 80 \ 81 PetscFree(a->a); \ 82 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 83 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 84 a->singlemalloc = 1; \ 85 \ 86 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 87 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 88 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 89 a->maxnz += CHUNKSIZE; \ 90 a->reallocs++; \ 91 } \ 92 N = nrow++ - 1; a->nz++; \ 93 /* shift up all the later entries in this row */ \ 94 for ( ii=N; ii>=_i; ii-- ) { \ 95 rp[ii+1] = rp[ii]; \ 96 ap[ii+1] = ap[ii]; \ 97 } \ 98 rp[_i] = col1; \ 99 ap[_i] = value; \ 100 a_noinsert: ; \ 101 ailen[row] = nrow; \ 102 } 103 104 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 105 { \ 106 \ 107 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 108 rmax = bimax[row]; nrow = bilen[row]; \ 109 col1 = col - shift; \ 110 \ 111 low = 0; high = nrow; \ 112 while (high-low > 5) { \ 113 t = (low+high)/2; \ 114 if (rp[t] > col) high = t; \ 115 else low = t; \ 116 } \ 117 for ( _i=0; _i<nrow; _i++ ) { \ 118 if (rp[_i] > col1) break; \ 119 if (rp[_i] == col1) { \ 120 if (addv == ADD_VALUES) ap[_i] += value; \ 121 else ap[_i] = value; \ 122 goto b_noinsert; \ 123 } \ 124 } \ 125 if (nonew == 1) goto b_noinsert; \ 126 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 127 if (nrow >= rmax) { \ 128 /* there is no extra room in row, therefore enlarge */ \ 129 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 130 Scalar *new_a; \ 131 \ 132 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 133 \ 134 /* malloc new storage space */ \ 135 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 136 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 137 new_j = (int *) (new_a + new_nz); \ 138 new_i = new_j + new_nz; \ 139 \ 140 /* copy over old data into new slots */ \ 141 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 142 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 143 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 144 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 145 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 146 len*sizeof(int)); \ 147 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 148 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 149 len*sizeof(Scalar)); \ 150 /* free up old matrix storage */ \ 151 \ 152 PetscFree(b->a); \ 153 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 154 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 155 b->singlemalloc = 1; \ 156 \ 157 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 158 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 159 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 160 b->maxnz += CHUNKSIZE; \ 161 b->reallocs++; \ 162 } \ 163 N = nrow++ - 1; b->nz++; \ 164 /* shift up all the later entries in this row */ \ 165 for ( ii=N; ii>=_i; ii-- ) { \ 166 rp[ii+1] = rp[ii]; \ 167 ap[ii+1] = ap[ii]; \ 168 } \ 169 rp[_i] = col1; \ 170 ap[_i] = value; \ 171 b_noinsert: ; \ 172 bilen[row] = nrow; \ 173 } 174 175 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 176 #undef __FUNC__ 177 #define __FUNC__ "MatSetValues_MPIAIJ" 178 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 179 { 180 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 181 Scalar value; 182 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 183 int cstart = aij->cstart, cend = aij->cend,row,col; 184 int roworiented = aij->roworiented; 185 186 /* Some Variables required in the macro */ 187 Mat A = aij->A; 188 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 189 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 190 Scalar *aa = a->a; 191 192 Mat B = aij->B; 193 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 194 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 195 Scalar *ba = b->a; 196 197 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 198 int nonew = a->nonew,shift = a->indexshift; 199 Scalar *ap; 200 201 for ( i=0; i<m; i++ ) { 202 #if defined(PETSC_BOPT_g) 203 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 204 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 205 #endif 206 if (im[i] >= rstart && im[i] < rend) { 207 row = im[i] - rstart; 208 for ( j=0; j<n; j++ ) { 209 if (in[j] >= cstart && in[j] < cend){ 210 col = in[j] - cstart; 211 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 212 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 213 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 #if defined(PETSC_BOPT_g) 216 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 217 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 218 #endif 219 else { 220 if (mat->was_assembled) { 221 if (!aij->colmap) { 222 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 223 } 224 col = aij->colmap[in[j]] - 1; 225 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 226 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 227 col = in[j]; 228 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 229 B = aij->B; 230 b = (Mat_SeqAIJ *) B->data; 231 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 232 ba = b->a; 233 } 234 } 235 else col = in[j]; 236 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 237 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 238 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 239 } 240 } 241 } 242 else { 243 if (roworiented && !aij->donotstash) { 244 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 245 } 246 else { 247 if (!aij->donotstash) { 248 row = im[i]; 249 for ( j=0; j<n; j++ ) { 250 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 251 } 252 } 253 } 254 } 255 } 256 return 0; 257 } 258 259 #undef __FUNC__ 260 #define __FUNC__ "MatGetValues_MPIAIJ" 261 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 262 { 263 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 264 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 265 int cstart = aij->cstart, cend = aij->cend,row,col; 266 267 for ( i=0; i<m; i++ ) { 268 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 269 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 270 if (idxm[i] >= rstart && idxm[i] < rend) { 271 row = idxm[i] - rstart; 272 for ( j=0; j<n; j++ ) { 273 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 274 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 275 if (idxn[j] >= cstart && idxn[j] < cend){ 276 col = idxn[j] - cstart; 277 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 278 } 279 else { 280 if (!aij->colmap) { 281 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 282 } 283 col = aij->colmap[idxn[j]] - 1; 284 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 285 else { 286 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 287 } 288 } 289 } 290 } 291 else { 292 SETERRQ(1,0,"Only local values currently supported"); 293 } 294 } 295 return 0; 296 } 297 298 #undef __FUNC__ 299 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 300 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 301 { 302 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 303 MPI_Comm comm = mat->comm; 304 int size = aij->size, *owners = aij->rowners; 305 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 306 MPI_Request *send_waits,*recv_waits; 307 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 308 InsertMode addv; 309 Scalar *rvalues,*svalues; 310 311 /* make sure all processors are either in INSERTMODE or ADDMODE */ 312 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 313 if (addv == (ADD_VALUES|INSERT_VALUES)) { 314 SETERRQ(1,0,"Some processors inserted others added"); 315 } 316 mat->insertmode = addv; /* in case this processor had no cache */ 317 318 /* first count number of contributors to each processor */ 319 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 320 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 321 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 322 for ( i=0; i<aij->stash.n; i++ ) { 323 idx = aij->stash.idx[i]; 324 for ( j=0; j<size; j++ ) { 325 if (idx >= owners[j] && idx < owners[j+1]) { 326 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 327 } 328 } 329 } 330 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 331 332 /* inform other processors of number of messages and max length*/ 333 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 334 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 335 nreceives = work[rank]; 336 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 337 nmax = work[rank]; 338 PetscFree(work); 339 340 /* post receives: 341 1) each message will consist of ordered pairs 342 (global index,value) we store the global index as a double 343 to simplify the message passing. 344 2) since we don't know how long each individual message is we 345 allocate the largest needed buffer for each receive. Potentially 346 this is a lot of wasted space. 347 348 349 This could be done better. 350 */ 351 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 352 CHKPTRQ(rvalues); 353 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 354 CHKPTRQ(recv_waits); 355 for ( i=0; i<nreceives; i++ ) { 356 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 357 comm,recv_waits+i); 358 } 359 360 /* do sends: 361 1) starts[i] gives the starting index in svalues for stuff going to 362 the ith processor 363 */ 364 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 365 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 366 CHKPTRQ(send_waits); 367 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 368 starts[0] = 0; 369 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 370 for ( i=0; i<aij->stash.n; i++ ) { 371 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 372 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 373 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 374 } 375 PetscFree(owner); 376 starts[0] = 0; 377 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 378 count = 0; 379 for ( i=0; i<size; i++ ) { 380 if (procs[i]) { 381 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 382 comm,send_waits+count++); 383 } 384 } 385 PetscFree(starts); PetscFree(nprocs); 386 387 /* Free cache space */ 388 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 389 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 390 391 aij->svalues = svalues; aij->rvalues = rvalues; 392 aij->nsends = nsends; aij->nrecvs = nreceives; 393 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 394 aij->rmax = nmax; 395 396 return 0; 397 } 398 extern int MatSetUpMultiply_MPIAIJ(Mat); 399 400 #undef __FUNC__ 401 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 402 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 403 { 404 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 405 MPI_Status *send_status,recv_status; 406 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 407 int row,col,other_disassembled; 408 Scalar *values,val; 409 InsertMode addv = mat->insertmode; 410 411 /* wait on receives */ 412 while (count) { 413 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 414 /* unpack receives into our local space */ 415 values = aij->rvalues + 3*imdex*aij->rmax; 416 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 417 n = n/3; 418 for ( i=0; i<n; i++ ) { 419 row = (int) PetscReal(values[3*i]) - aij->rstart; 420 col = (int) PetscReal(values[3*i+1]); 421 val = values[3*i+2]; 422 if (col >= aij->cstart && col < aij->cend) { 423 col -= aij->cstart; 424 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 425 } 426 else { 427 if (mat->was_assembled || mat->assembled) { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 430 } 431 col = aij->colmap[col] - 1; 432 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 433 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 434 col = (int) PetscReal(values[3*i+1]); 435 } 436 } 437 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 438 } 439 } 440 count--; 441 } 442 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 443 444 /* wait on sends */ 445 if (aij->nsends) { 446 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 447 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 448 PetscFree(send_status); 449 } 450 PetscFree(aij->send_waits); PetscFree(aij->svalues); 451 452 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 453 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 454 455 /* determine if any processor has disassembled, if so we must 456 also disassemble ourselfs, in order that we may reassemble. */ 457 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 458 if (mat->was_assembled && !other_disassembled) { 459 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460 } 461 462 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 463 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 464 } 465 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 467 468 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 469 return 0; 470 } 471 472 #undef __FUNC__ 473 #define __FUNC__ "MatZeroEntries_MPIAIJ" 474 int MatZeroEntries_MPIAIJ(Mat A) 475 { 476 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 477 int ierr; 478 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 479 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 480 return 0; 481 } 482 483 /* the code does not do the diagonal entries correctly unless the 484 matrix is square and the column and row owerships are identical. 485 This is a BUG. The only way to fix it seems to be to access 486 aij->A and aij->B directly and not through the MatZeroRows() 487 routine. 488 */ 489 #undef __FUNC__ 490 #define __FUNC__ "MatZeroRows_MPIAIJ" 491 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 492 { 493 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 494 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 495 int *procs,*nprocs,j,found,idx,nsends,*work; 496 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 497 int *rvalues,tag = A->tag,count,base,slen,n,*source; 498 int *lens,imdex,*lrows,*values; 499 MPI_Comm comm = A->comm; 500 MPI_Request *send_waits,*recv_waits; 501 MPI_Status recv_status,*send_status; 502 IS istmp; 503 504 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 505 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 506 507 /* first count number of contributors to each processor */ 508 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 509 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 510 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 511 for ( i=0; i<N; i++ ) { 512 idx = rows[i]; 513 found = 0; 514 for ( j=0; j<size; j++ ) { 515 if (idx >= owners[j] && idx < owners[j+1]) { 516 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 517 } 518 } 519 if (!found) SETERRQ(1,0,"Index out of range"); 520 } 521 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 522 523 /* inform other processors of number of messages and max length*/ 524 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 525 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 526 nrecvs = work[rank]; 527 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 528 nmax = work[rank]; 529 PetscFree(work); 530 531 /* post receives: */ 532 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 533 CHKPTRQ(rvalues); 534 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 535 CHKPTRQ(recv_waits); 536 for ( i=0; i<nrecvs; i++ ) { 537 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 538 } 539 540 /* do sends: 541 1) starts[i] gives the starting index in svalues for stuff going to 542 the ith processor 543 */ 544 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 545 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 546 CHKPTRQ(send_waits); 547 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 548 starts[0] = 0; 549 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 550 for ( i=0; i<N; i++ ) { 551 svalues[starts[owner[i]]++] = rows[i]; 552 } 553 ISRestoreIndices(is,&rows); 554 555 starts[0] = 0; 556 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 557 count = 0; 558 for ( i=0; i<size; i++ ) { 559 if (procs[i]) { 560 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 561 } 562 } 563 PetscFree(starts); 564 565 base = owners[rank]; 566 567 /* wait on receives */ 568 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 569 source = lens + nrecvs; 570 count = nrecvs; slen = 0; 571 while (count) { 572 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 573 /* unpack receives into our local space */ 574 MPI_Get_count(&recv_status,MPI_INT,&n); 575 source[imdex] = recv_status.MPI_SOURCE; 576 lens[imdex] = n; 577 slen += n; 578 count--; 579 } 580 PetscFree(recv_waits); 581 582 /* move the data into the send scatter */ 583 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 584 count = 0; 585 for ( i=0; i<nrecvs; i++ ) { 586 values = rvalues + i*nmax; 587 for ( j=0; j<lens[i]; j++ ) { 588 lrows[count++] = values[j] - base; 589 } 590 } 591 PetscFree(rvalues); PetscFree(lens); 592 PetscFree(owner); PetscFree(nprocs); 593 594 /* actually zap the local rows */ 595 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 596 PLogObjectParent(A,istmp); 597 PetscFree(lrows); 598 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 599 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 600 ierr = ISDestroy(istmp); CHKERRQ(ierr); 601 602 /* wait on sends */ 603 if (nsends) { 604 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 605 CHKPTRQ(send_status); 606 MPI_Waitall(nsends,send_waits,send_status); 607 PetscFree(send_status); 608 } 609 PetscFree(send_waits); PetscFree(svalues); 610 611 return 0; 612 } 613 614 #undef __FUNC__ 615 #define __FUNC__ "MatMult_MPIAIJ" 616 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 617 { 618 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 619 int ierr,nt; 620 621 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 622 if (nt != a->n) { 623 SETERRQ(1,0,"Incompatible partition of A and xx"); 624 } 625 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 626 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 627 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 628 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 629 return 0; 630 } 631 632 #undef __FUNC__ 633 #define __FUNC__ "MatMultAdd_MPIAIJ" 634 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 635 { 636 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 637 int ierr; 638 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 639 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 640 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 641 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 642 return 0; 643 } 644 645 #undef __FUNC__ 646 #define __FUNC__ "MatMultTrans_MPIAIJ" 647 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 648 { 649 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 650 int ierr; 651 652 /* do nondiagonal part */ 653 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 654 /* send it on its way */ 655 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 656 /* do local part */ 657 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 658 /* receive remote parts: note this assumes the values are not actually */ 659 /* inserted in yy until the next line, which is true for my implementation*/ 660 /* but is not perhaps always true. */ 661 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 662 return 0; 663 } 664 665 #undef __FUNC__ 666 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 667 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 668 { 669 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 670 int ierr; 671 672 /* do nondiagonal part */ 673 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 674 /* send it on its way */ 675 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 676 /* do local part */ 677 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 678 /* receive remote parts: note this assumes the values are not actually */ 679 /* inserted in yy until the next line, which is true for my implementation*/ 680 /* but is not perhaps always true. */ 681 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 682 return 0; 683 } 684 685 /* 686 This only works correctly for square matrices where the subblock A->A is the 687 diagonal block 688 */ 689 #undef __FUNC__ 690 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 691 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 692 { 693 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 694 if (a->M != a->N) 695 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 696 if (a->rstart != a->cstart || a->rend != a->cend) { 697 SETERRQ(1,0,"row partition must equal col partition"); } 698 return MatGetDiagonal(a->A,v); 699 } 700 701 #undef __FUNC__ 702 #define __FUNC__ "MatScale_MPIAIJ" 703 int MatScale_MPIAIJ(Scalar *aa,Mat A) 704 { 705 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 706 int ierr; 707 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 708 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 709 return 0; 710 } 711 712 #undef __FUNC__ 713 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */ 714 int MatDestroy_MPIAIJ(PetscObject obj) 715 { 716 Mat mat = (Mat) obj; 717 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 718 int ierr; 719 720 #if defined(PETSC_LOG) 721 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 722 #endif 723 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 724 PetscFree(aij->rowners); 725 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 726 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 727 if (aij->colmap) PetscFree(aij->colmap); 728 if (aij->garray) PetscFree(aij->garray); 729 if (aij->lvec) VecDestroy(aij->lvec); 730 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 731 if (aij->rowvalues) PetscFree(aij->rowvalues); 732 PetscFree(aij); 733 if (mat->mapping) { 734 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 735 } 736 PLogObjectDestroy(mat); 737 PetscHeaderDestroy(mat); 738 return 0; 739 } 740 741 #undef __FUNC__ 742 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */ 743 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 744 { 745 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 746 int ierr; 747 748 if (aij->size == 1) { 749 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 750 } 751 else SETERRQ(1,0,"Only uniprocessor output supported"); 752 return 0; 753 } 754 755 #undef __FUNC__ 756 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */ 757 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 758 { 759 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 760 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 761 int ierr, format,shift = C->indexshift,rank; 762 FILE *fd; 763 ViewerType vtype; 764 765 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 766 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 767 ierr = ViewerGetFormat(viewer,&format); 768 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 769 MatInfo info; 770 int flg; 771 MPI_Comm_rank(mat->comm,&rank); 772 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 773 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 774 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 775 PetscSequentialPhaseBegin(mat->comm,1); 776 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 777 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 778 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 779 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 780 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 781 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 782 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 783 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 784 fflush(fd); 785 PetscSequentialPhaseEnd(mat->comm,1); 786 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 787 return 0; 788 } 789 else if (format == VIEWER_FORMAT_ASCII_INFO) { 790 return 0; 791 } 792 } 793 794 if (vtype == DRAW_VIEWER) { 795 Draw draw; 796 PetscTruth isnull; 797 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 798 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 799 } 800 801 if (vtype == ASCII_FILE_VIEWER) { 802 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 803 PetscSequentialPhaseBegin(mat->comm,1); 804 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 805 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 806 aij->cend); 807 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 808 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 809 fflush(fd); 810 PetscSequentialPhaseEnd(mat->comm,1); 811 } 812 else { 813 int size = aij->size; 814 rank = aij->rank; 815 if (size == 1) { 816 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 817 } 818 else { 819 /* assemble the entire matrix onto first processor. */ 820 Mat A; 821 Mat_SeqAIJ *Aloc; 822 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 823 Scalar *a; 824 825 if (!rank) { 826 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 827 CHKERRQ(ierr); 828 } 829 else { 830 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 831 CHKERRQ(ierr); 832 } 833 PLogObjectParent(mat,A); 834 835 /* copy over the A part */ 836 Aloc = (Mat_SeqAIJ*) aij->A->data; 837 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 838 row = aij->rstart; 839 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 840 for ( i=0; i<m; i++ ) { 841 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 842 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 843 } 844 aj = Aloc->j; 845 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 846 847 /* copy over the B part */ 848 Aloc = (Mat_SeqAIJ*) aij->B->data; 849 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 850 row = aij->rstart; 851 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 852 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 853 for ( i=0; i<m; i++ ) { 854 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 855 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 856 } 857 PetscFree(ct); 858 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 859 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 860 if (!rank) { 861 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 862 } 863 ierr = MatDestroy(A); CHKERRQ(ierr); 864 } 865 } 866 return 0; 867 } 868 869 #undef __FUNC__ 870 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */ 871 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 872 { 873 Mat mat = (Mat) obj; 874 int ierr; 875 ViewerType vtype; 876 877 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 878 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 879 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 880 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 881 } 882 else if (vtype == BINARY_FILE_VIEWER) { 883 return MatView_MPIAIJ_Binary(mat,viewer); 884 } 885 return 0; 886 } 887 888 /* 889 This has to provide several versions. 890 891 2) a) use only local smoothing updating outer values only once. 892 b) local smoothing updating outer values each inner iteration 893 3) color updating out values betwen colors. 894 */ 895 #undef __FUNC__ 896 #define __FUNC__ "MatRelax_MPIAIJ" 897 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 898 double fshift,int its,Vec xx) 899 { 900 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 901 Mat AA = mat->A, BB = mat->B; 902 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 903 Scalar *b,*x,*xs,*ls,d,*v,sum; 904 int ierr,*idx, *diag; 905 int n = mat->n, m = mat->m, i,shift = A->indexshift; 906 907 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 908 xs = x + shift; /* shift by one for index start of 1 */ 909 ls = ls + shift; 910 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 911 diag = A->diag; 912 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 913 if (flag & SOR_ZERO_INITIAL_GUESS) { 914 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 915 } 916 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 917 CHKERRQ(ierr); 918 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 919 CHKERRQ(ierr); 920 while (its--) { 921 /* go down through the rows */ 922 for ( i=0; i<m; i++ ) { 923 n = A->i[i+1] - A->i[i]; 924 idx = A->j + A->i[i] + shift; 925 v = A->a + A->i[i] + shift; 926 sum = b[i]; 927 SPARSEDENSEMDOT(sum,xs,v,idx,n); 928 d = fshift + A->a[diag[i]+shift]; 929 n = B->i[i+1] - B->i[i]; 930 idx = B->j + B->i[i] + shift; 931 v = B->a + B->i[i] + shift; 932 SPARSEDENSEMDOT(sum,ls,v,idx,n); 933 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 934 } 935 /* come up through the rows */ 936 for ( i=m-1; i>-1; i-- ) { 937 n = A->i[i+1] - A->i[i]; 938 idx = A->j + A->i[i] + shift; 939 v = A->a + A->i[i] + shift; 940 sum = b[i]; 941 SPARSEDENSEMDOT(sum,xs,v,idx,n); 942 d = fshift + A->a[diag[i]+shift]; 943 n = B->i[i+1] - B->i[i]; 944 idx = B->j + B->i[i] + shift; 945 v = B->a + B->i[i] + shift; 946 SPARSEDENSEMDOT(sum,ls,v,idx,n); 947 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 948 } 949 } 950 } 951 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 952 if (flag & SOR_ZERO_INITIAL_GUESS) { 953 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 954 } 955 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 956 CHKERRQ(ierr); 957 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 958 CHKERRQ(ierr); 959 while (its--) { 960 for ( i=0; i<m; i++ ) { 961 n = A->i[i+1] - A->i[i]; 962 idx = A->j + A->i[i] + shift; 963 v = A->a + A->i[i] + shift; 964 sum = b[i]; 965 SPARSEDENSEMDOT(sum,xs,v,idx,n); 966 d = fshift + A->a[diag[i]+shift]; 967 n = B->i[i+1] - B->i[i]; 968 idx = B->j + B->i[i] + shift; 969 v = B->a + B->i[i] + shift; 970 SPARSEDENSEMDOT(sum,ls,v,idx,n); 971 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 972 } 973 } 974 } 975 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 976 if (flag & SOR_ZERO_INITIAL_GUESS) { 977 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 978 } 979 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 980 mat->Mvctx); CHKERRQ(ierr); 981 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 982 mat->Mvctx); CHKERRQ(ierr); 983 while (its--) { 984 for ( i=m-1; i>-1; i-- ) { 985 n = A->i[i+1] - A->i[i]; 986 idx = A->j + A->i[i] + shift; 987 v = A->a + A->i[i] + shift; 988 sum = b[i]; 989 SPARSEDENSEMDOT(sum,xs,v,idx,n); 990 d = fshift + A->a[diag[i]+shift]; 991 n = B->i[i+1] - B->i[i]; 992 idx = B->j + B->i[i] + shift; 993 v = B->a + B->i[i] + shift; 994 SPARSEDENSEMDOT(sum,ls,v,idx,n); 995 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 996 } 997 } 998 } 999 else { 1000 SETERRQ(1,0,"Option not supported"); 1001 } 1002 return 0; 1003 } 1004 1005 #undef __FUNC__ 1006 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */ 1007 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1008 { 1009 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1010 Mat A = mat->A, B = mat->B; 1011 int ierr; 1012 double isend[5], irecv[5]; 1013 1014 info->rows_global = (double)mat->M; 1015 info->columns_global = (double)mat->N; 1016 info->rows_local = (double)mat->m; 1017 info->columns_local = (double)mat->N; 1018 info->block_size = 1.0; 1019 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1020 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1021 isend[3] = info->memory; isend[4] = info->mallocs; 1022 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1023 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1024 isend[3] += info->memory; isend[4] += info->mallocs; 1025 if (flag == MAT_LOCAL) { 1026 info->nz_used = isend[0]; 1027 info->nz_allocated = isend[1]; 1028 info->nz_unneeded = isend[2]; 1029 info->memory = isend[3]; 1030 info->mallocs = isend[4]; 1031 } else if (flag == MAT_GLOBAL_MAX) { 1032 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 1033 info->nz_used = irecv[0]; 1034 info->nz_allocated = irecv[1]; 1035 info->nz_unneeded = irecv[2]; 1036 info->memory = irecv[3]; 1037 info->mallocs = irecv[4]; 1038 } else if (flag == MAT_GLOBAL_SUM) { 1039 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 1040 info->nz_used = irecv[0]; 1041 info->nz_allocated = irecv[1]; 1042 info->nz_unneeded = irecv[2]; 1043 info->memory = irecv[3]; 1044 info->mallocs = irecv[4]; 1045 } 1046 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1047 info->fill_ratio_needed = 0; 1048 info->factor_mallocs = 0; 1049 1050 return 0; 1051 } 1052 1053 #undef __FUNC__ 1054 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */ 1055 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1056 { 1057 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1058 1059 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1060 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1061 op == MAT_COLUMNS_UNSORTED || 1062 op == MAT_COLUMNS_SORTED || 1063 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1064 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1065 MatSetOption(a->A,op); 1066 MatSetOption(a->B,op); 1067 } else if (op == MAT_ROW_ORIENTED) { 1068 a->roworiented = 1; 1069 MatSetOption(a->A,op); 1070 MatSetOption(a->B,op); 1071 } else if (op == MAT_ROWS_SORTED || 1072 op == MAT_ROWS_UNSORTED || 1073 op == MAT_SYMMETRIC || 1074 op == MAT_STRUCTURALLY_SYMMETRIC || 1075 op == MAT_YES_NEW_DIAGONALS) 1076 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1077 else if (op == MAT_COLUMN_ORIENTED) { 1078 a->roworiented = 0; 1079 MatSetOption(a->A,op); 1080 MatSetOption(a->B,op); 1081 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1082 a->donotstash = 1; 1083 } else if (op == MAT_NO_NEW_DIAGONALS) 1084 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1085 else 1086 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1087 return 0; 1088 } 1089 1090 #undef __FUNC__ 1091 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */ 1092 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1093 { 1094 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1095 *m = mat->M; *n = mat->N; 1096 return 0; 1097 } 1098 1099 #undef __FUNC__ 1100 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */ 1101 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1102 { 1103 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1104 *m = mat->m; *n = mat->N; 1105 return 0; 1106 } 1107 1108 #undef __FUNC__ 1109 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */ 1110 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1111 { 1112 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1113 *m = mat->rstart; *n = mat->rend; 1114 return 0; 1115 } 1116 1117 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1118 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1119 1120 #undef __FUNC__ 1121 #define __FUNC__ "MatGetRow_MPIAIJ" 1122 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1123 { 1124 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1125 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1126 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1127 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1128 int *cmap, *idx_p; 1129 1130 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1131 mat->getrowactive = PETSC_TRUE; 1132 1133 if (!mat->rowvalues && (idx || v)) { 1134 /* 1135 allocate enough space to hold information from the longest row. 1136 */ 1137 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1138 int max = 1,m = mat->m,tmp; 1139 for ( i=0; i<m; i++ ) { 1140 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1141 if (max < tmp) { max = tmp; } 1142 } 1143 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1144 CHKPTRQ(mat->rowvalues); 1145 mat->rowindices = (int *) (mat->rowvalues + max); 1146 } 1147 1148 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1149 lrow = row - rstart; 1150 1151 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1152 if (!v) {pvA = 0; pvB = 0;} 1153 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1154 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1155 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1156 nztot = nzA + nzB; 1157 1158 cmap = mat->garray; 1159 if (v || idx) { 1160 if (nztot) { 1161 /* Sort by increasing column numbers, assuming A and B already sorted */ 1162 int imark = -1; 1163 if (v) { 1164 *v = v_p = mat->rowvalues; 1165 for ( i=0; i<nzB; i++ ) { 1166 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1167 else break; 1168 } 1169 imark = i; 1170 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1171 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1172 } 1173 if (idx) { 1174 *idx = idx_p = mat->rowindices; 1175 if (imark > -1) { 1176 for ( i=0; i<imark; i++ ) { 1177 idx_p[i] = cmap[cworkB[i]]; 1178 } 1179 } else { 1180 for ( i=0; i<nzB; i++ ) { 1181 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1182 else break; 1183 } 1184 imark = i; 1185 } 1186 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1187 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1188 } 1189 } 1190 else { 1191 if (idx) *idx = 0; 1192 if (v) *v = 0; 1193 } 1194 } 1195 *nz = nztot; 1196 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1197 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1198 return 0; 1199 } 1200 1201 #undef __FUNC__ 1202 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */ 1203 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1204 { 1205 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1206 if (aij->getrowactive == PETSC_FALSE) { 1207 SETERRQ(1,0,"MatGetRow not called"); 1208 } 1209 aij->getrowactive = PETSC_FALSE; 1210 return 0; 1211 } 1212 1213 #undef __FUNC__ 1214 #define __FUNC__ "MatNorm_MPIAIJ" 1215 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1216 { 1217 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1218 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1219 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1220 double sum = 0.0; 1221 Scalar *v; 1222 1223 if (aij->size == 1) { 1224 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1225 } else { 1226 if (type == NORM_FROBENIUS) { 1227 v = amat->a; 1228 for (i=0; i<amat->nz; i++ ) { 1229 #if defined(PETSC_COMPLEX) 1230 sum += real(conj(*v)*(*v)); v++; 1231 #else 1232 sum += (*v)*(*v); v++; 1233 #endif 1234 } 1235 v = bmat->a; 1236 for (i=0; i<bmat->nz; i++ ) { 1237 #if defined(PETSC_COMPLEX) 1238 sum += real(conj(*v)*(*v)); v++; 1239 #else 1240 sum += (*v)*(*v); v++; 1241 #endif 1242 } 1243 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1244 *norm = sqrt(*norm); 1245 } 1246 else if (type == NORM_1) { /* max column norm */ 1247 double *tmp, *tmp2; 1248 int *jj, *garray = aij->garray; 1249 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1250 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1251 PetscMemzero(tmp,aij->N*sizeof(double)); 1252 *norm = 0.0; 1253 v = amat->a; jj = amat->j; 1254 for ( j=0; j<amat->nz; j++ ) { 1255 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1256 } 1257 v = bmat->a; jj = bmat->j; 1258 for ( j=0; j<bmat->nz; j++ ) { 1259 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1260 } 1261 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1262 for ( j=0; j<aij->N; j++ ) { 1263 if (tmp2[j] > *norm) *norm = tmp2[j]; 1264 } 1265 PetscFree(tmp); PetscFree(tmp2); 1266 } 1267 else if (type == NORM_INFINITY) { /* max row norm */ 1268 double ntemp = 0.0; 1269 for ( j=0; j<amat->m; j++ ) { 1270 v = amat->a + amat->i[j] + shift; 1271 sum = 0.0; 1272 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1273 sum += PetscAbsScalar(*v); v++; 1274 } 1275 v = bmat->a + bmat->i[j] + shift; 1276 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1277 sum += PetscAbsScalar(*v); v++; 1278 } 1279 if (sum > ntemp) ntemp = sum; 1280 } 1281 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1282 } 1283 else { 1284 SETERRQ(1,0,"No support for two norm"); 1285 } 1286 } 1287 return 0; 1288 } 1289 1290 #undef __FUNC__ 1291 #define __FUNC__ "MatTranspose_MPIAIJ" 1292 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1293 { 1294 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1295 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1296 int ierr,shift = Aloc->indexshift; 1297 Mat B; 1298 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1299 Scalar *array; 1300 1301 if (matout == PETSC_NULL && M != N) 1302 SETERRQ(1,0,"Square matrix only for in-place"); 1303 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1304 PETSC_NULL,&B); CHKERRQ(ierr); 1305 1306 /* copy over the A part */ 1307 Aloc = (Mat_SeqAIJ*) a->A->data; 1308 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1309 row = a->rstart; 1310 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1311 for ( i=0; i<m; i++ ) { 1312 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1313 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1314 } 1315 aj = Aloc->j; 1316 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1317 1318 /* copy over the B part */ 1319 Aloc = (Mat_SeqAIJ*) a->B->data; 1320 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1321 row = a->rstart; 1322 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1323 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1324 for ( i=0; i<m; i++ ) { 1325 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1326 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1327 } 1328 PetscFree(ct); 1329 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1330 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1331 if (matout != PETSC_NULL) { 1332 *matout = B; 1333 } else { 1334 /* This isn't really an in-place transpose .... but free data structures from a */ 1335 PetscFree(a->rowners); 1336 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1337 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1338 if (a->colmap) PetscFree(a->colmap); 1339 if (a->garray) PetscFree(a->garray); 1340 if (a->lvec) VecDestroy(a->lvec); 1341 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1342 PetscFree(a); 1343 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1344 PetscHeaderDestroy(B); 1345 } 1346 return 0; 1347 } 1348 1349 #undef __FUNC__ 1350 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1351 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1352 { 1353 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1354 Mat a = aij->A, b = aij->B; 1355 int ierr,s1,s2,s3; 1356 1357 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1358 if (rr) { 1359 s3 = aij->n; 1360 VecGetLocalSize_Fast(rr,s1); 1361 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1362 /* Overlap communication with computation. */ 1363 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1364 } 1365 if (ll) { 1366 VecGetLocalSize_Fast(ll,s1); 1367 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1368 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1369 } 1370 /* scale the diagonal block */ 1371 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1372 1373 if (rr) { 1374 /* Do a scatter end and then right scale the off-diagonal block */ 1375 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1376 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1377 } 1378 1379 return 0; 1380 } 1381 1382 1383 extern int MatPrintHelp_SeqAIJ(Mat); 1384 #undef __FUNC__ 1385 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */ 1386 int MatPrintHelp_MPIAIJ(Mat A) 1387 { 1388 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1389 1390 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1391 else return 0; 1392 } 1393 1394 #undef __FUNC__ 1395 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */ 1396 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1397 { 1398 *bs = 1; 1399 return 0; 1400 } 1401 #undef __FUNC__ 1402 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */ 1403 int MatSetUnfactored_MPIAIJ(Mat A) 1404 { 1405 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1406 int ierr; 1407 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1408 return 0; 1409 } 1410 1411 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1412 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1413 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1414 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1415 /* -------------------------------------------------------------------*/ 1416 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1417 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1418 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1419 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1420 0,0, 1421 0,0, 1422 0,0, 1423 MatRelax_MPIAIJ, 1424 MatTranspose_MPIAIJ, 1425 MatGetInfo_MPIAIJ,0, 1426 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1427 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1428 0, 1429 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1430 0,0,0,0, 1431 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1432 0,0, 1433 0,0,MatConvertSameType_MPIAIJ,0,0, 1434 0,0,0, 1435 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1436 MatPrintHelp_MPIAIJ, 1437 MatScale_MPIAIJ,0,0,0, 1438 MatGetBlockSize_MPIAIJ,0,0,0,0, 1439 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 1440 1441 1442 #undef __FUNC__ 1443 #define __FUNC__ "MatCreateMPIAIJ" 1444 /*@C 1445 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1446 (the default parallel PETSc format). For good matrix assembly performance 1447 the user should preallocate the matrix storage by setting the parameters 1448 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1449 performance can be increased by more than a factor of 50. 1450 1451 Input Parameters: 1452 . comm - MPI communicator 1453 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1454 This value should be the same as the local size used in creating the 1455 y vector for the matrix-vector product y = Ax. 1456 . n - This value should be the same as the local size used in creating the 1457 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1458 calculated if N is given) For square matrices n is almost always m. 1459 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1460 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1461 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1462 (same for all local rows) 1463 . d_nzz - array containing the number of nonzeros in the various rows of the 1464 diagonal portion of the local submatrix (possibly different for each row) 1465 or PETSC_NULL. You must leave room for the diagonal entry even if 1466 it is zero. 1467 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1468 submatrix (same for all local rows). 1469 . o_nzz - array containing the number of nonzeros in the various rows of the 1470 off-diagonal portion of the local submatrix (possibly different for 1471 each row) or PETSC_NULL. 1472 1473 Output Parameter: 1474 . A - the matrix 1475 1476 Notes: 1477 The AIJ format (also called the Yale sparse matrix format or 1478 compressed row storage), is fully compatible with standard Fortran 77 1479 storage. That is, the stored row and column indices can begin at 1480 either one (as in Fortran) or zero. See the users manual for details. 1481 1482 The user MUST specify either the local or global matrix dimensions 1483 (possibly both). 1484 1485 By default, this format uses inodes (identical nodes) when possible. 1486 We search for consecutive rows with the same nonzero structure, thereby 1487 reusing matrix information to achieve increased efficiency. 1488 1489 Options Database Keys: 1490 $ -mat_aij_no_inode - Do not use inodes 1491 $ -mat_aij_inode_limit <limit> - Set inode limit. 1492 $ (max limit=5) 1493 $ -mat_aij_oneindex - Internally use indexing starting at 1 1494 $ rather than 0. Note: When calling MatSetValues(), 1495 $ the user still MUST index entries starting at 0! 1496 1497 Storage Information: 1498 For a square global matrix we define each processor's diagonal portion 1499 to be its local rows and the corresponding columns (a square submatrix); 1500 each processor's off-diagonal portion encompasses the remainder of the 1501 local matrix (a rectangular submatrix). 1502 1503 The user can specify preallocated storage for the diagonal part of 1504 the local submatrix with either d_nz or d_nnz (not both). Set 1505 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1506 memory allocation. Likewise, specify preallocated storage for the 1507 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1508 1509 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1510 the figure below we depict these three local rows and all columns (0-11). 1511 1512 $ 0 1 2 3 4 5 6 7 8 9 10 11 1513 $ ------------------- 1514 $ row 3 | o o o d d d o o o o o o 1515 $ row 4 | o o o d d d o o o o o o 1516 $ row 5 | o o o d d d o o o o o o 1517 $ ------------------- 1518 $ 1519 1520 Thus, any entries in the d locations are stored in the d (diagonal) 1521 submatrix, and any entries in the o locations are stored in the 1522 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1523 stored simply in the MATSEQAIJ format for compressed row storage. 1524 1525 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1526 and o_nz should indicate the number of nonzeros per row in the o matrix. 1527 In general, for PDE problems in which most nonzeros are near the diagonal, 1528 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1529 or you will get TERRIBLE performance; see the users' manual chapter on 1530 matrices. 1531 1532 .keywords: matrix, aij, compressed row, sparse, parallel 1533 1534 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1535 @*/ 1536 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1537 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1538 { 1539 Mat B; 1540 Mat_MPIAIJ *b; 1541 int ierr, i,sum[2],work[2],size; 1542 1543 MPI_Comm_size(comm,&size); 1544 if (size == 1) { 1545 if (M == PETSC_DECIDE) M = m; 1546 if (N == PETSC_DECIDE) N = n; 1547 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1548 return 0; 1549 } 1550 1551 *A = 0; 1552 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1553 PLogObjectCreate(B); 1554 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1555 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1556 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1557 B->destroy = MatDestroy_MPIAIJ; 1558 B->view = MatView_MPIAIJ; 1559 B->factor = 0; 1560 B->assembled = PETSC_FALSE; 1561 B->mapping = 0; 1562 1563 B->insertmode = NOT_SET_VALUES; 1564 b->size = size; 1565 MPI_Comm_rank(comm,&b->rank); 1566 1567 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1568 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1569 1570 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1571 work[0] = m; work[1] = n; 1572 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1573 if (M == PETSC_DECIDE) M = sum[0]; 1574 if (N == PETSC_DECIDE) N = sum[1]; 1575 } 1576 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1577 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1578 b->m = m; B->m = m; 1579 b->n = n; B->n = n; 1580 b->N = N; B->N = N; 1581 b->M = M; B->M = M; 1582 1583 /* build local table of row and column ownerships */ 1584 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1585 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1586 b->cowners = b->rowners + b->size + 2; 1587 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1588 b->rowners[0] = 0; 1589 for ( i=2; i<=b->size; i++ ) { 1590 b->rowners[i] += b->rowners[i-1]; 1591 } 1592 b->rstart = b->rowners[b->rank]; 1593 b->rend = b->rowners[b->rank+1]; 1594 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1595 b->cowners[0] = 0; 1596 for ( i=2; i<=b->size; i++ ) { 1597 b->cowners[i] += b->cowners[i-1]; 1598 } 1599 b->cstart = b->cowners[b->rank]; 1600 b->cend = b->cowners[b->rank+1]; 1601 1602 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1603 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1604 PLogObjectParent(B,b->A); 1605 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1606 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1607 PLogObjectParent(B,b->B); 1608 1609 /* build cache for off array entries formed */ 1610 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1611 b->donotstash = 0; 1612 b->colmap = 0; 1613 b->garray = 0; 1614 b->roworiented = 1; 1615 1616 /* stuff used for matrix vector multiply */ 1617 b->lvec = 0; 1618 b->Mvctx = 0; 1619 1620 /* stuff for MatGetRow() */ 1621 b->rowindices = 0; 1622 b->rowvalues = 0; 1623 b->getrowactive = PETSC_FALSE; 1624 1625 *A = B; 1626 return 0; 1627 } 1628 1629 #undef __FUNC__ 1630 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1631 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1632 { 1633 Mat mat; 1634 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1635 int ierr, len=0, flg; 1636 1637 *newmat = 0; 1638 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1639 PLogObjectCreate(mat); 1640 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1641 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1642 mat->destroy = MatDestroy_MPIAIJ; 1643 mat->view = MatView_MPIAIJ; 1644 mat->factor = matin->factor; 1645 mat->assembled = PETSC_TRUE; 1646 1647 a->m = mat->m = oldmat->m; 1648 a->n = mat->n = oldmat->n; 1649 a->M = mat->M = oldmat->M; 1650 a->N = mat->N = oldmat->N; 1651 1652 a->rstart = oldmat->rstart; 1653 a->rend = oldmat->rend; 1654 a->cstart = oldmat->cstart; 1655 a->cend = oldmat->cend; 1656 a->size = oldmat->size; 1657 a->rank = oldmat->rank; 1658 mat->insertmode = NOT_SET_VALUES; 1659 a->rowvalues = 0; 1660 a->getrowactive = PETSC_FALSE; 1661 1662 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1663 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1664 a->cowners = a->rowners + a->size + 2; 1665 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1666 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1667 if (oldmat->colmap) { 1668 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1669 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1670 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1671 } else a->colmap = 0; 1672 if (oldmat->garray) { 1673 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1674 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1675 PLogObjectMemory(mat,len*sizeof(int)); 1676 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1677 } else a->garray = 0; 1678 1679 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1680 PLogObjectParent(mat,a->lvec); 1681 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1682 PLogObjectParent(mat,a->Mvctx); 1683 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1684 PLogObjectParent(mat,a->A); 1685 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1686 PLogObjectParent(mat,a->B); 1687 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1688 if (flg) { 1689 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1690 } 1691 *newmat = mat; 1692 return 0; 1693 } 1694 1695 #include "sys.h" 1696 1697 #undef __FUNC__ 1698 #define __FUNC__ "MatLoad_MPIAIJ" 1699 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1700 { 1701 Mat A; 1702 int i, nz, ierr, j,rstart, rend, fd; 1703 Scalar *vals,*svals; 1704 MPI_Comm comm = ((PetscObject)viewer)->comm; 1705 MPI_Status status; 1706 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1707 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1708 int tag = ((PetscObject)viewer)->tag; 1709 1710 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1711 if (!rank) { 1712 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1713 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1714 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1715 } 1716 1717 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1718 M = header[1]; N = header[2]; 1719 /* determine ownership of all rows */ 1720 m = M/size + ((M % size) > rank); 1721 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1722 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1723 rowners[0] = 0; 1724 for ( i=2; i<=size; i++ ) { 1725 rowners[i] += rowners[i-1]; 1726 } 1727 rstart = rowners[rank]; 1728 rend = rowners[rank+1]; 1729 1730 /* distribute row lengths to all processors */ 1731 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1732 offlens = ourlens + (rend-rstart); 1733 if (!rank) { 1734 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1735 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1736 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1737 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1738 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1739 PetscFree(sndcounts); 1740 } 1741 else { 1742 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1743 } 1744 1745 if (!rank) { 1746 /* calculate the number of nonzeros on each processor */ 1747 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1748 PetscMemzero(procsnz,size*sizeof(int)); 1749 for ( i=0; i<size; i++ ) { 1750 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1751 procsnz[i] += rowlengths[j]; 1752 } 1753 } 1754 PetscFree(rowlengths); 1755 1756 /* determine max buffer needed and allocate it */ 1757 maxnz = 0; 1758 for ( i=0; i<size; i++ ) { 1759 maxnz = PetscMax(maxnz,procsnz[i]); 1760 } 1761 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1762 1763 /* read in my part of the matrix column indices */ 1764 nz = procsnz[0]; 1765 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1766 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1767 1768 /* read in every one elses and ship off */ 1769 for ( i=1; i<size; i++ ) { 1770 nz = procsnz[i]; 1771 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1772 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1773 } 1774 PetscFree(cols); 1775 } 1776 else { 1777 /* determine buffer space needed for message */ 1778 nz = 0; 1779 for ( i=0; i<m; i++ ) { 1780 nz += ourlens[i]; 1781 } 1782 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1783 1784 /* receive message of column indices*/ 1785 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1786 MPI_Get_count(&status,MPI_INT,&maxnz); 1787 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1788 } 1789 1790 /* loop over local rows, determining number of off diagonal entries */ 1791 PetscMemzero(offlens,m*sizeof(int)); 1792 jj = 0; 1793 for ( i=0; i<m; i++ ) { 1794 for ( j=0; j<ourlens[i]; j++ ) { 1795 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1796 jj++; 1797 } 1798 } 1799 1800 /* create our matrix */ 1801 for ( i=0; i<m; i++ ) { 1802 ourlens[i] -= offlens[i]; 1803 } 1804 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1805 A = *newmat; 1806 MatSetOption(A,MAT_COLUMNS_SORTED); 1807 for ( i=0; i<m; i++ ) { 1808 ourlens[i] += offlens[i]; 1809 } 1810 1811 if (!rank) { 1812 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1813 1814 /* read in my part of the matrix numerical values */ 1815 nz = procsnz[0]; 1816 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1817 1818 /* insert into matrix */ 1819 jj = rstart; 1820 smycols = mycols; 1821 svals = vals; 1822 for ( i=0; i<m; i++ ) { 1823 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1824 smycols += ourlens[i]; 1825 svals += ourlens[i]; 1826 jj++; 1827 } 1828 1829 /* read in other processors and ship out */ 1830 for ( i=1; i<size; i++ ) { 1831 nz = procsnz[i]; 1832 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1833 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1834 } 1835 PetscFree(procsnz); 1836 } 1837 else { 1838 /* receive numeric values */ 1839 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1840 1841 /* receive message of values*/ 1842 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1843 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1844 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1845 1846 /* insert into matrix */ 1847 jj = rstart; 1848 smycols = mycols; 1849 svals = vals; 1850 for ( i=0; i<m; i++ ) { 1851 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1852 smycols += ourlens[i]; 1853 svals += ourlens[i]; 1854 jj++; 1855 } 1856 } 1857 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1858 1859 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1860 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1861 return 0; 1862 } 1863