1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.215 1997/09/11 20:39:34 bsmith Exp bsmith $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/spops.h" 9 10 /* local utility routine that creates a mapping from the global column 11 number to the local number in the off-diagonal part of the local 12 storage of the matrix. This is done in a non scable way since the 13 length of colmap equals the global matrix length. 14 */ 15 #undef __FUNC__ 16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 17 int CreateColmap_MPIAIJ_Private(Mat mat) 18 { 19 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21 int n = B->n,i; 22 23 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 24 PLogObjectMemory(mat,aij->N*sizeof(int)); 25 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 26 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 27 return 0; 28 } 29 30 extern int DisAssemble_MPIAIJ(Mat); 31 32 #define CHUNKSIZE 15 33 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 34 { \ 35 \ 36 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 37 rmax = aimax[row]; nrow = ailen[row]; \ 38 col1 = col - shift; \ 39 \ 40 low = 0; high = nrow; \ 41 while (high-low > 5) { \ 42 t = (low+high)/2; \ 43 if (rp[t] > col) high = t; \ 44 else low = t; \ 45 } \ 46 for ( _i=0; _i<nrow; _i++ ) { \ 47 if (rp[_i] > col1) break; \ 48 if (rp[_i] == col1) { \ 49 if (addv == ADD_VALUES) ap[_i] += value; \ 50 else ap[_i] = value; \ 51 goto a_noinsert; \ 52 } \ 53 } \ 54 if (nonew == 1) goto a_noinsert; \ 55 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 56 if (nrow >= rmax) { \ 57 /* there is no extra room in row, therefore enlarge */ \ 58 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 59 Scalar *new_a; \ 60 \ 61 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 62 \ 63 /* malloc new storage space */ \ 64 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 65 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 66 new_j = (int *) (new_a + new_nz); \ 67 new_i = new_j + new_nz; \ 68 \ 69 /* copy over old data into new slots */ \ 70 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 71 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 72 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 73 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 74 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 75 len*sizeof(int)); \ 76 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 77 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 78 len*sizeof(Scalar)); \ 79 /* free up old matrix storage */ \ 80 \ 81 PetscFree(a->a); \ 82 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 83 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 84 a->singlemalloc = 1; \ 85 \ 86 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 87 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 88 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 89 a->maxnz += CHUNKSIZE; \ 90 a->reallocs++; \ 91 } \ 92 N = nrow++ - 1; a->nz++; \ 93 /* shift up all the later entries in this row */ \ 94 for ( ii=N; ii>=_i; ii-- ) { \ 95 rp[ii+1] = rp[ii]; \ 96 ap[ii+1] = ap[ii]; \ 97 } \ 98 rp[_i] = col1; \ 99 ap[_i] = value; \ 100 a_noinsert: ; \ 101 ailen[row] = nrow; \ 102 } 103 104 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 105 { \ 106 \ 107 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 108 rmax = bimax[row]; nrow = bilen[row]; \ 109 col1 = col - shift; \ 110 \ 111 low = 0; high = nrow; \ 112 while (high-low > 5) { \ 113 t = (low+high)/2; \ 114 if (rp[t] > col) high = t; \ 115 else low = t; \ 116 } \ 117 for ( _i=0; _i<nrow; _i++ ) { \ 118 if (rp[_i] > col1) break; \ 119 if (rp[_i] == col1) { \ 120 if (addv == ADD_VALUES) ap[_i] += value; \ 121 else ap[_i] = value; \ 122 goto b_noinsert; \ 123 } \ 124 } \ 125 if (nonew == 1) goto b_noinsert; \ 126 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 127 if (nrow >= rmax) { \ 128 /* there is no extra room in row, therefore enlarge */ \ 129 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 130 Scalar *new_a; \ 131 \ 132 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 133 \ 134 /* malloc new storage space */ \ 135 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 136 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 137 new_j = (int *) (new_a + new_nz); \ 138 new_i = new_j + new_nz; \ 139 \ 140 /* copy over old data into new slots */ \ 141 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 142 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 143 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 144 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 145 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 146 len*sizeof(int)); \ 147 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 148 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 149 len*sizeof(Scalar)); \ 150 /* free up old matrix storage */ \ 151 \ 152 PetscFree(b->a); \ 153 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 154 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 155 b->singlemalloc = 1; \ 156 \ 157 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 158 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 159 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 160 b->maxnz += CHUNKSIZE; \ 161 b->reallocs++; \ 162 } \ 163 N = nrow++ - 1; b->nz++; \ 164 /* shift up all the later entries in this row */ \ 165 for ( ii=N; ii>=_i; ii-- ) { \ 166 rp[ii+1] = rp[ii]; \ 167 ap[ii+1] = ap[ii]; \ 168 } \ 169 rp[_i] = col1; \ 170 ap[_i] = value; \ 171 b_noinsert: ; \ 172 bilen[row] = nrow; \ 173 } 174 175 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 176 #undef __FUNC__ 177 #define __FUNC__ "MatSetValues_MPIAIJ" 178 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 179 { 180 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 181 Scalar value; 182 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 183 int cstart = aij->cstart, cend = aij->cend,row,col; 184 int roworiented = aij->roworiented; 185 186 /* Some Variables required in the macro */ 187 Mat A = aij->A; 188 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 189 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 190 Scalar *aa = a->a; 191 192 Mat B = aij->B; 193 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 194 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 195 Scalar *ba = b->a; 196 197 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 198 int nonew = a->nonew,shift = a->indexshift; 199 Scalar *ap; 200 201 for ( i=0; i<m; i++ ) { 202 #if defined(PETSC_BOPT_g) 203 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 204 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 205 #endif 206 if (im[i] >= rstart && im[i] < rend) { 207 row = im[i] - rstart; 208 for ( j=0; j<n; j++ ) { 209 if (in[j] >= cstart && in[j] < cend){ 210 col = in[j] - cstart; 211 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 212 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 213 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 #if defined(PETSC_BOPT_g) 216 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 217 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 218 #endif 219 else { 220 if (mat->was_assembled) { 221 if (!aij->colmap) { 222 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 223 } 224 col = aij->colmap[in[j]] - 1; 225 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 226 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 227 col = in[j]; 228 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 229 B = aij->B; 230 b = (Mat_SeqAIJ *) B->data; 231 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 232 ba = b->a; 233 } 234 } 235 else col = in[j]; 236 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 237 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 238 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 239 } 240 } 241 } 242 else { 243 if (roworiented && !aij->donotstash) { 244 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 245 } 246 else { 247 if (!aij->donotstash) { 248 row = im[i]; 249 for ( j=0; j<n; j++ ) { 250 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 251 } 252 } 253 } 254 } 255 } 256 return 0; 257 } 258 259 #undef __FUNC__ 260 #define __FUNC__ "MatGetValues_MPIAIJ" 261 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 262 { 263 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 264 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 265 int cstart = aij->cstart, cend = aij->cend,row,col; 266 267 for ( i=0; i<m; i++ ) { 268 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 269 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 270 if (idxm[i] >= rstart && idxm[i] < rend) { 271 row = idxm[i] - rstart; 272 for ( j=0; j<n; j++ ) { 273 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 274 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 275 if (idxn[j] >= cstart && idxn[j] < cend){ 276 col = idxn[j] - cstart; 277 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 278 } 279 else { 280 if (!aij->colmap) { 281 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 282 } 283 col = aij->colmap[idxn[j]] - 1; 284 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 285 else { 286 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 287 } 288 } 289 } 290 } 291 else { 292 SETERRQ(1,0,"Only local values currently supported"); 293 } 294 } 295 return 0; 296 } 297 298 #undef __FUNC__ 299 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 300 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 301 { 302 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 303 MPI_Comm comm = mat->comm; 304 int size = aij->size, *owners = aij->rowners; 305 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 306 MPI_Request *send_waits,*recv_waits; 307 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 308 InsertMode addv; 309 Scalar *rvalues,*svalues; 310 311 /* make sure all processors are either in INSERTMODE or ADDMODE */ 312 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 313 if (addv == (ADD_VALUES|INSERT_VALUES)) { 314 SETERRQ(1,0,"Some processors inserted others added"); 315 } 316 mat->insertmode = addv; /* in case this processor had no cache */ 317 318 /* first count number of contributors to each processor */ 319 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 320 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 321 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 322 for ( i=0; i<aij->stash.n; i++ ) { 323 idx = aij->stash.idx[i]; 324 for ( j=0; j<size; j++ ) { 325 if (idx >= owners[j] && idx < owners[j+1]) { 326 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 327 } 328 } 329 } 330 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 331 332 /* inform other processors of number of messages and max length*/ 333 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 334 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 335 nreceives = work[rank]; 336 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 337 nmax = work[rank]; 338 PetscFree(work); 339 340 /* post receives: 341 1) each message will consist of ordered pairs 342 (global index,value) we store the global index as a double 343 to simplify the message passing. 344 2) since we don't know how long each individual message is we 345 allocate the largest needed buffer for each receive. Potentially 346 this is a lot of wasted space. 347 348 349 This could be done better. 350 */ 351 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 352 CHKPTRQ(rvalues); 353 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 354 CHKPTRQ(recv_waits); 355 for ( i=0; i<nreceives; i++ ) { 356 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 357 comm,recv_waits+i); 358 } 359 360 /* do sends: 361 1) starts[i] gives the starting index in svalues for stuff going to 362 the ith processor 363 */ 364 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 365 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 366 CHKPTRQ(send_waits); 367 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 368 starts[0] = 0; 369 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 370 for ( i=0; i<aij->stash.n; i++ ) { 371 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 372 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 373 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 374 } 375 PetscFree(owner); 376 starts[0] = 0; 377 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 378 count = 0; 379 for ( i=0; i<size; i++ ) { 380 if (procs[i]) { 381 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 382 comm,send_waits+count++); 383 } 384 } 385 PetscFree(starts); PetscFree(nprocs); 386 387 /* Free cache space */ 388 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 389 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 390 391 aij->svalues = svalues; aij->rvalues = rvalues; 392 aij->nsends = nsends; aij->nrecvs = nreceives; 393 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 394 aij->rmax = nmax; 395 396 return 0; 397 } 398 extern int MatSetUpMultiply_MPIAIJ(Mat); 399 400 #undef __FUNC__ 401 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 402 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 403 { 404 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 405 MPI_Status *send_status,recv_status; 406 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 407 int row,col,other_disassembled; 408 Scalar *values,val; 409 InsertMode addv = mat->insertmode; 410 411 /* wait on receives */ 412 while (count) { 413 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 414 /* unpack receives into our local space */ 415 values = aij->rvalues + 3*imdex*aij->rmax; 416 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 417 n = n/3; 418 for ( i=0; i<n; i++ ) { 419 row = (int) PetscReal(values[3*i]) - aij->rstart; 420 col = (int) PetscReal(values[3*i+1]); 421 val = values[3*i+2]; 422 if (col >= aij->cstart && col < aij->cend) { 423 col -= aij->cstart; 424 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 425 } 426 else { 427 if (mat->was_assembled || mat->assembled) { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 430 } 431 col = aij->colmap[col] - 1; 432 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 433 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 434 col = (int) PetscReal(values[3*i+1]); 435 } 436 } 437 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 438 } 439 } 440 count--; 441 } 442 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 443 444 /* wait on sends */ 445 if (aij->nsends) { 446 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 447 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 448 PetscFree(send_status); 449 } 450 PetscFree(aij->send_waits); PetscFree(aij->svalues); 451 452 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 453 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 454 455 /* determine if any processor has disassembled, if so we must 456 also disassemble ourselfs, in order that we may reassemble. */ 457 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 458 if (mat->was_assembled && !other_disassembled) { 459 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460 } 461 462 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 463 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 464 } 465 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 467 468 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 469 return 0; 470 } 471 472 #undef __FUNC__ 473 #define __FUNC__ "MatZeroEntries_MPIAIJ" 474 int MatZeroEntries_MPIAIJ(Mat A) 475 { 476 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 477 int ierr; 478 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 479 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 480 return 0; 481 } 482 483 /* the code does not do the diagonal entries correctly unless the 484 matrix is square and the column and row owerships are identical. 485 This is a BUG. The only way to fix it seems to be to access 486 aij->A and aij->B directly and not through the MatZeroRows() 487 routine. 488 */ 489 #undef __FUNC__ 490 #define __FUNC__ "MatZeroRows_MPIAIJ" 491 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 492 { 493 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 494 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 495 int *procs,*nprocs,j,found,idx,nsends,*work; 496 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 497 int *rvalues,tag = A->tag,count,base,slen,n,*source; 498 int *lens,imdex,*lrows,*values; 499 MPI_Comm comm = A->comm; 500 MPI_Request *send_waits,*recv_waits; 501 MPI_Status recv_status,*send_status; 502 IS istmp; 503 504 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 505 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 506 507 /* first count number of contributors to each processor */ 508 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 509 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 510 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 511 for ( i=0; i<N; i++ ) { 512 idx = rows[i]; 513 found = 0; 514 for ( j=0; j<size; j++ ) { 515 if (idx >= owners[j] && idx < owners[j+1]) { 516 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 517 } 518 } 519 if (!found) SETERRQ(1,0,"Index out of range"); 520 } 521 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 522 523 /* inform other processors of number of messages and max length*/ 524 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 525 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 526 nrecvs = work[rank]; 527 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 528 nmax = work[rank]; 529 PetscFree(work); 530 531 /* post receives: */ 532 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 533 CHKPTRQ(rvalues); 534 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 535 CHKPTRQ(recv_waits); 536 for ( i=0; i<nrecvs; i++ ) { 537 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 538 } 539 540 /* do sends: 541 1) starts[i] gives the starting index in svalues for stuff going to 542 the ith processor 543 */ 544 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 545 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 546 CHKPTRQ(send_waits); 547 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 548 starts[0] = 0; 549 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 550 for ( i=0; i<N; i++ ) { 551 svalues[starts[owner[i]]++] = rows[i]; 552 } 553 ISRestoreIndices(is,&rows); 554 555 starts[0] = 0; 556 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 557 count = 0; 558 for ( i=0; i<size; i++ ) { 559 if (procs[i]) { 560 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 561 } 562 } 563 PetscFree(starts); 564 565 base = owners[rank]; 566 567 /* wait on receives */ 568 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 569 source = lens + nrecvs; 570 count = nrecvs; slen = 0; 571 while (count) { 572 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 573 /* unpack receives into our local space */ 574 MPI_Get_count(&recv_status,MPI_INT,&n); 575 source[imdex] = recv_status.MPI_SOURCE; 576 lens[imdex] = n; 577 slen += n; 578 count--; 579 } 580 PetscFree(recv_waits); 581 582 /* move the data into the send scatter */ 583 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 584 count = 0; 585 for ( i=0; i<nrecvs; i++ ) { 586 values = rvalues + i*nmax; 587 for ( j=0; j<lens[i]; j++ ) { 588 lrows[count++] = values[j] - base; 589 } 590 } 591 PetscFree(rvalues); PetscFree(lens); 592 PetscFree(owner); PetscFree(nprocs); 593 594 /* actually zap the local rows */ 595 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 596 PLogObjectParent(A,istmp); 597 PetscFree(lrows); 598 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 599 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 600 ierr = ISDestroy(istmp); CHKERRQ(ierr); 601 602 /* wait on sends */ 603 if (nsends) { 604 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 605 CHKPTRQ(send_status); 606 MPI_Waitall(nsends,send_waits,send_status); 607 PetscFree(send_status); 608 } 609 PetscFree(send_waits); PetscFree(svalues); 610 611 return 0; 612 } 613 614 #undef __FUNC__ 615 #define __FUNC__ "MatMult_MPIAIJ" 616 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 617 { 618 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 619 int ierr,nt; 620 621 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 622 if (nt != a->n) { 623 SETERRQ(1,0,"Incompatible partition of A and xx"); 624 } 625 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 626 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 627 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 628 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 629 return 0; 630 } 631 632 #undef __FUNC__ 633 #define __FUNC__ "MatMultAdd_MPIAIJ" 634 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 635 { 636 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 637 int ierr; 638 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 639 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 640 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 641 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 642 return 0; 643 } 644 645 #undef __FUNC__ 646 #define __FUNC__ "MatMultTrans_MPIAIJ" 647 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 648 { 649 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 650 int ierr; 651 652 /* do nondiagonal part */ 653 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 654 /* send it on its way */ 655 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 656 /* do local part */ 657 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 658 /* receive remote parts: note this assumes the values are not actually */ 659 /* inserted in yy until the next line, which is true for my implementation*/ 660 /* but is not perhaps always true. */ 661 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 662 return 0; 663 } 664 665 #undef __FUNC__ 666 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 667 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 668 { 669 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 670 int ierr; 671 672 /* do nondiagonal part */ 673 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 674 /* send it on its way */ 675 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 676 /* do local part */ 677 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 678 /* receive remote parts: note this assumes the values are not actually */ 679 /* inserted in yy until the next line, which is true for my implementation*/ 680 /* but is not perhaps always true. */ 681 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 682 return 0; 683 } 684 685 /* 686 This only works correctly for square matrices where the subblock A->A is the 687 diagonal block 688 */ 689 #undef __FUNC__ 690 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 691 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 692 { 693 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 694 if (a->M != a->N) 695 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 696 if (a->rstart != a->cstart || a->rend != a->cend) { 697 SETERRQ(1,0,"row partition must equal col partition"); } 698 return MatGetDiagonal(a->A,v); 699 } 700 701 #undef __FUNC__ 702 #define __FUNC__ "MatScale_MPIAIJ" 703 int MatScale_MPIAIJ(Scalar *aa,Mat A) 704 { 705 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 706 int ierr; 707 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 708 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 709 return 0; 710 } 711 712 #undef __FUNC__ 713 #define __FUNC__ "MatDestroy_MPIAIJ" 714 int MatDestroy_MPIAIJ(PetscObject obj) 715 { 716 Mat mat = (Mat) obj; 717 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 718 int ierr; 719 720 #if defined(PETSC_LOG) 721 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 722 #endif 723 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 724 PetscFree(aij->rowners); 725 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 726 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 727 if (aij->colmap) PetscFree(aij->colmap); 728 if (aij->garray) PetscFree(aij->garray); 729 if (aij->lvec) VecDestroy(aij->lvec); 730 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 731 if (aij->rowvalues) PetscFree(aij->rowvalues); 732 PetscFree(aij); 733 PLogObjectDestroy(mat); 734 PetscHeaderDestroy(mat); 735 return 0; 736 } 737 738 #undef __FUNC__ 739 #define __FUNC__ "MatView_MPIAIJ_Binary" 740 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 741 { 742 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 743 int ierr; 744 745 if (aij->size == 1) { 746 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 747 } 748 else SETERRQ(1,0,"Only uniprocessor output supported"); 749 return 0; 750 } 751 752 #undef __FUNC__ 753 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 754 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 757 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 758 int ierr, format,shift = C->indexshift,rank; 759 FILE *fd; 760 ViewerType vtype; 761 762 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 763 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 764 ierr = ViewerGetFormat(viewer,&format); 765 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 766 MatInfo info; 767 int flg; 768 MPI_Comm_rank(mat->comm,&rank); 769 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 770 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 771 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 772 PetscSequentialPhaseBegin(mat->comm,1); 773 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 774 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 775 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 776 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 777 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 778 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 779 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 780 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 781 fflush(fd); 782 PetscSequentialPhaseEnd(mat->comm,1); 783 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 784 return 0; 785 } 786 else if (format == VIEWER_FORMAT_ASCII_INFO) { 787 return 0; 788 } 789 } 790 791 if (vtype == DRAW_VIEWER) { 792 Draw draw; 793 PetscTruth isnull; 794 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 795 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 796 } 797 798 if (vtype == ASCII_FILE_VIEWER) { 799 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 800 PetscSequentialPhaseBegin(mat->comm,1); 801 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 802 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 803 aij->cend); 804 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 805 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 806 fflush(fd); 807 PetscSequentialPhaseEnd(mat->comm,1); 808 } 809 else { 810 int size = aij->size; 811 rank = aij->rank; 812 if (size == 1) { 813 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 814 } 815 else { 816 /* assemble the entire matrix onto first processor. */ 817 Mat A; 818 Mat_SeqAIJ *Aloc; 819 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 820 Scalar *a; 821 822 if (!rank) { 823 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 824 CHKERRQ(ierr); 825 } 826 else { 827 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 828 CHKERRQ(ierr); 829 } 830 PLogObjectParent(mat,A); 831 832 /* copy over the A part */ 833 Aloc = (Mat_SeqAIJ*) aij->A->data; 834 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 835 row = aij->rstart; 836 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 837 for ( i=0; i<m; i++ ) { 838 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 839 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 840 } 841 aj = Aloc->j; 842 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 843 844 /* copy over the B part */ 845 Aloc = (Mat_SeqAIJ*) aij->B->data; 846 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 847 row = aij->rstart; 848 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 849 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 850 for ( i=0; i<m; i++ ) { 851 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 852 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 853 } 854 PetscFree(ct); 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 857 if (!rank) { 858 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 859 } 860 ierr = MatDestroy(A); CHKERRQ(ierr); 861 } 862 } 863 return 0; 864 } 865 866 #undef __FUNC__ 867 #define __FUNC__ "MatView_MPIAIJ" 868 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 869 { 870 Mat mat = (Mat) obj; 871 int ierr; 872 ViewerType vtype; 873 874 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 875 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 876 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 877 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 878 } 879 else if (vtype == BINARY_FILE_VIEWER) { 880 return MatView_MPIAIJ_Binary(mat,viewer); 881 } 882 return 0; 883 } 884 885 /* 886 This has to provide several versions. 887 888 2) a) use only local smoothing updating outer values only once. 889 b) local smoothing updating outer values each inner iteration 890 3) color updating out values betwen colors. 891 */ 892 #undef __FUNC__ 893 #define __FUNC__ "MatRelax_MPIAIJ" 894 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 895 double fshift,int its,Vec xx) 896 { 897 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 898 Mat AA = mat->A, BB = mat->B; 899 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 900 Scalar *b,*x,*xs,*ls,d,*v,sum; 901 int ierr,*idx, *diag; 902 int n = mat->n, m = mat->m, i,shift = A->indexshift; 903 904 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 905 xs = x + shift; /* shift by one for index start of 1 */ 906 ls = ls + shift; 907 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 908 diag = A->diag; 909 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 910 if (flag & SOR_ZERO_INITIAL_GUESS) { 911 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 912 } 913 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 914 CHKERRQ(ierr); 915 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 916 CHKERRQ(ierr); 917 while (its--) { 918 /* go down through the rows */ 919 for ( i=0; i<m; i++ ) { 920 n = A->i[i+1] - A->i[i]; 921 idx = A->j + A->i[i] + shift; 922 v = A->a + A->i[i] + shift; 923 sum = b[i]; 924 SPARSEDENSEMDOT(sum,xs,v,idx,n); 925 d = fshift + A->a[diag[i]+shift]; 926 n = B->i[i+1] - B->i[i]; 927 idx = B->j + B->i[i] + shift; 928 v = B->a + B->i[i] + shift; 929 SPARSEDENSEMDOT(sum,ls,v,idx,n); 930 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 931 } 932 /* come up through the rows */ 933 for ( i=m-1; i>-1; i-- ) { 934 n = A->i[i+1] - A->i[i]; 935 idx = A->j + A->i[i] + shift; 936 v = A->a + A->i[i] + shift; 937 sum = b[i]; 938 SPARSEDENSEMDOT(sum,xs,v,idx,n); 939 d = fshift + A->a[diag[i]+shift]; 940 n = B->i[i+1] - B->i[i]; 941 idx = B->j + B->i[i] + shift; 942 v = B->a + B->i[i] + shift; 943 SPARSEDENSEMDOT(sum,ls,v,idx,n); 944 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 945 } 946 } 947 } 948 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 949 if (flag & SOR_ZERO_INITIAL_GUESS) { 950 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 951 } 952 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 953 CHKERRQ(ierr); 954 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 955 CHKERRQ(ierr); 956 while (its--) { 957 for ( i=0; i<m; i++ ) { 958 n = A->i[i+1] - A->i[i]; 959 idx = A->j + A->i[i] + shift; 960 v = A->a + A->i[i] + shift; 961 sum = b[i]; 962 SPARSEDENSEMDOT(sum,xs,v,idx,n); 963 d = fshift + A->a[diag[i]+shift]; 964 n = B->i[i+1] - B->i[i]; 965 idx = B->j + B->i[i] + shift; 966 v = B->a + B->i[i] + shift; 967 SPARSEDENSEMDOT(sum,ls,v,idx,n); 968 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 969 } 970 } 971 } 972 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 973 if (flag & SOR_ZERO_INITIAL_GUESS) { 974 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 975 } 976 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 977 mat->Mvctx); CHKERRQ(ierr); 978 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 979 mat->Mvctx); CHKERRQ(ierr); 980 while (its--) { 981 for ( i=m-1; i>-1; i-- ) { 982 n = A->i[i+1] - A->i[i]; 983 idx = A->j + A->i[i] + shift; 984 v = A->a + A->i[i] + shift; 985 sum = b[i]; 986 SPARSEDENSEMDOT(sum,xs,v,idx,n); 987 d = fshift + A->a[diag[i]+shift]; 988 n = B->i[i+1] - B->i[i]; 989 idx = B->j + B->i[i] + shift; 990 v = B->a + B->i[i] + shift; 991 SPARSEDENSEMDOT(sum,ls,v,idx,n); 992 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 993 } 994 } 995 } 996 else { 997 SETERRQ(1,0,"Option not supported"); 998 } 999 return 0; 1000 } 1001 1002 #undef __FUNC__ 1003 #define __FUNC__ "MatGetInfo_MPIAIJ" 1004 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1005 { 1006 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1007 Mat A = mat->A, B = mat->B; 1008 int ierr; 1009 double isend[5], irecv[5]; 1010 1011 info->rows_global = (double)mat->M; 1012 info->columns_global = (double)mat->N; 1013 info->rows_local = (double)mat->m; 1014 info->columns_local = (double)mat->N; 1015 info->block_size = 1.0; 1016 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1017 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1018 isend[3] = info->memory; isend[4] = info->mallocs; 1019 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1020 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1021 isend[3] += info->memory; isend[4] += info->mallocs; 1022 if (flag == MAT_LOCAL) { 1023 info->nz_used = isend[0]; 1024 info->nz_allocated = isend[1]; 1025 info->nz_unneeded = isend[2]; 1026 info->memory = isend[3]; 1027 info->mallocs = isend[4]; 1028 } else if (flag == MAT_GLOBAL_MAX) { 1029 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 1030 info->nz_used = irecv[0]; 1031 info->nz_allocated = irecv[1]; 1032 info->nz_unneeded = irecv[2]; 1033 info->memory = irecv[3]; 1034 info->mallocs = irecv[4]; 1035 } else if (flag == MAT_GLOBAL_SUM) { 1036 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 1037 info->nz_used = irecv[0]; 1038 info->nz_allocated = irecv[1]; 1039 info->nz_unneeded = irecv[2]; 1040 info->memory = irecv[3]; 1041 info->mallocs = irecv[4]; 1042 } 1043 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1044 info->fill_ratio_needed = 0; 1045 info->factor_mallocs = 0; 1046 1047 return 0; 1048 } 1049 1050 #undef __FUNC__ 1051 #define __FUNC__ "MatSetOption_MPIAIJ" 1052 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1055 1056 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1057 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1058 op == MAT_COLUMNS_UNSORTED || 1059 op == MAT_COLUMNS_SORTED || 1060 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1061 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1062 MatSetOption(a->A,op); 1063 MatSetOption(a->B,op); 1064 } else if (op == MAT_ROW_ORIENTED) { 1065 a->roworiented = 1; 1066 MatSetOption(a->A,op); 1067 MatSetOption(a->B,op); 1068 } else if (op == MAT_ROWS_SORTED || 1069 op == MAT_ROWS_UNSORTED || 1070 op == MAT_SYMMETRIC || 1071 op == MAT_STRUCTURALLY_SYMMETRIC || 1072 op == MAT_YES_NEW_DIAGONALS) 1073 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1074 else if (op == MAT_COLUMN_ORIENTED) { 1075 a->roworiented = 0; 1076 MatSetOption(a->A,op); 1077 MatSetOption(a->B,op); 1078 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1079 a->donotstash = 1; 1080 } else if (op == MAT_NO_NEW_DIAGONALS) 1081 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1082 else 1083 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1084 return 0; 1085 } 1086 1087 #undef __FUNC__ 1088 #define __FUNC__ "MatGetSize_MPIAIJ" 1089 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1090 { 1091 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1092 if (m) *m = mat->M; 1093 if (n) *n = mat->N; 1094 return 0; 1095 } 1096 1097 #undef __FUNC__ 1098 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1099 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1100 { 1101 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1102 if (m) *m = mat->m; 1103 if (n) *n = mat->N; 1104 return 0; 1105 } 1106 1107 #undef __FUNC__ 1108 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1109 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1110 { 1111 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1112 *m = mat->rstart; *n = mat->rend; 1113 return 0; 1114 } 1115 1116 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1117 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1118 1119 #undef __FUNC__ 1120 #define __FUNC__ "MatGetRow_MPIAIJ" 1121 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1122 { 1123 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1124 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1125 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1126 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1127 int *cmap, *idx_p; 1128 1129 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1130 mat->getrowactive = PETSC_TRUE; 1131 1132 if (!mat->rowvalues && (idx || v)) { 1133 /* 1134 allocate enough space to hold information from the longest row. 1135 */ 1136 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1137 int max = 1,m = mat->m,tmp; 1138 for ( i=0; i<m; i++ ) { 1139 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1140 if (max < tmp) { max = tmp; } 1141 } 1142 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1143 CHKPTRQ(mat->rowvalues); 1144 mat->rowindices = (int *) (mat->rowvalues + max); 1145 } 1146 1147 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1148 lrow = row - rstart; 1149 1150 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1151 if (!v) {pvA = 0; pvB = 0;} 1152 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1153 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1154 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1155 nztot = nzA + nzB; 1156 1157 cmap = mat->garray; 1158 if (v || idx) { 1159 if (nztot) { 1160 /* Sort by increasing column numbers, assuming A and B already sorted */ 1161 int imark = -1; 1162 if (v) { 1163 *v = v_p = mat->rowvalues; 1164 for ( i=0; i<nzB; i++ ) { 1165 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1166 else break; 1167 } 1168 imark = i; 1169 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1170 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1171 } 1172 if (idx) { 1173 *idx = idx_p = mat->rowindices; 1174 if (imark > -1) { 1175 for ( i=0; i<imark; i++ ) { 1176 idx_p[i] = cmap[cworkB[i]]; 1177 } 1178 } else { 1179 for ( i=0; i<nzB; i++ ) { 1180 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1181 else break; 1182 } 1183 imark = i; 1184 } 1185 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1186 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1187 } 1188 } 1189 else { 1190 if (idx) *idx = 0; 1191 if (v) *v = 0; 1192 } 1193 } 1194 *nz = nztot; 1195 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1196 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1197 return 0; 1198 } 1199 1200 #undef __FUNC__ 1201 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1202 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1203 { 1204 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1205 if (aij->getrowactive == PETSC_FALSE) { 1206 SETERRQ(1,0,"MatGetRow not called"); 1207 } 1208 aij->getrowactive = PETSC_FALSE; 1209 return 0; 1210 } 1211 1212 #undef __FUNC__ 1213 #define __FUNC__ "MatNorm_MPIAIJ" 1214 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1215 { 1216 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1217 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1218 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1219 double sum = 0.0; 1220 Scalar *v; 1221 1222 if (aij->size == 1) { 1223 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1224 } else { 1225 if (type == NORM_FROBENIUS) { 1226 v = amat->a; 1227 for (i=0; i<amat->nz; i++ ) { 1228 #if defined(PETSC_COMPLEX) 1229 sum += real(conj(*v)*(*v)); v++; 1230 #else 1231 sum += (*v)*(*v); v++; 1232 #endif 1233 } 1234 v = bmat->a; 1235 for (i=0; i<bmat->nz; i++ ) { 1236 #if defined(PETSC_COMPLEX) 1237 sum += real(conj(*v)*(*v)); v++; 1238 #else 1239 sum += (*v)*(*v); v++; 1240 #endif 1241 } 1242 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1243 *norm = sqrt(*norm); 1244 } 1245 else if (type == NORM_1) { /* max column norm */ 1246 double *tmp, *tmp2; 1247 int *jj, *garray = aij->garray; 1248 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1249 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1250 PetscMemzero(tmp,aij->N*sizeof(double)); 1251 *norm = 0.0; 1252 v = amat->a; jj = amat->j; 1253 for ( j=0; j<amat->nz; j++ ) { 1254 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1255 } 1256 v = bmat->a; jj = bmat->j; 1257 for ( j=0; j<bmat->nz; j++ ) { 1258 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1259 } 1260 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1261 for ( j=0; j<aij->N; j++ ) { 1262 if (tmp2[j] > *norm) *norm = tmp2[j]; 1263 } 1264 PetscFree(tmp); PetscFree(tmp2); 1265 } 1266 else if (type == NORM_INFINITY) { /* max row norm */ 1267 double ntemp = 0.0; 1268 for ( j=0; j<amat->m; j++ ) { 1269 v = amat->a + amat->i[j] + shift; 1270 sum = 0.0; 1271 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1272 sum += PetscAbsScalar(*v); v++; 1273 } 1274 v = bmat->a + bmat->i[j] + shift; 1275 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1276 sum += PetscAbsScalar(*v); v++; 1277 } 1278 if (sum > ntemp) ntemp = sum; 1279 } 1280 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1281 } 1282 else { 1283 SETERRQ(1,0,"No support for two norm"); 1284 } 1285 } 1286 return 0; 1287 } 1288 1289 #undef __FUNC__ 1290 #define __FUNC__ "MatTranspose_MPIAIJ" 1291 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1292 { 1293 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1294 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1295 int ierr,shift = Aloc->indexshift; 1296 Mat B; 1297 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1298 Scalar *array; 1299 1300 if (matout == PETSC_NULL && M != N) { 1301 SETERRQ(1,0,"Square matrix only for in-place"); 1302 } 1303 1304 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1305 1306 /* copy over the A part */ 1307 Aloc = (Mat_SeqAIJ*) a->A->data; 1308 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1309 row = a->rstart; 1310 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1311 for ( i=0; i<m; i++ ) { 1312 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1313 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1314 } 1315 aj = Aloc->j; 1316 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1317 1318 /* copy over the B part */ 1319 Aloc = (Mat_SeqAIJ*) a->B->data; 1320 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1321 row = a->rstart; 1322 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1323 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1324 for ( i=0; i<m; i++ ) { 1325 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1326 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1327 } 1328 PetscFree(ct); 1329 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1330 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1331 if (matout != PETSC_NULL) { 1332 *matout = B; 1333 } else { 1334 /* This isn't really an in-place transpose .... but free data structures from a */ 1335 PetscFree(a->rowners); 1336 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1337 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1338 if (a->colmap) PetscFree(a->colmap); 1339 if (a->garray) PetscFree(a->garray); 1340 if (a->lvec) VecDestroy(a->lvec); 1341 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1342 PetscFree(a); 1343 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1344 PetscHeaderDestroy(B); 1345 } 1346 return 0; 1347 } 1348 1349 #undef __FUNC__ 1350 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1351 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1352 { 1353 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1354 Mat a = aij->A, b = aij->B; 1355 int ierr,s1,s2,s3; 1356 1357 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1358 if (rr) { 1359 s3 = aij->n; 1360 VecGetLocalSize_Fast(rr,s1); 1361 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1362 /* Overlap communication with computation. */ 1363 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1364 } 1365 if (ll) { 1366 VecGetLocalSize_Fast(ll,s1); 1367 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1368 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1369 } 1370 /* scale the diagonal block */ 1371 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1372 1373 if (rr) { 1374 /* Do a scatter end and then right scale the off-diagonal block */ 1375 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1376 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1377 } 1378 1379 return 0; 1380 } 1381 1382 1383 extern int MatPrintHelp_SeqAIJ(Mat); 1384 #undef __FUNC__ 1385 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1386 int MatPrintHelp_MPIAIJ(Mat A) 1387 { 1388 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1389 1390 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1391 else return 0; 1392 } 1393 1394 #undef __FUNC__ 1395 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1396 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1397 { 1398 *bs = 1; 1399 return 0; 1400 } 1401 #undef __FUNC__ 1402 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1403 int MatSetUnfactored_MPIAIJ(Mat A) 1404 { 1405 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1406 int ierr; 1407 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1408 return 0; 1409 } 1410 1411 #undef __FUNC__ 1412 #define __FUNC__ "MatEqual_MPIAIJ" 1413 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1414 { 1415 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1416 Mat a, b, c, d; 1417 PetscTruth flg; 1418 int ierr; 1419 1420 if (B->type != MATMPIAIJ) SETERRQ(1,0,"Matrices must be same type"); 1421 a = matA->A; b = matA->B; 1422 c = matB->A; d = matB->B; 1423 1424 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1425 if (flg == PETSC_TRUE) { 1426 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1427 } 1428 MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm); 1429 return(0); 1430 } 1431 1432 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1433 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1434 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1435 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1436 /* -------------------------------------------------------------------*/ 1437 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1438 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1439 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1440 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1441 0,0, 1442 0,0, 1443 0,0, 1444 MatRelax_MPIAIJ, 1445 MatTranspose_MPIAIJ, 1446 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1447 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1448 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1449 0, 1450 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1451 0,0,0,0, 1452 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1453 0,0, 1454 0,0,MatConvertSameType_MPIAIJ,0,0, 1455 0,0,0, 1456 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1457 MatPrintHelp_MPIAIJ, 1458 MatScale_MPIAIJ,0,0,0, 1459 MatGetBlockSize_MPIAIJ,0,0,0,0, 1460 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 1461 1462 1463 #undef __FUNC__ 1464 #define __FUNC__ "MatCreateMPIAIJ" 1465 /*@C 1466 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1467 (the default parallel PETSc format). For good matrix assembly performance 1468 the user should preallocate the matrix storage by setting the parameters 1469 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1470 performance can be increased by more than a factor of 50. 1471 1472 Input Parameters: 1473 . comm - MPI communicator 1474 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1475 This value should be the same as the local size used in creating the 1476 y vector for the matrix-vector product y = Ax. 1477 . n - This value should be the same as the local size used in creating the 1478 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1479 calculated if N is given) For square matrices n is almost always m. 1480 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1481 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1482 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1483 (same for all local rows) 1484 . d_nzz - array containing the number of nonzeros in the various rows of the 1485 diagonal portion of the local submatrix (possibly different for each row) 1486 or PETSC_NULL. You must leave room for the diagonal entry even if 1487 it is zero. 1488 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1489 submatrix (same for all local rows). 1490 . o_nzz - array containing the number of nonzeros in the various rows of the 1491 off-diagonal portion of the local submatrix (possibly different for 1492 each row) or PETSC_NULL. 1493 1494 Output Parameter: 1495 . A - the matrix 1496 1497 Notes: 1498 The AIJ format (also called the Yale sparse matrix format or 1499 compressed row storage), is fully compatible with standard Fortran 77 1500 storage. That is, the stored row and column indices can begin at 1501 either one (as in Fortran) or zero. See the users manual for details. 1502 1503 The user MUST specify either the local or global matrix dimensions 1504 (possibly both). 1505 1506 By default, this format uses inodes (identical nodes) when possible. 1507 We search for consecutive rows with the same nonzero structure, thereby 1508 reusing matrix information to achieve increased efficiency. 1509 1510 Options Database Keys: 1511 $ -mat_aij_no_inode - Do not use inodes 1512 $ -mat_aij_inode_limit <limit> - Set inode limit. 1513 $ (max limit=5) 1514 $ -mat_aij_oneindex - Internally use indexing starting at 1 1515 $ rather than 0. Note: When calling MatSetValues(), 1516 $ the user still MUST index entries starting at 0! 1517 1518 Storage Information: 1519 For a square global matrix we define each processor's diagonal portion 1520 to be its local rows and the corresponding columns (a square submatrix); 1521 each processor's off-diagonal portion encompasses the remainder of the 1522 local matrix (a rectangular submatrix). 1523 1524 The user can specify preallocated storage for the diagonal part of 1525 the local submatrix with either d_nz or d_nnz (not both). Set 1526 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1527 memory allocation. Likewise, specify preallocated storage for the 1528 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1529 1530 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1531 the figure below we depict these three local rows and all columns (0-11). 1532 1533 $ 0 1 2 3 4 5 6 7 8 9 10 11 1534 $ ------------------- 1535 $ row 3 | o o o d d d o o o o o o 1536 $ row 4 | o o o d d d o o o o o o 1537 $ row 5 | o o o d d d o o o o o o 1538 $ ------------------- 1539 $ 1540 1541 Thus, any entries in the d locations are stored in the d (diagonal) 1542 submatrix, and any entries in the o locations are stored in the 1543 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1544 stored simply in the MATSEQAIJ format for compressed row storage. 1545 1546 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1547 and o_nz should indicate the number of nonzeros per row in the o matrix. 1548 In general, for PDE problems in which most nonzeros are near the diagonal, 1549 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1550 or you will get TERRIBLE performance; see the users' manual chapter on 1551 matrices. 1552 1553 .keywords: matrix, aij, compressed row, sparse, parallel 1554 1555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1556 @*/ 1557 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1558 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1559 { 1560 Mat B; 1561 Mat_MPIAIJ *b; 1562 int ierr, i,sum[2],work[2],size; 1563 1564 MPI_Comm_size(comm,&size); 1565 if (size == 1) { 1566 if (M == PETSC_DECIDE) M = m; 1567 if (N == PETSC_DECIDE) N = n; 1568 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1569 return 0; 1570 } 1571 1572 *A = 0; 1573 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1574 PLogObjectCreate(B); 1575 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1576 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1577 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1578 B->destroy = MatDestroy_MPIAIJ; 1579 B->view = MatView_MPIAIJ; 1580 B->factor = 0; 1581 B->assembled = PETSC_FALSE; 1582 B->mapping = 0; 1583 1584 B->insertmode = NOT_SET_VALUES; 1585 b->size = size; 1586 MPI_Comm_rank(comm,&b->rank); 1587 1588 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1589 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1590 1591 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1592 work[0] = m; work[1] = n; 1593 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1594 if (M == PETSC_DECIDE) M = sum[0]; 1595 if (N == PETSC_DECIDE) N = sum[1]; 1596 } 1597 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1598 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1599 b->m = m; B->m = m; 1600 b->n = n; B->n = n; 1601 b->N = N; B->N = N; 1602 b->M = M; B->M = M; 1603 1604 /* build local table of row and column ownerships */ 1605 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1606 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1607 b->cowners = b->rowners + b->size + 2; 1608 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1609 b->rowners[0] = 0; 1610 for ( i=2; i<=b->size; i++ ) { 1611 b->rowners[i] += b->rowners[i-1]; 1612 } 1613 b->rstart = b->rowners[b->rank]; 1614 b->rend = b->rowners[b->rank+1]; 1615 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1616 b->cowners[0] = 0; 1617 for ( i=2; i<=b->size; i++ ) { 1618 b->cowners[i] += b->cowners[i-1]; 1619 } 1620 b->cstart = b->cowners[b->rank]; 1621 b->cend = b->cowners[b->rank+1]; 1622 1623 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1624 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1625 PLogObjectParent(B,b->A); 1626 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1627 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1628 PLogObjectParent(B,b->B); 1629 1630 /* build cache for off array entries formed */ 1631 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1632 b->donotstash = 0; 1633 b->colmap = 0; 1634 b->garray = 0; 1635 b->roworiented = 1; 1636 1637 /* stuff used for matrix vector multiply */ 1638 b->lvec = 0; 1639 b->Mvctx = 0; 1640 1641 /* stuff for MatGetRow() */ 1642 b->rowindices = 0; 1643 b->rowvalues = 0; 1644 b->getrowactive = PETSC_FALSE; 1645 1646 *A = B; 1647 return 0; 1648 } 1649 1650 #undef __FUNC__ 1651 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1652 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1653 { 1654 Mat mat; 1655 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1656 int ierr, len=0, flg; 1657 1658 *newmat = 0; 1659 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1660 PLogObjectCreate(mat); 1661 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1662 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1663 mat->destroy = MatDestroy_MPIAIJ; 1664 mat->view = MatView_MPIAIJ; 1665 mat->factor = matin->factor; 1666 mat->assembled = PETSC_TRUE; 1667 1668 a->m = mat->m = oldmat->m; 1669 a->n = mat->n = oldmat->n; 1670 a->M = mat->M = oldmat->M; 1671 a->N = mat->N = oldmat->N; 1672 1673 a->rstart = oldmat->rstart; 1674 a->rend = oldmat->rend; 1675 a->cstart = oldmat->cstart; 1676 a->cend = oldmat->cend; 1677 a->size = oldmat->size; 1678 a->rank = oldmat->rank; 1679 mat->insertmode = NOT_SET_VALUES; 1680 a->rowvalues = 0; 1681 a->getrowactive = PETSC_FALSE; 1682 1683 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1684 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1685 a->cowners = a->rowners + a->size + 2; 1686 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1687 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1688 if (oldmat->colmap) { 1689 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1690 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1691 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1692 } else a->colmap = 0; 1693 if (oldmat->garray) { 1694 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1695 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1696 PLogObjectMemory(mat,len*sizeof(int)); 1697 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1698 } else a->garray = 0; 1699 1700 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1701 PLogObjectParent(mat,a->lvec); 1702 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1703 PLogObjectParent(mat,a->Mvctx); 1704 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1705 PLogObjectParent(mat,a->A); 1706 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1707 PLogObjectParent(mat,a->B); 1708 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1709 if (flg) { 1710 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1711 } 1712 *newmat = mat; 1713 return 0; 1714 } 1715 1716 #include "sys.h" 1717 1718 #undef __FUNC__ 1719 #define __FUNC__ "MatLoad_MPIAIJ" 1720 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1721 { 1722 Mat A; 1723 int i, nz, ierr, j,rstart, rend, fd; 1724 Scalar *vals,*svals; 1725 MPI_Comm comm = ((PetscObject)viewer)->comm; 1726 MPI_Status status; 1727 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1728 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1729 int tag = ((PetscObject)viewer)->tag; 1730 1731 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1732 if (!rank) { 1733 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1734 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1735 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1736 } 1737 1738 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1739 M = header[1]; N = header[2]; 1740 /* determine ownership of all rows */ 1741 m = M/size + ((M % size) > rank); 1742 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1743 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1744 rowners[0] = 0; 1745 for ( i=2; i<=size; i++ ) { 1746 rowners[i] += rowners[i-1]; 1747 } 1748 rstart = rowners[rank]; 1749 rend = rowners[rank+1]; 1750 1751 /* distribute row lengths to all processors */ 1752 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1753 offlens = ourlens + (rend-rstart); 1754 if (!rank) { 1755 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1756 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1757 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1758 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1759 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1760 PetscFree(sndcounts); 1761 } 1762 else { 1763 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1764 } 1765 1766 if (!rank) { 1767 /* calculate the number of nonzeros on each processor */ 1768 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1769 PetscMemzero(procsnz,size*sizeof(int)); 1770 for ( i=0; i<size; i++ ) { 1771 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1772 procsnz[i] += rowlengths[j]; 1773 } 1774 } 1775 PetscFree(rowlengths); 1776 1777 /* determine max buffer needed and allocate it */ 1778 maxnz = 0; 1779 for ( i=0; i<size; i++ ) { 1780 maxnz = PetscMax(maxnz,procsnz[i]); 1781 } 1782 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1783 1784 /* read in my part of the matrix column indices */ 1785 nz = procsnz[0]; 1786 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1787 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1788 1789 /* read in every one elses and ship off */ 1790 for ( i=1; i<size; i++ ) { 1791 nz = procsnz[i]; 1792 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1793 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1794 } 1795 PetscFree(cols); 1796 } 1797 else { 1798 /* determine buffer space needed for message */ 1799 nz = 0; 1800 for ( i=0; i<m; i++ ) { 1801 nz += ourlens[i]; 1802 } 1803 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1804 1805 /* receive message of column indices*/ 1806 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1807 MPI_Get_count(&status,MPI_INT,&maxnz); 1808 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1809 } 1810 1811 /* loop over local rows, determining number of off diagonal entries */ 1812 PetscMemzero(offlens,m*sizeof(int)); 1813 jj = 0; 1814 for ( i=0; i<m; i++ ) { 1815 for ( j=0; j<ourlens[i]; j++ ) { 1816 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1817 jj++; 1818 } 1819 } 1820 1821 /* create our matrix */ 1822 for ( i=0; i<m; i++ ) { 1823 ourlens[i] -= offlens[i]; 1824 } 1825 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1826 A = *newmat; 1827 MatSetOption(A,MAT_COLUMNS_SORTED); 1828 for ( i=0; i<m; i++ ) { 1829 ourlens[i] += offlens[i]; 1830 } 1831 1832 if (!rank) { 1833 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1834 1835 /* read in my part of the matrix numerical values */ 1836 nz = procsnz[0]; 1837 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1838 1839 /* insert into matrix */ 1840 jj = rstart; 1841 smycols = mycols; 1842 svals = vals; 1843 for ( i=0; i<m; i++ ) { 1844 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1845 smycols += ourlens[i]; 1846 svals += ourlens[i]; 1847 jj++; 1848 } 1849 1850 /* read in other processors and ship out */ 1851 for ( i=1; i<size; i++ ) { 1852 nz = procsnz[i]; 1853 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1854 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1855 } 1856 PetscFree(procsnz); 1857 } 1858 else { 1859 /* receive numeric values */ 1860 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1861 1862 /* receive message of values*/ 1863 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1864 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1865 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1866 1867 /* insert into matrix */ 1868 jj = rstart; 1869 smycols = mycols; 1870 svals = vals; 1871 for ( i=0; i<m; i++ ) { 1872 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1873 smycols += ourlens[i]; 1874 svals += ourlens[i]; 1875 jj++; 1876 } 1877 } 1878 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1879 1880 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1881 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1882 return 0; 1883 } 1884