1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.214 1997/08/22 15:13:39 bsmith Exp bsmith $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/spops.h" 9 10 /* local utility routine that creates a mapping from the global column 11 number to the local number in the off-diagonal part of the local 12 storage of the matrix. This is done in a non scable way since the 13 length of colmap equals the global matrix length. 14 */ 15 #undef __FUNC__ 16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 17 int CreateColmap_MPIAIJ_Private(Mat mat) 18 { 19 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21 int n = B->n,i; 22 23 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 24 PLogObjectMemory(mat,aij->N*sizeof(int)); 25 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 26 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 27 return 0; 28 } 29 30 extern int DisAssemble_MPIAIJ(Mat); 31 32 #define CHUNKSIZE 15 33 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 34 { \ 35 \ 36 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 37 rmax = aimax[row]; nrow = ailen[row]; \ 38 col1 = col - shift; \ 39 \ 40 low = 0; high = nrow; \ 41 while (high-low > 5) { \ 42 t = (low+high)/2; \ 43 if (rp[t] > col) high = t; \ 44 else low = t; \ 45 } \ 46 for ( _i=0; _i<nrow; _i++ ) { \ 47 if (rp[_i] > col1) break; \ 48 if (rp[_i] == col1) { \ 49 if (addv == ADD_VALUES) ap[_i] += value; \ 50 else ap[_i] = value; \ 51 goto a_noinsert; \ 52 } \ 53 } \ 54 if (nonew == 1) goto a_noinsert; \ 55 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 56 if (nrow >= rmax) { \ 57 /* there is no extra room in row, therefore enlarge */ \ 58 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 59 Scalar *new_a; \ 60 \ 61 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 62 \ 63 /* malloc new storage space */ \ 64 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 65 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 66 new_j = (int *) (new_a + new_nz); \ 67 new_i = new_j + new_nz; \ 68 \ 69 /* copy over old data into new slots */ \ 70 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 71 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 72 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 73 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 74 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 75 len*sizeof(int)); \ 76 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 77 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 78 len*sizeof(Scalar)); \ 79 /* free up old matrix storage */ \ 80 \ 81 PetscFree(a->a); \ 82 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 83 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 84 a->singlemalloc = 1; \ 85 \ 86 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 87 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 88 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 89 a->maxnz += CHUNKSIZE; \ 90 a->reallocs++; \ 91 } \ 92 N = nrow++ - 1; a->nz++; \ 93 /* shift up all the later entries in this row */ \ 94 for ( ii=N; ii>=_i; ii-- ) { \ 95 rp[ii+1] = rp[ii]; \ 96 ap[ii+1] = ap[ii]; \ 97 } \ 98 rp[_i] = col1; \ 99 ap[_i] = value; \ 100 a_noinsert: ; \ 101 ailen[row] = nrow; \ 102 } 103 104 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 105 { \ 106 \ 107 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 108 rmax = bimax[row]; nrow = bilen[row]; \ 109 col1 = col - shift; \ 110 \ 111 low = 0; high = nrow; \ 112 while (high-low > 5) { \ 113 t = (low+high)/2; \ 114 if (rp[t] > col) high = t; \ 115 else low = t; \ 116 } \ 117 for ( _i=0; _i<nrow; _i++ ) { \ 118 if (rp[_i] > col1) break; \ 119 if (rp[_i] == col1) { \ 120 if (addv == ADD_VALUES) ap[_i] += value; \ 121 else ap[_i] = value; \ 122 goto b_noinsert; \ 123 } \ 124 } \ 125 if (nonew == 1) goto b_noinsert; \ 126 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 127 if (nrow >= rmax) { \ 128 /* there is no extra room in row, therefore enlarge */ \ 129 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 130 Scalar *new_a; \ 131 \ 132 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 133 \ 134 /* malloc new storage space */ \ 135 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 136 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 137 new_j = (int *) (new_a + new_nz); \ 138 new_i = new_j + new_nz; \ 139 \ 140 /* copy over old data into new slots */ \ 141 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 142 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 143 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 144 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 145 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 146 len*sizeof(int)); \ 147 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 148 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 149 len*sizeof(Scalar)); \ 150 /* free up old matrix storage */ \ 151 \ 152 PetscFree(b->a); \ 153 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 154 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 155 b->singlemalloc = 1; \ 156 \ 157 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 158 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 159 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 160 b->maxnz += CHUNKSIZE; \ 161 b->reallocs++; \ 162 } \ 163 N = nrow++ - 1; b->nz++; \ 164 /* shift up all the later entries in this row */ \ 165 for ( ii=N; ii>=_i; ii-- ) { \ 166 rp[ii+1] = rp[ii]; \ 167 ap[ii+1] = ap[ii]; \ 168 } \ 169 rp[_i] = col1; \ 170 ap[_i] = value; \ 171 b_noinsert: ; \ 172 bilen[row] = nrow; \ 173 } 174 175 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 176 #undef __FUNC__ 177 #define __FUNC__ "MatSetValues_MPIAIJ" 178 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 179 { 180 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 181 Scalar value; 182 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 183 int cstart = aij->cstart, cend = aij->cend,row,col; 184 int roworiented = aij->roworiented; 185 186 /* Some Variables required in the macro */ 187 Mat A = aij->A; 188 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 189 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 190 Scalar *aa = a->a; 191 192 Mat B = aij->B; 193 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 194 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 195 Scalar *ba = b->a; 196 197 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 198 int nonew = a->nonew,shift = a->indexshift; 199 Scalar *ap; 200 201 for ( i=0; i<m; i++ ) { 202 #if defined(PETSC_BOPT_g) 203 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 204 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 205 #endif 206 if (im[i] >= rstart && im[i] < rend) { 207 row = im[i] - rstart; 208 for ( j=0; j<n; j++ ) { 209 if (in[j] >= cstart && in[j] < cend){ 210 col = in[j] - cstart; 211 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 212 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 213 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 #if defined(PETSC_BOPT_g) 216 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 217 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 218 #endif 219 else { 220 if (mat->was_assembled) { 221 if (!aij->colmap) { 222 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 223 } 224 col = aij->colmap[in[j]] - 1; 225 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 226 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 227 col = in[j]; 228 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 229 B = aij->B; 230 b = (Mat_SeqAIJ *) B->data; 231 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 232 ba = b->a; 233 } 234 } 235 else col = in[j]; 236 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 237 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 238 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 239 } 240 } 241 } 242 else { 243 if (roworiented && !aij->donotstash) { 244 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 245 } 246 else { 247 if (!aij->donotstash) { 248 row = im[i]; 249 for ( j=0; j<n; j++ ) { 250 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 251 } 252 } 253 } 254 } 255 } 256 return 0; 257 } 258 259 #undef __FUNC__ 260 #define __FUNC__ "MatGetValues_MPIAIJ" 261 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 262 { 263 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 264 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 265 int cstart = aij->cstart, cend = aij->cend,row,col; 266 267 for ( i=0; i<m; i++ ) { 268 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 269 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 270 if (idxm[i] >= rstart && idxm[i] < rend) { 271 row = idxm[i] - rstart; 272 for ( j=0; j<n; j++ ) { 273 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 274 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 275 if (idxn[j] >= cstart && idxn[j] < cend){ 276 col = idxn[j] - cstart; 277 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 278 } 279 else { 280 if (!aij->colmap) { 281 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 282 } 283 col = aij->colmap[idxn[j]] - 1; 284 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 285 else { 286 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 287 } 288 } 289 } 290 } 291 else { 292 SETERRQ(1,0,"Only local values currently supported"); 293 } 294 } 295 return 0; 296 } 297 298 #undef __FUNC__ 299 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 300 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 301 { 302 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 303 MPI_Comm comm = mat->comm; 304 int size = aij->size, *owners = aij->rowners; 305 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 306 MPI_Request *send_waits,*recv_waits; 307 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 308 InsertMode addv; 309 Scalar *rvalues,*svalues; 310 311 /* make sure all processors are either in INSERTMODE or ADDMODE */ 312 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 313 if (addv == (ADD_VALUES|INSERT_VALUES)) { 314 SETERRQ(1,0,"Some processors inserted others added"); 315 } 316 mat->insertmode = addv; /* in case this processor had no cache */ 317 318 /* first count number of contributors to each processor */ 319 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 320 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 321 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 322 for ( i=0; i<aij->stash.n; i++ ) { 323 idx = aij->stash.idx[i]; 324 for ( j=0; j<size; j++ ) { 325 if (idx >= owners[j] && idx < owners[j+1]) { 326 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 327 } 328 } 329 } 330 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 331 332 /* inform other processors of number of messages and max length*/ 333 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 334 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 335 nreceives = work[rank]; 336 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 337 nmax = work[rank]; 338 PetscFree(work); 339 340 /* post receives: 341 1) each message will consist of ordered pairs 342 (global index,value) we store the global index as a double 343 to simplify the message passing. 344 2) since we don't know how long each individual message is we 345 allocate the largest needed buffer for each receive. Potentially 346 this is a lot of wasted space. 347 348 349 This could be done better. 350 */ 351 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 352 CHKPTRQ(rvalues); 353 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 354 CHKPTRQ(recv_waits); 355 for ( i=0; i<nreceives; i++ ) { 356 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 357 comm,recv_waits+i); 358 } 359 360 /* do sends: 361 1) starts[i] gives the starting index in svalues for stuff going to 362 the ith processor 363 */ 364 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 365 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 366 CHKPTRQ(send_waits); 367 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 368 starts[0] = 0; 369 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 370 for ( i=0; i<aij->stash.n; i++ ) { 371 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 372 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 373 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 374 } 375 PetscFree(owner); 376 starts[0] = 0; 377 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 378 count = 0; 379 for ( i=0; i<size; i++ ) { 380 if (procs[i]) { 381 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 382 comm,send_waits+count++); 383 } 384 } 385 PetscFree(starts); PetscFree(nprocs); 386 387 /* Free cache space */ 388 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 389 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 390 391 aij->svalues = svalues; aij->rvalues = rvalues; 392 aij->nsends = nsends; aij->nrecvs = nreceives; 393 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 394 aij->rmax = nmax; 395 396 return 0; 397 } 398 extern int MatSetUpMultiply_MPIAIJ(Mat); 399 400 #undef __FUNC__ 401 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 402 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 403 { 404 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 405 MPI_Status *send_status,recv_status; 406 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 407 int row,col,other_disassembled; 408 Scalar *values,val; 409 InsertMode addv = mat->insertmode; 410 411 /* wait on receives */ 412 while (count) { 413 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 414 /* unpack receives into our local space */ 415 values = aij->rvalues + 3*imdex*aij->rmax; 416 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 417 n = n/3; 418 for ( i=0; i<n; i++ ) { 419 row = (int) PetscReal(values[3*i]) - aij->rstart; 420 col = (int) PetscReal(values[3*i+1]); 421 val = values[3*i+2]; 422 if (col >= aij->cstart && col < aij->cend) { 423 col -= aij->cstart; 424 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 425 } 426 else { 427 if (mat->was_assembled || mat->assembled) { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 430 } 431 col = aij->colmap[col] - 1; 432 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 433 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 434 col = (int) PetscReal(values[3*i+1]); 435 } 436 } 437 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 438 } 439 } 440 count--; 441 } 442 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 443 444 /* wait on sends */ 445 if (aij->nsends) { 446 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 447 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 448 PetscFree(send_status); 449 } 450 PetscFree(aij->send_waits); PetscFree(aij->svalues); 451 452 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 453 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 454 455 /* determine if any processor has disassembled, if so we must 456 also disassemble ourselfs, in order that we may reassemble. */ 457 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 458 if (mat->was_assembled && !other_disassembled) { 459 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460 } 461 462 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 463 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 464 } 465 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 467 468 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 469 return 0; 470 } 471 472 #undef __FUNC__ 473 #define __FUNC__ "MatZeroEntries_MPIAIJ" 474 int MatZeroEntries_MPIAIJ(Mat A) 475 { 476 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 477 int ierr; 478 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 479 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 480 return 0; 481 } 482 483 /* the code does not do the diagonal entries correctly unless the 484 matrix is square and the column and row owerships are identical. 485 This is a BUG. The only way to fix it seems to be to access 486 aij->A and aij->B directly and not through the MatZeroRows() 487 routine. 488 */ 489 #undef __FUNC__ 490 #define __FUNC__ "MatZeroRows_MPIAIJ" 491 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 492 { 493 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 494 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 495 int *procs,*nprocs,j,found,idx,nsends,*work; 496 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 497 int *rvalues,tag = A->tag,count,base,slen,n,*source; 498 int *lens,imdex,*lrows,*values; 499 MPI_Comm comm = A->comm; 500 MPI_Request *send_waits,*recv_waits; 501 MPI_Status recv_status,*send_status; 502 IS istmp; 503 504 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 505 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 506 507 /* first count number of contributors to each processor */ 508 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 509 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 510 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 511 for ( i=0; i<N; i++ ) { 512 idx = rows[i]; 513 found = 0; 514 for ( j=0; j<size; j++ ) { 515 if (idx >= owners[j] && idx < owners[j+1]) { 516 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 517 } 518 } 519 if (!found) SETERRQ(1,0,"Index out of range"); 520 } 521 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 522 523 /* inform other processors of number of messages and max length*/ 524 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 525 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 526 nrecvs = work[rank]; 527 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 528 nmax = work[rank]; 529 PetscFree(work); 530 531 /* post receives: */ 532 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 533 CHKPTRQ(rvalues); 534 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 535 CHKPTRQ(recv_waits); 536 for ( i=0; i<nrecvs; i++ ) { 537 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 538 } 539 540 /* do sends: 541 1) starts[i] gives the starting index in svalues for stuff going to 542 the ith processor 543 */ 544 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 545 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 546 CHKPTRQ(send_waits); 547 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 548 starts[0] = 0; 549 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 550 for ( i=0; i<N; i++ ) { 551 svalues[starts[owner[i]]++] = rows[i]; 552 } 553 ISRestoreIndices(is,&rows); 554 555 starts[0] = 0; 556 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 557 count = 0; 558 for ( i=0; i<size; i++ ) { 559 if (procs[i]) { 560 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 561 } 562 } 563 PetscFree(starts); 564 565 base = owners[rank]; 566 567 /* wait on receives */ 568 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 569 source = lens + nrecvs; 570 count = nrecvs; slen = 0; 571 while (count) { 572 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 573 /* unpack receives into our local space */ 574 MPI_Get_count(&recv_status,MPI_INT,&n); 575 source[imdex] = recv_status.MPI_SOURCE; 576 lens[imdex] = n; 577 slen += n; 578 count--; 579 } 580 PetscFree(recv_waits); 581 582 /* move the data into the send scatter */ 583 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 584 count = 0; 585 for ( i=0; i<nrecvs; i++ ) { 586 values = rvalues + i*nmax; 587 for ( j=0; j<lens[i]; j++ ) { 588 lrows[count++] = values[j] - base; 589 } 590 } 591 PetscFree(rvalues); PetscFree(lens); 592 PetscFree(owner); PetscFree(nprocs); 593 594 /* actually zap the local rows */ 595 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 596 PLogObjectParent(A,istmp); 597 PetscFree(lrows); 598 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 599 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 600 ierr = ISDestroy(istmp); CHKERRQ(ierr); 601 602 /* wait on sends */ 603 if (nsends) { 604 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 605 CHKPTRQ(send_status); 606 MPI_Waitall(nsends,send_waits,send_status); 607 PetscFree(send_status); 608 } 609 PetscFree(send_waits); PetscFree(svalues); 610 611 return 0; 612 } 613 614 #undef __FUNC__ 615 #define __FUNC__ "MatMult_MPIAIJ" 616 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 617 { 618 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 619 int ierr,nt; 620 621 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 622 if (nt != a->n) { 623 SETERRQ(1,0,"Incompatible partition of A and xx"); 624 } 625 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 626 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 627 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 628 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 629 return 0; 630 } 631 632 #undef __FUNC__ 633 #define __FUNC__ "MatMultAdd_MPIAIJ" 634 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 635 { 636 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 637 int ierr; 638 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 639 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 640 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 641 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 642 return 0; 643 } 644 645 #undef __FUNC__ 646 #define __FUNC__ "MatMultTrans_MPIAIJ" 647 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 648 { 649 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 650 int ierr; 651 652 /* do nondiagonal part */ 653 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 654 /* send it on its way */ 655 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 656 /* do local part */ 657 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 658 /* receive remote parts: note this assumes the values are not actually */ 659 /* inserted in yy until the next line, which is true for my implementation*/ 660 /* but is not perhaps always true. */ 661 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 662 return 0; 663 } 664 665 #undef __FUNC__ 666 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 667 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 668 { 669 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 670 int ierr; 671 672 /* do nondiagonal part */ 673 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 674 /* send it on its way */ 675 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 676 /* do local part */ 677 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 678 /* receive remote parts: note this assumes the values are not actually */ 679 /* inserted in yy until the next line, which is true for my implementation*/ 680 /* but is not perhaps always true. */ 681 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 682 return 0; 683 } 684 685 /* 686 This only works correctly for square matrices where the subblock A->A is the 687 diagonal block 688 */ 689 #undef __FUNC__ 690 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 691 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 692 { 693 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 694 if (a->M != a->N) 695 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 696 if (a->rstart != a->cstart || a->rend != a->cend) { 697 SETERRQ(1,0,"row partition must equal col partition"); } 698 return MatGetDiagonal(a->A,v); 699 } 700 701 #undef __FUNC__ 702 #define __FUNC__ "MatScale_MPIAIJ" 703 int MatScale_MPIAIJ(Scalar *aa,Mat A) 704 { 705 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 706 int ierr; 707 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 708 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 709 return 0; 710 } 711 712 #undef __FUNC__ 713 #define __FUNC__ "MatDestroy_MPIAIJ" 714 int MatDestroy_MPIAIJ(PetscObject obj) 715 { 716 Mat mat = (Mat) obj; 717 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 718 int ierr; 719 720 #if defined(PETSC_LOG) 721 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 722 #endif 723 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 724 PetscFree(aij->rowners); 725 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 726 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 727 if (aij->colmap) PetscFree(aij->colmap); 728 if (aij->garray) PetscFree(aij->garray); 729 if (aij->lvec) VecDestroy(aij->lvec); 730 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 731 if (aij->rowvalues) PetscFree(aij->rowvalues); 732 PetscFree(aij); 733 PLogObjectDestroy(mat); 734 PetscHeaderDestroy(mat); 735 return 0; 736 } 737 738 #undef __FUNC__ 739 #define __FUNC__ "MatView_MPIAIJ_Binary" 740 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 741 { 742 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 743 int ierr; 744 745 if (aij->size == 1) { 746 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 747 } 748 else SETERRQ(1,0,"Only uniprocessor output supported"); 749 return 0; 750 } 751 752 #undef __FUNC__ 753 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 754 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 757 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 758 int ierr, format,shift = C->indexshift,rank; 759 FILE *fd; 760 ViewerType vtype; 761 762 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 763 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 764 ierr = ViewerGetFormat(viewer,&format); 765 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 766 MatInfo info; 767 int flg; 768 MPI_Comm_rank(mat->comm,&rank); 769 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 770 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 771 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 772 PetscSequentialPhaseBegin(mat->comm,1); 773 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 774 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 775 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 776 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 777 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 778 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 779 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 780 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 781 fflush(fd); 782 PetscSequentialPhaseEnd(mat->comm,1); 783 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 784 return 0; 785 } 786 else if (format == VIEWER_FORMAT_ASCII_INFO) { 787 return 0; 788 } 789 } 790 791 if (vtype == DRAW_VIEWER) { 792 Draw draw; 793 PetscTruth isnull; 794 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 795 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 796 } 797 798 if (vtype == ASCII_FILE_VIEWER) { 799 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 800 PetscSequentialPhaseBegin(mat->comm,1); 801 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 802 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 803 aij->cend); 804 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 805 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 806 fflush(fd); 807 PetscSequentialPhaseEnd(mat->comm,1); 808 } 809 else { 810 int size = aij->size; 811 rank = aij->rank; 812 if (size == 1) { 813 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 814 } 815 else { 816 /* assemble the entire matrix onto first processor. */ 817 Mat A; 818 Mat_SeqAIJ *Aloc; 819 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 820 Scalar *a; 821 822 if (!rank) { 823 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 824 CHKERRQ(ierr); 825 } 826 else { 827 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 828 CHKERRQ(ierr); 829 } 830 PLogObjectParent(mat,A); 831 832 /* copy over the A part */ 833 Aloc = (Mat_SeqAIJ*) aij->A->data; 834 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 835 row = aij->rstart; 836 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 837 for ( i=0; i<m; i++ ) { 838 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 839 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 840 } 841 aj = Aloc->j; 842 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 843 844 /* copy over the B part */ 845 Aloc = (Mat_SeqAIJ*) aij->B->data; 846 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 847 row = aij->rstart; 848 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 849 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 850 for ( i=0; i<m; i++ ) { 851 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 852 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 853 } 854 PetscFree(ct); 855 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 856 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 857 if (!rank) { 858 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 859 } 860 ierr = MatDestroy(A); CHKERRQ(ierr); 861 } 862 } 863 return 0; 864 } 865 866 #undef __FUNC__ 867 #define __FUNC__ "MatView_MPIAIJ" 868 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 869 { 870 Mat mat = (Mat) obj; 871 int ierr; 872 ViewerType vtype; 873 874 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 875 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 876 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 877 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 878 } 879 else if (vtype == BINARY_FILE_VIEWER) { 880 return MatView_MPIAIJ_Binary(mat,viewer); 881 } 882 return 0; 883 } 884 885 /* 886 This has to provide several versions. 887 888 2) a) use only local smoothing updating outer values only once. 889 b) local smoothing updating outer values each inner iteration 890 3) color updating out values betwen colors. 891 */ 892 #undef __FUNC__ 893 #define __FUNC__ "MatRelax_MPIAIJ" 894 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 895 double fshift,int its,Vec xx) 896 { 897 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 898 Mat AA = mat->A, BB = mat->B; 899 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 900 Scalar *b,*x,*xs,*ls,d,*v,sum; 901 int ierr,*idx, *diag; 902 int n = mat->n, m = mat->m, i,shift = A->indexshift; 903 904 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 905 xs = x + shift; /* shift by one for index start of 1 */ 906 ls = ls + shift; 907 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 908 diag = A->diag; 909 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 910 if (flag & SOR_ZERO_INITIAL_GUESS) { 911 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 912 } 913 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 914 CHKERRQ(ierr); 915 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 916 CHKERRQ(ierr); 917 while (its--) { 918 /* go down through the rows */ 919 for ( i=0; i<m; i++ ) { 920 n = A->i[i+1] - A->i[i]; 921 idx = A->j + A->i[i] + shift; 922 v = A->a + A->i[i] + shift; 923 sum = b[i]; 924 SPARSEDENSEMDOT(sum,xs,v,idx,n); 925 d = fshift + A->a[diag[i]+shift]; 926 n = B->i[i+1] - B->i[i]; 927 idx = B->j + B->i[i] + shift; 928 v = B->a + B->i[i] + shift; 929 SPARSEDENSEMDOT(sum,ls,v,idx,n); 930 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 931 } 932 /* come up through the rows */ 933 for ( i=m-1; i>-1; i-- ) { 934 n = A->i[i+1] - A->i[i]; 935 idx = A->j + A->i[i] + shift; 936 v = A->a + A->i[i] + shift; 937 sum = b[i]; 938 SPARSEDENSEMDOT(sum,xs,v,idx,n); 939 d = fshift + A->a[diag[i]+shift]; 940 n = B->i[i+1] - B->i[i]; 941 idx = B->j + B->i[i] + shift; 942 v = B->a + B->i[i] + shift; 943 SPARSEDENSEMDOT(sum,ls,v,idx,n); 944 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 945 } 946 } 947 } 948 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 949 if (flag & SOR_ZERO_INITIAL_GUESS) { 950 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 951 } 952 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 953 CHKERRQ(ierr); 954 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 955 CHKERRQ(ierr); 956 while (its--) { 957 for ( i=0; i<m; i++ ) { 958 n = A->i[i+1] - A->i[i]; 959 idx = A->j + A->i[i] + shift; 960 v = A->a + A->i[i] + shift; 961 sum = b[i]; 962 SPARSEDENSEMDOT(sum,xs,v,idx,n); 963 d = fshift + A->a[diag[i]+shift]; 964 n = B->i[i+1] - B->i[i]; 965 idx = B->j + B->i[i] + shift; 966 v = B->a + B->i[i] + shift; 967 SPARSEDENSEMDOT(sum,ls,v,idx,n); 968 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 969 } 970 } 971 } 972 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 973 if (flag & SOR_ZERO_INITIAL_GUESS) { 974 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 975 } 976 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 977 mat->Mvctx); CHKERRQ(ierr); 978 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 979 mat->Mvctx); CHKERRQ(ierr); 980 while (its--) { 981 for ( i=m-1; i>-1; i-- ) { 982 n = A->i[i+1] - A->i[i]; 983 idx = A->j + A->i[i] + shift; 984 v = A->a + A->i[i] + shift; 985 sum = b[i]; 986 SPARSEDENSEMDOT(sum,xs,v,idx,n); 987 d = fshift + A->a[diag[i]+shift]; 988 n = B->i[i+1] - B->i[i]; 989 idx = B->j + B->i[i] + shift; 990 v = B->a + B->i[i] + shift; 991 SPARSEDENSEMDOT(sum,ls,v,idx,n); 992 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 993 } 994 } 995 } 996 else { 997 SETERRQ(1,0,"Option not supported"); 998 } 999 return 0; 1000 } 1001 1002 #undef __FUNC__ 1003 #define __FUNC__ "MatGetInfo_MPIAIJ" 1004 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1005 { 1006 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1007 Mat A = mat->A, B = mat->B; 1008 int ierr; 1009 double isend[5], irecv[5]; 1010 1011 info->rows_global = (double)mat->M; 1012 info->columns_global = (double)mat->N; 1013 info->rows_local = (double)mat->m; 1014 info->columns_local = (double)mat->N; 1015 info->block_size = 1.0; 1016 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1017 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1018 isend[3] = info->memory; isend[4] = info->mallocs; 1019 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1020 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1021 isend[3] += info->memory; isend[4] += info->mallocs; 1022 if (flag == MAT_LOCAL) { 1023 info->nz_used = isend[0]; 1024 info->nz_allocated = isend[1]; 1025 info->nz_unneeded = isend[2]; 1026 info->memory = isend[3]; 1027 info->mallocs = isend[4]; 1028 } else if (flag == MAT_GLOBAL_MAX) { 1029 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 1030 info->nz_used = irecv[0]; 1031 info->nz_allocated = irecv[1]; 1032 info->nz_unneeded = irecv[2]; 1033 info->memory = irecv[3]; 1034 info->mallocs = irecv[4]; 1035 } else if (flag == MAT_GLOBAL_SUM) { 1036 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 1037 info->nz_used = irecv[0]; 1038 info->nz_allocated = irecv[1]; 1039 info->nz_unneeded = irecv[2]; 1040 info->memory = irecv[3]; 1041 info->mallocs = irecv[4]; 1042 } 1043 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1044 info->fill_ratio_needed = 0; 1045 info->factor_mallocs = 0; 1046 1047 return 0; 1048 } 1049 1050 #undef __FUNC__ 1051 #define __FUNC__ "MatSetOption_MPIAIJ" 1052 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1055 1056 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1057 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1058 op == MAT_COLUMNS_UNSORTED || 1059 op == MAT_COLUMNS_SORTED || 1060 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1061 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1062 MatSetOption(a->A,op); 1063 MatSetOption(a->B,op); 1064 } else if (op == MAT_ROW_ORIENTED) { 1065 a->roworiented = 1; 1066 MatSetOption(a->A,op); 1067 MatSetOption(a->B,op); 1068 } else if (op == MAT_ROWS_SORTED || 1069 op == MAT_ROWS_UNSORTED || 1070 op == MAT_SYMMETRIC || 1071 op == MAT_STRUCTURALLY_SYMMETRIC || 1072 op == MAT_YES_NEW_DIAGONALS) 1073 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1074 else if (op == MAT_COLUMN_ORIENTED) { 1075 a->roworiented = 0; 1076 MatSetOption(a->A,op); 1077 MatSetOption(a->B,op); 1078 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1079 a->donotstash = 1; 1080 } else if (op == MAT_NO_NEW_DIAGONALS) 1081 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1082 else 1083 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1084 return 0; 1085 } 1086 1087 #undef __FUNC__ 1088 #define __FUNC__ "MatGetSize_MPIAIJ" 1089 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1090 { 1091 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1092 *m = mat->M; *n = mat->N; 1093 return 0; 1094 } 1095 1096 #undef __FUNC__ 1097 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1098 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1099 { 1100 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1101 *m = mat->m; *n = mat->N; 1102 return 0; 1103 } 1104 1105 #undef __FUNC__ 1106 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1107 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1108 { 1109 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1110 *m = mat->rstart; *n = mat->rend; 1111 return 0; 1112 } 1113 1114 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1115 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1116 1117 #undef __FUNC__ 1118 #define __FUNC__ "MatGetRow_MPIAIJ" 1119 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1120 { 1121 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1122 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1123 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1124 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1125 int *cmap, *idx_p; 1126 1127 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1128 mat->getrowactive = PETSC_TRUE; 1129 1130 if (!mat->rowvalues && (idx || v)) { 1131 /* 1132 allocate enough space to hold information from the longest row. 1133 */ 1134 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1135 int max = 1,m = mat->m,tmp; 1136 for ( i=0; i<m; i++ ) { 1137 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1138 if (max < tmp) { max = tmp; } 1139 } 1140 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1141 CHKPTRQ(mat->rowvalues); 1142 mat->rowindices = (int *) (mat->rowvalues + max); 1143 } 1144 1145 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1146 lrow = row - rstart; 1147 1148 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1149 if (!v) {pvA = 0; pvB = 0;} 1150 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1151 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1152 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1153 nztot = nzA + nzB; 1154 1155 cmap = mat->garray; 1156 if (v || idx) { 1157 if (nztot) { 1158 /* Sort by increasing column numbers, assuming A and B already sorted */ 1159 int imark = -1; 1160 if (v) { 1161 *v = v_p = mat->rowvalues; 1162 for ( i=0; i<nzB; i++ ) { 1163 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1164 else break; 1165 } 1166 imark = i; 1167 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1168 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1169 } 1170 if (idx) { 1171 *idx = idx_p = mat->rowindices; 1172 if (imark > -1) { 1173 for ( i=0; i<imark; i++ ) { 1174 idx_p[i] = cmap[cworkB[i]]; 1175 } 1176 } else { 1177 for ( i=0; i<nzB; i++ ) { 1178 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1179 else break; 1180 } 1181 imark = i; 1182 } 1183 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1184 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1185 } 1186 } 1187 else { 1188 if (idx) *idx = 0; 1189 if (v) *v = 0; 1190 } 1191 } 1192 *nz = nztot; 1193 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1194 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1195 return 0; 1196 } 1197 1198 #undef __FUNC__ 1199 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1200 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1201 { 1202 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1203 if (aij->getrowactive == PETSC_FALSE) { 1204 SETERRQ(1,0,"MatGetRow not called"); 1205 } 1206 aij->getrowactive = PETSC_FALSE; 1207 return 0; 1208 } 1209 1210 #undef __FUNC__ 1211 #define __FUNC__ "MatNorm_MPIAIJ" 1212 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1213 { 1214 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1215 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1216 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1217 double sum = 0.0; 1218 Scalar *v; 1219 1220 if (aij->size == 1) { 1221 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1222 } else { 1223 if (type == NORM_FROBENIUS) { 1224 v = amat->a; 1225 for (i=0; i<amat->nz; i++ ) { 1226 #if defined(PETSC_COMPLEX) 1227 sum += real(conj(*v)*(*v)); v++; 1228 #else 1229 sum += (*v)*(*v); v++; 1230 #endif 1231 } 1232 v = bmat->a; 1233 for (i=0; i<bmat->nz; i++ ) { 1234 #if defined(PETSC_COMPLEX) 1235 sum += real(conj(*v)*(*v)); v++; 1236 #else 1237 sum += (*v)*(*v); v++; 1238 #endif 1239 } 1240 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1241 *norm = sqrt(*norm); 1242 } 1243 else if (type == NORM_1) { /* max column norm */ 1244 double *tmp, *tmp2; 1245 int *jj, *garray = aij->garray; 1246 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1247 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1248 PetscMemzero(tmp,aij->N*sizeof(double)); 1249 *norm = 0.0; 1250 v = amat->a; jj = amat->j; 1251 for ( j=0; j<amat->nz; j++ ) { 1252 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1253 } 1254 v = bmat->a; jj = bmat->j; 1255 for ( j=0; j<bmat->nz; j++ ) { 1256 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1257 } 1258 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1259 for ( j=0; j<aij->N; j++ ) { 1260 if (tmp2[j] > *norm) *norm = tmp2[j]; 1261 } 1262 PetscFree(tmp); PetscFree(tmp2); 1263 } 1264 else if (type == NORM_INFINITY) { /* max row norm */ 1265 double ntemp = 0.0; 1266 for ( j=0; j<amat->m; j++ ) { 1267 v = amat->a + amat->i[j] + shift; 1268 sum = 0.0; 1269 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1270 sum += PetscAbsScalar(*v); v++; 1271 } 1272 v = bmat->a + bmat->i[j] + shift; 1273 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1274 sum += PetscAbsScalar(*v); v++; 1275 } 1276 if (sum > ntemp) ntemp = sum; 1277 } 1278 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1279 } 1280 else { 1281 SETERRQ(1,0,"No support for two norm"); 1282 } 1283 } 1284 return 0; 1285 } 1286 1287 #undef __FUNC__ 1288 #define __FUNC__ "MatTranspose_MPIAIJ" 1289 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1290 { 1291 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1292 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1293 int ierr,shift = Aloc->indexshift; 1294 Mat B; 1295 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1296 Scalar *array; 1297 1298 if (matout == PETSC_NULL && M != N) { 1299 SETERRQ(1,0,"Square matrix only for in-place"); 1300 } 1301 1302 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1303 1304 /* copy over the A part */ 1305 Aloc = (Mat_SeqAIJ*) a->A->data; 1306 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1307 row = a->rstart; 1308 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1309 for ( i=0; i<m; i++ ) { 1310 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1311 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1312 } 1313 aj = Aloc->j; 1314 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1315 1316 /* copy over the B part */ 1317 Aloc = (Mat_SeqAIJ*) a->B->data; 1318 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1319 row = a->rstart; 1320 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1321 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1322 for ( i=0; i<m; i++ ) { 1323 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1324 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1325 } 1326 PetscFree(ct); 1327 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1328 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1329 if (matout != PETSC_NULL) { 1330 *matout = B; 1331 } else { 1332 /* This isn't really an in-place transpose .... but free data structures from a */ 1333 PetscFree(a->rowners); 1334 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1335 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1336 if (a->colmap) PetscFree(a->colmap); 1337 if (a->garray) PetscFree(a->garray); 1338 if (a->lvec) VecDestroy(a->lvec); 1339 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1340 PetscFree(a); 1341 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1342 PetscHeaderDestroy(B); 1343 } 1344 return 0; 1345 } 1346 1347 #undef __FUNC__ 1348 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1349 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1350 { 1351 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1352 Mat a = aij->A, b = aij->B; 1353 int ierr,s1,s2,s3; 1354 1355 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1356 if (rr) { 1357 s3 = aij->n; 1358 VecGetLocalSize_Fast(rr,s1); 1359 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1360 /* Overlap communication with computation. */ 1361 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1362 } 1363 if (ll) { 1364 VecGetLocalSize_Fast(ll,s1); 1365 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1366 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1367 } 1368 /* scale the diagonal block */ 1369 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1370 1371 if (rr) { 1372 /* Do a scatter end and then right scale the off-diagonal block */ 1373 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1374 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1375 } 1376 1377 return 0; 1378 } 1379 1380 1381 extern int MatPrintHelp_SeqAIJ(Mat); 1382 #undef __FUNC__ 1383 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1384 int MatPrintHelp_MPIAIJ(Mat A) 1385 { 1386 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1387 1388 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1389 else return 0; 1390 } 1391 1392 #undef __FUNC__ 1393 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1394 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1395 { 1396 *bs = 1; 1397 return 0; 1398 } 1399 #undef __FUNC__ 1400 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1401 int MatSetUnfactored_MPIAIJ(Mat A) 1402 { 1403 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1404 int ierr; 1405 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1406 return 0; 1407 } 1408 1409 #undef __FUNC__ 1410 #define __FUNC__ "MatEqual_MPIAIJ" 1411 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1412 { 1413 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1414 Mat a, b, c, d; 1415 PetscTruth flg; 1416 int ierr; 1417 1418 if (B->type != MATMPIAIJ) SETERRQ(1,0,"Matrices must be same type"); 1419 a = matA->A; b = matA->B; 1420 c = matB->A; d = matB->B; 1421 1422 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1423 if (flg == PETSC_TRUE) { 1424 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1425 } 1426 MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm); 1427 return(0); 1428 } 1429 1430 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1431 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1432 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1433 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1434 /* -------------------------------------------------------------------*/ 1435 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1436 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1437 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1438 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1439 0,0, 1440 0,0, 1441 0,0, 1442 MatRelax_MPIAIJ, 1443 MatTranspose_MPIAIJ, 1444 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1445 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1446 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1447 0, 1448 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1449 0,0,0,0, 1450 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1451 0,0, 1452 0,0,MatConvertSameType_MPIAIJ,0,0, 1453 0,0,0, 1454 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1455 MatPrintHelp_MPIAIJ, 1456 MatScale_MPIAIJ,0,0,0, 1457 MatGetBlockSize_MPIAIJ,0,0,0,0, 1458 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 1459 1460 1461 #undef __FUNC__ 1462 #define __FUNC__ "MatCreateMPIAIJ" 1463 /*@C 1464 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1465 (the default parallel PETSc format). For good matrix assembly performance 1466 the user should preallocate the matrix storage by setting the parameters 1467 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1468 performance can be increased by more than a factor of 50. 1469 1470 Input Parameters: 1471 . comm - MPI communicator 1472 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1473 This value should be the same as the local size used in creating the 1474 y vector for the matrix-vector product y = Ax. 1475 . n - This value should be the same as the local size used in creating the 1476 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1477 calculated if N is given) For square matrices n is almost always m. 1478 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1479 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1480 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1481 (same for all local rows) 1482 . d_nzz - array containing the number of nonzeros in the various rows of the 1483 diagonal portion of the local submatrix (possibly different for each row) 1484 or PETSC_NULL. You must leave room for the diagonal entry even if 1485 it is zero. 1486 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1487 submatrix (same for all local rows). 1488 . o_nzz - array containing the number of nonzeros in the various rows of the 1489 off-diagonal portion of the local submatrix (possibly different for 1490 each row) or PETSC_NULL. 1491 1492 Output Parameter: 1493 . A - the matrix 1494 1495 Notes: 1496 The AIJ format (also called the Yale sparse matrix format or 1497 compressed row storage), is fully compatible with standard Fortran 77 1498 storage. That is, the stored row and column indices can begin at 1499 either one (as in Fortran) or zero. See the users manual for details. 1500 1501 The user MUST specify either the local or global matrix dimensions 1502 (possibly both). 1503 1504 By default, this format uses inodes (identical nodes) when possible. 1505 We search for consecutive rows with the same nonzero structure, thereby 1506 reusing matrix information to achieve increased efficiency. 1507 1508 Options Database Keys: 1509 $ -mat_aij_no_inode - Do not use inodes 1510 $ -mat_aij_inode_limit <limit> - Set inode limit. 1511 $ (max limit=5) 1512 $ -mat_aij_oneindex - Internally use indexing starting at 1 1513 $ rather than 0. Note: When calling MatSetValues(), 1514 $ the user still MUST index entries starting at 0! 1515 1516 Storage Information: 1517 For a square global matrix we define each processor's diagonal portion 1518 to be its local rows and the corresponding columns (a square submatrix); 1519 each processor's off-diagonal portion encompasses the remainder of the 1520 local matrix (a rectangular submatrix). 1521 1522 The user can specify preallocated storage for the diagonal part of 1523 the local submatrix with either d_nz or d_nnz (not both). Set 1524 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1525 memory allocation. Likewise, specify preallocated storage for the 1526 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1527 1528 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1529 the figure below we depict these three local rows and all columns (0-11). 1530 1531 $ 0 1 2 3 4 5 6 7 8 9 10 11 1532 $ ------------------- 1533 $ row 3 | o o o d d d o o o o o o 1534 $ row 4 | o o o d d d o o o o o o 1535 $ row 5 | o o o d d d o o o o o o 1536 $ ------------------- 1537 $ 1538 1539 Thus, any entries in the d locations are stored in the d (diagonal) 1540 submatrix, and any entries in the o locations are stored in the 1541 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1542 stored simply in the MATSEQAIJ format for compressed row storage. 1543 1544 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1545 and o_nz should indicate the number of nonzeros per row in the o matrix. 1546 In general, for PDE problems in which most nonzeros are near the diagonal, 1547 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1548 or you will get TERRIBLE performance; see the users' manual chapter on 1549 matrices. 1550 1551 .keywords: matrix, aij, compressed row, sparse, parallel 1552 1553 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1554 @*/ 1555 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1556 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1557 { 1558 Mat B; 1559 Mat_MPIAIJ *b; 1560 int ierr, i,sum[2],work[2],size; 1561 1562 MPI_Comm_size(comm,&size); 1563 if (size == 1) { 1564 if (M == PETSC_DECIDE) M = m; 1565 if (N == PETSC_DECIDE) N = n; 1566 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1567 return 0; 1568 } 1569 1570 *A = 0; 1571 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1572 PLogObjectCreate(B); 1573 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1574 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1575 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1576 B->destroy = MatDestroy_MPIAIJ; 1577 B->view = MatView_MPIAIJ; 1578 B->factor = 0; 1579 B->assembled = PETSC_FALSE; 1580 B->mapping = 0; 1581 1582 B->insertmode = NOT_SET_VALUES; 1583 b->size = size; 1584 MPI_Comm_rank(comm,&b->rank); 1585 1586 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1587 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1588 1589 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1590 work[0] = m; work[1] = n; 1591 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1592 if (M == PETSC_DECIDE) M = sum[0]; 1593 if (N == PETSC_DECIDE) N = sum[1]; 1594 } 1595 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1596 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1597 b->m = m; B->m = m; 1598 b->n = n; B->n = n; 1599 b->N = N; B->N = N; 1600 b->M = M; B->M = M; 1601 1602 /* build local table of row and column ownerships */ 1603 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1604 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1605 b->cowners = b->rowners + b->size + 2; 1606 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1607 b->rowners[0] = 0; 1608 for ( i=2; i<=b->size; i++ ) { 1609 b->rowners[i] += b->rowners[i-1]; 1610 } 1611 b->rstart = b->rowners[b->rank]; 1612 b->rend = b->rowners[b->rank+1]; 1613 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1614 b->cowners[0] = 0; 1615 for ( i=2; i<=b->size; i++ ) { 1616 b->cowners[i] += b->cowners[i-1]; 1617 } 1618 b->cstart = b->cowners[b->rank]; 1619 b->cend = b->cowners[b->rank+1]; 1620 1621 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1622 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1623 PLogObjectParent(B,b->A); 1624 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1625 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1626 PLogObjectParent(B,b->B); 1627 1628 /* build cache for off array entries formed */ 1629 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1630 b->donotstash = 0; 1631 b->colmap = 0; 1632 b->garray = 0; 1633 b->roworiented = 1; 1634 1635 /* stuff used for matrix vector multiply */ 1636 b->lvec = 0; 1637 b->Mvctx = 0; 1638 1639 /* stuff for MatGetRow() */ 1640 b->rowindices = 0; 1641 b->rowvalues = 0; 1642 b->getrowactive = PETSC_FALSE; 1643 1644 *A = B; 1645 return 0; 1646 } 1647 1648 #undef __FUNC__ 1649 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1650 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1651 { 1652 Mat mat; 1653 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1654 int ierr, len=0, flg; 1655 1656 *newmat = 0; 1657 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1658 PLogObjectCreate(mat); 1659 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1660 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1661 mat->destroy = MatDestroy_MPIAIJ; 1662 mat->view = MatView_MPIAIJ; 1663 mat->factor = matin->factor; 1664 mat->assembled = PETSC_TRUE; 1665 1666 a->m = mat->m = oldmat->m; 1667 a->n = mat->n = oldmat->n; 1668 a->M = mat->M = oldmat->M; 1669 a->N = mat->N = oldmat->N; 1670 1671 a->rstart = oldmat->rstart; 1672 a->rend = oldmat->rend; 1673 a->cstart = oldmat->cstart; 1674 a->cend = oldmat->cend; 1675 a->size = oldmat->size; 1676 a->rank = oldmat->rank; 1677 mat->insertmode = NOT_SET_VALUES; 1678 a->rowvalues = 0; 1679 a->getrowactive = PETSC_FALSE; 1680 1681 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1682 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1683 a->cowners = a->rowners + a->size + 2; 1684 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1685 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1686 if (oldmat->colmap) { 1687 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1688 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1689 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1690 } else a->colmap = 0; 1691 if (oldmat->garray) { 1692 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1693 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1694 PLogObjectMemory(mat,len*sizeof(int)); 1695 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1696 } else a->garray = 0; 1697 1698 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1699 PLogObjectParent(mat,a->lvec); 1700 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1701 PLogObjectParent(mat,a->Mvctx); 1702 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1703 PLogObjectParent(mat,a->A); 1704 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1705 PLogObjectParent(mat,a->B); 1706 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1707 if (flg) { 1708 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1709 } 1710 *newmat = mat; 1711 return 0; 1712 } 1713 1714 #include "sys.h" 1715 1716 #undef __FUNC__ 1717 #define __FUNC__ "MatLoad_MPIAIJ" 1718 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1719 { 1720 Mat A; 1721 int i, nz, ierr, j,rstart, rend, fd; 1722 Scalar *vals,*svals; 1723 MPI_Comm comm = ((PetscObject)viewer)->comm; 1724 MPI_Status status; 1725 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1726 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1727 int tag = ((PetscObject)viewer)->tag; 1728 1729 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1730 if (!rank) { 1731 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1732 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1733 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1734 } 1735 1736 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1737 M = header[1]; N = header[2]; 1738 /* determine ownership of all rows */ 1739 m = M/size + ((M % size) > rank); 1740 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1741 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1742 rowners[0] = 0; 1743 for ( i=2; i<=size; i++ ) { 1744 rowners[i] += rowners[i-1]; 1745 } 1746 rstart = rowners[rank]; 1747 rend = rowners[rank+1]; 1748 1749 /* distribute row lengths to all processors */ 1750 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1751 offlens = ourlens + (rend-rstart); 1752 if (!rank) { 1753 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1754 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1755 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1756 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1757 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1758 PetscFree(sndcounts); 1759 } 1760 else { 1761 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1762 } 1763 1764 if (!rank) { 1765 /* calculate the number of nonzeros on each processor */ 1766 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1767 PetscMemzero(procsnz,size*sizeof(int)); 1768 for ( i=0; i<size; i++ ) { 1769 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1770 procsnz[i] += rowlengths[j]; 1771 } 1772 } 1773 PetscFree(rowlengths); 1774 1775 /* determine max buffer needed and allocate it */ 1776 maxnz = 0; 1777 for ( i=0; i<size; i++ ) { 1778 maxnz = PetscMax(maxnz,procsnz[i]); 1779 } 1780 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1781 1782 /* read in my part of the matrix column indices */ 1783 nz = procsnz[0]; 1784 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1785 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1786 1787 /* read in every one elses and ship off */ 1788 for ( i=1; i<size; i++ ) { 1789 nz = procsnz[i]; 1790 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1791 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1792 } 1793 PetscFree(cols); 1794 } 1795 else { 1796 /* determine buffer space needed for message */ 1797 nz = 0; 1798 for ( i=0; i<m; i++ ) { 1799 nz += ourlens[i]; 1800 } 1801 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1802 1803 /* receive message of column indices*/ 1804 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1805 MPI_Get_count(&status,MPI_INT,&maxnz); 1806 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1807 } 1808 1809 /* loop over local rows, determining number of off diagonal entries */ 1810 PetscMemzero(offlens,m*sizeof(int)); 1811 jj = 0; 1812 for ( i=0; i<m; i++ ) { 1813 for ( j=0; j<ourlens[i]; j++ ) { 1814 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1815 jj++; 1816 } 1817 } 1818 1819 /* create our matrix */ 1820 for ( i=0; i<m; i++ ) { 1821 ourlens[i] -= offlens[i]; 1822 } 1823 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1824 A = *newmat; 1825 MatSetOption(A,MAT_COLUMNS_SORTED); 1826 for ( i=0; i<m; i++ ) { 1827 ourlens[i] += offlens[i]; 1828 } 1829 1830 if (!rank) { 1831 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1832 1833 /* read in my part of the matrix numerical values */ 1834 nz = procsnz[0]; 1835 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1836 1837 /* insert into matrix */ 1838 jj = rstart; 1839 smycols = mycols; 1840 svals = vals; 1841 for ( i=0; i<m; i++ ) { 1842 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1843 smycols += ourlens[i]; 1844 svals += ourlens[i]; 1845 jj++; 1846 } 1847 1848 /* read in other processors and ship out */ 1849 for ( i=1; i<size; i++ ) { 1850 nz = procsnz[i]; 1851 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1852 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1853 } 1854 PetscFree(procsnz); 1855 } 1856 else { 1857 /* receive numeric values */ 1858 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1859 1860 /* receive message of values*/ 1861 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1862 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1863 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1864 1865 /* insert into matrix */ 1866 jj = rstart; 1867 smycols = mycols; 1868 svals = vals; 1869 for ( i=0; i<m; i++ ) { 1870 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1871 smycols += ourlens[i]; 1872 svals += ourlens[i]; 1873 jj++; 1874 } 1875 } 1876 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1877 1878 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1879 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1880 return 0; 1881 } 1882