1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.213 1997/08/08 18:44:38 balay Exp bsmith $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/spops.h" 9 10 /* local utility routine that creates a mapping from the global column 11 number to the local number in the off-diagonal part of the local 12 storage of the matrix. This is done in a non scable way since the 13 length of colmap equals the global matrix length. 14 */ 15 #undef __FUNC__ 16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 17 int CreateColmap_MPIAIJ_Private(Mat mat) 18 { 19 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21 int n = B->n,i; 22 23 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 24 PLogObjectMemory(mat,aij->N*sizeof(int)); 25 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 26 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 27 return 0; 28 } 29 30 extern int DisAssemble_MPIAIJ(Mat); 31 32 #define CHUNKSIZE 15 33 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 34 { \ 35 \ 36 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 37 rmax = aimax[row]; nrow = ailen[row]; \ 38 col1 = col - shift; \ 39 \ 40 low = 0; high = nrow; \ 41 while (high-low > 5) { \ 42 t = (low+high)/2; \ 43 if (rp[t] > col) high = t; \ 44 else low = t; \ 45 } \ 46 for ( _i=0; _i<nrow; _i++ ) { \ 47 if (rp[_i] > col1) break; \ 48 if (rp[_i] == col1) { \ 49 if (addv == ADD_VALUES) ap[_i] += value; \ 50 else ap[_i] = value; \ 51 goto a_noinsert; \ 52 } \ 53 } \ 54 if (nonew == 1) goto a_noinsert; \ 55 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 56 if (nrow >= rmax) { \ 57 /* there is no extra room in row, therefore enlarge */ \ 58 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 59 Scalar *new_a; \ 60 \ 61 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 62 \ 63 /* malloc new storage space */ \ 64 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 65 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 66 new_j = (int *) (new_a + new_nz); \ 67 new_i = new_j + new_nz; \ 68 \ 69 /* copy over old data into new slots */ \ 70 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 71 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 72 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 73 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 74 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 75 len*sizeof(int)); \ 76 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 77 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 78 len*sizeof(Scalar)); \ 79 /* free up old matrix storage */ \ 80 \ 81 PetscFree(a->a); \ 82 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 83 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 84 a->singlemalloc = 1; \ 85 \ 86 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 87 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 88 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 89 a->maxnz += CHUNKSIZE; \ 90 a->reallocs++; \ 91 } \ 92 N = nrow++ - 1; a->nz++; \ 93 /* shift up all the later entries in this row */ \ 94 for ( ii=N; ii>=_i; ii-- ) { \ 95 rp[ii+1] = rp[ii]; \ 96 ap[ii+1] = ap[ii]; \ 97 } \ 98 rp[_i] = col1; \ 99 ap[_i] = value; \ 100 a_noinsert: ; \ 101 ailen[row] = nrow; \ 102 } 103 104 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 105 { \ 106 \ 107 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 108 rmax = bimax[row]; nrow = bilen[row]; \ 109 col1 = col - shift; \ 110 \ 111 low = 0; high = nrow; \ 112 while (high-low > 5) { \ 113 t = (low+high)/2; \ 114 if (rp[t] > col) high = t; \ 115 else low = t; \ 116 } \ 117 for ( _i=0; _i<nrow; _i++ ) { \ 118 if (rp[_i] > col1) break; \ 119 if (rp[_i] == col1) { \ 120 if (addv == ADD_VALUES) ap[_i] += value; \ 121 else ap[_i] = value; \ 122 goto b_noinsert; \ 123 } \ 124 } \ 125 if (nonew == 1) goto b_noinsert; \ 126 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 127 if (nrow >= rmax) { \ 128 /* there is no extra room in row, therefore enlarge */ \ 129 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 130 Scalar *new_a; \ 131 \ 132 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 133 \ 134 /* malloc new storage space */ \ 135 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 136 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 137 new_j = (int *) (new_a + new_nz); \ 138 new_i = new_j + new_nz; \ 139 \ 140 /* copy over old data into new slots */ \ 141 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 142 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 143 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 144 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 145 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 146 len*sizeof(int)); \ 147 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 148 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 149 len*sizeof(Scalar)); \ 150 /* free up old matrix storage */ \ 151 \ 152 PetscFree(b->a); \ 153 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 154 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 155 b->singlemalloc = 1; \ 156 \ 157 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 158 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 159 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 160 b->maxnz += CHUNKSIZE; \ 161 b->reallocs++; \ 162 } \ 163 N = nrow++ - 1; b->nz++; \ 164 /* shift up all the later entries in this row */ \ 165 for ( ii=N; ii>=_i; ii-- ) { \ 166 rp[ii+1] = rp[ii]; \ 167 ap[ii+1] = ap[ii]; \ 168 } \ 169 rp[_i] = col1; \ 170 ap[_i] = value; \ 171 b_noinsert: ; \ 172 bilen[row] = nrow; \ 173 } 174 175 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 176 #undef __FUNC__ 177 #define __FUNC__ "MatSetValues_MPIAIJ" 178 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 179 { 180 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 181 Scalar value; 182 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 183 int cstart = aij->cstart, cend = aij->cend,row,col; 184 int roworiented = aij->roworiented; 185 186 /* Some Variables required in the macro */ 187 Mat A = aij->A; 188 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 189 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 190 Scalar *aa = a->a; 191 192 Mat B = aij->B; 193 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 194 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 195 Scalar *ba = b->a; 196 197 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 198 int nonew = a->nonew,shift = a->indexshift; 199 Scalar *ap; 200 201 for ( i=0; i<m; i++ ) { 202 #if defined(PETSC_BOPT_g) 203 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 204 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 205 #endif 206 if (im[i] >= rstart && im[i] < rend) { 207 row = im[i] - rstart; 208 for ( j=0; j<n; j++ ) { 209 if (in[j] >= cstart && in[j] < cend){ 210 col = in[j] - cstart; 211 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 212 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 213 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 #if defined(PETSC_BOPT_g) 216 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 217 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 218 #endif 219 else { 220 if (mat->was_assembled) { 221 if (!aij->colmap) { 222 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 223 } 224 col = aij->colmap[in[j]] - 1; 225 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 226 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 227 col = in[j]; 228 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 229 B = aij->B; 230 b = (Mat_SeqAIJ *) B->data; 231 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 232 ba = b->a; 233 } 234 } 235 else col = in[j]; 236 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 237 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 238 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 239 } 240 } 241 } 242 else { 243 if (roworiented && !aij->donotstash) { 244 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 245 } 246 else { 247 if (!aij->donotstash) { 248 row = im[i]; 249 for ( j=0; j<n; j++ ) { 250 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 251 } 252 } 253 } 254 } 255 } 256 return 0; 257 } 258 259 #undef __FUNC__ 260 #define __FUNC__ "MatGetValues_MPIAIJ" 261 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 262 { 263 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 264 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 265 int cstart = aij->cstart, cend = aij->cend,row,col; 266 267 for ( i=0; i<m; i++ ) { 268 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 269 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 270 if (idxm[i] >= rstart && idxm[i] < rend) { 271 row = idxm[i] - rstart; 272 for ( j=0; j<n; j++ ) { 273 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 274 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 275 if (idxn[j] >= cstart && idxn[j] < cend){ 276 col = idxn[j] - cstart; 277 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 278 } 279 else { 280 if (!aij->colmap) { 281 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 282 } 283 col = aij->colmap[idxn[j]] - 1; 284 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 285 else { 286 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 287 } 288 } 289 } 290 } 291 else { 292 SETERRQ(1,0,"Only local values currently supported"); 293 } 294 } 295 return 0; 296 } 297 298 #undef __FUNC__ 299 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 300 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 301 { 302 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 303 MPI_Comm comm = mat->comm; 304 int size = aij->size, *owners = aij->rowners; 305 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 306 MPI_Request *send_waits,*recv_waits; 307 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 308 InsertMode addv; 309 Scalar *rvalues,*svalues; 310 311 /* make sure all processors are either in INSERTMODE or ADDMODE */ 312 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 313 if (addv == (ADD_VALUES|INSERT_VALUES)) { 314 SETERRQ(1,0,"Some processors inserted others added"); 315 } 316 mat->insertmode = addv; /* in case this processor had no cache */ 317 318 /* first count number of contributors to each processor */ 319 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 320 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 321 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 322 for ( i=0; i<aij->stash.n; i++ ) { 323 idx = aij->stash.idx[i]; 324 for ( j=0; j<size; j++ ) { 325 if (idx >= owners[j] && idx < owners[j+1]) { 326 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 327 } 328 } 329 } 330 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 331 332 /* inform other processors of number of messages and max length*/ 333 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 334 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 335 nreceives = work[rank]; 336 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 337 nmax = work[rank]; 338 PetscFree(work); 339 340 /* post receives: 341 1) each message will consist of ordered pairs 342 (global index,value) we store the global index as a double 343 to simplify the message passing. 344 2) since we don't know how long each individual message is we 345 allocate the largest needed buffer for each receive. Potentially 346 this is a lot of wasted space. 347 348 349 This could be done better. 350 */ 351 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 352 CHKPTRQ(rvalues); 353 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 354 CHKPTRQ(recv_waits); 355 for ( i=0; i<nreceives; i++ ) { 356 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 357 comm,recv_waits+i); 358 } 359 360 /* do sends: 361 1) starts[i] gives the starting index in svalues for stuff going to 362 the ith processor 363 */ 364 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 365 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 366 CHKPTRQ(send_waits); 367 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 368 starts[0] = 0; 369 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 370 for ( i=0; i<aij->stash.n; i++ ) { 371 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 372 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 373 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 374 } 375 PetscFree(owner); 376 starts[0] = 0; 377 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 378 count = 0; 379 for ( i=0; i<size; i++ ) { 380 if (procs[i]) { 381 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 382 comm,send_waits+count++); 383 } 384 } 385 PetscFree(starts); PetscFree(nprocs); 386 387 /* Free cache space */ 388 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 389 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 390 391 aij->svalues = svalues; aij->rvalues = rvalues; 392 aij->nsends = nsends; aij->nrecvs = nreceives; 393 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 394 aij->rmax = nmax; 395 396 return 0; 397 } 398 extern int MatSetUpMultiply_MPIAIJ(Mat); 399 400 #undef __FUNC__ 401 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 402 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 403 { 404 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 405 MPI_Status *send_status,recv_status; 406 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 407 int row,col,other_disassembled; 408 Scalar *values,val; 409 InsertMode addv = mat->insertmode; 410 411 /* wait on receives */ 412 while (count) { 413 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 414 /* unpack receives into our local space */ 415 values = aij->rvalues + 3*imdex*aij->rmax; 416 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 417 n = n/3; 418 for ( i=0; i<n; i++ ) { 419 row = (int) PetscReal(values[3*i]) - aij->rstart; 420 col = (int) PetscReal(values[3*i+1]); 421 val = values[3*i+2]; 422 if (col >= aij->cstart && col < aij->cend) { 423 col -= aij->cstart; 424 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 425 } 426 else { 427 if (mat->was_assembled || mat->assembled) { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 430 } 431 col = aij->colmap[col] - 1; 432 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 433 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 434 col = (int) PetscReal(values[3*i+1]); 435 } 436 } 437 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 438 } 439 } 440 count--; 441 } 442 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 443 444 /* wait on sends */ 445 if (aij->nsends) { 446 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 447 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 448 PetscFree(send_status); 449 } 450 PetscFree(aij->send_waits); PetscFree(aij->svalues); 451 452 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 453 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 454 455 /* determine if any processor has disassembled, if so we must 456 also disassemble ourselfs, in order that we may reassemble. */ 457 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 458 if (mat->was_assembled && !other_disassembled) { 459 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460 } 461 462 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 463 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 464 } 465 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 467 468 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 469 return 0; 470 } 471 472 #undef __FUNC__ 473 #define __FUNC__ "MatZeroEntries_MPIAIJ" 474 int MatZeroEntries_MPIAIJ(Mat A) 475 { 476 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 477 int ierr; 478 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 479 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 480 return 0; 481 } 482 483 /* the code does not do the diagonal entries correctly unless the 484 matrix is square and the column and row owerships are identical. 485 This is a BUG. The only way to fix it seems to be to access 486 aij->A and aij->B directly and not through the MatZeroRows() 487 routine. 488 */ 489 #undef __FUNC__ 490 #define __FUNC__ "MatZeroRows_MPIAIJ" 491 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 492 { 493 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 494 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 495 int *procs,*nprocs,j,found,idx,nsends,*work; 496 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 497 int *rvalues,tag = A->tag,count,base,slen,n,*source; 498 int *lens,imdex,*lrows,*values; 499 MPI_Comm comm = A->comm; 500 MPI_Request *send_waits,*recv_waits; 501 MPI_Status recv_status,*send_status; 502 IS istmp; 503 504 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 505 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 506 507 /* first count number of contributors to each processor */ 508 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 509 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 510 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 511 for ( i=0; i<N; i++ ) { 512 idx = rows[i]; 513 found = 0; 514 for ( j=0; j<size; j++ ) { 515 if (idx >= owners[j] && idx < owners[j+1]) { 516 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 517 } 518 } 519 if (!found) SETERRQ(1,0,"Index out of range"); 520 } 521 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 522 523 /* inform other processors of number of messages and max length*/ 524 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 525 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 526 nrecvs = work[rank]; 527 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 528 nmax = work[rank]; 529 PetscFree(work); 530 531 /* post receives: */ 532 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 533 CHKPTRQ(rvalues); 534 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 535 CHKPTRQ(recv_waits); 536 for ( i=0; i<nrecvs; i++ ) { 537 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 538 } 539 540 /* do sends: 541 1) starts[i] gives the starting index in svalues for stuff going to 542 the ith processor 543 */ 544 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 545 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 546 CHKPTRQ(send_waits); 547 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 548 starts[0] = 0; 549 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 550 for ( i=0; i<N; i++ ) { 551 svalues[starts[owner[i]]++] = rows[i]; 552 } 553 ISRestoreIndices(is,&rows); 554 555 starts[0] = 0; 556 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 557 count = 0; 558 for ( i=0; i<size; i++ ) { 559 if (procs[i]) { 560 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 561 } 562 } 563 PetscFree(starts); 564 565 base = owners[rank]; 566 567 /* wait on receives */ 568 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 569 source = lens + nrecvs; 570 count = nrecvs; slen = 0; 571 while (count) { 572 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 573 /* unpack receives into our local space */ 574 MPI_Get_count(&recv_status,MPI_INT,&n); 575 source[imdex] = recv_status.MPI_SOURCE; 576 lens[imdex] = n; 577 slen += n; 578 count--; 579 } 580 PetscFree(recv_waits); 581 582 /* move the data into the send scatter */ 583 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 584 count = 0; 585 for ( i=0; i<nrecvs; i++ ) { 586 values = rvalues + i*nmax; 587 for ( j=0; j<lens[i]; j++ ) { 588 lrows[count++] = values[j] - base; 589 } 590 } 591 PetscFree(rvalues); PetscFree(lens); 592 PetscFree(owner); PetscFree(nprocs); 593 594 /* actually zap the local rows */ 595 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 596 PLogObjectParent(A,istmp); 597 PetscFree(lrows); 598 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 599 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 600 ierr = ISDestroy(istmp); CHKERRQ(ierr); 601 602 /* wait on sends */ 603 if (nsends) { 604 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 605 CHKPTRQ(send_status); 606 MPI_Waitall(nsends,send_waits,send_status); 607 PetscFree(send_status); 608 } 609 PetscFree(send_waits); PetscFree(svalues); 610 611 return 0; 612 } 613 614 #undef __FUNC__ 615 #define __FUNC__ "MatMult_MPIAIJ" 616 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 617 { 618 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 619 int ierr,nt; 620 621 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 622 if (nt != a->n) { 623 SETERRQ(1,0,"Incompatible partition of A and xx"); 624 } 625 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 626 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 627 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 628 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 629 return 0; 630 } 631 632 #undef __FUNC__ 633 #define __FUNC__ "MatMultAdd_MPIAIJ" 634 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 635 { 636 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 637 int ierr; 638 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 639 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 640 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 641 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 642 return 0; 643 } 644 645 #undef __FUNC__ 646 #define __FUNC__ "MatMultTrans_MPIAIJ" 647 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 648 { 649 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 650 int ierr; 651 652 /* do nondiagonal part */ 653 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 654 /* send it on its way */ 655 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 656 /* do local part */ 657 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 658 /* receive remote parts: note this assumes the values are not actually */ 659 /* inserted in yy until the next line, which is true for my implementation*/ 660 /* but is not perhaps always true. */ 661 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 662 return 0; 663 } 664 665 #undef __FUNC__ 666 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 667 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 668 { 669 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 670 int ierr; 671 672 /* do nondiagonal part */ 673 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 674 /* send it on its way */ 675 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 676 /* do local part */ 677 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 678 /* receive remote parts: note this assumes the values are not actually */ 679 /* inserted in yy until the next line, which is true for my implementation*/ 680 /* but is not perhaps always true. */ 681 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 682 return 0; 683 } 684 685 /* 686 This only works correctly for square matrices where the subblock A->A is the 687 diagonal block 688 */ 689 #undef __FUNC__ 690 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 691 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 692 { 693 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 694 if (a->M != a->N) 695 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 696 if (a->rstart != a->cstart || a->rend != a->cend) { 697 SETERRQ(1,0,"row partition must equal col partition"); } 698 return MatGetDiagonal(a->A,v); 699 } 700 701 #undef __FUNC__ 702 #define __FUNC__ "MatScale_MPIAIJ" 703 int MatScale_MPIAIJ(Scalar *aa,Mat A) 704 { 705 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 706 int ierr; 707 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 708 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 709 return 0; 710 } 711 712 #undef __FUNC__ 713 #define __FUNC__ "MatDestroy_MPIAIJ" 714 int MatDestroy_MPIAIJ(PetscObject obj) 715 { 716 Mat mat = (Mat) obj; 717 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 718 int ierr; 719 720 #if defined(PETSC_LOG) 721 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 722 #endif 723 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 724 PetscFree(aij->rowners); 725 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 726 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 727 if (aij->colmap) PetscFree(aij->colmap); 728 if (aij->garray) PetscFree(aij->garray); 729 if (aij->lvec) VecDestroy(aij->lvec); 730 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 731 if (aij->rowvalues) PetscFree(aij->rowvalues); 732 PetscFree(aij); 733 if (mat->mapping) { 734 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 735 } 736 PLogObjectDestroy(mat); 737 PetscHeaderDestroy(mat); 738 return 0; 739 } 740 741 #undef __FUNC__ 742 #define __FUNC__ "MatView_MPIAIJ_Binary" 743 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 744 { 745 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 746 int ierr; 747 748 if (aij->size == 1) { 749 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 750 } 751 else SETERRQ(1,0,"Only uniprocessor output supported"); 752 return 0; 753 } 754 755 #undef __FUNC__ 756 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 757 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 758 { 759 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 760 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 761 int ierr, format,shift = C->indexshift,rank; 762 FILE *fd; 763 ViewerType vtype; 764 765 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 766 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 767 ierr = ViewerGetFormat(viewer,&format); 768 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 769 MatInfo info; 770 int flg; 771 MPI_Comm_rank(mat->comm,&rank); 772 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 773 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 774 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 775 PetscSequentialPhaseBegin(mat->comm,1); 776 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 777 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 778 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 779 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 780 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 781 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 782 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 783 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 784 fflush(fd); 785 PetscSequentialPhaseEnd(mat->comm,1); 786 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 787 return 0; 788 } 789 else if (format == VIEWER_FORMAT_ASCII_INFO) { 790 return 0; 791 } 792 } 793 794 if (vtype == DRAW_VIEWER) { 795 Draw draw; 796 PetscTruth isnull; 797 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 798 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 799 } 800 801 if (vtype == ASCII_FILE_VIEWER) { 802 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 803 PetscSequentialPhaseBegin(mat->comm,1); 804 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 805 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 806 aij->cend); 807 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 808 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 809 fflush(fd); 810 PetscSequentialPhaseEnd(mat->comm,1); 811 } 812 else { 813 int size = aij->size; 814 rank = aij->rank; 815 if (size == 1) { 816 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 817 } 818 else { 819 /* assemble the entire matrix onto first processor. */ 820 Mat A; 821 Mat_SeqAIJ *Aloc; 822 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 823 Scalar *a; 824 825 if (!rank) { 826 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 827 CHKERRQ(ierr); 828 } 829 else { 830 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 831 CHKERRQ(ierr); 832 } 833 PLogObjectParent(mat,A); 834 835 /* copy over the A part */ 836 Aloc = (Mat_SeqAIJ*) aij->A->data; 837 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 838 row = aij->rstart; 839 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 840 for ( i=0; i<m; i++ ) { 841 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 842 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 843 } 844 aj = Aloc->j; 845 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 846 847 /* copy over the B part */ 848 Aloc = (Mat_SeqAIJ*) aij->B->data; 849 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 850 row = aij->rstart; 851 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 852 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 853 for ( i=0; i<m; i++ ) { 854 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 855 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 856 } 857 PetscFree(ct); 858 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 859 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 860 if (!rank) { 861 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 862 } 863 ierr = MatDestroy(A); CHKERRQ(ierr); 864 } 865 } 866 return 0; 867 } 868 869 #undef __FUNC__ 870 #define __FUNC__ "MatView_MPIAIJ" 871 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 872 { 873 Mat mat = (Mat) obj; 874 int ierr; 875 ViewerType vtype; 876 877 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 878 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 879 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 880 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 881 } 882 else if (vtype == BINARY_FILE_VIEWER) { 883 return MatView_MPIAIJ_Binary(mat,viewer); 884 } 885 return 0; 886 } 887 888 /* 889 This has to provide several versions. 890 891 2) a) use only local smoothing updating outer values only once. 892 b) local smoothing updating outer values each inner iteration 893 3) color updating out values betwen colors. 894 */ 895 #undef __FUNC__ 896 #define __FUNC__ "MatRelax_MPIAIJ" 897 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 898 double fshift,int its,Vec xx) 899 { 900 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 901 Mat AA = mat->A, BB = mat->B; 902 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 903 Scalar *b,*x,*xs,*ls,d,*v,sum; 904 int ierr,*idx, *diag; 905 int n = mat->n, m = mat->m, i,shift = A->indexshift; 906 907 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 908 xs = x + shift; /* shift by one for index start of 1 */ 909 ls = ls + shift; 910 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 911 diag = A->diag; 912 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 913 if (flag & SOR_ZERO_INITIAL_GUESS) { 914 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 915 } 916 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 917 CHKERRQ(ierr); 918 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 919 CHKERRQ(ierr); 920 while (its--) { 921 /* go down through the rows */ 922 for ( i=0; i<m; i++ ) { 923 n = A->i[i+1] - A->i[i]; 924 idx = A->j + A->i[i] + shift; 925 v = A->a + A->i[i] + shift; 926 sum = b[i]; 927 SPARSEDENSEMDOT(sum,xs,v,idx,n); 928 d = fshift + A->a[diag[i]+shift]; 929 n = B->i[i+1] - B->i[i]; 930 idx = B->j + B->i[i] + shift; 931 v = B->a + B->i[i] + shift; 932 SPARSEDENSEMDOT(sum,ls,v,idx,n); 933 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 934 } 935 /* come up through the rows */ 936 for ( i=m-1; i>-1; i-- ) { 937 n = A->i[i+1] - A->i[i]; 938 idx = A->j + A->i[i] + shift; 939 v = A->a + A->i[i] + shift; 940 sum = b[i]; 941 SPARSEDENSEMDOT(sum,xs,v,idx,n); 942 d = fshift + A->a[diag[i]+shift]; 943 n = B->i[i+1] - B->i[i]; 944 idx = B->j + B->i[i] + shift; 945 v = B->a + B->i[i] + shift; 946 SPARSEDENSEMDOT(sum,ls,v,idx,n); 947 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 948 } 949 } 950 } 951 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 952 if (flag & SOR_ZERO_INITIAL_GUESS) { 953 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 954 } 955 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 956 CHKERRQ(ierr); 957 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 958 CHKERRQ(ierr); 959 while (its--) { 960 for ( i=0; i<m; i++ ) { 961 n = A->i[i+1] - A->i[i]; 962 idx = A->j + A->i[i] + shift; 963 v = A->a + A->i[i] + shift; 964 sum = b[i]; 965 SPARSEDENSEMDOT(sum,xs,v,idx,n); 966 d = fshift + A->a[diag[i]+shift]; 967 n = B->i[i+1] - B->i[i]; 968 idx = B->j + B->i[i] + shift; 969 v = B->a + B->i[i] + shift; 970 SPARSEDENSEMDOT(sum,ls,v,idx,n); 971 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 972 } 973 } 974 } 975 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 976 if (flag & SOR_ZERO_INITIAL_GUESS) { 977 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 978 } 979 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 980 mat->Mvctx); CHKERRQ(ierr); 981 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 982 mat->Mvctx); CHKERRQ(ierr); 983 while (its--) { 984 for ( i=m-1; i>-1; i-- ) { 985 n = A->i[i+1] - A->i[i]; 986 idx = A->j + A->i[i] + shift; 987 v = A->a + A->i[i] + shift; 988 sum = b[i]; 989 SPARSEDENSEMDOT(sum,xs,v,idx,n); 990 d = fshift + A->a[diag[i]+shift]; 991 n = B->i[i+1] - B->i[i]; 992 idx = B->j + B->i[i] + shift; 993 v = B->a + B->i[i] + shift; 994 SPARSEDENSEMDOT(sum,ls,v,idx,n); 995 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 996 } 997 } 998 } 999 else { 1000 SETERRQ(1,0,"Option not supported"); 1001 } 1002 return 0; 1003 } 1004 1005 #undef __FUNC__ 1006 #define __FUNC__ "MatGetInfo_MPIAIJ" 1007 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1008 { 1009 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1010 Mat A = mat->A, B = mat->B; 1011 int ierr; 1012 double isend[5], irecv[5]; 1013 1014 info->rows_global = (double)mat->M; 1015 info->columns_global = (double)mat->N; 1016 info->rows_local = (double)mat->m; 1017 info->columns_local = (double)mat->N; 1018 info->block_size = 1.0; 1019 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1020 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1021 isend[3] = info->memory; isend[4] = info->mallocs; 1022 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1023 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1024 isend[3] += info->memory; isend[4] += info->mallocs; 1025 if (flag == MAT_LOCAL) { 1026 info->nz_used = isend[0]; 1027 info->nz_allocated = isend[1]; 1028 info->nz_unneeded = isend[2]; 1029 info->memory = isend[3]; 1030 info->mallocs = isend[4]; 1031 } else if (flag == MAT_GLOBAL_MAX) { 1032 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 1033 info->nz_used = irecv[0]; 1034 info->nz_allocated = irecv[1]; 1035 info->nz_unneeded = irecv[2]; 1036 info->memory = irecv[3]; 1037 info->mallocs = irecv[4]; 1038 } else if (flag == MAT_GLOBAL_SUM) { 1039 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 1040 info->nz_used = irecv[0]; 1041 info->nz_allocated = irecv[1]; 1042 info->nz_unneeded = irecv[2]; 1043 info->memory = irecv[3]; 1044 info->mallocs = irecv[4]; 1045 } 1046 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1047 info->fill_ratio_needed = 0; 1048 info->factor_mallocs = 0; 1049 1050 return 0; 1051 } 1052 1053 #undef __FUNC__ 1054 #define __FUNC__ "MatSetOption_MPIAIJ" 1055 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1056 { 1057 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1058 1059 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1060 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1061 op == MAT_COLUMNS_UNSORTED || 1062 op == MAT_COLUMNS_SORTED || 1063 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1064 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1065 MatSetOption(a->A,op); 1066 MatSetOption(a->B,op); 1067 } else if (op == MAT_ROW_ORIENTED) { 1068 a->roworiented = 1; 1069 MatSetOption(a->A,op); 1070 MatSetOption(a->B,op); 1071 } else if (op == MAT_ROWS_SORTED || 1072 op == MAT_ROWS_UNSORTED || 1073 op == MAT_SYMMETRIC || 1074 op == MAT_STRUCTURALLY_SYMMETRIC || 1075 op == MAT_YES_NEW_DIAGONALS) 1076 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1077 else if (op == MAT_COLUMN_ORIENTED) { 1078 a->roworiented = 0; 1079 MatSetOption(a->A,op); 1080 MatSetOption(a->B,op); 1081 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1082 a->donotstash = 1; 1083 } else if (op == MAT_NO_NEW_DIAGONALS) 1084 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1085 else 1086 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1087 return 0; 1088 } 1089 1090 #undef __FUNC__ 1091 #define __FUNC__ "MatGetSize_MPIAIJ" 1092 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1093 { 1094 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1095 *m = mat->M; *n = mat->N; 1096 return 0; 1097 } 1098 1099 #undef __FUNC__ 1100 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1101 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1102 { 1103 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1104 *m = mat->m; *n = mat->N; 1105 return 0; 1106 } 1107 1108 #undef __FUNC__ 1109 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1110 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1111 { 1112 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1113 *m = mat->rstart; *n = mat->rend; 1114 return 0; 1115 } 1116 1117 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1118 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1119 1120 #undef __FUNC__ 1121 #define __FUNC__ "MatGetRow_MPIAIJ" 1122 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1123 { 1124 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1125 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1126 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1127 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1128 int *cmap, *idx_p; 1129 1130 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1131 mat->getrowactive = PETSC_TRUE; 1132 1133 if (!mat->rowvalues && (idx || v)) { 1134 /* 1135 allocate enough space to hold information from the longest row. 1136 */ 1137 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1138 int max = 1,m = mat->m,tmp; 1139 for ( i=0; i<m; i++ ) { 1140 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1141 if (max < tmp) { max = tmp; } 1142 } 1143 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1144 CHKPTRQ(mat->rowvalues); 1145 mat->rowindices = (int *) (mat->rowvalues + max); 1146 } 1147 1148 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1149 lrow = row - rstart; 1150 1151 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1152 if (!v) {pvA = 0; pvB = 0;} 1153 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1154 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1155 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1156 nztot = nzA + nzB; 1157 1158 cmap = mat->garray; 1159 if (v || idx) { 1160 if (nztot) { 1161 /* Sort by increasing column numbers, assuming A and B already sorted */ 1162 int imark = -1; 1163 if (v) { 1164 *v = v_p = mat->rowvalues; 1165 for ( i=0; i<nzB; i++ ) { 1166 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1167 else break; 1168 } 1169 imark = i; 1170 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1171 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1172 } 1173 if (idx) { 1174 *idx = idx_p = mat->rowindices; 1175 if (imark > -1) { 1176 for ( i=0; i<imark; i++ ) { 1177 idx_p[i] = cmap[cworkB[i]]; 1178 } 1179 } else { 1180 for ( i=0; i<nzB; i++ ) { 1181 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1182 else break; 1183 } 1184 imark = i; 1185 } 1186 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1187 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1188 } 1189 } 1190 else { 1191 if (idx) *idx = 0; 1192 if (v) *v = 0; 1193 } 1194 } 1195 *nz = nztot; 1196 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1197 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1198 return 0; 1199 } 1200 1201 #undef __FUNC__ 1202 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1203 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1204 { 1205 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1206 if (aij->getrowactive == PETSC_FALSE) { 1207 SETERRQ(1,0,"MatGetRow not called"); 1208 } 1209 aij->getrowactive = PETSC_FALSE; 1210 return 0; 1211 } 1212 1213 #undef __FUNC__ 1214 #define __FUNC__ "MatNorm_MPIAIJ" 1215 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1216 { 1217 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1218 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1219 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1220 double sum = 0.0; 1221 Scalar *v; 1222 1223 if (aij->size == 1) { 1224 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1225 } else { 1226 if (type == NORM_FROBENIUS) { 1227 v = amat->a; 1228 for (i=0; i<amat->nz; i++ ) { 1229 #if defined(PETSC_COMPLEX) 1230 sum += real(conj(*v)*(*v)); v++; 1231 #else 1232 sum += (*v)*(*v); v++; 1233 #endif 1234 } 1235 v = bmat->a; 1236 for (i=0; i<bmat->nz; i++ ) { 1237 #if defined(PETSC_COMPLEX) 1238 sum += real(conj(*v)*(*v)); v++; 1239 #else 1240 sum += (*v)*(*v); v++; 1241 #endif 1242 } 1243 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1244 *norm = sqrt(*norm); 1245 } 1246 else if (type == NORM_1) { /* max column norm */ 1247 double *tmp, *tmp2; 1248 int *jj, *garray = aij->garray; 1249 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1250 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1251 PetscMemzero(tmp,aij->N*sizeof(double)); 1252 *norm = 0.0; 1253 v = amat->a; jj = amat->j; 1254 for ( j=0; j<amat->nz; j++ ) { 1255 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1256 } 1257 v = bmat->a; jj = bmat->j; 1258 for ( j=0; j<bmat->nz; j++ ) { 1259 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1260 } 1261 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1262 for ( j=0; j<aij->N; j++ ) { 1263 if (tmp2[j] > *norm) *norm = tmp2[j]; 1264 } 1265 PetscFree(tmp); PetscFree(tmp2); 1266 } 1267 else if (type == NORM_INFINITY) { /* max row norm */ 1268 double ntemp = 0.0; 1269 for ( j=0; j<amat->m; j++ ) { 1270 v = amat->a + amat->i[j] + shift; 1271 sum = 0.0; 1272 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1273 sum += PetscAbsScalar(*v); v++; 1274 } 1275 v = bmat->a + bmat->i[j] + shift; 1276 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1277 sum += PetscAbsScalar(*v); v++; 1278 } 1279 if (sum > ntemp) ntemp = sum; 1280 } 1281 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1282 } 1283 else { 1284 SETERRQ(1,0,"No support for two norm"); 1285 } 1286 } 1287 return 0; 1288 } 1289 1290 #undef __FUNC__ 1291 #define __FUNC__ "MatTranspose_MPIAIJ" 1292 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1293 { 1294 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1295 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1296 int ierr,shift = Aloc->indexshift; 1297 Mat B; 1298 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1299 Scalar *array; 1300 1301 if (matout == PETSC_NULL && M != N) { 1302 SETERRQ(1,0,"Square matrix only for in-place"); 1303 } 1304 1305 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1306 1307 /* copy over the A part */ 1308 Aloc = (Mat_SeqAIJ*) a->A->data; 1309 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1310 row = a->rstart; 1311 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1312 for ( i=0; i<m; i++ ) { 1313 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1314 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1315 } 1316 aj = Aloc->j; 1317 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1318 1319 /* copy over the B part */ 1320 Aloc = (Mat_SeqAIJ*) a->B->data; 1321 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1322 row = a->rstart; 1323 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1324 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1325 for ( i=0; i<m; i++ ) { 1326 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1327 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1328 } 1329 PetscFree(ct); 1330 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1331 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1332 if (matout != PETSC_NULL) { 1333 *matout = B; 1334 } else { 1335 /* This isn't really an in-place transpose .... but free data structures from a */ 1336 PetscFree(a->rowners); 1337 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1338 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1339 if (a->colmap) PetscFree(a->colmap); 1340 if (a->garray) PetscFree(a->garray); 1341 if (a->lvec) VecDestroy(a->lvec); 1342 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1343 PetscFree(a); 1344 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1345 PetscHeaderDestroy(B); 1346 } 1347 return 0; 1348 } 1349 1350 #undef __FUNC__ 1351 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1352 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1353 { 1354 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1355 Mat a = aij->A, b = aij->B; 1356 int ierr,s1,s2,s3; 1357 1358 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1359 if (rr) { 1360 s3 = aij->n; 1361 VecGetLocalSize_Fast(rr,s1); 1362 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1363 /* Overlap communication with computation. */ 1364 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1365 } 1366 if (ll) { 1367 VecGetLocalSize_Fast(ll,s1); 1368 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1369 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1370 } 1371 /* scale the diagonal block */ 1372 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1373 1374 if (rr) { 1375 /* Do a scatter end and then right scale the off-diagonal block */ 1376 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1377 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1378 } 1379 1380 return 0; 1381 } 1382 1383 1384 extern int MatPrintHelp_SeqAIJ(Mat); 1385 #undef __FUNC__ 1386 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1387 int MatPrintHelp_MPIAIJ(Mat A) 1388 { 1389 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1390 1391 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1392 else return 0; 1393 } 1394 1395 #undef __FUNC__ 1396 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1397 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1398 { 1399 *bs = 1; 1400 return 0; 1401 } 1402 #undef __FUNC__ 1403 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1404 int MatSetUnfactored_MPIAIJ(Mat A) 1405 { 1406 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1407 int ierr; 1408 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1409 return 0; 1410 } 1411 1412 #undef __FUNC__ 1413 #define __FUNC__ "MatEqual_MPIAIJ" 1414 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1415 { 1416 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1417 Mat a, b, c, d; 1418 PetscTruth flg; 1419 int ierr; 1420 1421 if (B->type != MATMPIAIJ) SETERRQ(1,0,"Matrices must be same type"); 1422 a = matA->A; b = matA->B; 1423 c = matB->A; d = matB->B; 1424 1425 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1426 if (flg == PETSC_TRUE) { 1427 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1428 } 1429 MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm); 1430 return(0); 1431 } 1432 1433 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1434 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1435 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1436 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1437 /* -------------------------------------------------------------------*/ 1438 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1439 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1440 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1441 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1442 0,0, 1443 0,0, 1444 0,0, 1445 MatRelax_MPIAIJ, 1446 MatTranspose_MPIAIJ, 1447 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1448 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1449 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1450 0, 1451 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1452 0,0,0,0, 1453 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1454 0,0, 1455 0,0,MatConvertSameType_MPIAIJ,0,0, 1456 0,0,0, 1457 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1458 MatPrintHelp_MPIAIJ, 1459 MatScale_MPIAIJ,0,0,0, 1460 MatGetBlockSize_MPIAIJ,0,0,0,0, 1461 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 1462 1463 1464 #undef __FUNC__ 1465 #define __FUNC__ "MatCreateMPIAIJ" 1466 /*@C 1467 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1468 (the default parallel PETSc format). For good matrix assembly performance 1469 the user should preallocate the matrix storage by setting the parameters 1470 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1471 performance can be increased by more than a factor of 50. 1472 1473 Input Parameters: 1474 . comm - MPI communicator 1475 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1476 This value should be the same as the local size used in creating the 1477 y vector for the matrix-vector product y = Ax. 1478 . n - This value should be the same as the local size used in creating the 1479 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1480 calculated if N is given) For square matrices n is almost always m. 1481 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1482 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1483 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1484 (same for all local rows) 1485 . d_nzz - array containing the number of nonzeros in the various rows of the 1486 diagonal portion of the local submatrix (possibly different for each row) 1487 or PETSC_NULL. You must leave room for the diagonal entry even if 1488 it is zero. 1489 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1490 submatrix (same for all local rows). 1491 . o_nzz - array containing the number of nonzeros in the various rows of the 1492 off-diagonal portion of the local submatrix (possibly different for 1493 each row) or PETSC_NULL. 1494 1495 Output Parameter: 1496 . A - the matrix 1497 1498 Notes: 1499 The AIJ format (also called the Yale sparse matrix format or 1500 compressed row storage), is fully compatible with standard Fortran 77 1501 storage. That is, the stored row and column indices can begin at 1502 either one (as in Fortran) or zero. See the users manual for details. 1503 1504 The user MUST specify either the local or global matrix dimensions 1505 (possibly both). 1506 1507 By default, this format uses inodes (identical nodes) when possible. 1508 We search for consecutive rows with the same nonzero structure, thereby 1509 reusing matrix information to achieve increased efficiency. 1510 1511 Options Database Keys: 1512 $ -mat_aij_no_inode - Do not use inodes 1513 $ -mat_aij_inode_limit <limit> - Set inode limit. 1514 $ (max limit=5) 1515 $ -mat_aij_oneindex - Internally use indexing starting at 1 1516 $ rather than 0. Note: When calling MatSetValues(), 1517 $ the user still MUST index entries starting at 0! 1518 1519 Storage Information: 1520 For a square global matrix we define each processor's diagonal portion 1521 to be its local rows and the corresponding columns (a square submatrix); 1522 each processor's off-diagonal portion encompasses the remainder of the 1523 local matrix (a rectangular submatrix). 1524 1525 The user can specify preallocated storage for the diagonal part of 1526 the local submatrix with either d_nz or d_nnz (not both). Set 1527 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1528 memory allocation. Likewise, specify preallocated storage for the 1529 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1530 1531 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1532 the figure below we depict these three local rows and all columns (0-11). 1533 1534 $ 0 1 2 3 4 5 6 7 8 9 10 11 1535 $ ------------------- 1536 $ row 3 | o o o d d d o o o o o o 1537 $ row 4 | o o o d d d o o o o o o 1538 $ row 5 | o o o d d d o o o o o o 1539 $ ------------------- 1540 $ 1541 1542 Thus, any entries in the d locations are stored in the d (diagonal) 1543 submatrix, and any entries in the o locations are stored in the 1544 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1545 stored simply in the MATSEQAIJ format for compressed row storage. 1546 1547 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1548 and o_nz should indicate the number of nonzeros per row in the o matrix. 1549 In general, for PDE problems in which most nonzeros are near the diagonal, 1550 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1551 or you will get TERRIBLE performance; see the users' manual chapter on 1552 matrices. 1553 1554 .keywords: matrix, aij, compressed row, sparse, parallel 1555 1556 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1557 @*/ 1558 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1559 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1560 { 1561 Mat B; 1562 Mat_MPIAIJ *b; 1563 int ierr, i,sum[2],work[2],size; 1564 1565 MPI_Comm_size(comm,&size); 1566 if (size == 1) { 1567 if (M == PETSC_DECIDE) M = m; 1568 if (N == PETSC_DECIDE) N = n; 1569 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1570 return 0; 1571 } 1572 1573 *A = 0; 1574 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1575 PLogObjectCreate(B); 1576 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1577 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1578 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1579 B->destroy = MatDestroy_MPIAIJ; 1580 B->view = MatView_MPIAIJ; 1581 B->factor = 0; 1582 B->assembled = PETSC_FALSE; 1583 B->mapping = 0; 1584 1585 B->insertmode = NOT_SET_VALUES; 1586 b->size = size; 1587 MPI_Comm_rank(comm,&b->rank); 1588 1589 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1590 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1591 1592 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1593 work[0] = m; work[1] = n; 1594 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1595 if (M == PETSC_DECIDE) M = sum[0]; 1596 if (N == PETSC_DECIDE) N = sum[1]; 1597 } 1598 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1599 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1600 b->m = m; B->m = m; 1601 b->n = n; B->n = n; 1602 b->N = N; B->N = N; 1603 b->M = M; B->M = M; 1604 1605 /* build local table of row and column ownerships */ 1606 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1607 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1608 b->cowners = b->rowners + b->size + 2; 1609 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1610 b->rowners[0] = 0; 1611 for ( i=2; i<=b->size; i++ ) { 1612 b->rowners[i] += b->rowners[i-1]; 1613 } 1614 b->rstart = b->rowners[b->rank]; 1615 b->rend = b->rowners[b->rank+1]; 1616 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1617 b->cowners[0] = 0; 1618 for ( i=2; i<=b->size; i++ ) { 1619 b->cowners[i] += b->cowners[i-1]; 1620 } 1621 b->cstart = b->cowners[b->rank]; 1622 b->cend = b->cowners[b->rank+1]; 1623 1624 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1625 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1626 PLogObjectParent(B,b->A); 1627 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1628 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1629 PLogObjectParent(B,b->B); 1630 1631 /* build cache for off array entries formed */ 1632 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1633 b->donotstash = 0; 1634 b->colmap = 0; 1635 b->garray = 0; 1636 b->roworiented = 1; 1637 1638 /* stuff used for matrix vector multiply */ 1639 b->lvec = 0; 1640 b->Mvctx = 0; 1641 1642 /* stuff for MatGetRow() */ 1643 b->rowindices = 0; 1644 b->rowvalues = 0; 1645 b->getrowactive = PETSC_FALSE; 1646 1647 *A = B; 1648 return 0; 1649 } 1650 1651 #undef __FUNC__ 1652 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1653 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1654 { 1655 Mat mat; 1656 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1657 int ierr, len=0, flg; 1658 1659 *newmat = 0; 1660 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1661 PLogObjectCreate(mat); 1662 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1663 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1664 mat->destroy = MatDestroy_MPIAIJ; 1665 mat->view = MatView_MPIAIJ; 1666 mat->factor = matin->factor; 1667 mat->assembled = PETSC_TRUE; 1668 1669 a->m = mat->m = oldmat->m; 1670 a->n = mat->n = oldmat->n; 1671 a->M = mat->M = oldmat->M; 1672 a->N = mat->N = oldmat->N; 1673 1674 a->rstart = oldmat->rstart; 1675 a->rend = oldmat->rend; 1676 a->cstart = oldmat->cstart; 1677 a->cend = oldmat->cend; 1678 a->size = oldmat->size; 1679 a->rank = oldmat->rank; 1680 mat->insertmode = NOT_SET_VALUES; 1681 a->rowvalues = 0; 1682 a->getrowactive = PETSC_FALSE; 1683 1684 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1685 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1686 a->cowners = a->rowners + a->size + 2; 1687 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1688 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1689 if (oldmat->colmap) { 1690 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1691 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1692 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1693 } else a->colmap = 0; 1694 if (oldmat->garray) { 1695 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1696 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1697 PLogObjectMemory(mat,len*sizeof(int)); 1698 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1699 } else a->garray = 0; 1700 1701 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1702 PLogObjectParent(mat,a->lvec); 1703 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1704 PLogObjectParent(mat,a->Mvctx); 1705 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1706 PLogObjectParent(mat,a->A); 1707 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1708 PLogObjectParent(mat,a->B); 1709 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1710 if (flg) { 1711 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1712 } 1713 *newmat = mat; 1714 return 0; 1715 } 1716 1717 #include "sys.h" 1718 1719 #undef __FUNC__ 1720 #define __FUNC__ "MatLoad_MPIAIJ" 1721 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1722 { 1723 Mat A; 1724 int i, nz, ierr, j,rstart, rend, fd; 1725 Scalar *vals,*svals; 1726 MPI_Comm comm = ((PetscObject)viewer)->comm; 1727 MPI_Status status; 1728 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1729 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1730 int tag = ((PetscObject)viewer)->tag; 1731 1732 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1733 if (!rank) { 1734 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1735 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1736 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1737 } 1738 1739 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1740 M = header[1]; N = header[2]; 1741 /* determine ownership of all rows */ 1742 m = M/size + ((M % size) > rank); 1743 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1744 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1745 rowners[0] = 0; 1746 for ( i=2; i<=size; i++ ) { 1747 rowners[i] += rowners[i-1]; 1748 } 1749 rstart = rowners[rank]; 1750 rend = rowners[rank+1]; 1751 1752 /* distribute row lengths to all processors */ 1753 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1754 offlens = ourlens + (rend-rstart); 1755 if (!rank) { 1756 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1757 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1758 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1759 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1760 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1761 PetscFree(sndcounts); 1762 } 1763 else { 1764 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1765 } 1766 1767 if (!rank) { 1768 /* calculate the number of nonzeros on each processor */ 1769 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1770 PetscMemzero(procsnz,size*sizeof(int)); 1771 for ( i=0; i<size; i++ ) { 1772 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1773 procsnz[i] += rowlengths[j]; 1774 } 1775 } 1776 PetscFree(rowlengths); 1777 1778 /* determine max buffer needed and allocate it */ 1779 maxnz = 0; 1780 for ( i=0; i<size; i++ ) { 1781 maxnz = PetscMax(maxnz,procsnz[i]); 1782 } 1783 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1784 1785 /* read in my part of the matrix column indices */ 1786 nz = procsnz[0]; 1787 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1788 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1789 1790 /* read in every one elses and ship off */ 1791 for ( i=1; i<size; i++ ) { 1792 nz = procsnz[i]; 1793 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1794 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1795 } 1796 PetscFree(cols); 1797 } 1798 else { 1799 /* determine buffer space needed for message */ 1800 nz = 0; 1801 for ( i=0; i<m; i++ ) { 1802 nz += ourlens[i]; 1803 } 1804 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1805 1806 /* receive message of column indices*/ 1807 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1808 MPI_Get_count(&status,MPI_INT,&maxnz); 1809 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1810 } 1811 1812 /* loop over local rows, determining number of off diagonal entries */ 1813 PetscMemzero(offlens,m*sizeof(int)); 1814 jj = 0; 1815 for ( i=0; i<m; i++ ) { 1816 for ( j=0; j<ourlens[i]; j++ ) { 1817 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1818 jj++; 1819 } 1820 } 1821 1822 /* create our matrix */ 1823 for ( i=0; i<m; i++ ) { 1824 ourlens[i] -= offlens[i]; 1825 } 1826 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1827 A = *newmat; 1828 MatSetOption(A,MAT_COLUMNS_SORTED); 1829 for ( i=0; i<m; i++ ) { 1830 ourlens[i] += offlens[i]; 1831 } 1832 1833 if (!rank) { 1834 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1835 1836 /* read in my part of the matrix numerical values */ 1837 nz = procsnz[0]; 1838 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1839 1840 /* insert into matrix */ 1841 jj = rstart; 1842 smycols = mycols; 1843 svals = vals; 1844 for ( i=0; i<m; i++ ) { 1845 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1846 smycols += ourlens[i]; 1847 svals += ourlens[i]; 1848 jj++; 1849 } 1850 1851 /* read in other processors and ship out */ 1852 for ( i=1; i<size; i++ ) { 1853 nz = procsnz[i]; 1854 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1855 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1856 } 1857 PetscFree(procsnz); 1858 } 1859 else { 1860 /* receive numeric values */ 1861 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1862 1863 /* receive message of values*/ 1864 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1865 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1866 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1867 1868 /* insert into matrix */ 1869 jj = rstart; 1870 smycols = mycols; 1871 svals = vals; 1872 for ( i=0; i<m; i++ ) { 1873 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1874 smycols += ourlens[i]; 1875 svals += ourlens[i]; 1876 jj++; 1877 } 1878 } 1879 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1880 1881 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1882 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1883 return 0; 1884 } 1885