1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.289 1999/03/18 00:35:56 balay Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 extern int MatSetUpMultiply_MPIAIJ(Mat); 10 extern int DisAssemble_MPIAIJ(Mat); 11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatPrintHelp_SeqAIJ(Mat); 15 16 /* local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 26 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 27 int n = B->n,i; 28 #if defined (USE_CTABLE) 29 int ierr; 30 #endif 31 32 PetscFunctionBegin; 33 #if defined (USE_CTABLE) 34 ierr = TableCreate(aij->n/5,&aij->colmap); CHKERRQ(ierr); 35 for ( i=0; i<n; i++ ){ 36 ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 37 } 38 #else 39 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 40 PLogObjectMemory(mat,aij->N*sizeof(int)); 41 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 42 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 43 #endif 44 PetscFunctionReturn(0); 45 } 46 47 #define CHUNKSIZE 15 48 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 49 { \ 50 \ 51 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 52 rmax = aimax[row]; nrow = ailen[row]; \ 53 col1 = col - shift; \ 54 \ 55 low = 0; high = nrow; \ 56 while (high-low > 5) { \ 57 t = (low+high)/2; \ 58 if (rp[t] > col) high = t; \ 59 else low = t; \ 60 } \ 61 for ( _i=0; _i<nrow; _i++ ) { \ 62 if (rp[_i] > col1) break; \ 63 if (rp[_i] == col1) { \ 64 if (addv == ADD_VALUES) ap[_i] += value; \ 65 else ap[_i] = value; \ 66 goto a_noinsert; \ 67 } \ 68 } \ 69 if (nonew == 1) goto a_noinsert; \ 70 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 71 if (nrow >= rmax) { \ 72 /* there is no extra room in row, therefore enlarge */ \ 73 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 74 Scalar *new_a; \ 75 \ 76 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 77 \ 78 /* malloc new storage space */ \ 79 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 80 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 81 new_j = (int *) (new_a + new_nz); \ 82 new_i = new_j + new_nz; \ 83 \ 84 /* copy over old data into new slots */ \ 85 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 86 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 87 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 88 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 89 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 90 len*sizeof(int)); \ 91 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 92 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 93 len*sizeof(Scalar)); \ 94 /* free up old matrix storage */ \ 95 \ 96 PetscFree(a->a); \ 97 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = 1; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for ( ii=N; ii>=_i; ii-- ) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for ( _i=0; _i<nrow; _i++ ) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 145 Scalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 151 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 152 new_j = (int *) (new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 157 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int)); \ 162 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 163 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(Scalar)); \ 165 /* free up old matrix storage */ \ 166 \ 167 PetscFree(b->a); \ 168 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 169 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 170 b->singlemalloc = 1; \ 171 \ 172 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 173 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 174 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 175 b->maxnz += CHUNKSIZE; \ 176 b->reallocs++; \ 177 } \ 178 N = nrow++ - 1; b->nz++; \ 179 /* shift up all the later entries in this row */ \ 180 for ( ii=N; ii>=_i; ii-- ) { \ 181 rp[ii+1] = rp[ii]; \ 182 ap[ii+1] = ap[ii]; \ 183 } \ 184 rp[_i] = col1; \ 185 ap[_i] = value; \ 186 b_noinsert: ; \ 187 bilen[row] = nrow; \ 188 } 189 190 #undef __FUNC__ 191 #define __FUNC__ "MatSetValues_MPIAIJ" 192 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 193 { 194 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 195 Scalar value; 196 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 197 int cstart = aij->cstart, cend = aij->cend,row,col; 198 int roworiented = aij->roworiented; 199 200 /* Some Variables required in the macro */ 201 Mat A = aij->A; 202 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 203 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 204 Scalar *aa = a->a; 205 206 Mat B = aij->B; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 208 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 209 Scalar *ba = b->a; 210 211 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 212 int nonew = a->nonew,shift = a->indexshift; 213 Scalar *ap; 214 215 PetscFunctionBegin; 216 for ( i=0; i<m; i++ ) { 217 if (im[i] < 0) continue; 218 #if defined(USE_PETSC_BOPT_g) 219 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220 #endif 221 if (im[i] >= rstart && im[i] < rend) { 222 row = im[i] - rstart; 223 for ( j=0; j<n; j++ ) { 224 if (in[j] >= cstart && in[j] < cend){ 225 col = in[j] - cstart; 226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } 230 else if (in[j] < 0) continue; 231 #if defined(USE_PETSC_BOPT_g) 232 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!aij->colmap) { 237 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 #if defined (USE_CTABLE) 240 ierr = TableFind(aij->colmap,in[j]+1,&col); CHKERRQ(ierr); 241 col--; 242 #else 243 col = aij->colmap[in[j]] - 1; 244 #endif 245 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 246 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 247 col = in[j]; 248 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 249 B = aij->B; 250 b = (Mat_SeqAIJ *) B->data; 251 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 252 ba = b->a; 253 } 254 } else col = in[j]; 255 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 256 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 257 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 258 } 259 } 260 } else { 261 if (!aij->donotstash) { 262 if (roworiented) { 263 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 264 } else { 265 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 266 } 267 } 268 } 269 } 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNC__ 274 #define __FUNC__ "MatGetValues_MPIAIJ" 275 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 276 { 277 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 278 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 279 int cstart = aij->cstart, cend = aij->cend,row,col; 280 281 PetscFunctionBegin; 282 for ( i=0; i<m; i++ ) { 283 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 284 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 285 if (idxm[i] >= rstart && idxm[i] < rend) { 286 row = idxm[i] - rstart; 287 for ( j=0; j<n; j++ ) { 288 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 289 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 290 if (idxn[j] >= cstart && idxn[j] < cend){ 291 col = idxn[j] - cstart; 292 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 293 } else { 294 if (!aij->colmap) { 295 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 296 } 297 #if defined (USE_CTABLE) 298 ierr = TableFind(aij->colmap,idxn[j]+1,&col); CHKERRQ(ierr); 299 col --; 300 #else 301 col = aij->colmap[idxn[j]] - 1; 302 #endif 303 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 304 else { 305 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 306 } 307 } 308 } 309 } else { 310 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 311 } 312 } 313 PetscFunctionReturn(0); 314 } 315 316 #undef __FUNC__ 317 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 318 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 319 { 320 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 321 int ierr,nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners); CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); CHKERRQ(ierr); 338 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 343 #undef __FUNC__ 344 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 345 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 348 int i,j,rstart,ncols,n,ierr,flg; 349 int *row,*col,other_disassembled; 350 Scalar *val; 351 InsertMode addv = mat->insertmode; 352 353 PetscFunctionBegin; 354 if (!aij->donotstash) { 355 while (1) { 356 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 357 if (!flg) break; 358 359 for ( i=0; i<n; ) { 360 /* Now identify the consecutive vals belonging to the same row */ 361 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 362 if (j < n) ncols = j-i; 363 else ncols = n-i; 364 /* Now assemble all these values with a single function call */ 365 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr); 366 i = j; 367 } 368 } 369 ierr = MatStashScatterEnd_Private(&mat->stash); CHKERRQ(ierr); 370 } 371 372 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 373 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 374 375 /* determine if any processor has disassembled, if so we must 376 also disassemble ourselfs, in order that we may reassemble. */ 377 /* 378 if nonzero structure of submatrix B cannot change then we know that 379 no processor disassembled thus we can skip this stuff 380 */ 381 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 382 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 383 if (mat->was_assembled && !other_disassembled) { 384 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 385 } 386 } 387 388 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 389 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 390 } 391 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 392 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 393 394 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 395 PetscFunctionReturn(0); 396 } 397 398 #undef __FUNC__ 399 #define __FUNC__ "MatZeroEntries_MPIAIJ" 400 int MatZeroEntries_MPIAIJ(Mat A) 401 { 402 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 403 int ierr; 404 405 PetscFunctionBegin; 406 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 407 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 408 PetscFunctionReturn(0); 409 } 410 411 #undef __FUNC__ 412 #define __FUNC__ "MatZeroRows_MPIAIJ" 413 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 414 { 415 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 416 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 417 int *procs,*nprocs,j,found,idx,nsends,*work,row; 418 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 419 int *rvalues,tag = A->tag,count,base,slen,n,*source; 420 int *lens,imdex,*lrows,*values,rstart=l->rstart; 421 MPI_Comm comm = A->comm; 422 MPI_Request *send_waits,*recv_waits; 423 MPI_Status recv_status,*send_status; 424 IS istmp; 425 426 PetscFunctionBegin; 427 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 428 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 429 430 /* first count number of contributors to each processor */ 431 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 432 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 433 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 434 for ( i=0; i<N; i++ ) { 435 idx = rows[i]; 436 found = 0; 437 for ( j=0; j<size; j++ ) { 438 if (idx >= owners[j] && idx < owners[j+1]) { 439 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 440 } 441 } 442 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 443 } 444 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 445 446 /* inform other processors of number of messages and max length*/ 447 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 448 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 449 nrecvs = work[rank]; 450 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 451 nmax = work[rank]; 452 PetscFree(work); 453 454 /* post receives: */ 455 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 456 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 457 for ( i=0; i<nrecvs; i++ ) { 458 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 459 } 460 461 /* do sends: 462 1) starts[i] gives the starting index in svalues for stuff going to 463 the ith processor 464 */ 465 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 466 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 467 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 468 starts[0] = 0; 469 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 470 for ( i=0; i<N; i++ ) { 471 svalues[starts[owner[i]]++] = rows[i]; 472 } 473 ISRestoreIndices(is,&rows); 474 475 starts[0] = 0; 476 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 477 count = 0; 478 for ( i=0; i<size; i++ ) { 479 if (procs[i]) { 480 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 481 } 482 } 483 PetscFree(starts); 484 485 base = owners[rank]; 486 487 /* wait on receives */ 488 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 489 source = lens + nrecvs; 490 count = nrecvs; slen = 0; 491 while (count) { 492 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 493 /* unpack receives into our local space */ 494 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 495 source[imdex] = recv_status.MPI_SOURCE; 496 lens[imdex] = n; 497 slen += n; 498 count--; 499 } 500 PetscFree(recv_waits); 501 502 /* move the data into the send scatter */ 503 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 504 count = 0; 505 for ( i=0; i<nrecvs; i++ ) { 506 values = rvalues + i*nmax; 507 for ( j=0; j<lens[i]; j++ ) { 508 lrows[count++] = values[j] - base; 509 } 510 } 511 PetscFree(rvalues); PetscFree(lens); 512 PetscFree(owner); PetscFree(nprocs); 513 514 /* actually zap the local rows */ 515 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 516 PLogObjectParent(A,istmp); 517 518 /* 519 Zero the required rows. If the "diagonal block" of the matrix 520 is square and the user wishes to set the diagonal we use seperate 521 code so that MatSetValues() is not called for each diagonal allocating 522 new memory, thus calling lots of mallocs and slowing things down. 523 524 Contributed by: Mathew Knepley 525 */ 526 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 527 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 528 if (diag && (l->A->M == l->A->N)) { 529 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 530 } else if (diag) { 531 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 532 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 533 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 534 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 535 } 536 for ( i = 0; i < slen; i++ ) { 537 row = lrows[i] + rstart; 538 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 539 } 540 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 541 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 542 } else { 543 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 544 } 545 ierr = ISDestroy(istmp); CHKERRQ(ierr); 546 PetscFree(lrows); 547 548 /* wait on sends */ 549 if (nsends) { 550 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 551 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 552 PetscFree(send_status); 553 } 554 PetscFree(send_waits); PetscFree(svalues); 555 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNC__ 560 #define __FUNC__ "MatMult_MPIAIJ" 561 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 562 { 563 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 564 int ierr,nt; 565 566 PetscFunctionBegin; 567 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 568 if (nt != a->n) { 569 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 570 } 571 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 572 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 573 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 574 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNC__ 579 #define __FUNC__ "MatMultAdd_MPIAIJ" 580 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 581 { 582 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 583 int ierr; 584 585 PetscFunctionBegin; 586 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 587 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 588 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 589 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 } 592 593 #undef __FUNC__ 594 #define __FUNC__ "MatMultTrans_MPIAIJ" 595 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 596 { 597 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 598 int ierr; 599 600 PetscFunctionBegin; 601 /* do nondiagonal part */ 602 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 603 /* send it on its way */ 604 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 605 /* do local part */ 606 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 607 /* receive remote parts: note this assumes the values are not actually */ 608 /* inserted in yy until the next line, which is true for my implementation*/ 609 /* but is not perhaps always true. */ 610 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 611 PetscFunctionReturn(0); 612 } 613 614 #undef __FUNC__ 615 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 616 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 617 { 618 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 619 int ierr; 620 621 PetscFunctionBegin; 622 /* do nondiagonal part */ 623 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 624 /* send it on its way */ 625 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 626 /* do local part */ 627 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 628 /* receive remote parts: note this assumes the values are not actually */ 629 /* inserted in yy until the next line, which is true for my implementation*/ 630 /* but is not perhaps always true. */ 631 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 This only works correctly for square matrices where the subblock A->A is the 637 diagonal block 638 */ 639 #undef __FUNC__ 640 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 641 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 642 { 643 int ierr; 644 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 645 646 PetscFunctionBegin; 647 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 648 if (a->rstart != a->cstart || a->rend != a->cend) { 649 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 650 } 651 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 652 PetscFunctionReturn(0); 653 } 654 655 #undef __FUNC__ 656 #define __FUNC__ "MatScale_MPIAIJ" 657 int MatScale_MPIAIJ(Scalar *aa,Mat A) 658 { 659 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 660 int ierr; 661 662 PetscFunctionBegin; 663 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 664 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNC__ 669 #define __FUNC__ "MatDestroy_MPIAIJ" 670 int MatDestroy_MPIAIJ(Mat mat) 671 { 672 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 673 int ierr; 674 675 PetscFunctionBegin; 676 if (--mat->refct > 0) PetscFunctionReturn(0); 677 678 if (mat->mapping) { 679 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 680 } 681 if (mat->bmapping) { 682 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 683 } 684 if (mat->rmap) { 685 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 686 } 687 if (mat->cmap) { 688 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 689 } 690 #if defined(USE_PETSC_LOG) 691 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 692 #endif 693 ierr = MatStashDestroy_Private(&mat->stash); CHKERRQ(ierr); 694 PetscFree(aij->rowners); 695 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 696 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 697 #if defined (USE_CTABLE) 698 if (aij->colmap) TableDelete(aij->colmap); 699 #else 700 if (aij->colmap) PetscFree(aij->colmap); 701 #endif 702 if (aij->garray) PetscFree(aij->garray); 703 if (aij->lvec) VecDestroy(aij->lvec); 704 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 705 if (aij->rowvalues) PetscFree(aij->rowvalues); 706 PetscFree(aij); 707 PLogObjectDestroy(mat); 708 PetscHeaderDestroy(mat); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNC__ 713 #define __FUNC__ "MatView_MPIAIJ_Binary" 714 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 715 { 716 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 717 int ierr; 718 719 PetscFunctionBegin; 720 if (aij->size == 1) { 721 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 722 } 723 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNC__ 728 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 729 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 730 { 731 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 732 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 733 int ierr, format,shift = C->indexshift,rank = aij->rank ; 734 int size = aij->size; 735 FILE *fd; 736 ViewerType vtype; 737 738 PetscFunctionBegin; 739 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 740 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 741 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 742 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 743 MatInfo info; 744 int flg; 745 MPI_Comm_rank(mat->comm,&rank); 746 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 747 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 748 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 749 PetscSequentialPhaseBegin(mat->comm,1); 750 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 751 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 752 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 753 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 754 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 755 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 756 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 757 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 758 fflush(fd); 759 PetscSequentialPhaseEnd(mat->comm,1); 760 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 761 PetscFunctionReturn(0); 762 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 763 PetscFunctionReturn(0); 764 } 765 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 766 Draw draw; 767 PetscTruth isnull; 768 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 769 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 770 } 771 772 if (size == 1) { 773 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 774 } else { 775 /* assemble the entire matrix onto first processor. */ 776 Mat A; 777 Mat_SeqAIJ *Aloc; 778 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 779 Scalar *a; 780 781 if (!rank) { 782 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 783 } else { 784 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 785 } 786 PLogObjectParent(mat,A); 787 788 /* copy over the A part */ 789 Aloc = (Mat_SeqAIJ*) aij->A->data; 790 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 791 row = aij->rstart; 792 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 793 for ( i=0; i<m; i++ ) { 794 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 795 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 796 } 797 aj = Aloc->j; 798 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 799 800 /* copy over the B part */ 801 Aloc = (Mat_SeqAIJ*) aij->B->data; 802 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 803 row = aij->rstart; 804 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 805 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 806 for ( i=0; i<m; i++ ) { 807 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 808 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 809 } 810 PetscFree(ct); 811 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 812 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 813 /* 814 Everyone has to call to draw the matrix since the graphics waits are 815 synchronized across all processors that share the Draw object 816 */ 817 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 818 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 819 } 820 ierr = MatDestroy(A); CHKERRQ(ierr); 821 } 822 PetscFunctionReturn(0); 823 } 824 825 #undef __FUNC__ 826 #define __FUNC__ "MatView_MPIAIJ" 827 int MatView_MPIAIJ(Mat mat,Viewer viewer) 828 { 829 int ierr; 830 ViewerType vtype; 831 832 PetscFunctionBegin; 833 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 834 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 835 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 836 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 837 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 838 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 839 } else { 840 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 841 } 842 PetscFunctionReturn(0); 843 } 844 845 /* 846 This has to provide several versions. 847 848 2) a) use only local smoothing updating outer values only once. 849 b) local smoothing updating outer values each inner iteration 850 3) color updating out values betwen colors. 851 */ 852 #undef __FUNC__ 853 #define __FUNC__ "MatRelax_MPIAIJ" 854 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 855 double fshift,int its,Vec xx) 856 { 857 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 858 Mat AA = mat->A, BB = mat->B; 859 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 860 Scalar *b,*x,*xs,*ls,d,*v,sum; 861 int ierr,*idx, *diag; 862 int n = mat->n, m = mat->m, i,shift = A->indexshift; 863 864 PetscFunctionBegin; 865 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 866 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 867 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 868 xs = x + shift; /* shift by one for index start of 1 */ 869 ls = ls + shift; 870 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 871 diag = A->diag; 872 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 873 if (flag & SOR_ZERO_INITIAL_GUESS) { 874 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 875 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 876 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 877 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 878 PetscFunctionReturn(0); 879 } 880 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 881 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 882 while (its--) { 883 /* go down through the rows */ 884 for ( i=0; i<m; i++ ) { 885 n = A->i[i+1] - A->i[i]; 886 PLogFlops(4*n+3); 887 idx = A->j + A->i[i] + shift; 888 v = A->a + A->i[i] + shift; 889 sum = b[i]; 890 SPARSEDENSEMDOT(sum,xs,v,idx,n); 891 d = fshift + A->a[diag[i]+shift]; 892 n = B->i[i+1] - B->i[i]; 893 idx = B->j + B->i[i] + shift; 894 v = B->a + B->i[i] + shift; 895 SPARSEDENSEMDOT(sum,ls,v,idx,n); 896 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 897 } 898 /* come up through the rows */ 899 for ( i=m-1; i>-1; i-- ) { 900 n = A->i[i+1] - A->i[i]; 901 PLogFlops(4*n+3) 902 idx = A->j + A->i[i] + shift; 903 v = A->a + A->i[i] + shift; 904 sum = b[i]; 905 SPARSEDENSEMDOT(sum,xs,v,idx,n); 906 d = fshift + A->a[diag[i]+shift]; 907 n = B->i[i+1] - B->i[i]; 908 idx = B->j + B->i[i] + shift; 909 v = B->a + B->i[i] + shift; 910 SPARSEDENSEMDOT(sum,ls,v,idx,n); 911 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 912 } 913 } 914 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 915 if (flag & SOR_ZERO_INITIAL_GUESS) { 916 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 917 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 918 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 919 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 920 PetscFunctionReturn(0); 921 } 922 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 923 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 924 while (its--) { 925 for ( i=0; i<m; i++ ) { 926 n = A->i[i+1] - A->i[i]; 927 PLogFlops(4*n+3); 928 idx = A->j + A->i[i] + shift; 929 v = A->a + A->i[i] + shift; 930 sum = b[i]; 931 SPARSEDENSEMDOT(sum,xs,v,idx,n); 932 d = fshift + A->a[diag[i]+shift]; 933 n = B->i[i+1] - B->i[i]; 934 idx = B->j + B->i[i] + shift; 935 v = B->a + B->i[i] + shift; 936 SPARSEDENSEMDOT(sum,ls,v,idx,n); 937 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 938 } 939 } 940 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 941 if (flag & SOR_ZERO_INITIAL_GUESS) { 942 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 943 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 944 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 945 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 949 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 950 while (its--) { 951 for ( i=m-1; i>-1; i-- ) { 952 n = A->i[i+1] - A->i[i]; 953 PLogFlops(4*n+3); 954 idx = A->j + A->i[i] + shift; 955 v = A->a + A->i[i] + shift; 956 sum = b[i]; 957 SPARSEDENSEMDOT(sum,xs,v,idx,n); 958 d = fshift + A->a[diag[i]+shift]; 959 n = B->i[i+1] - B->i[i]; 960 idx = B->j + B->i[i] + shift; 961 v = B->a + B->i[i] + shift; 962 SPARSEDENSEMDOT(sum,ls,v,idx,n); 963 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 964 } 965 } 966 } else { 967 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 968 } 969 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 970 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 971 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNC__ 976 #define __FUNC__ "MatGetInfo_MPIAIJ" 977 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 978 { 979 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 980 Mat A = mat->A, B = mat->B; 981 int ierr; 982 double isend[5], irecv[5]; 983 984 PetscFunctionBegin; 985 info->block_size = 1.0; 986 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 987 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 988 isend[3] = info->memory; isend[4] = info->mallocs; 989 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 990 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 991 isend[3] += info->memory; isend[4] += info->mallocs; 992 if (flag == MAT_LOCAL) { 993 info->nz_used = isend[0]; 994 info->nz_allocated = isend[1]; 995 info->nz_unneeded = isend[2]; 996 info->memory = isend[3]; 997 info->mallocs = isend[4]; 998 } else if (flag == MAT_GLOBAL_MAX) { 999 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1000 info->nz_used = irecv[0]; 1001 info->nz_allocated = irecv[1]; 1002 info->nz_unneeded = irecv[2]; 1003 info->memory = irecv[3]; 1004 info->mallocs = irecv[4]; 1005 } else if (flag == MAT_GLOBAL_SUM) { 1006 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1007 info->nz_used = irecv[0]; 1008 info->nz_allocated = irecv[1]; 1009 info->nz_unneeded = irecv[2]; 1010 info->memory = irecv[3]; 1011 info->mallocs = irecv[4]; 1012 } 1013 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1014 info->fill_ratio_needed = 0; 1015 info->factor_mallocs = 0; 1016 info->rows_global = (double)mat->M; 1017 info->columns_global = (double)mat->N; 1018 info->rows_local = (double)mat->m; 1019 info->columns_local = (double)mat->N; 1020 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNC__ 1025 #define __FUNC__ "MatSetOption_MPIAIJ" 1026 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1027 { 1028 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1029 int ierr; 1030 1031 PetscFunctionBegin; 1032 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1033 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1034 op == MAT_COLUMNS_UNSORTED || 1035 op == MAT_COLUMNS_SORTED || 1036 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1037 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1038 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1039 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1040 } else if (op == MAT_ROW_ORIENTED) { 1041 a->roworiented = 1; 1042 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1043 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1044 } else if (op == MAT_ROWS_SORTED || 1045 op == MAT_ROWS_UNSORTED || 1046 op == MAT_SYMMETRIC || 1047 op == MAT_STRUCTURALLY_SYMMETRIC || 1048 op == MAT_YES_NEW_DIAGONALS) 1049 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1050 else if (op == MAT_COLUMN_ORIENTED) { 1051 a->roworiented = 0; 1052 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1053 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1054 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1055 a->donotstash = 1; 1056 } else if (op == MAT_NO_NEW_DIAGONALS){ 1057 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1058 } else { 1059 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1060 } 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNC__ 1065 #define __FUNC__ "MatGetSize_MPIAIJ" 1066 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1067 { 1068 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1069 1070 PetscFunctionBegin; 1071 if (m) *m = mat->M; 1072 if (n) *n = mat->N; 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNC__ 1077 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1078 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1079 { 1080 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1081 1082 PetscFunctionBegin; 1083 if (m) *m = mat->m; 1084 if (n) *n = mat->n; 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNC__ 1089 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1090 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1091 { 1092 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1093 1094 PetscFunctionBegin; 1095 *m = mat->rstart; *n = mat->rend; 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNC__ 1100 #define __FUNC__ "MatGetRow_MPIAIJ" 1101 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1102 { 1103 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1104 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1105 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1106 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1107 int *cmap, *idx_p; 1108 1109 PetscFunctionBegin; 1110 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1111 mat->getrowactive = PETSC_TRUE; 1112 1113 if (!mat->rowvalues && (idx || v)) { 1114 /* 1115 allocate enough space to hold information from the longest row. 1116 */ 1117 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1118 int max = 1,m = mat->m,tmp; 1119 for ( i=0; i<m; i++ ) { 1120 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1121 if (max < tmp) { max = tmp; } 1122 } 1123 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1124 mat->rowindices = (int *) (mat->rowvalues + max); 1125 } 1126 1127 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1128 lrow = row - rstart; 1129 1130 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1131 if (!v) {pvA = 0; pvB = 0;} 1132 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1133 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1134 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1135 nztot = nzA + nzB; 1136 1137 cmap = mat->garray; 1138 if (v || idx) { 1139 if (nztot) { 1140 /* Sort by increasing column numbers, assuming A and B already sorted */ 1141 int imark = -1; 1142 if (v) { 1143 *v = v_p = mat->rowvalues; 1144 for ( i=0; i<nzB; i++ ) { 1145 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1146 else break; 1147 } 1148 imark = i; 1149 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1150 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1151 } 1152 if (idx) { 1153 *idx = idx_p = mat->rowindices; 1154 if (imark > -1) { 1155 for ( i=0; i<imark; i++ ) { 1156 idx_p[i] = cmap[cworkB[i]]; 1157 } 1158 } else { 1159 for ( i=0; i<nzB; i++ ) { 1160 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1161 else break; 1162 } 1163 imark = i; 1164 } 1165 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1166 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1167 } 1168 } else { 1169 if (idx) *idx = 0; 1170 if (v) *v = 0; 1171 } 1172 } 1173 *nz = nztot; 1174 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1175 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNC__ 1180 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1181 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1182 { 1183 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1184 1185 PetscFunctionBegin; 1186 if (aij->getrowactive == PETSC_FALSE) { 1187 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1188 } 1189 aij->getrowactive = PETSC_FALSE; 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNC__ 1194 #define __FUNC__ "MatNorm_MPIAIJ" 1195 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1196 { 1197 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1198 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1199 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1200 double sum = 0.0; 1201 Scalar *v; 1202 1203 PetscFunctionBegin; 1204 if (aij->size == 1) { 1205 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1206 } else { 1207 if (type == NORM_FROBENIUS) { 1208 v = amat->a; 1209 for (i=0; i<amat->nz; i++ ) { 1210 #if defined(USE_PETSC_COMPLEX) 1211 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1212 #else 1213 sum += (*v)*(*v); v++; 1214 #endif 1215 } 1216 v = bmat->a; 1217 for (i=0; i<bmat->nz; i++ ) { 1218 #if defined(USE_PETSC_COMPLEX) 1219 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1220 #else 1221 sum += (*v)*(*v); v++; 1222 #endif 1223 } 1224 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1225 *norm = sqrt(*norm); 1226 } else if (type == NORM_1) { /* max column norm */ 1227 double *tmp, *tmp2; 1228 int *jj, *garray = aij->garray; 1229 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1230 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1231 PetscMemzero(tmp,aij->N*sizeof(double)); 1232 *norm = 0.0; 1233 v = amat->a; jj = amat->j; 1234 for ( j=0; j<amat->nz; j++ ) { 1235 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1236 } 1237 v = bmat->a; jj = bmat->j; 1238 for ( j=0; j<bmat->nz; j++ ) { 1239 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1240 } 1241 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1242 for ( j=0; j<aij->N; j++ ) { 1243 if (tmp2[j] > *norm) *norm = tmp2[j]; 1244 } 1245 PetscFree(tmp); PetscFree(tmp2); 1246 } else if (type == NORM_INFINITY) { /* max row norm */ 1247 double ntemp = 0.0; 1248 for ( j=0; j<amat->m; j++ ) { 1249 v = amat->a + amat->i[j] + shift; 1250 sum = 0.0; 1251 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1252 sum += PetscAbsScalar(*v); v++; 1253 } 1254 v = bmat->a + bmat->i[j] + shift; 1255 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1256 sum += PetscAbsScalar(*v); v++; 1257 } 1258 if (sum > ntemp) ntemp = sum; 1259 } 1260 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1261 } else { 1262 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1263 } 1264 } 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNC__ 1269 #define __FUNC__ "MatTranspose_MPIAIJ" 1270 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1271 { 1272 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1273 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1274 int ierr,shift = Aloc->indexshift; 1275 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1276 Mat B; 1277 Scalar *array; 1278 1279 PetscFunctionBegin; 1280 if (matout == PETSC_NULL && M != N) { 1281 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1282 } 1283 1284 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1285 1286 /* copy over the A part */ 1287 Aloc = (Mat_SeqAIJ*) a->A->data; 1288 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1289 row = a->rstart; 1290 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1291 for ( i=0; i<m; i++ ) { 1292 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1293 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1294 } 1295 aj = Aloc->j; 1296 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1297 1298 /* copy over the B part */ 1299 Aloc = (Mat_SeqAIJ*) a->B->data; 1300 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1301 row = a->rstart; 1302 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1303 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1304 for ( i=0; i<m; i++ ) { 1305 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1306 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1307 } 1308 PetscFree(ct); 1309 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1310 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1311 if (matout != PETSC_NULL) { 1312 *matout = B; 1313 } else { 1314 PetscOps *Abops; 1315 struct _MatOps *Aops; 1316 1317 /* This isn't really an in-place transpose .... but free data structures from a */ 1318 PetscFree(a->rowners); 1319 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1320 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1321 #if defined (USE_CTABLE) 1322 if (a->colmap) TableDelete(a->colmap); 1323 #else 1324 if (a->colmap) PetscFree(a->colmap); 1325 #endif 1326 if (a->garray) PetscFree(a->garray); 1327 if (a->lvec) VecDestroy(a->lvec); 1328 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1329 PetscFree(a); 1330 1331 /* 1332 This is horrible, horrible code. We need to keep the 1333 A pointers for the bops and ops but copy everything 1334 else from C. 1335 */ 1336 Abops = A->bops; 1337 Aops = A->ops; 1338 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1339 A->bops = Abops; 1340 A->ops = Aops; 1341 PetscHeaderDestroy(B); 1342 } 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNC__ 1347 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1348 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1349 { 1350 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1351 Mat a = aij->A, b = aij->B; 1352 int ierr,s1,s2,s3; 1353 1354 PetscFunctionBegin; 1355 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1356 if (rr) { 1357 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1358 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1359 /* Overlap communication with computation. */ 1360 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1361 } 1362 if (ll) { 1363 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1364 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1365 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1366 } 1367 /* scale the diagonal block */ 1368 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1369 1370 if (rr) { 1371 /* Do a scatter end and then right scale the off-diagonal block */ 1372 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1373 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1374 } 1375 1376 PetscFunctionReturn(0); 1377 } 1378 1379 1380 #undef __FUNC__ 1381 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1382 int MatPrintHelp_MPIAIJ(Mat A) 1383 { 1384 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1385 int ierr; 1386 1387 PetscFunctionBegin; 1388 if (!a->rank) { 1389 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1390 } 1391 PetscFunctionReturn(0); 1392 } 1393 1394 #undef __FUNC__ 1395 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1396 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1397 { 1398 PetscFunctionBegin; 1399 *bs = 1; 1400 PetscFunctionReturn(0); 1401 } 1402 #undef __FUNC__ 1403 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1404 int MatSetUnfactored_MPIAIJ(Mat A) 1405 { 1406 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1407 int ierr; 1408 1409 PetscFunctionBegin; 1410 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNC__ 1415 #define __FUNC__ "MatEqual_MPIAIJ" 1416 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1417 { 1418 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1419 Mat a, b, c, d; 1420 PetscTruth flg; 1421 int ierr; 1422 1423 PetscFunctionBegin; 1424 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1425 a = matA->A; b = matA->B; 1426 c = matB->A; d = matB->B; 1427 1428 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1429 if (flg == PETSC_TRUE) { 1430 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1431 } 1432 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNC__ 1437 #define __FUNC__ "MatCopy_MPIAIJ" 1438 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1439 { 1440 int ierr; 1441 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1442 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1443 1444 PetscFunctionBegin; 1445 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1446 /* because of the column compression in the off-processor part of the matrix a->B, 1447 the number of columns in a->B and b->B may be different, hence we cannot call 1448 the MatCopy() directly on the two parts. If need be, we can provide a more 1449 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1450 then copying the submatrices */ 1451 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1452 } else { 1453 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1454 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1460 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1461 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1462 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1463 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1464 1465 /* -------------------------------------------------------------------*/ 1466 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1467 MatGetRow_MPIAIJ, 1468 MatRestoreRow_MPIAIJ, 1469 MatMult_MPIAIJ, 1470 MatMultAdd_MPIAIJ, 1471 MatMultTrans_MPIAIJ, 1472 MatMultTransAdd_MPIAIJ, 1473 0, 1474 0, 1475 0, 1476 0, 1477 0, 1478 0, 1479 MatRelax_MPIAIJ, 1480 MatTranspose_MPIAIJ, 1481 MatGetInfo_MPIAIJ, 1482 MatEqual_MPIAIJ, 1483 MatGetDiagonal_MPIAIJ, 1484 MatDiagonalScale_MPIAIJ, 1485 MatNorm_MPIAIJ, 1486 MatAssemblyBegin_MPIAIJ, 1487 MatAssemblyEnd_MPIAIJ, 1488 0, 1489 MatSetOption_MPIAIJ, 1490 MatZeroEntries_MPIAIJ, 1491 MatZeroRows_MPIAIJ, 1492 0, 1493 0, 1494 0, 1495 0, 1496 MatGetSize_MPIAIJ, 1497 MatGetLocalSize_MPIAIJ, 1498 MatGetOwnershipRange_MPIAIJ, 1499 0, 1500 0, 1501 0, 1502 0, 1503 MatDuplicate_MPIAIJ, 1504 0, 1505 0, 1506 0, 1507 0, 1508 0, 1509 MatGetSubMatrices_MPIAIJ, 1510 MatIncreaseOverlap_MPIAIJ, 1511 MatGetValues_MPIAIJ, 1512 MatCopy_MPIAIJ, 1513 MatPrintHelp_MPIAIJ, 1514 MatScale_MPIAIJ, 1515 0, 1516 0, 1517 0, 1518 MatGetBlockSize_MPIAIJ, 1519 0, 1520 0, 1521 0, 1522 0, 1523 MatFDColoringCreate_MPIAIJ, 1524 0, 1525 MatSetUnfactored_MPIAIJ, 1526 0, 1527 0, 1528 MatGetSubMatrix_MPIAIJ, 1529 0, 1530 0, 1531 MatGetMaps_Petsc}; 1532 1533 /* ----------------------------------------------------------------------------------------*/ 1534 1535 EXTERN_C_BEGIN 1536 #undef __FUNC__ 1537 #define __FUNC__ "MatStoreValues_MPIAIJ" 1538 int MatStoreValues_MPIAIJ(Mat mat) 1539 { 1540 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1541 int ierr; 1542 1543 PetscFunctionBegin; 1544 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1545 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 EXTERN_C_END 1549 1550 EXTERN_C_BEGIN 1551 #undef __FUNC__ 1552 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1553 int MatRetrieveValues_MPIAIJ(Mat mat) 1554 { 1555 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1556 int ierr; 1557 1558 PetscFunctionBegin; 1559 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1560 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1561 PetscFunctionReturn(0); 1562 } 1563 EXTERN_C_END 1564 1565 #include "pc.h" 1566 EXTERN_C_BEGIN 1567 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1568 EXTERN_C_END 1569 1570 #undef __FUNC__ 1571 #define __FUNC__ "MatCreateMPIAIJ" 1572 /*@C 1573 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1574 (the default parallel PETSc format). For good matrix assembly performance 1575 the user should preallocate the matrix storage by setting the parameters 1576 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1577 performance can be increased by more than a factor of 50. 1578 1579 Collective on MPI_Comm 1580 1581 Input Parameters: 1582 + comm - MPI communicator 1583 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1584 This value should be the same as the local size used in creating the 1585 y vector for the matrix-vector product y = Ax. 1586 . n - This value should be the same as the local size used in creating the 1587 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1588 calculated if N is given) For square matrices n is almost always m. 1589 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1590 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1591 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1592 (same value is used for all local rows) 1593 . d_nnz - array containing the number of nonzeros in the various rows of the 1594 DIAGONAL portion of the local submatrix (possibly different for each row) 1595 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1596 The size of this array is equal to the number of local rows, i.e 'm'. 1597 You must leave room for the diagonal entry even if it is zero. 1598 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1599 submatrix (same value is used for all local rows). 1600 - o_nnz - array containing the number of nonzeros in the various rows of the 1601 OFF-DIAGONAL portion of the local submatrix (possibly different for 1602 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1603 structure. The size of this array is equal to the number 1604 of local rows, i.e 'm'. 1605 1606 Output Parameter: 1607 . A - the matrix 1608 1609 Notes: 1610 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1611 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1612 storage requirements for this matrix. 1613 1614 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1615 processor than it must be used on all processors that share the object for 1616 that argument. 1617 1618 The AIJ format (also called the Yale sparse matrix format or 1619 compressed row storage), is fully compatible with standard Fortran 77 1620 storage. That is, the stored row and column indices can begin at 1621 either one (as in Fortran) or zero. See the users manual for details. 1622 1623 The user MUST specify either the local or global matrix dimensions 1624 (possibly both). 1625 1626 The parallel matrix is partitioned such that the first m0 rows belong to 1627 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1628 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1629 1630 The DIAGONAL portion of the local submatrix of a processor can be defined 1631 as the submatrix which is obtained by extraction the part corresponding 1632 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1633 first row that belongs to the processor, and r2 is the last row belonging 1634 to the this processor. This is a square mxm matrix. The remaining portion 1635 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1636 1637 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1638 1639 By default, this format uses inodes (identical nodes) when possible. 1640 We search for consecutive rows with the same nonzero structure, thereby 1641 reusing matrix information to achieve increased efficiency. 1642 1643 Options Database Keys: 1644 + -mat_aij_no_inode - Do not use inodes 1645 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1646 - -mat_aij_oneindex - Internally use indexing starting at 1 1647 rather than 0. Note that when calling MatSetValues(), 1648 the user still MUST index entries starting at 0! 1649 . -mat_mpi - use the parallel matrix data structures even on one processor 1650 (defaults to using SeqBAIJ format on one processor) 1651 . -mat_mpi - use the parallel matrix data structures even on one processor 1652 (defaults to using SeqAIJ format on one processor) 1653 1654 1655 Example usage: 1656 1657 Consider the following 8x8 matrix with 34 non-zero values, that is 1658 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1659 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1660 as follows: 1661 1662 .vb 1663 1 2 0 | 0 3 0 | 0 4 1664 Proc0 0 5 6 | 7 0 0 | 8 0 1665 9 0 10 | 11 0 0 | 12 0 1666 ------------------------------------- 1667 13 0 14 | 15 16 17 | 0 0 1668 Proc1 0 18 0 | 19 20 21 | 0 0 1669 0 0 0 | 22 23 0 | 24 0 1670 ------------------------------------- 1671 Proc2 25 26 27 | 0 0 28 | 29 0 1672 30 0 0 | 31 32 33 | 0 34 1673 .ve 1674 1675 This can be represented as a collection of submatrices as: 1676 1677 .vb 1678 A B C 1679 D E F 1680 G H I 1681 .ve 1682 1683 Where the submatrices A,B,C are owned by proc0, D,E,F are 1684 owned by proc1, G,H,I are owned by proc2. 1685 1686 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1687 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1688 The 'M','N' parameters are 8,8, and have the same values on all procs. 1689 1690 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1691 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1692 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1693 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1694 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1695 matrix, ans [DF] as another SeqAIJ matrix. 1696 1697 When d_nz, o_nz parameters are specified, d_nz storage elements are 1698 allocated for every row of the local diagonal submatrix, and o_nz 1699 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1700 One way to choose d_nz and o_nz is to use the max nonzerors per local 1701 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1702 In this case, the values of d_nz,o_nz are: 1703 .vb 1704 proc0 : dnz = 2, o_nz = 2 1705 proc1 : dnz = 3, o_nz = 2 1706 proc2 : dnz = 1, o_nz = 4 1707 .ve 1708 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1709 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1710 for proc3. i.e we are using 12+15+10=37 storage locations to store 1711 34 values. 1712 1713 When d_nnz, o_nnz parameters are specified, the storage is specified 1714 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1715 In the above case the values for d_nnz,o_nnz are: 1716 .vb 1717 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1718 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1719 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1720 .ve 1721 Here the space allocated is sum of all the above values i.e 34, and 1722 hence pre-allocation is perfect. 1723 1724 Level: intermediate 1725 1726 .keywords: matrix, aij, compressed row, sparse, parallel 1727 1728 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1729 @*/ 1730 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1731 { 1732 Mat B; 1733 Mat_MPIAIJ *b; 1734 int ierr, i,size,flag1 = 0, flag2 = 0; 1735 1736 PetscFunctionBegin; 1737 MPI_Comm_size(comm,&size); 1738 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr); 1739 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1740 if (!flag1 && !flag2 && size == 1) { 1741 if (M == PETSC_DECIDE) M = m; 1742 if (N == PETSC_DECIDE) N = n; 1743 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1744 PetscFunctionReturn(0); 1745 } 1746 1747 *A = 0; 1748 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1749 PLogObjectCreate(B); 1750 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1751 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1752 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1753 B->ops->destroy = MatDestroy_MPIAIJ; 1754 B->ops->view = MatView_MPIAIJ; 1755 B->factor = 0; 1756 B->assembled = PETSC_FALSE; 1757 B->mapping = 0; 1758 1759 B->insertmode = NOT_SET_VALUES; 1760 b->size = size; 1761 MPI_Comm_rank(comm,&b->rank); 1762 1763 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1764 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1765 } 1766 1767 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1768 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1769 b->m = m; B->m = m; 1770 b->n = n; B->n = n; 1771 b->N = N; B->N = N; 1772 b->M = M; B->M = M; 1773 1774 /* the information in the maps duplicates the information computed below, eventually 1775 we should remove the duplicate information that is not contained in the maps */ 1776 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1777 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1778 1779 /* build local table of row and column ownerships */ 1780 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1781 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1782 b->cowners = b->rowners + b->size + 2; 1783 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1784 b->rowners[0] = 0; 1785 for ( i=2; i<=b->size; i++ ) { 1786 b->rowners[i] += b->rowners[i-1]; 1787 } 1788 b->rstart = b->rowners[b->rank]; 1789 b->rend = b->rowners[b->rank+1]; 1790 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1791 b->cowners[0] = 0; 1792 for ( i=2; i<=b->size; i++ ) { 1793 b->cowners[i] += b->cowners[i-1]; 1794 } 1795 b->cstart = b->cowners[b->rank]; 1796 b->cend = b->cowners[b->rank+1]; 1797 1798 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1799 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1800 PLogObjectParent(B,b->A); 1801 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1802 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1803 PLogObjectParent(B,b->B); 1804 1805 /* build cache for off array entries formed */ 1806 ierr = MatStashCreate_Private(comm,1,&B->stash); CHKERRQ(ierr); 1807 b->donotstash = 0; 1808 b->colmap = 0; 1809 b->garray = 0; 1810 b->roworiented = 1; 1811 1812 /* stuff used for matrix vector multiply */ 1813 b->lvec = 0; 1814 b->Mvctx = 0; 1815 1816 /* stuff for MatGetRow() */ 1817 b->rowindices = 0; 1818 b->rowvalues = 0; 1819 b->getrowactive = PETSC_FALSE; 1820 1821 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1822 "MatStoreValues_MPIAIJ", 1823 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1824 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1825 "MatRetrieveValues_MPIAIJ", 1826 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1827 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 1828 "MatGetDiagonalBlock_MPIAIJ", 1829 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1830 *A = B; 1831 PetscFunctionReturn(0); 1832 } 1833 1834 #undef __FUNC__ 1835 #define __FUNC__ "MatDuplicate_MPIAIJ" 1836 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1837 { 1838 Mat mat; 1839 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1840 int ierr, len=0, flg; 1841 1842 PetscFunctionBegin; 1843 *newmat = 0; 1844 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1845 PLogObjectCreate(mat); 1846 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1847 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 1848 mat->ops->destroy = MatDestroy_MPIAIJ; 1849 mat->ops->view = MatView_MPIAIJ; 1850 mat->factor = matin->factor; 1851 mat->assembled = PETSC_TRUE; 1852 mat->insertmode = NOT_SET_VALUES; 1853 1854 a->m = mat->m = oldmat->m; 1855 a->n = mat->n = oldmat->n; 1856 a->M = mat->M = oldmat->M; 1857 a->N = mat->N = oldmat->N; 1858 1859 a->rstart = oldmat->rstart; 1860 a->rend = oldmat->rend; 1861 a->cstart = oldmat->cstart; 1862 a->cend = oldmat->cend; 1863 a->size = oldmat->size; 1864 a->rank = oldmat->rank; 1865 a->donotstash = oldmat->donotstash; 1866 a->roworiented = oldmat->roworiented; 1867 a->rowindices = 0; 1868 a->rowvalues = 0; 1869 a->getrowactive = PETSC_FALSE; 1870 1871 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1872 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1873 a->cowners = a->rowners + a->size + 2; 1874 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1875 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash); CHKERRQ(ierr); 1876 if (oldmat->colmap) { 1877 #if defined (USE_CTABLE) 1878 ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr); 1879 #else 1880 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1881 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1882 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1883 #endif 1884 } else a->colmap = 0; 1885 if (oldmat->garray) { 1886 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1887 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1888 PLogObjectMemory(mat,len*sizeof(int)); 1889 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1890 } else a->garray = 0; 1891 1892 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1893 PLogObjectParent(mat,a->lvec); 1894 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1895 PLogObjectParent(mat,a->Mvctx); 1896 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 1897 PLogObjectParent(mat,a->A); 1898 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 1899 PLogObjectParent(mat,a->B); 1900 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1901 if (flg) { 1902 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1903 } 1904 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1905 *newmat = mat; 1906 PetscFunctionReturn(0); 1907 } 1908 1909 #include "sys.h" 1910 1911 #undef __FUNC__ 1912 #define __FUNC__ "MatLoad_MPIAIJ" 1913 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1914 { 1915 Mat A; 1916 Scalar *vals,*svals; 1917 MPI_Comm comm = ((PetscObject)viewer)->comm; 1918 MPI_Status status; 1919 int i, nz, ierr, j,rstart, rend, fd; 1920 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1921 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1922 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1923 1924 PetscFunctionBegin; 1925 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1926 if (!rank) { 1927 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1928 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1929 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1930 if (header[3] < 0) { 1931 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1932 } 1933 } 1934 1935 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1936 M = header[1]; N = header[2]; 1937 /* determine ownership of all rows */ 1938 m = M/size + ((M % size) > rank); 1939 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1940 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1941 rowners[0] = 0; 1942 for ( i=2; i<=size; i++ ) { 1943 rowners[i] += rowners[i-1]; 1944 } 1945 rstart = rowners[rank]; 1946 rend = rowners[rank+1]; 1947 1948 /* distribute row lengths to all processors */ 1949 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1950 offlens = ourlens + (rend-rstart); 1951 if (!rank) { 1952 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1953 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1954 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1955 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1956 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1957 PetscFree(sndcounts); 1958 } else { 1959 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1960 } 1961 1962 if (!rank) { 1963 /* calculate the number of nonzeros on each processor */ 1964 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1965 PetscMemzero(procsnz,size*sizeof(int)); 1966 for ( i=0; i<size; i++ ) { 1967 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1968 procsnz[i] += rowlengths[j]; 1969 } 1970 } 1971 PetscFree(rowlengths); 1972 1973 /* determine max buffer needed and allocate it */ 1974 maxnz = 0; 1975 for ( i=0; i<size; i++ ) { 1976 maxnz = PetscMax(maxnz,procsnz[i]); 1977 } 1978 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1979 1980 /* read in my part of the matrix column indices */ 1981 nz = procsnz[0]; 1982 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1983 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1984 1985 /* read in every one elses and ship off */ 1986 for ( i=1; i<size; i++ ) { 1987 nz = procsnz[i]; 1988 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1989 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1990 } 1991 PetscFree(cols); 1992 } else { 1993 /* determine buffer space needed for message */ 1994 nz = 0; 1995 for ( i=0; i<m; i++ ) { 1996 nz += ourlens[i]; 1997 } 1998 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1999 2000 /* receive message of column indices*/ 2001 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2002 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2003 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2004 } 2005 2006 /* determine column ownership if matrix is not square */ 2007 if (N != M) { 2008 n = N/size + ((N % size) > rank); 2009 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2010 cstart = cend - n; 2011 } else { 2012 cstart = rstart; 2013 cend = rend; 2014 n = cend - cstart; 2015 } 2016 2017 /* loop over local rows, determining number of off diagonal entries */ 2018 PetscMemzero(offlens,m*sizeof(int)); 2019 jj = 0; 2020 for ( i=0; i<m; i++ ) { 2021 for ( j=0; j<ourlens[i]; j++ ) { 2022 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2023 jj++; 2024 } 2025 } 2026 2027 /* create our matrix */ 2028 for ( i=0; i<m; i++ ) { 2029 ourlens[i] -= offlens[i]; 2030 } 2031 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2032 A = *newmat; 2033 ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr); 2034 for ( i=0; i<m; i++ ) { 2035 ourlens[i] += offlens[i]; 2036 } 2037 2038 if (!rank) { 2039 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 2040 2041 /* read in my part of the matrix numerical values */ 2042 nz = procsnz[0]; 2043 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2044 2045 /* insert into matrix */ 2046 jj = rstart; 2047 smycols = mycols; 2048 svals = vals; 2049 for ( i=0; i<m; i++ ) { 2050 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2051 smycols += ourlens[i]; 2052 svals += ourlens[i]; 2053 jj++; 2054 } 2055 2056 /* read in other processors and ship out */ 2057 for ( i=1; i<size; i++ ) { 2058 nz = procsnz[i]; 2059 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2060 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2061 } 2062 PetscFree(procsnz); 2063 } else { 2064 /* receive numeric values */ 2065 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 2066 2067 /* receive message of values*/ 2068 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2069 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2070 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2071 2072 /* insert into matrix */ 2073 jj = rstart; 2074 smycols = mycols; 2075 svals = vals; 2076 for ( i=0; i<m; i++ ) { 2077 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2078 smycols += ourlens[i]; 2079 svals += ourlens[i]; 2080 jj++; 2081 } 2082 } 2083 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 2084 2085 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2086 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2087 PetscFunctionReturn(0); 2088 } 2089 2090 #undef __FUNC__ 2091 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2092 /* 2093 Not great since it makes two copies of the submatrix, first an SeqAIJ 2094 in local and then by concatenating the local matrices the end result. 2095 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2096 */ 2097 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2098 { 2099 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2100 Mat *local,M, Mreuse; 2101 Scalar *vwork,*aa; 2102 MPI_Comm comm = mat->comm; 2103 Mat_SeqAIJ *aij; 2104 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2105 2106 PetscFunctionBegin; 2107 MPI_Comm_rank(comm,&rank); 2108 MPI_Comm_size(comm,&size); 2109 2110 if (call == MAT_REUSE_MATRIX) { 2111 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2112 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2113 local = &Mreuse; 2114 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2115 } else { 2116 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2117 Mreuse = *local; 2118 PetscFree(local); 2119 } 2120 2121 /* 2122 m - number of local rows 2123 n - number of columns (same on all processors) 2124 rstart - first row in new global matrix generated 2125 */ 2126 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2127 if (call == MAT_INITIAL_MATRIX) { 2128 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2129 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2130 ii = aij->i; 2131 jj = aij->j; 2132 2133 /* 2134 Determine the number of non-zeros in the diagonal and off-diagonal 2135 portions of the matrix in order to do correct preallocation 2136 */ 2137 2138 /* first get start and end of "diagonal" columns */ 2139 if (csize == PETSC_DECIDE) { 2140 nlocal = n/size + ((n % size) > rank); 2141 } else { 2142 nlocal = csize; 2143 } 2144 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2145 rstart = rend - nlocal; 2146 if (rank == size - 1 && rend != n) { 2147 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2148 } 2149 2150 /* next, compute all the lengths */ 2151 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2152 olens = dlens + m; 2153 for ( i=0; i<m; i++ ) { 2154 jend = ii[i+1] - ii[i]; 2155 olen = 0; 2156 dlen = 0; 2157 for ( j=0; j<jend; j++ ) { 2158 if ( *jj < rstart || *jj >= rend) olen++; 2159 else dlen++; 2160 jj++; 2161 } 2162 olens[i] = olen; 2163 dlens[i] = dlen; 2164 } 2165 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2166 PetscFree(dlens); 2167 } else { 2168 int ml,nl; 2169 2170 M = *newmat; 2171 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2172 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2173 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2174 /* 2175 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2176 rather than the slower MatSetValues(). 2177 */ 2178 M->was_assembled = PETSC_TRUE; 2179 M->assembled = PETSC_FALSE; 2180 } 2181 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2182 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2183 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2184 ii = aij->i; 2185 jj = aij->j; 2186 aa = aij->a; 2187 for (i=0; i<m; i++) { 2188 row = rstart + i; 2189 nz = ii[i+1] - ii[i]; 2190 cwork = jj; jj += nz; 2191 vwork = aa; aa += nz; 2192 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2193 } 2194 2195 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2196 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2197 *newmat = M; 2198 2199 /* save submatrix used in processor for next request */ 2200 if (call == MAT_INITIAL_MATRIX) { 2201 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2202 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2203 } 2204 2205 PetscFunctionReturn(0); 2206 } 2207